Start updating the excel extractor to the new style code

git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@635026 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2008-03-08 17:39:56 +00:00
parent 2cc22cb45b
commit 2a7d3ad154
5 changed files with 84 additions and 81 deletions

View File

@ -21,7 +21,7 @@
<document>
<header>
<title>Busy Developers' Guide to HSSF Features</title>
<title>Busy Developers' Guide to HSSF and XSSF Features</title>
<authors>
<person email="user@poi.apache.org" name="Glen Stampoultzis" id="CO"/>
<person email="user@poi.apache.org" name="Yegor Kozlov" id="YK"/>
@ -30,8 +30,9 @@
<body>
<section><title>Busy Developers' Guide to Features</title>
<p>
Want to use HSSF read and write spreadsheets in a hurry? This guide is for you. If you're after
more in-depth coverage of the HSSF user-API please consult the <link href="how-to.html">HOWTO</link>
Want to use HSSF and XSSF read and write spreadsheets in a hurry? This
guide is for you. If you're after more in-depth coverage of the HSSF and
XSSF user-APIs, please consult the <link href="how-to.html">HOWTO</link>
guide as it contains actual descriptions of how to use this stuff.
</p>
<section><title>Index of Features</title>

View File

@ -46,18 +46,33 @@ public abstract class POIXMLDocument {
protected POIXMLDocument() {}
protected POIXMLDocument(String path) throws IOException {
try {
this.pkg = Package.open(path);
PackageRelationship coreDocRelationship = this.pkg.getRelationshipsByType(
PackageRelationshipTypes.CORE_DOCUMENT).getRelationship(0);
// Get core part
this.corePart = this.pkg.getPart(coreDocRelationship);
} catch (InvalidFormatException e) {
throw new IOException(e.toString());
protected POIXMLDocument(Package pkg) throws IOException {
try {
this.pkg = pkg;
PackageRelationship coreDocRelationship = this.pkg.getRelationshipsByType(
PackageRelationshipTypes.CORE_DOCUMENT).getRelationship(0);
// Get core part
this.corePart = this.pkg.getPart(coreDocRelationship);
} catch (OpenXML4JException e) {
throw new IOException(e.toString());
}
}
protected POIXMLDocument(String path) throws IOException {
this(openPackage(path));
}
/**
* Wrapper to open a package, returning an IOException
* in the event of a problem.
* Works around shortcomings in java's this() constructor calls
*/
protected static Package openPackage(String path) throws IOException {
try {
return Package.open(path);
} catch (InvalidFormatException e) {
throw new IOException(e.toString());
}
}

View File

@ -20,6 +20,11 @@ import java.io.File;
import java.io.IOException;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.xmlbeans.XmlException;
import org.openxml4j.exceptions.OpenXML4JException;
@ -33,10 +38,13 @@ import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet;
* Helper class to extract text from an OOXML Excel file
*/
public class XSSFExcelExtractor extends POIXMLTextExtractor {
private XSSFWorkbook workbook;
private Workbook workbook;
private boolean includeSheetNames = true;
private boolean formulasNotResults = false;
public XSSFExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
this(new XSSFWorkbook(path));
}
public XSSFExcelExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
this(new XSSFWorkbook(container));
}
@ -52,9 +60,7 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
System.exit(1);
}
POIXMLTextExtractor extractor =
new HXFExcelExtractor(HXFDocument.openPackage(
new File(args[0])
));
new XSSFExcelExtractor(args[0]);
System.out.println(extractor.getText());
}
@ -78,48 +84,27 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
public String getText() {
StringBuffer text = new StringBuffer();
CTSheet[] sheetRefs =
workbook._getHSSFXML().getSheetReferences().getSheetArray();
for(int i=0; i<sheetRefs.length; i++) {
try {
CTWorksheet sheet =
workbook._getHSSFXML().getSheet(sheetRefs[i]);
CTRow[] rows =
sheet.getSheetData().getRowArray();
if(i > 0) {
text.append("\n");
}
if(includeSheetNames) {
text.append(sheetRefs[i].getName() + "\n");
}
for(int j=0; j<rows.length; j++) {
CTCell[] cells = rows[j].getCArray();
for(int k=0; k<cells.length; k++) {
CTCell cell = cells[k];
if(k > 0) {
text.append("\t");
}
boolean done = false;
// Is it a formula one?
if(cell.getF() != null) {
if(formulasNotResults) {
text.append(cell.getF().getStringValue());
done = true;
}
}
if(!done) {
HSSFXMLCell uCell = new HSSFXMLCell(cell, workbook);
text.append(uCell.getStringValue());
}
for(int i=0; i<workbook.getNumberOfSheets(); i++) {
Sheet sheet = workbook.getSheetAt(i);
if(includeSheetNames) {
text.append(workbook.getSheetName(i) + "\n");
}
for (Object rawR : sheet) {
Row row = (Row)rawR;
for (Object rawC: row) {
Cell cell = (Cell)rawC;
// Is it a formula one?
if(cell.getCellType() == Cell.CELL_TYPE_FORMULA && formulasNotResults) {
text.append(cell.getCellFormula());
} else {
text.append(cell.toString());
}
text.append("\n");
text.append(",");
}
} catch(Exception e) {
throw new RuntimeException(e);
text.append("\n");
}
}

View File

@ -89,7 +89,10 @@ public class XSSFWorkbook extends POIXMLDocument implements Workbook {
}
public XSSFWorkbook(String path) throws IOException {
super(path);
this(openPackage(path));
}
public XSSFWorkbook(Package pkg) throws IOException {
super(pkg);
try {
WorkbookDocument doc = WorkbookDocument.Factory.parse(getCorePart().getInputStream());
this.workbook = doc.getWorkbook();

View File

@ -14,7 +14,7 @@
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf.extractor;
package org.apache.poi.xssf.extractor;
import java.io.File;
import java.io.FileInputStream;
@ -24,28 +24,28 @@ import java.util.regex.Pattern;
import junit.framework.TestCase;
import org.apache.poi.POITextExtractor;
import org.apache.poi.hssf.HSSFXML;
import org.apache.poi.hssf.extractor.ExcelExtractor;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hssf.usermodel.HSSFXMLWorkbook;
import org.apache.poi.hxf.HXFDocument;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
/**
* Tests for HXFExcelExtractor
* Tests for XSSFExcelExtractor
*/
public class TestHXFExcelExtractor extends TestCase {
public class TestXSSFExcelExtractor extends TestCase {
/**
* A very simple file
*/
private HSSFXML xmlA;
private XSSFWorkbook xmlA;
private File fileA;
/**
* A fairly complex file
*/
private HSSFXML xmlB;
private XSSFWorkbook xmlB;
/**
* A fairly simple file - ooxml
*/
private HSSFXML simpleXLSX;
private XSSFWorkbook simpleXLSX;
/**
* A fairly simple file - ole2
*/
@ -54,7 +54,7 @@ public class TestHXFExcelExtractor extends TestCase {
protected void setUp() throws Exception {
super.setUp();
File fileA = new File(
fileA = new File(
System.getProperty("HSSF.testdata.path") +
File.separator + "sample.xlsx"
);
@ -72,10 +72,10 @@ public class TestHXFExcelExtractor extends TestCase {
File.separator + "SampleSS.xls"
);
xmlA = new HSSFXML(HXFDocument.openPackage(fileA));
xmlB = new HSSFXML(HXFDocument.openPackage(fileB));
xmlA = new XSSFWorkbook(fileA.toString());
xmlB = new XSSFWorkbook(fileB.toString());
simpleXLSX = new HSSFXML(HXFDocument.openPackage(fileSOOXML));
simpleXLSX = new XSSFWorkbook(fileSOOXML.toString());
simpleXLS = new HSSFWorkbook(new FileInputStream(fileSOLE2));
}
@ -83,11 +83,11 @@ public class TestHXFExcelExtractor extends TestCase {
* Get text out of the simple file
*/
public void testGetSimpleText() throws Exception {
new HXFExcelExtractor(xmlA.getPackage());
new HXFExcelExtractor(new HSSFXMLWorkbook(xmlA));
new XSSFExcelExtractor(fileA.toString());
new XSSFExcelExtractor(xmlA);
HXFExcelExtractor extractor =
new HXFExcelExtractor(xmlA.getPackage());
XSSFExcelExtractor extractor =
new XSSFExcelExtractor(xmlA);
extractor.getText();
String text = extractor.getText();
@ -150,11 +150,10 @@ public class TestHXFExcelExtractor extends TestCase {
}
public void testGetComplexText() throws Exception {
new HXFExcelExtractor(xmlB.getPackage());
new HXFExcelExtractor(new HSSFXMLWorkbook(xmlB));
new XSSFExcelExtractor(xmlB);
HXFExcelExtractor extractor =
new HXFExcelExtractor(xmlB.getPackage());
XSSFExcelExtractor extractor =
new XSSFExcelExtractor(xmlB);
extractor.getText();
String text = extractor.getText();
@ -174,8 +173,8 @@ public class TestHXFExcelExtractor extends TestCase {
* the same file, just saved as xls and xlsx
*/
public void testComparedToOLE2() throws Exception {
HXFExcelExtractor ooxmlExtractor =
new HXFExcelExtractor(simpleXLSX.getPackage());
XSSFExcelExtractor ooxmlExtractor =
new XSSFExcelExtractor(simpleXLSX);
ExcelExtractor ole2Extractor =
new ExcelExtractor(simpleXLS);