mirror of https://github.com/apache/poi.git
More on converting the excel extractor to the new code
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@635030 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2a7d3ad154
commit
d67507164c
|
@ -18,12 +18,14 @@ package org.apache.poi.xssf.extractor;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.poi.POIXMLTextExtractor;
|
||||
import org.apache.poi.ss.usermodel.Cell;
|
||||
import org.apache.poi.ss.usermodel.Row;
|
||||
import org.apache.poi.ss.usermodel.Sheet;
|
||||
import org.apache.poi.ss.usermodel.Workbook;
|
||||
import org.apache.poi.xssf.usermodel.XSSFCell;
|
||||
import org.apache.poi.xssf.usermodel.XSSFSheet;
|
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||
import org.apache.xmlbeans.XmlException;
|
||||
|
@ -92,17 +94,21 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
|
|||
|
||||
for (Object rawR : sheet) {
|
||||
Row row = (Row)rawR;
|
||||
for (Object rawC: row) {
|
||||
Cell cell = (Cell)rawC;
|
||||
for(Iterator ri = row.cellIterator(); ri.hasNext();) {
|
||||
Cell cell = (Cell)ri.next();
|
||||
|
||||
// Is it a formula one?
|
||||
if(cell.getCellType() == Cell.CELL_TYPE_FORMULA && formulasNotResults) {
|
||||
text.append(cell.getCellFormula());
|
||||
} else if(cell.getCellType() == Cell.CELL_TYPE_STRING) {
|
||||
text.append(cell.getRichStringCellValue().getString());
|
||||
} else {
|
||||
text.append(cell.toString());
|
||||
XSSFCell xc = (XSSFCell)cell;
|
||||
text.append(xc.getRawValue());
|
||||
}
|
||||
|
||||
text.append(",");
|
||||
if(ri.hasNext())
|
||||
text.append("\t");
|
||||
}
|
||||
text.append("\n");
|
||||
}
|
||||
|
|
|
@ -78,7 +78,7 @@ public class XSSFCell implements Cell {
|
|||
}
|
||||
|
||||
public String getCellFormula() {
|
||||
if (STCellType.STR != cell.getT()) {
|
||||
if(this.cell.getF() == null) {
|
||||
throw new NumberFormatException("You cannot get a formula from a non-formula cell");
|
||||
}
|
||||
return this.cell.getF().getStringValue();
|
||||
|
@ -94,6 +94,12 @@ public class XSSFCell implements Cell {
|
|||
}
|
||||
|
||||
public int getCellType() {
|
||||
// Detecting formulas is quite pesky,
|
||||
// as they don't get their type set
|
||||
if(this.cell.getF() != null) {
|
||||
return CELL_TYPE_FORMULA;
|
||||
}
|
||||
|
||||
switch (this.cell.getT().intValue()) {
|
||||
case STCellType.INT_B:
|
||||
return CELL_TYPE_BOOLEAN;
|
||||
|
@ -289,6 +295,13 @@ public class XSSFCell implements Cell {
|
|||
public String toString() {
|
||||
return "[" + this.row.getRowNum() + "," + this.getCellNum() + "] " + this.cell.getV();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the raw, underlying ooxml value for the cell
|
||||
*/
|
||||
public String getRawValue() {
|
||||
return this.cell.getV();
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws RuntimeException if the bounds are exceeded.
|
||||
|
|
|
@ -79,6 +79,9 @@ public class XSSFRichTextString implements RichTextString {
|
|||
public String getString() {
|
||||
return string;
|
||||
}
|
||||
public String toString() {
|
||||
return string;
|
||||
}
|
||||
|
||||
public int length() {
|
||||
return string.length();
|
||||
|
|
|
@ -35,59 +35,56 @@ public class TestXSSFExcelExtractor extends TestCase {
|
|||
/**
|
||||
* A very simple file
|
||||
*/
|
||||
private XSSFWorkbook xmlA;
|
||||
private File fileA;
|
||||
private File xmlA;
|
||||
/**
|
||||
* A fairly complex file
|
||||
*/
|
||||
private XSSFWorkbook xmlB;
|
||||
private File xmlB;
|
||||
|
||||
/**
|
||||
* A fairly simple file - ooxml
|
||||
*/
|
||||
private XSSFWorkbook simpleXLSX;
|
||||
private File simpleXLSX;
|
||||
/**
|
||||
* A fairly simple file - ole2
|
||||
*/
|
||||
private HSSFWorkbook simpleXLS;
|
||||
private File simpleXLS;
|
||||
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
|
||||
fileA = new File(
|
||||
xmlA = new File(
|
||||
System.getProperty("HSSF.testdata.path") +
|
||||
File.separator + "sample.xlsx"
|
||||
);
|
||||
File fileB = new File(
|
||||
assertTrue(xmlA.exists());
|
||||
xmlB = new File(
|
||||
System.getProperty("HSSF.testdata.path") +
|
||||
File.separator + "AverageTaxRates.xlsx"
|
||||
);
|
||||
assertTrue(xmlB.exists());
|
||||
|
||||
File fileSOOXML = new File(
|
||||
simpleXLSX = new File(
|
||||
System.getProperty("HSSF.testdata.path") +
|
||||
File.separator + "SampleSS.xlsx"
|
||||
);
|
||||
File fileSOLE2 = new File(
|
||||
simpleXLS = new File(
|
||||
System.getProperty("HSSF.testdata.path") +
|
||||
File.separator + "SampleSS.xls"
|
||||
);
|
||||
|
||||
xmlA = new XSSFWorkbook(fileA.toString());
|
||||
xmlB = new XSSFWorkbook(fileB.toString());
|
||||
|
||||
simpleXLSX = new XSSFWorkbook(fileSOOXML.toString());
|
||||
simpleXLS = new HSSFWorkbook(new FileInputStream(fileSOLE2));
|
||||
assertTrue(simpleXLS.exists());
|
||||
assertTrue(simpleXLSX.exists());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get text out of the simple file
|
||||
*/
|
||||
public void testGetSimpleText() throws Exception {
|
||||
new XSSFExcelExtractor(fileA.toString());
|
||||
new XSSFExcelExtractor(xmlA);
|
||||
new XSSFExcelExtractor(xmlA.toString());
|
||||
new XSSFExcelExtractor(new XSSFWorkbook(xmlA.toString()));
|
||||
|
||||
XSSFExcelExtractor extractor =
|
||||
new XSSFExcelExtractor(xmlA);
|
||||
new XSSFExcelExtractor(xmlA.toString());
|
||||
extractor.getText();
|
||||
|
||||
String text = extractor.getText();
|
||||
|
@ -110,8 +107,7 @@ public class TestXSSFExcelExtractor extends TestCase {
|
|||
"adipiscing\t777\n" +
|
||||
"elit\t888\n" +
|
||||
"Nunc\t999\n" +
|
||||
"at\t4995\n" +
|
||||
"\n\n", text);
|
||||
"at\t4995\n", text);
|
||||
|
||||
// Now get formulas not their values
|
||||
extractor.setFormulasNotResults(true);
|
||||
|
@ -126,8 +122,7 @@ public class TestXSSFExcelExtractor extends TestCase {
|
|||
"adipiscing\t777\n" +
|
||||
"elit\t888\n" +
|
||||
"Nunc\t999\n" +
|
||||
"at\tSUM(B1:B9)\n" +
|
||||
"\n\n", text);
|
||||
"at\tSUM(B1:B9)\n", text);
|
||||
|
||||
// With sheet names too
|
||||
extractor.setIncludeSheetNames(true);
|
||||
|
@ -143,17 +138,17 @@ public class TestXSSFExcelExtractor extends TestCase {
|
|||
"adipiscing\t777\n" +
|
||||
"elit\t888\n" +
|
||||
"Nunc\t999\n" +
|
||||
"at\tSUM(B1:B9)\n\n" +
|
||||
"Sheet2\n\n" +
|
||||
"at\tSUM(B1:B9)\n" +
|
||||
"Sheet2\n" +
|
||||
"Sheet3\n"
|
||||
, text);
|
||||
}
|
||||
|
||||
public void testGetComplexText() throws Exception {
|
||||
new XSSFExcelExtractor(xmlB);
|
||||
new XSSFExcelExtractor(xmlB.toString());
|
||||
|
||||
XSSFExcelExtractor extractor =
|
||||
new XSSFExcelExtractor(xmlB);
|
||||
new XSSFExcelExtractor(new XSSFWorkbook(xmlB.toString()));
|
||||
extractor.getText();
|
||||
|
||||
String text = extractor.getText();
|
||||
|
@ -174,9 +169,10 @@ public class TestXSSFExcelExtractor extends TestCase {
|
|||
*/
|
||||
public void testComparedToOLE2() throws Exception {
|
||||
XSSFExcelExtractor ooxmlExtractor =
|
||||
new XSSFExcelExtractor(simpleXLSX);
|
||||
new XSSFExcelExtractor(simpleXLSX.toString());
|
||||
ExcelExtractor ole2Extractor =
|
||||
new ExcelExtractor(simpleXLS);
|
||||
new ExcelExtractor(new HSSFWorkbook(
|
||||
new FileInputStream(simpleXLS)));
|
||||
|
||||
POITextExtractor[] extractors =
|
||||
new POITextExtractor[] { ooxmlExtractor, ole2Extractor };
|
||||
|
|
Loading…
Reference in New Issue