diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index c9b7f86fb6..1d03b99cec 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,6 +37,7 @@ + Update hssf.extractor.ExcelExtractor to optionally output blank cells too Include the sheet name in the output of examples.XLS2CSVmra 45784 - Support long chart titles in SeriesTextRecords 45777 - Throw an exception if HSSF Footer or Header is attemped to be set too long, rather than having it break during writing out diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index ae6a90ae09..976c02833a 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + Update hssf.extractor.ExcelExtractor to optionally output blank cells too Include the sheet name in the output of examples.XLS2CSVmra 45784 - Support long chart titles in SeriesTextRecords 45777 - Throw an exception if HSSF Footer or Header is attemped to be set too long, rather than having it break during writing out diff --git a/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java b/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java index d5dc30d00c..26fb1f8c3b 100644 --- a/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java +++ b/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java @@ -44,6 +44,7 @@ public class ExcelExtractor extends POIOLE2TextExtractor { private boolean includeSheetNames = true; private boolean formulasNotResults = false; private boolean includeCellComments = false; + private boolean includeBlankCells = false; public ExcelExtractor(HSSFWorkbook wb) { super(wb); @@ -73,13 +74,26 @@ public class ExcelExtractor extends POIOLE2TextExtractor { public void setIncludeCellComments(boolean includeCellComments) { this.includeCellComments = includeCellComments; } + /** + * Should blank cells be output? Default is to only + * output cells that are present in the file and are + * non-blank. + */ + public void setIncludeBlankCells(boolean includeBlankCells) { + this.includeBlankCells = includeBlankCells; + } /** * Retreives the text contents of the file */ public String getText() { StringBuffer text = new StringBuffer(); + + // We don't care about the differnce between + // null (missing) and blank cells + wb.setMissingCellPolicy(HSSFRow.RETURN_BLANK_AS_NULL); + // Process each sheet in turn for(int i=0;i 0) { - text.append(str.toString()); - } - break; - case HSSFCell.CELL_TYPE_NUMERIC: - text.append(cell.getNumericCellValue()); - break; - case HSSFCell.CELL_TYPE_BOOLEAN: - text.append(cell.getBooleanCellValue()); - break; - case HSSFCell.CELL_TYPE_ERROR: - text.append(ErrorEval.getText(cell.getErrorCellValue())); - break; - + + if(cell == null) { + // Only output if requested + outputContents = includeBlankCells; + } else { + switch(cell.getCellType()) { + case HSSFCell.CELL_TYPE_STRING: + text.append(cell.getRichStringCellValue().getString()); + break; + case HSSFCell.CELL_TYPE_NUMERIC: + // Note - we don't apply any formatting! + text.append(cell.getNumericCellValue()); + break; + case HSSFCell.CELL_TYPE_BOOLEAN: + text.append(cell.getBooleanCellValue()); + break; + case HSSFCell.CELL_TYPE_ERROR: + text.append(ErrorEval.getText(cell.getErrorCellValue())); + break; + case HSSFCell.CELL_TYPE_FORMULA: + if(formulasNotResults) { + text.append(cell.getCellFormula()); + } else { + switch(cell.getCachedFormulaResultType()) { + case HSSFCell.CELL_TYPE_STRING: + HSSFRichTextString str = cell.getRichStringCellValue(); + if(str != null && str.length() > 0) { + text.append(str.toString()); + } + break; + case HSSFCell.CELL_TYPE_NUMERIC: + text.append(cell.getNumericCellValue()); + break; + case HSSFCell.CELL_TYPE_BOOLEAN: + text.append(cell.getBooleanCellValue()); + break; + case HSSFCell.CELL_TYPE_ERROR: + text.append(ErrorEval.getText(cell.getErrorCellValue())); + break; + + } } - } - break; - default: - throw new RuntimeException("Unexpected cell type (" + cell.getCellType() + ")"); - } - - // Output the comment, if requested and exists - HSSFComment comment = cell.getCellComment(); - if(includeCellComments && comment != null) { - // Replace any newlines with spaces, otherwise it - // breaks the output - String commentText = comment.getString().getString().replace('\n', ' '); - text.append(" Comment by "+comment.getAuthor()+": "+commentText); + break; + default: + throw new RuntimeException("Unexpected cell type (" + cell.getCellType() + ")"); + } + + // Output the comment, if requested and exists + HSSFComment comment = cell.getCellComment(); + if(includeCellComments && comment != null) { + // Replace any newlines with spaces, otherwise it + // breaks the output + String commentText = comment.getString().getString().replace('\n', ' '); + text.append(" Comment by "+comment.getAuthor()+": "+commentText); + } } // Output a tab if we're not on the last cell diff --git a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java index cb028edfa5..d715650528 100644 --- a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java +++ b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java @@ -187,6 +187,27 @@ public final class TestExcelExtractor extends TestCase { ); } + public void testWithBlank() throws Exception { + ExcelExtractor extractor = createExtractor("MissingBits.xls"); + String def = extractor.getText(); + extractor.setIncludeBlankCells(true); + String padded = extractor.getText(); + + assertTrue(def.startsWith( + "Sheet1\n" + + "&[TAB]\t\n" + + "Hello\t\n" + + "11.0\t23.0\t\n" + )); + + assertTrue(padded.startsWith( + "Sheet1\n" + + "&[TAB]\t\n" + + "Hello\t\t\t\t\t\t\t\t\t\t\t\n" + + "11.0\t\t\t23.0\t\t\t\t\t\t\t\t\n" + )); + } + /** * Embded in a non-excel file