From 8952570e0bcf435c612b1faffa07f09b51ff562f Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Wed, 2 Jun 2010 16:51:05 +0000 Subject: [PATCH] Another fix inspired by bug #48494 - have ExcelExtractor make use of HSSFDataFormatter, so that numbers and dates come out closer to how Excel would render them git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@950657 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/status.xml | 1 + .../poi/hssf/extractor/ExcelExtractor.java | 22 ++++- .../hssf/extractor/TestExcelExtractor.java | 83 ++++++++++++++----- .../hssf/usermodel/TestHSSFDataFormatter.java | 57 +++++++++++++ 4 files changed, 141 insertions(+), 22 deletions(-) diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 016fb1eceb..b5c0164728 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 48494 - have ExcelExtractor make use of HSSFDataFormatter, so that numbers and dates come out closer to how Excel would render them 48494 - have EventBasedExcelExtractor make use of HSSFDataFormatter, so that numbers and dates come out closer to how Excel would render them 49096 - add clone support to Chart begin and end records, to allow cloning of more Chart containing sheets List attachment names in the output of OutlookTextExtractor (to get attachment contents, use ExtractorFactory as normal) diff --git a/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java b/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java index dc70fef2fb..c2e234ce34 100644 --- a/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java +++ b/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java @@ -26,7 +26,9 @@ import java.io.PrintStream; import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.hssf.record.formula.eval.ErrorEval; import org.apache.poi.hssf.usermodel.HSSFCell; +import org.apache.poi.hssf.usermodel.HSSFCellStyle; import org.apache.poi.hssf.usermodel.HSSFComment; +import org.apache.poi.hssf.usermodel.HSSFDataFormatter; import org.apache.poi.hssf.usermodel.HSSFRichTextString; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFSheet; @@ -51,6 +53,7 @@ import org.apache.poi.ss.usermodel.HeaderFooter; */ public class ExcelExtractor extends POIOLE2TextExtractor implements org.apache.poi.ss.extractor.ExcelExtractor { private HSSFWorkbook _wb; + private HSSFDataFormatter _formatter; private boolean _includeSheetNames = true; private boolean _shouldEvaluateFormulas = true; private boolean _includeCellComments = false; @@ -60,6 +63,7 @@ public class ExcelExtractor extends POIOLE2TextExtractor implements org.apache.p public ExcelExtractor(HSSFWorkbook wb) { super(wb); _wb = wb; + _formatter = new HSSFDataFormatter(); } public ExcelExtractor(POIFSFileSystem fs) throws IOException { this(fs.getRoot(), fs); @@ -323,8 +327,9 @@ public class ExcelExtractor extends POIOLE2TextExtractor implements org.apache.p text.append(cell.getRichStringCellValue().getString()); break; case HSSFCell.CELL_TYPE_NUMERIC: - // Note - we don't apply any formatting! - text.append(cell.getNumericCellValue()); + text.append( + _formatter.formatCellValue(cell) + ); break; case HSSFCell.CELL_TYPE_BOOLEAN: text.append(cell.getBooleanCellValue()); @@ -344,7 +349,18 @@ public class ExcelExtractor extends POIOLE2TextExtractor implements org.apache.p } break; case HSSFCell.CELL_TYPE_NUMERIC: - text.append(cell.getNumericCellValue()); + HSSFCellStyle style = cell.getCellStyle(); + if(style == null) { + text.append( cell.getNumericCellValue() ); + } else { + text.append( + _formatter.formatRawCellContents( + cell.getNumericCellValue(), + style.getDataFormat(), + style.getDataFormatString() + ) + ); + } break; case HSSFCell.CELL_TYPE_BOOLEAN: text.append(cell.getBooleanCellValue()); diff --git a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java index bc264d097e..ed2da48bd4 100644 --- a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java +++ b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java @@ -63,11 +63,11 @@ public final class TestExcelExtractor extends TestCase { assertEquals( "Sheet1\n" + - "1000.0\t1.0\t5.0\n" + - "2000.0\t2.0\n" + - "3000.0\t3.0\n" + - "4000.0\t4.0\n" + - "5000.0\t5.0\n" + + "1000\t1\t5\n" + + "2000\t2\n" + + "3000\t3\n" + + "4000\t4\n" + + "5000\t5\n" + "Sheet2\nSheet3\n", extractor.getText() ); @@ -76,11 +76,11 @@ public final class TestExcelExtractor extends TestCase { assertEquals( "Sheet1\n" + - "1000.0\t1.0\tSUMIF(A1:A5,\">4000\",B1:B5)\n" + - "2000.0\t2.0\n" + - "3000.0\t3.0\n" + - "4000.0\t4.0\n" + - "5000.0\t5.0\n" + + "1000\t1\tSUMIF(A1:A5,\">4000\",B1:B5)\n" + + "2000\t2\n" + + "3000\t3\n" + + "4000\t4\n" + + "5000\t5\n" + "Sheet2\nSheet3\n", extractor.getText() ); @@ -173,18 +173,18 @@ public final class TestExcelExtractor extends TestCase { // Check without comments assertEquals( - "1.0\tone\n" + - "2.0\ttwo\n" + - "3.0\tthree\n", + "1\tone\n" + + "2\ttwo\n" + + "3\tthree\n", extractor.getText() ); // Now with extractor.setIncludeCellComments(true); assertEquals( - "1.0\tone Comment by Yegor Kozlov: Yegor Kozlov: first cell\n" + - "2.0\ttwo Comment by Yegor Kozlov: Yegor Kozlov: second cell\n" + - "3.0\tthree Comment by Yegor Kozlov: Yegor Kozlov: third cell\n", + "1\tone Comment by Yegor Kozlov: Yegor Kozlov: first cell\n" + + "2\ttwo Comment by Yegor Kozlov: Yegor Kozlov: second cell\n" + + "3\tthree Comment by Yegor Kozlov: Yegor Kozlov: third cell\n", extractor.getText() ); } @@ -199,20 +199,65 @@ public final class TestExcelExtractor extends TestCase { "Sheet1\n" + "&[TAB]\t\n" + "Hello\n" + - "11.0\t23.0\n" + "11\t23\n" )); assertTrue(padded.startsWith( "Sheet1\n" + "&[TAB]\t\n" + "Hello\n" + - "11.0\t\t\t23.0\n" + "11\t\t\t23\n" )); } + public void testFormatting() throws Exception { + ExcelExtractor extractor = createExtractor("Formatting.xls"); + extractor.setIncludeBlankCells(false); + extractor.setIncludeSheetNames(false); + String text = extractor.getText(); + + // Note - not all the formats in the file + // actually quite match what they claim to + // be, as some are auto-local builtins... + + assertTrue(text.startsWith( + "Dates, all 24th November 2006\n" + )); + assertTrue( + text.indexOf( + "yyyy/mm/dd\t2006/11/24\n" + ) > -1 + ); + assertTrue( + text.indexOf( + "yyyy-mm-dd\t2006-11-24\n" + ) > -1 + ); + assertTrue( + text.indexOf( + "dd-mm-yy\t24-11-06\n" + ) > -1 + ); + + assertTrue( + text.indexOf( + "nn.nn\t10.52\n" + ) > -1 + ); + assertTrue( + text.indexOf( + "nn.nnn\t10.520\n" + ) > -1 + ); + assertTrue( + text.indexOf( + "£nn.nn\t£10.52\n" + ) > -1 + ); + } /** - * Embded in a non-excel file + * Embeded in a non-excel file */ public void testWithEmbeded() throws Exception { POIFSFileSystem fs = new POIFSFileSystem( diff --git a/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFDataFormatter.java b/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFDataFormatter.java index d0e5c4183f..c0e4150b4b 100644 --- a/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFDataFormatter.java +++ b/src/testcases/org/apache/poi/hssf/usermodel/TestHSSFDataFormatter.java @@ -309,6 +309,63 @@ public final class TestHSSFDataFormatter extends TestCase { assertEquals("2345", f.formatCellValue(cellA1)); } + + /** + * Tests various formattings of dates and numbers + */ + public void testFromFile() { + HSSFWorkbook workbook = HSSFTestDataSamples.openSampleWorkbook("Formatting.xls"); + HSSFSheet sheet = workbook.getSheetAt(0); + + HSSFDataFormatter f = new HSSFDataFormatter(); + + // This one is one of the nasty auto-locale changing ones... + assertEquals("dd/mm/yyyy", sheet.getRow(1).getCell(0).getStringCellValue()); + assertEquals("m/d/yy", sheet.getRow(1).getCell(1).getCellStyle().getDataFormatString()); + assertEquals("11/24/06", f.formatCellValue(sheet.getRow(1).getCell(1))); + + assertEquals("yyyy/mm/dd", sheet.getRow(2).getCell(0).getStringCellValue()); + assertEquals("yyyy/mm/dd", sheet.getRow(2).getCell(1).getCellStyle().getDataFormatString()); + assertEquals("2006/11/24", f.formatCellValue(sheet.getRow(2).getCell(1))); + + assertEquals("yyyy-mm-dd", sheet.getRow(3).getCell(0).getStringCellValue()); + assertEquals("yyyy\\-mm\\-dd", sheet.getRow(3).getCell(1).getCellStyle().getDataFormatString()); + assertEquals("2006-11-24", f.formatCellValue(sheet.getRow(3).getCell(1))); + + assertEquals("yy/mm/dd", sheet.getRow(4).getCell(0).getStringCellValue()); + assertEquals("yy/mm/dd", sheet.getRow(4).getCell(1).getCellStyle().getDataFormatString()); + assertEquals("06/11/24", f.formatCellValue(sheet.getRow(4).getCell(1))); + + // Another builtin fun one + assertEquals("dd/mm/yy", sheet.getRow(5).getCell(0).getStringCellValue()); + assertEquals("d/m/yy;@", sheet.getRow(5).getCell(1).getCellStyle().getDataFormatString()); + assertEquals("24/11/06", f.formatCellValue(sheet.getRow(5).getCell(1))); + + assertEquals("dd-mm-yy", sheet.getRow(6).getCell(0).getStringCellValue()); + assertEquals("dd\\-mm\\-yy", sheet.getRow(6).getCell(1).getCellStyle().getDataFormatString()); + assertEquals("24-11-06", f.formatCellValue(sheet.getRow(6).getCell(1))); + + + // Another builtin fun one + assertEquals("nn.nn", sheet.getRow(9).getCell(0).getStringCellValue()); + assertEquals("General", sheet.getRow(9).getCell(1).getCellStyle().getDataFormatString()); + assertEquals("10.52", f.formatCellValue(sheet.getRow(9).getCell(1))); + + // text isn't quite the format rule... + assertEquals("nn.nnn", sheet.getRow(10).getCell(0).getStringCellValue()); + assertEquals("0.000", sheet.getRow(10).getCell(1).getCellStyle().getDataFormatString()); + assertEquals("10.520", f.formatCellValue(sheet.getRow(10).getCell(1))); + + // text isn't quite the format rule... + assertEquals("nn.n", sheet.getRow(11).getCell(0).getStringCellValue()); + assertEquals("0.0", sheet.getRow(11).getCell(1).getCellStyle().getDataFormatString()); + assertEquals("10.5", f.formatCellValue(sheet.getRow(11).getCell(1))); + + // text isn't quite the format rule... + assertEquals("£nn.nn", sheet.getRow(12).getCell(0).getStringCellValue()); + assertEquals("\"£\"#,##0.00", sheet.getRow(12).getCell(1).getCellStyle().getDataFormatString()); + assertEquals("£10.52", f.formatCellValue(sheet.getRow(12).getCell(1))); + } private static void log(String msg) { if (false) { // successful tests should be silent