From 550faf04678a9f3cf6ffabc5019d2c2e591af5d7 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Mon, 22 Nov 2010 15:16:45 +0000 Subject: [PATCH] Allow access from XSSFReader to sheet comments and headers/footers (related to bug #50076) git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1037753 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/status.xml | 1 + .../org/apache/poi/POIXMLTextExtractor.java | 9 ++- .../poi/xssf/eventusermodel/XSSFReader.java | 37 ++++++++++- .../eventusermodel/XSSFSheetXMLHandler.java | 43 ++++++++++++- .../XSSFEventBasedExcelExtractor.java | 61 ++++++++++++++++++- .../xssf/eventusermodel/TestXSSFReader.java | 23 +++++++ 6 files changed, 167 insertions(+), 7 deletions(-) diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 1172555e45..80af43bf4c 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 50076 - Allow access from XSSFReader to sheet comments and headers/footers 50076 - Refactor XSSFEventBasedExcelExtractor to make it easier for you to have control over outputting the cell contents 50258 - avoid corruption of XSSFWorkbook after applying XSSFRichTextRun#applyFont 50154 - Allow white spaces and unicode in OPC relationship targets diff --git a/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java b/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java index b3c4182d59..eee1d25abd 100644 --- a/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java +++ b/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java @@ -20,6 +20,7 @@ package org.apache.poi; import org.apache.poi.POIXMLProperties.CoreProperties; import org.apache.poi.POIXMLProperties.CustomProperties; import org.apache.poi.POIXMLProperties.ExtendedProperties; +import org.apache.poi.openxml4j.opc.OPCPackage; public abstract class POIXMLTextExtractor extends POITextExtractor { /** The POIXMLDocument that's open */ @@ -56,10 +57,16 @@ public abstract class POIXMLTextExtractor extends POITextExtractor { /** * Returns opened document */ - public final POIXMLDocument getDocument(){ + public final POIXMLDocument getDocument() { return _document; } + /** + * Returns the opened OPCPackage that contains the document + */ + public OPCPackage getPackage() { + return _document.getPackage(); + } /** * Returns an OOXML properties text extractor for the diff --git a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java index 0994ad8500..e59636e646 100644 --- a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java +++ b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java @@ -30,8 +30,10 @@ import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.openxml4j.opc.PackagePart; import org.apache.poi.openxml4j.opc.PackagePartName; import org.apache.poi.openxml4j.opc.PackageRelationship; +import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; import org.apache.poi.openxml4j.opc.PackagingURIHelper; +import org.apache.poi.xssf.model.CommentsTable; import org.apache.poi.xssf.model.SharedStringsTable; import org.apache.poi.xssf.model.StylesTable; import org.apache.poi.xssf.usermodel.XSSFRelation; @@ -155,7 +157,7 @@ public class XSSFReader { * Current CTSheet bean */ private CTSheet ctSheet; - + /** * Iterator over CTSheet objects, returns sheets in logical order. * We can't rely on the Ooxml4J's relationship iterator because it returns objects in physical order, @@ -228,7 +230,40 @@ public class XSSFReader { public String getSheetName() { return ctSheet.getName(); } + + /** + * Returns the comments associated with this sheet, + * or null if there aren't any + */ + public CommentsTable getSheetComments() { + PackagePart sheetPkg = getSheetPart(); + + // Do we have a comments relationship? (Only ever one if so) + try { + PackageRelationshipCollection commentsList = + sheetPkg.getRelationshipsByType(XSSFRelation.SHEET_COMMENTS.getRelation()); + if(commentsList.size() > 0) { + PackageRelationship comments = commentsList.getRelationship(0); + PackagePartName commentsName = PackagingURIHelper.createPartName(comments.getTargetURI()); + PackagePart commentsPart = sheetPkg.getPackage().getPart(commentsName); + return new CommentsTable(commentsPart, comments); + } + } catch (InvalidFormatException e) { + return null; + } catch (IOException e) { + return null; + } + return null; + } + + public PackagePart getSheetPart() { + String sheetId = ctSheet.getId(); + return sheetMap.get(sheetId); + } + /** + * We're read only, so remove isn't supported + */ public void remove() { throw new IllegalStateException("Not supported"); } diff --git a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java index 873d84eea9..df16a97abd 100644 --- a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java +++ b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java @@ -61,6 +61,8 @@ public class XSSFSheetXMLHandler extends DefaultHandler { private boolean vIsOpen; // Set when F start element is seen private boolean fIsOpen; + // Set when a header/footer element is seen + private boolean hfIsOpen; // Set when cell start element is seen; // used when cell close element is seen. @@ -76,26 +78,39 @@ public class XSSFSheetXMLHandler extends DefaultHandler { // Gathers characters as they are seen. private StringBuffer value = new StringBuffer(); private StringBuffer formula = new StringBuffer(); + private StringBuffer headerFooter = new StringBuffer(); /** * Accepts objects needed while parsing. * * @param styles Table of styles * @param strings Table of shared strings - * @param cols Minimum number of columns to show - * @param target Sink for output */ public XSSFSheetXMLHandler( StylesTable styles, ReadOnlySharedStringsTable strings, SheetContentsHandler sheetContentsHandler, + DataFormatter dataFormatter, boolean formulasNotResults) { this.stylesTable = styles; this.sharedStringsTable = strings; this.output = sheetContentsHandler; this.formulasNotResults = formulasNotResults; this.nextDataType = xssfDataType.NUMBER; - this.formatter = new DataFormatter(); + this.formatter = dataFormatter; + } + /** + * Accepts objects needed while parsing. + * + * @param styles Table of styles + * @param strings Table of shared strings + */ + public XSSFSheetXMLHandler( + StylesTable styles, + ReadOnlySharedStringsTable strings, + SheetContentsHandler sheetContentsHandler, + boolean formulasNotResults) { + this(styles, strings, sheetContentsHandler, new DataFormatter(), formulasNotResults); } public void startElement(String uri, String localName, String name, @@ -122,6 +137,13 @@ public class XSSFSheetXMLHandler extends DefaultHandler { fIsOpen = true; } } + else if("oddHeader".equals(name) || "evenHeader".equals(name) || + "firstHeader".equals(name) || "firstFooter".equals(name) || + "oddFooter".equals(name) || "evenFooter".equals(name)) { + hfIsOpen = true; + // Clear contents cache + headerFooter.setLength(0); + } else if("row".equals(name)) { int rowNum = Integer.parseInt(attributes.getValue("r")) - 1; output.startRow(rowNum); @@ -222,6 +244,16 @@ public class XSSFSheetXMLHandler extends DefaultHandler { } else if ("row".equals(name)) { output.endRow(); } + else if("oddHeader".equals(name) || "evenHeader".equals(name) || + "firstHeader".equals(name)) { + hfIsOpen = false; + output.headerFooter(headerFooter.toString(), true, name); + } + else if("oddFooter".equals(name) || "evenFooter".equals(name) || + "firstFooter".equals(name)) { + hfIsOpen = false; + output.headerFooter(headerFooter.toString(), false, name); + } } /** @@ -236,6 +268,9 @@ public class XSSFSheetXMLHandler extends DefaultHandler { if (fIsOpen) { formula.append(ch, start, length); } + if (hfIsOpen) { + headerFooter.append(ch, start, length); + } } /** @@ -249,5 +284,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler { public void endRow(); /** A cell, with the given formatted value, was encountered */ public void cell(String cellReference, String formattedValue); + /** A header or footer has been encountered */ + public void headerFooter(String text, boolean isHeader, String tagName); } } diff --git a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java index c765ef7925..efc42cff5f 100644 --- a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java +++ b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java @@ -18,14 +18,20 @@ package org.apache.poi.xssf.extractor; import java.io.IOException; import java.io.InputStream; +import java.util.Locale; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; +import org.apache.poi.POIXMLProperties; import org.apache.poi.POIXMLTextExtractor; +import org.apache.poi.POIXMLProperties.CoreProperties; +import org.apache.poi.POIXMLProperties.CustomProperties; +import org.apache.poi.POIXMLProperties.ExtendedProperties; import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.ss.usermodel.DataFormatter; import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler; @@ -43,6 +49,9 @@ import org.xml.sax.XMLReader; */ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor { private OPCPackage container; + private POIXMLProperties properties; + + private Locale locale; private boolean includeSheetNames = true; private boolean formulasNotResults = false; @@ -52,6 +61,8 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor { public XSSFEventBasedExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException { super(null); this.container = container; + + properties = new POIXMLProperties(container); } public static void main(String[] args) throws Exception { @@ -79,22 +90,64 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor { this.formulasNotResults = formulasNotResults; } + public void setLocale(Locale locale) { + this.locale = locale; + } + + /** + * Returns the opened OPCPackage container. + */ + @Override + public OPCPackage getPackage() { + return container; + } + + /** + * Returns the core document properties + */ + @Override + public CoreProperties getCoreProperties() { + return properties.getCoreProperties(); + } + /** + * Returns the extended document properties + */ + @Override + public ExtendedProperties getExtendedProperties() { + return properties.getExtendedProperties(); + } + /** + * Returns the custom document properties + */ + @Override + public CustomProperties getCustomProperties() { + return properties.getCustomProperties(); + } + /** * Processes the given sheet */ public void processSheet( - SheetTextExtractor sheetExtractor, + SheetContentsHandler sheetContentsExtractor, StylesTable styles, ReadOnlySharedStringsTable strings, InputStream sheetInputStream) throws IOException, SAXException { + DataFormatter formatter; + if(locale == null) { + formatter = new DataFormatter(); + } else { + formatter = new DataFormatter(locale); + } + InputSource sheetSource = new InputSource(sheetInputStream); SAXParserFactory saxFactory = SAXParserFactory.newInstance(); try { SAXParser saxParser = saxFactory.newSAXParser(); XMLReader sheetParser = saxParser.getXMLReader(); - ContentHandler handler = new XSSFSheetXMLHandler(styles, strings, sheetExtractor, formulasNotResults); + ContentHandler handler = new XSSFSheetXMLHandler( + styles, strings, sheetContentsExtractor, formatter, formulasNotResults); sheetParser.setContentHandler(handler); sheetParser.parse(sheetSource); } catch(ParserConfigurationException e) { @@ -162,5 +215,9 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor { } output.append(formattedValue); } + + public void headerFooter(String text, boolean isHeader, String tagName) { + // We don't include headers in the output yet, so ignore + } } } diff --git a/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFReader.java b/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFReader.java index 11f03dd62d..ab1113759a 100644 --- a/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFReader.java +++ b/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFReader.java @@ -25,6 +25,7 @@ import junit.framework.TestCase; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.util.IOUtils; import org.apache.poi.xssf.XSSFTestDataSamples; +import org.apache.poi.xssf.model.CommentsTable; import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.apache.poi.POIDataSamples; @@ -117,6 +118,28 @@ public final class TestXSSFReader extends TestCase { assertEquals(4, count); } + public void testComments() throws Exception { + OPCPackage pkg = XSSFTestDataSamples.openSamplePackage("comments.xlsx"); + XSSFReader r = new XSSFReader(pkg); + XSSFReader.SheetIterator it = (XSSFReader.SheetIterator)r.getSheetsData(); + + int count = 0; + while(it.hasNext()) { + count++; + InputStream inp = it.next(); + inp.close(); + + if(count == 1) { + assertNotNull(it.getSheetComments()); + CommentsTable ct = it.getSheetComments(); + assertEquals(1, ct.getNumberOfAuthors()); + assertEquals(3, ct.getNumberOfComments()); + } else { + assertNull(it.getSheetComments()); + } + } + assertEquals(3, count); + } /** * Iterating over a workbook with chart sheets in it, using the