Allow access from XSSFReader to sheet comments and headers/footers (related to bug #50076)

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1037753 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2010-11-22 15:16:45 +00:00
parent 28e0a3f566
commit 550faf0467
6 changed files with 167 additions and 7 deletions

View File

@ -34,6 +34,7 @@
<changes> <changes>
<release version="3.8-beta1" date="2010-??-??"> <release version="3.8-beta1" date="2010-??-??">
<action dev="poi-developers" type="add">50076 - Allow access from XSSFReader to sheet comments and headers/footers</action>
<action dev="poi-developers" type="add">50076 - Refactor XSSFEventBasedExcelExtractor to make it easier for you to have control over outputting the cell contents</action> <action dev="poi-developers" type="add">50076 - Refactor XSSFEventBasedExcelExtractor to make it easier for you to have control over outputting the cell contents</action>
<action dev="poi-developers" type="fix">50258 - avoid corruption of XSSFWorkbook after applying XSSFRichTextRun#applyFont</action> <action dev="poi-developers" type="fix">50258 - avoid corruption of XSSFWorkbook after applying XSSFRichTextRun#applyFont</action>
<action dev="poi-developers" type="fix">50154 - Allow white spaces and unicode in OPC relationship targets </action> <action dev="poi-developers" type="fix">50154 - Allow white spaces and unicode in OPC relationship targets </action>

View File

@ -20,6 +20,7 @@ package org.apache.poi;
import org.apache.poi.POIXMLProperties.CoreProperties; import org.apache.poi.POIXMLProperties.CoreProperties;
import org.apache.poi.POIXMLProperties.CustomProperties; import org.apache.poi.POIXMLProperties.CustomProperties;
import org.apache.poi.POIXMLProperties.ExtendedProperties; import org.apache.poi.POIXMLProperties.ExtendedProperties;
import org.apache.poi.openxml4j.opc.OPCPackage;
public abstract class POIXMLTextExtractor extends POITextExtractor { public abstract class POIXMLTextExtractor extends POITextExtractor {
/** The POIXMLDocument that's open */ /** The POIXMLDocument that's open */
@ -56,10 +57,16 @@ public abstract class POIXMLTextExtractor extends POITextExtractor {
/** /**
* Returns opened document * Returns opened document
*/ */
public final POIXMLDocument getDocument(){ public final POIXMLDocument getDocument() {
return _document; return _document;
} }
/**
* Returns the opened OPCPackage that contains the document
*/
public OPCPackage getPackage() {
return _document.getPackage();
}
/** /**
* Returns an OOXML properties text extractor for the * Returns an OOXML properties text extractor for the

View File

@ -30,8 +30,10 @@ import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart; import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackagePartName; import org.apache.poi.openxml4j.opc.PackagePartName;
import org.apache.poi.openxml4j.opc.PackageRelationship; import org.apache.poi.openxml4j.opc.PackageRelationship;
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
import org.apache.poi.openxml4j.opc.PackagingURIHelper; import org.apache.poi.openxml4j.opc.PackagingURIHelper;
import org.apache.poi.xssf.model.CommentsTable;
import org.apache.poi.xssf.model.SharedStringsTable; import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable; import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFRelation; import org.apache.poi.xssf.usermodel.XSSFRelation;
@ -155,7 +157,7 @@ public class XSSFReader {
* Current CTSheet bean * Current CTSheet bean
*/ */
private CTSheet ctSheet; private CTSheet ctSheet;
/** /**
* Iterator over CTSheet objects, returns sheets in <tt>logical</tt> order. * Iterator over CTSheet objects, returns sheets in <tt>logical</tt> order.
* We can't rely on the Ooxml4J's relationship iterator because it returns objects in physical order, * We can't rely on the Ooxml4J's relationship iterator because it returns objects in physical order,
@ -228,7 +230,40 @@ public class XSSFReader {
public String getSheetName() { public String getSheetName() {
return ctSheet.getName(); return ctSheet.getName();
} }
/**
* Returns the comments associated with this sheet,
* or null if there aren't any
*/
public CommentsTable getSheetComments() {
PackagePart sheetPkg = getSheetPart();
// Do we have a comments relationship? (Only ever one if so)
try {
PackageRelationshipCollection commentsList =
sheetPkg.getRelationshipsByType(XSSFRelation.SHEET_COMMENTS.getRelation());
if(commentsList.size() > 0) {
PackageRelationship comments = commentsList.getRelationship(0);
PackagePartName commentsName = PackagingURIHelper.createPartName(comments.getTargetURI());
PackagePart commentsPart = sheetPkg.getPackage().getPart(commentsName);
return new CommentsTable(commentsPart, comments);
}
} catch (InvalidFormatException e) {
return null;
} catch (IOException e) {
return null;
}
return null;
}
public PackagePart getSheetPart() {
String sheetId = ctSheet.getId();
return sheetMap.get(sheetId);
}
/**
* We're read only, so remove isn't supported
*/
public void remove() { public void remove() {
throw new IllegalStateException("Not supported"); throw new IllegalStateException("Not supported");
} }

View File

@ -61,6 +61,8 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
private boolean vIsOpen; private boolean vIsOpen;
// Set when F start element is seen // Set when F start element is seen
private boolean fIsOpen; private boolean fIsOpen;
// Set when a header/footer element is seen
private boolean hfIsOpen;
// Set when cell start element is seen; // Set when cell start element is seen;
// used when cell close element is seen. // used when cell close element is seen.
@ -76,26 +78,39 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
// Gathers characters as they are seen. // Gathers characters as they are seen.
private StringBuffer value = new StringBuffer(); private StringBuffer value = new StringBuffer();
private StringBuffer formula = new StringBuffer(); private StringBuffer formula = new StringBuffer();
private StringBuffer headerFooter = new StringBuffer();
/** /**
* Accepts objects needed while parsing. * Accepts objects needed while parsing.
* *
* @param styles Table of styles * @param styles Table of styles
* @param strings Table of shared strings * @param strings Table of shared strings
* @param cols Minimum number of columns to show
* @param target Sink for output
*/ */
public XSSFSheetXMLHandler( public XSSFSheetXMLHandler(
StylesTable styles, StylesTable styles,
ReadOnlySharedStringsTable strings, ReadOnlySharedStringsTable strings,
SheetContentsHandler sheetContentsHandler, SheetContentsHandler sheetContentsHandler,
DataFormatter dataFormatter,
boolean formulasNotResults) { boolean formulasNotResults) {
this.stylesTable = styles; this.stylesTable = styles;
this.sharedStringsTable = strings; this.sharedStringsTable = strings;
this.output = sheetContentsHandler; this.output = sheetContentsHandler;
this.formulasNotResults = formulasNotResults; this.formulasNotResults = formulasNotResults;
this.nextDataType = xssfDataType.NUMBER; this.nextDataType = xssfDataType.NUMBER;
this.formatter = new DataFormatter(); this.formatter = dataFormatter;
}
/**
* Accepts objects needed while parsing.
*
* @param styles Table of styles
* @param strings Table of shared strings
*/
public XSSFSheetXMLHandler(
StylesTable styles,
ReadOnlySharedStringsTable strings,
SheetContentsHandler sheetContentsHandler,
boolean formulasNotResults) {
this(styles, strings, sheetContentsHandler, new DataFormatter(), formulasNotResults);
} }
public void startElement(String uri, String localName, String name, public void startElement(String uri, String localName, String name,
@ -122,6 +137,13 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
fIsOpen = true; fIsOpen = true;
} }
} }
else if("oddHeader".equals(name) || "evenHeader".equals(name) ||
"firstHeader".equals(name) || "firstFooter".equals(name) ||
"oddFooter".equals(name) || "evenFooter".equals(name)) {
hfIsOpen = true;
// Clear contents cache
headerFooter.setLength(0);
}
else if("row".equals(name)) { else if("row".equals(name)) {
int rowNum = Integer.parseInt(attributes.getValue("r")) - 1; int rowNum = Integer.parseInt(attributes.getValue("r")) - 1;
output.startRow(rowNum); output.startRow(rowNum);
@ -222,6 +244,16 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
} else if ("row".equals(name)) { } else if ("row".equals(name)) {
output.endRow(); output.endRow();
} }
else if("oddHeader".equals(name) || "evenHeader".equals(name) ||
"firstHeader".equals(name)) {
hfIsOpen = false;
output.headerFooter(headerFooter.toString(), true, name);
}
else if("oddFooter".equals(name) || "evenFooter".equals(name) ||
"firstFooter".equals(name)) {
hfIsOpen = false;
output.headerFooter(headerFooter.toString(), false, name);
}
} }
/** /**
@ -236,6 +268,9 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
if (fIsOpen) { if (fIsOpen) {
formula.append(ch, start, length); formula.append(ch, start, length);
} }
if (hfIsOpen) {
headerFooter.append(ch, start, length);
}
} }
/** /**
@ -249,5 +284,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
public void endRow(); public void endRow();
/** A cell, with the given formatted value, was encountered */ /** A cell, with the given formatted value, was encountered */
public void cell(String cellReference, String formattedValue); public void cell(String cellReference, String formattedValue);
/** A header or footer has been encountered */
public void headerFooter(String text, boolean isHeader, String tagName);
} }
} }

View File

@ -18,14 +18,20 @@ package org.apache.poi.xssf.extractor;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.Locale;
import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory; import javax.xml.parsers.SAXParserFactory;
import org.apache.poi.POIXMLProperties;
import org.apache.poi.POIXMLTextExtractor; import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.POIXMLProperties.CoreProperties;
import org.apache.poi.POIXMLProperties.CustomProperties;
import org.apache.poi.POIXMLProperties.ExtendedProperties;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable; import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler; import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
@ -43,6 +49,9 @@ import org.xml.sax.XMLReader;
*/ */
public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor { public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor {
private OPCPackage container; private OPCPackage container;
private POIXMLProperties properties;
private Locale locale;
private boolean includeSheetNames = true; private boolean includeSheetNames = true;
private boolean formulasNotResults = false; private boolean formulasNotResults = false;
@ -52,6 +61,8 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor {
public XSSFEventBasedExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException { public XSSFEventBasedExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException {
super(null); super(null);
this.container = container; this.container = container;
properties = new POIXMLProperties(container);
} }
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
@ -79,22 +90,64 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor {
this.formulasNotResults = formulasNotResults; this.formulasNotResults = formulasNotResults;
} }
public void setLocale(Locale locale) {
this.locale = locale;
}
/**
* Returns the opened OPCPackage container.
*/
@Override
public OPCPackage getPackage() {
return container;
}
/**
* Returns the core document properties
*/
@Override
public CoreProperties getCoreProperties() {
return properties.getCoreProperties();
}
/**
* Returns the extended document properties
*/
@Override
public ExtendedProperties getExtendedProperties() {
return properties.getExtendedProperties();
}
/**
* Returns the custom document properties
*/
@Override
public CustomProperties getCustomProperties() {
return properties.getCustomProperties();
}
/** /**
* Processes the given sheet * Processes the given sheet
*/ */
public void processSheet( public void processSheet(
SheetTextExtractor sheetExtractor, SheetContentsHandler sheetContentsExtractor,
StylesTable styles, StylesTable styles,
ReadOnlySharedStringsTable strings, ReadOnlySharedStringsTable strings,
InputStream sheetInputStream) InputStream sheetInputStream)
throws IOException, SAXException { throws IOException, SAXException {
DataFormatter formatter;
if(locale == null) {
formatter = new DataFormatter();
} else {
formatter = new DataFormatter(locale);
}
InputSource sheetSource = new InputSource(sheetInputStream); InputSource sheetSource = new InputSource(sheetInputStream);
SAXParserFactory saxFactory = SAXParserFactory.newInstance(); SAXParserFactory saxFactory = SAXParserFactory.newInstance();
try { try {
SAXParser saxParser = saxFactory.newSAXParser(); SAXParser saxParser = saxFactory.newSAXParser();
XMLReader sheetParser = saxParser.getXMLReader(); XMLReader sheetParser = saxParser.getXMLReader();
ContentHandler handler = new XSSFSheetXMLHandler(styles, strings, sheetExtractor, formulasNotResults); ContentHandler handler = new XSSFSheetXMLHandler(
styles, strings, sheetContentsExtractor, formatter, formulasNotResults);
sheetParser.setContentHandler(handler); sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource); sheetParser.parse(sheetSource);
} catch(ParserConfigurationException e) { } catch(ParserConfigurationException e) {
@ -162,5 +215,9 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor {
} }
output.append(formattedValue); output.append(formattedValue);
} }
public void headerFooter(String text, boolean isHeader, String tagName) {
// We don't include headers in the output yet, so ignore
}
} }
} }

View File

@ -25,6 +25,7 @@ import junit.framework.TestCase;
import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.util.IOUtils; import org.apache.poi.util.IOUtils;
import org.apache.poi.xssf.XSSFTestDataSamples; import org.apache.poi.xssf.XSSFTestDataSamples;
import org.apache.poi.xssf.model.CommentsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.apache.poi.POIDataSamples; import org.apache.poi.POIDataSamples;
@ -117,6 +118,28 @@ public final class TestXSSFReader extends TestCase {
assertEquals(4, count); assertEquals(4, count);
} }
public void testComments() throws Exception {
OPCPackage pkg = XSSFTestDataSamples.openSamplePackage("comments.xlsx");
XSSFReader r = new XSSFReader(pkg);
XSSFReader.SheetIterator it = (XSSFReader.SheetIterator)r.getSheetsData();
int count = 0;
while(it.hasNext()) {
count++;
InputStream inp = it.next();
inp.close();
if(count == 1) {
assertNotNull(it.getSheetComments());
CommentsTable ct = it.getSheetComments();
assertEquals(1, ct.getNumberOfAuthors());
assertEquals(3, ct.getNumberOfComments());
} else {
assertNull(it.getSheetComments());
}
}
assertEquals(3, count);
}
/** /**
* Iterating over a workbook with chart sheets in it, using the * Iterating over a workbook with chart sheets in it, using the