Allow access from XSSFReader to sheet comments and headers/footers (related to bug #50076)

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1037753 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2010-11-22 15:16:45 +00:00
parent 28e0a3f566
commit 550faf0467
6 changed files with 167 additions and 7 deletions

View File

@ -34,6 +34,7 @@
<changes>
<release version="3.8-beta1" date="2010-??-??">
<action dev="poi-developers" type="add">50076 - Allow access from XSSFReader to sheet comments and headers/footers</action>
<action dev="poi-developers" type="add">50076 - Refactor XSSFEventBasedExcelExtractor to make it easier for you to have control over outputting the cell contents</action>
<action dev="poi-developers" type="fix">50258 - avoid corruption of XSSFWorkbook after applying XSSFRichTextRun#applyFont</action>
<action dev="poi-developers" type="fix">50154 - Allow white spaces and unicode in OPC relationship targets </action>

View File

@ -20,6 +20,7 @@ package org.apache.poi;
import org.apache.poi.POIXMLProperties.CoreProperties;
import org.apache.poi.POIXMLProperties.CustomProperties;
import org.apache.poi.POIXMLProperties.ExtendedProperties;
import org.apache.poi.openxml4j.opc.OPCPackage;
public abstract class POIXMLTextExtractor extends POITextExtractor {
/** The POIXMLDocument that's open */
@ -56,10 +57,16 @@ public abstract class POIXMLTextExtractor extends POITextExtractor {
/**
* Returns opened document
*/
public final POIXMLDocument getDocument(){
public final POIXMLDocument getDocument() {
return _document;
}
/**
* Returns the opened OPCPackage that contains the document
*/
public OPCPackage getPackage() {
return _document.getPackage();
}
/**
* Returns an OOXML properties text extractor for the

View File

@ -30,8 +30,10 @@ import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackagePartName;
import org.apache.poi.openxml4j.opc.PackageRelationship;
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
import org.apache.poi.xssf.model.CommentsTable;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFRelation;
@ -155,7 +157,7 @@ public class XSSFReader {
* Current CTSheet bean
*/
private CTSheet ctSheet;
/**
* Iterator over CTSheet objects, returns sheets in <tt>logical</tt> order.
* We can't rely on the Ooxml4J's relationship iterator because it returns objects in physical order,
@ -228,7 +230,40 @@ public class XSSFReader {
public String getSheetName() {
return ctSheet.getName();
}
/**
* Returns the comments associated with this sheet,
* or null if there aren't any
*/
public CommentsTable getSheetComments() {
PackagePart sheetPkg = getSheetPart();
// Do we have a comments relationship? (Only ever one if so)
try {
PackageRelationshipCollection commentsList =
sheetPkg.getRelationshipsByType(XSSFRelation.SHEET_COMMENTS.getRelation());
if(commentsList.size() > 0) {
PackageRelationship comments = commentsList.getRelationship(0);
PackagePartName commentsName = PackagingURIHelper.createPartName(comments.getTargetURI());
PackagePart commentsPart = sheetPkg.getPackage().getPart(commentsName);
return new CommentsTable(commentsPart, comments);
}
} catch (InvalidFormatException e) {
return null;
} catch (IOException e) {
return null;
}
return null;
}
public PackagePart getSheetPart() {
String sheetId = ctSheet.getId();
return sheetMap.get(sheetId);
}
/**
* We're read only, so remove isn't supported
*/
public void remove() {
throw new IllegalStateException("Not supported");
}

View File

@ -61,6 +61,8 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
private boolean vIsOpen;
// Set when F start element is seen
private boolean fIsOpen;
// Set when a header/footer element is seen
private boolean hfIsOpen;
// Set when cell start element is seen;
// used when cell close element is seen.
@ -76,26 +78,39 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
// Gathers characters as they are seen.
private StringBuffer value = new StringBuffer();
private StringBuffer formula = new StringBuffer();
private StringBuffer headerFooter = new StringBuffer();
/**
* Accepts objects needed while parsing.
*
* @param styles Table of styles
* @param strings Table of shared strings
* @param cols Minimum number of columns to show
* @param target Sink for output
*/
public XSSFSheetXMLHandler(
StylesTable styles,
ReadOnlySharedStringsTable strings,
SheetContentsHandler sheetContentsHandler,
DataFormatter dataFormatter,
boolean formulasNotResults) {
this.stylesTable = styles;
this.sharedStringsTable = strings;
this.output = sheetContentsHandler;
this.formulasNotResults = formulasNotResults;
this.nextDataType = xssfDataType.NUMBER;
this.formatter = new DataFormatter();
this.formatter = dataFormatter;
}
/**
* Accepts objects needed while parsing.
*
* @param styles Table of styles
* @param strings Table of shared strings
*/
public XSSFSheetXMLHandler(
StylesTable styles,
ReadOnlySharedStringsTable strings,
SheetContentsHandler sheetContentsHandler,
boolean formulasNotResults) {
this(styles, strings, sheetContentsHandler, new DataFormatter(), formulasNotResults);
}
public void startElement(String uri, String localName, String name,
@ -122,6 +137,13 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
fIsOpen = true;
}
}
else if("oddHeader".equals(name) || "evenHeader".equals(name) ||
"firstHeader".equals(name) || "firstFooter".equals(name) ||
"oddFooter".equals(name) || "evenFooter".equals(name)) {
hfIsOpen = true;
// Clear contents cache
headerFooter.setLength(0);
}
else if("row".equals(name)) {
int rowNum = Integer.parseInt(attributes.getValue("r")) - 1;
output.startRow(rowNum);
@ -222,6 +244,16 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
} else if ("row".equals(name)) {
output.endRow();
}
else if("oddHeader".equals(name) || "evenHeader".equals(name) ||
"firstHeader".equals(name)) {
hfIsOpen = false;
output.headerFooter(headerFooter.toString(), true, name);
}
else if("oddFooter".equals(name) || "evenFooter".equals(name) ||
"firstFooter".equals(name)) {
hfIsOpen = false;
output.headerFooter(headerFooter.toString(), false, name);
}
}
/**
@ -236,6 +268,9 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
if (fIsOpen) {
formula.append(ch, start, length);
}
if (hfIsOpen) {
headerFooter.append(ch, start, length);
}
}
/**
@ -249,5 +284,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
public void endRow();
/** A cell, with the given formatted value, was encountered */
public void cell(String cellReference, String formattedValue);
/** A header or footer has been encountered */
public void headerFooter(String text, boolean isHeader, String tagName);
}
}

View File

@ -18,14 +18,20 @@ package org.apache.poi.xssf.extractor;
import java.io.IOException;
import java.io.InputStream;
import java.util.Locale;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.poi.POIXMLProperties;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.POIXMLProperties.CoreProperties;
import org.apache.poi.POIXMLProperties.CustomProperties;
import org.apache.poi.POIXMLProperties.ExtendedProperties;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
@ -43,6 +49,9 @@ import org.xml.sax.XMLReader;
*/
public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor {
private OPCPackage container;
private POIXMLProperties properties;
private Locale locale;
private boolean includeSheetNames = true;
private boolean formulasNotResults = false;
@ -52,6 +61,8 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor {
public XSSFEventBasedExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException {
super(null);
this.container = container;
properties = new POIXMLProperties(container);
}
public static void main(String[] args) throws Exception {
@ -79,22 +90,64 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor {
this.formulasNotResults = formulasNotResults;
}
public void setLocale(Locale locale) {
this.locale = locale;
}
/**
* Returns the opened OPCPackage container.
*/
@Override
public OPCPackage getPackage() {
return container;
}
/**
* Returns the core document properties
*/
@Override
public CoreProperties getCoreProperties() {
return properties.getCoreProperties();
}
/**
* Returns the extended document properties
*/
@Override
public ExtendedProperties getExtendedProperties() {
return properties.getExtendedProperties();
}
/**
* Returns the custom document properties
*/
@Override
public CustomProperties getCustomProperties() {
return properties.getCustomProperties();
}
/**
* Processes the given sheet
*/
public void processSheet(
SheetTextExtractor sheetExtractor,
SheetContentsHandler sheetContentsExtractor,
StylesTable styles,
ReadOnlySharedStringsTable strings,
InputStream sheetInputStream)
throws IOException, SAXException {
DataFormatter formatter;
if(locale == null) {
formatter = new DataFormatter();
} else {
formatter = new DataFormatter(locale);
}
InputSource sheetSource = new InputSource(sheetInputStream);
SAXParserFactory saxFactory = SAXParserFactory.newInstance();
try {
SAXParser saxParser = saxFactory.newSAXParser();
XMLReader sheetParser = saxParser.getXMLReader();
ContentHandler handler = new XSSFSheetXMLHandler(styles, strings, sheetExtractor, formulasNotResults);
ContentHandler handler = new XSSFSheetXMLHandler(
styles, strings, sheetContentsExtractor, formatter, formulasNotResults);
sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource);
} catch(ParserConfigurationException e) {
@ -162,5 +215,9 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor {
}
output.append(formattedValue);
}
public void headerFooter(String text, boolean isHeader, String tagName) {
// We don't include headers in the output yet, so ignore
}
}
}

View File

@ -25,6 +25,7 @@ import junit.framework.TestCase;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.util.IOUtils;
import org.apache.poi.xssf.XSSFTestDataSamples;
import org.apache.poi.xssf.model.CommentsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.apache.poi.POIDataSamples;
@ -117,6 +118,28 @@ public final class TestXSSFReader extends TestCase {
assertEquals(4, count);
}
public void testComments() throws Exception {
OPCPackage pkg = XSSFTestDataSamples.openSamplePackage("comments.xlsx");
XSSFReader r = new XSSFReader(pkg);
XSSFReader.SheetIterator it = (XSSFReader.SheetIterator)r.getSheetsData();
int count = 0;
while(it.hasNext()) {
count++;
InputStream inp = it.next();
inp.close();
if(count == 1) {
assertNotNull(it.getSheetComments());
CommentsTable ct = it.getSheetComments();
assertEquals(1, ct.getNumberOfAuthors());
assertEquals(3, ct.getNumberOfComments());
} else {
assertNull(it.getSheetComments());
}
}
assertEquals(3, count);
}
/**
* Iterating over a workbook with chart sheets in it, using the