mirror of https://github.com/apache/poi.git
Patch from Shaun Kalley from bug #56023 - Allow XSSF event model to find + return comments, and use this for the event based .xlsx text extractor
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1613266 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f3dba52888
commit
62bd48af74
|
@ -16,13 +16,22 @@
|
||||||
==================================================================== */
|
==================================================================== */
|
||||||
package org.apache.poi.xssf.eventusermodel;
|
package org.apache.poi.xssf.eventusermodel;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Queue;
|
||||||
|
|
||||||
import org.apache.poi.ss.usermodel.BuiltinFormats;
|
import org.apache.poi.ss.usermodel.BuiltinFormats;
|
||||||
import org.apache.poi.ss.usermodel.DataFormatter;
|
import org.apache.poi.ss.usermodel.DataFormatter;
|
||||||
|
import org.apache.poi.ss.util.CellReference;
|
||||||
import org.apache.poi.util.POILogFactory;
|
import org.apache.poi.util.POILogFactory;
|
||||||
import org.apache.poi.util.POILogger;
|
import org.apache.poi.util.POILogger;
|
||||||
|
import org.apache.poi.xssf.model.CommentsTable;
|
||||||
import org.apache.poi.xssf.model.StylesTable;
|
import org.apache.poi.xssf.model.StylesTable;
|
||||||
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
|
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
|
||||||
|
import org.apache.poi.xssf.usermodel.XSSFComment;
|
||||||
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
|
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
|
||||||
|
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTComment;
|
||||||
import org.xml.sax.Attributes;
|
import org.xml.sax.Attributes;
|
||||||
import org.xml.sax.SAXException;
|
import org.xml.sax.SAXException;
|
||||||
import org.xml.sax.helpers.DefaultHandler;
|
import org.xml.sax.helpers.DefaultHandler;
|
||||||
|
@ -54,6 +63,15 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
|
||||||
*/
|
*/
|
||||||
private StylesTable stylesTable;
|
private StylesTable stylesTable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Table with cell comments
|
||||||
|
*/
|
||||||
|
private CommentsTable commentsTable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read only access to the shared strings table, for looking
|
||||||
|
* up (most) string cell's contents
|
||||||
|
*/
|
||||||
private ReadOnlySharedStringsTable sharedStringsTable;
|
private ReadOnlySharedStringsTable sharedStringsTable;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -78,6 +96,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
|
||||||
private short formatIndex;
|
private short formatIndex;
|
||||||
private String formatString;
|
private String formatString;
|
||||||
private final DataFormatter formatter;
|
private final DataFormatter formatter;
|
||||||
|
private int rowNum;
|
||||||
private String cellRef;
|
private String cellRef;
|
||||||
private boolean formulasNotResults;
|
private boolean formulasNotResults;
|
||||||
|
|
||||||
|
@ -86,6 +105,31 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
|
||||||
private StringBuffer formula = new StringBuffer();
|
private StringBuffer formula = new StringBuffer();
|
||||||
private StringBuffer headerFooter = new StringBuffer();
|
private StringBuffer headerFooter = new StringBuffer();
|
||||||
|
|
||||||
|
private Queue<CellReference> commentCellRefs;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Accepts objects needed while parsing.
|
||||||
|
*
|
||||||
|
* @param styles Table of styles
|
||||||
|
* @param strings Table of shared strings
|
||||||
|
*/
|
||||||
|
public XSSFSheetXMLHandler(
|
||||||
|
StylesTable styles,
|
||||||
|
CommentsTable comments,
|
||||||
|
ReadOnlySharedStringsTable strings,
|
||||||
|
SheetContentsHandler sheetContentsHandler,
|
||||||
|
DataFormatter dataFormatter,
|
||||||
|
boolean formulasNotResults) {
|
||||||
|
this.stylesTable = styles;
|
||||||
|
this.commentsTable = comments;
|
||||||
|
this.sharedStringsTable = strings;
|
||||||
|
this.output = sheetContentsHandler;
|
||||||
|
this.formulasNotResults = formulasNotResults;
|
||||||
|
this.nextDataType = xssfDataType.NUMBER;
|
||||||
|
this.formatter = dataFormatter;
|
||||||
|
init();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Accepts objects needed while parsing.
|
* Accepts objects needed while parsing.
|
||||||
*
|
*
|
||||||
|
@ -98,13 +142,9 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
|
||||||
SheetContentsHandler sheetContentsHandler,
|
SheetContentsHandler sheetContentsHandler,
|
||||||
DataFormatter dataFormatter,
|
DataFormatter dataFormatter,
|
||||||
boolean formulasNotResults) {
|
boolean formulasNotResults) {
|
||||||
this.stylesTable = styles;
|
this(styles, null, strings, sheetContentsHandler, dataFormatter, formulasNotResults);
|
||||||
this.sharedStringsTable = strings;
|
|
||||||
this.output = sheetContentsHandler;
|
|
||||||
this.formulasNotResults = formulasNotResults;
|
|
||||||
this.nextDataType = xssfDataType.NUMBER;
|
|
||||||
this.formatter = dataFormatter;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Accepts objects needed while parsing.
|
* Accepts objects needed while parsing.
|
||||||
*
|
*
|
||||||
|
@ -119,6 +159,16 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
|
||||||
this(styles, strings, sheetContentsHandler, new DataFormatter(), formulasNotResults);
|
this(styles, strings, sheetContentsHandler, new DataFormatter(), formulasNotResults);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void init() {
|
||||||
|
if (commentsTable != null) {
|
||||||
|
commentCellRefs = new LinkedList<CellReference>();
|
||||||
|
List<CTComment> commentList = commentsTable.getCTComments().getCommentList().getCommentList();
|
||||||
|
for (CTComment comment : commentList) {
|
||||||
|
commentCellRefs.add(new CellReference(comment.getRef()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private boolean isTextTag(String name) {
|
private boolean isTextTag(String name) {
|
||||||
if("v".equals(name)) {
|
if("v".equals(name)) {
|
||||||
// Easy, normal v text tag
|
// Easy, normal v text tag
|
||||||
|
@ -190,7 +240,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
|
||||||
headerFooter.setLength(0);
|
headerFooter.setLength(0);
|
||||||
}
|
}
|
||||||
else if("row".equals(name)) {
|
else if("row".equals(name)) {
|
||||||
int rowNum = Integer.parseInt(attributes.getValue("r")) - 1;
|
rowNum = Integer.parseInt(attributes.getValue("r")) - 1;
|
||||||
output.startRow(rowNum);
|
output.startRow(rowNum);
|
||||||
}
|
}
|
||||||
// c => cell
|
// c => cell
|
||||||
|
@ -304,14 +354,25 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Do we have a comment for this cell?
|
||||||
|
checkForEmptyCellComments(EmptyCellCommentsCheckType.CELL);
|
||||||
|
XSSFComment comment = commentsTable != null ? commentsTable.findCellComment(cellRef) : null;
|
||||||
|
|
||||||
// Output
|
// Output
|
||||||
output.cell(cellRef, thisStr);
|
output.cell(cellRef, thisStr, comment);
|
||||||
} else if ("f".equals(name)) {
|
} else if ("f".equals(name)) {
|
||||||
fIsOpen = false;
|
fIsOpen = false;
|
||||||
} else if ("is".equals(name)) {
|
} else if ("is".equals(name)) {
|
||||||
isIsOpen = false;
|
isIsOpen = false;
|
||||||
} else if ("row".equals(name)) {
|
} else if ("row".equals(name)) {
|
||||||
output.endRow();
|
// Handle any "missing" cells which had comments attached
|
||||||
|
checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_ROW);
|
||||||
|
|
||||||
|
// Finish up the row
|
||||||
|
output.endRow(rowNum);
|
||||||
|
} else if ("sheetData".equals(name)) {
|
||||||
|
// Handle any "missing" cells which had comments attached
|
||||||
|
checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_SHEET_DATA);
|
||||||
}
|
}
|
||||||
else if("oddHeader".equals(name) || "evenHeader".equals(name) ||
|
else if("oddHeader".equals(name) || "evenHeader".equals(name) ||
|
||||||
"firstHeader".equals(name)) {
|
"firstHeader".equals(name)) {
|
||||||
|
@ -343,6 +404,90 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Do a check for, and output, comments in otherwise empty cells.
|
||||||
|
*/
|
||||||
|
private void checkForEmptyCellComments(EmptyCellCommentsCheckType type) {
|
||||||
|
if (commentCellRefs != null && !commentCellRefs.isEmpty()) {
|
||||||
|
// If we've reached the end of the sheet data, output any
|
||||||
|
// comments we haven't yet already handled
|
||||||
|
if (type == EmptyCellCommentsCheckType.END_OF_SHEET_DATA) {
|
||||||
|
while (!commentCellRefs.isEmpty()) {
|
||||||
|
outputEmptyCellComment(commentCellRefs.remove());
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// At the end of a row, handle any comments for "missing" rows before us
|
||||||
|
if (this.cellRef == null) {
|
||||||
|
if (type == EmptyCellCommentsCheckType.END_OF_ROW) {
|
||||||
|
while (!commentCellRefs.isEmpty()) {
|
||||||
|
if (commentCellRefs.peek().getRow() == rowNum) {
|
||||||
|
outputEmptyCellComment(commentCellRefs.remove());
|
||||||
|
} else {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
throw new IllegalStateException("Cell ref should be null only if there are only empty cells in the row; rowNum: " + rowNum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CellReference nextCommentCellRef;
|
||||||
|
do {
|
||||||
|
CellReference cellRef = new CellReference(this.cellRef);
|
||||||
|
CellReference peekCellRef = commentCellRefs.peek();
|
||||||
|
if (type == EmptyCellCommentsCheckType.CELL && cellRef.equals(peekCellRef)) {
|
||||||
|
// remove the comment cell ref from the list if we're about to handle it alongside the cell content
|
||||||
|
commentCellRefs.remove();
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
// fill in any gaps if there are empty cells with comment mixed in with non-empty cells
|
||||||
|
int comparison = cellRefComparator.compare(peekCellRef, cellRef);
|
||||||
|
if (comparison > 0 && type == EmptyCellCommentsCheckType.END_OF_ROW && peekCellRef.getRow() <= rowNum) {
|
||||||
|
nextCommentCellRef = commentCellRefs.remove();
|
||||||
|
outputEmptyCellComment(nextCommentCellRef);
|
||||||
|
} else if (comparison < 0 && type == EmptyCellCommentsCheckType.CELL && peekCellRef.getRow() <= rowNum) {
|
||||||
|
nextCommentCellRef = commentCellRefs.remove();
|
||||||
|
outputEmptyCellComment(nextCommentCellRef);
|
||||||
|
} else {
|
||||||
|
nextCommentCellRef = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while (nextCommentCellRef != null && !commentCellRefs.isEmpty());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Output an empty-cell comment.
|
||||||
|
*/
|
||||||
|
private void outputEmptyCellComment(CellReference cellRef) {
|
||||||
|
String cellRefString = cellRef.formatAsString();
|
||||||
|
XSSFComment comment = commentsTable.findCellComment(cellRefString);
|
||||||
|
output.emptyCellComment(cellRefString, comment);
|
||||||
|
}
|
||||||
|
|
||||||
|
private enum EmptyCellCommentsCheckType {
|
||||||
|
CELL,
|
||||||
|
END_OF_ROW,
|
||||||
|
END_OF_SHEET_DATA
|
||||||
|
}
|
||||||
|
private static final Comparator<CellReference> cellRefComparator = new Comparator<CellReference>() {
|
||||||
|
@Override
|
||||||
|
public int compare(CellReference o1, CellReference o2) {
|
||||||
|
int result = compare(o1.getRow(), o2.getRow());
|
||||||
|
if (result == 0) {
|
||||||
|
result = compare(o1.getCol(), o2.getCol());
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
public int compare(int x, int y) {
|
||||||
|
return (x < y) ? -1 : ((x == y) ? 0 : 1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* You need to implement this to handle the results
|
* You need to implement this to handle the results
|
||||||
* of the sheet parsing.
|
* of the sheet parsing.
|
||||||
|
@ -351,9 +496,11 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
|
||||||
/** A row with the (zero based) row number has started */
|
/** A row with the (zero based) row number has started */
|
||||||
public void startRow(int rowNum);
|
public void startRow(int rowNum);
|
||||||
/** A row with the (zero based) row number has ended */
|
/** A row with the (zero based) row number has ended */
|
||||||
public void endRow();
|
public void endRow(int rowNum);
|
||||||
/** A cell, with the given formatted value, was encountered */
|
/** A cell, with the given formatted value, and possibly a comment, was encountered */
|
||||||
public void cell(String cellReference, String formattedValue);
|
public void cell(String cellReference, String formattedValue, XSSFComment comment);
|
||||||
|
/** A comment for an otherwise-empty cell was encountered */
|
||||||
|
public void emptyCellComment(String cellReference, XSSFComment comment);
|
||||||
/** A header or footer has been encountered */
|
/** A header or footer has been encountered */
|
||||||
public void headerFooter(String text, boolean isHeader, String tagName);
|
public void headerFooter(String text, boolean isHeader, String tagName);
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,7 +39,9 @@ import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
|
||||||
import org.apache.poi.xssf.eventusermodel.XSSFReader;
|
import org.apache.poi.xssf.eventusermodel.XSSFReader;
|
||||||
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
|
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
|
||||||
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
|
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
|
||||||
|
import org.apache.poi.xssf.model.CommentsTable;
|
||||||
import org.apache.poi.xssf.model.StylesTable;
|
import org.apache.poi.xssf.model.StylesTable;
|
||||||
|
import org.apache.poi.xssf.usermodel.XSSFComment;
|
||||||
import org.apache.poi.xssf.usermodel.XSSFShape;
|
import org.apache.poi.xssf.usermodel.XSSFShape;
|
||||||
import org.apache.poi.xssf.usermodel.XSSFSimpleShape;
|
import org.apache.poi.xssf.usermodel.XSSFSimpleShape;
|
||||||
import org.apache.xmlbeans.XmlException;
|
import org.apache.xmlbeans.XmlException;
|
||||||
|
@ -60,6 +62,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
||||||
private Locale locale;
|
private Locale locale;
|
||||||
private boolean includeTextBoxes = true;
|
private boolean includeTextBoxes = true;
|
||||||
private boolean includeSheetNames = true;
|
private boolean includeSheetNames = true;
|
||||||
|
private boolean includeCellComments = false;
|
||||||
private boolean includeHeadersFooters = true;
|
private boolean includeHeadersFooters = true;
|
||||||
private boolean formulasNotResults = false;
|
private boolean formulasNotResults = false;
|
||||||
|
|
||||||
|
@ -112,11 +115,10 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Would control the inclusion of cell comments from the document,
|
* Should cell comments be included? Default is false
|
||||||
* if we supported it
|
|
||||||
*/
|
*/
|
||||||
public void setIncludeCellComments(boolean includeCellComments) {
|
public void setIncludeCellComments(boolean includeCellComments) {
|
||||||
throw new IllegalStateException("Comment extraction not supported in streaming mode, please use XSSFExcelExtractor");
|
this.includeCellComments = includeCellComments;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setLocale(Locale locale) {
|
public void setLocale(Locale locale) {
|
||||||
|
@ -159,6 +161,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
||||||
public void processSheet(
|
public void processSheet(
|
||||||
SheetContentsHandler sheetContentsExtractor,
|
SheetContentsHandler sheetContentsExtractor,
|
||||||
StylesTable styles,
|
StylesTable styles,
|
||||||
|
CommentsTable comments,
|
||||||
ReadOnlySharedStringsTable strings,
|
ReadOnlySharedStringsTable strings,
|
||||||
InputStream sheetInputStream)
|
InputStream sheetInputStream)
|
||||||
throws IOException, SAXException {
|
throws IOException, SAXException {
|
||||||
|
@ -176,7 +179,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
||||||
SAXParser saxParser = saxFactory.newSAXParser();
|
SAXParser saxParser = saxFactory.newSAXParser();
|
||||||
XMLReader sheetParser = saxParser.getXMLReader();
|
XMLReader sheetParser = saxParser.getXMLReader();
|
||||||
ContentHandler handler = new XSSFSheetXMLHandler(
|
ContentHandler handler = new XSSFSheetXMLHandler(
|
||||||
styles, strings, sheetContentsExtractor, formatter, formulasNotResults);
|
styles, comments, strings, sheetContentsExtractor, formatter, formulasNotResults);
|
||||||
sheetParser.setContentHandler(handler);
|
sheetParser.setContentHandler(handler);
|
||||||
sheetParser.parse(sheetSource);
|
sheetParser.parse(sheetSource);
|
||||||
} catch(ParserConfigurationException e) {
|
} catch(ParserConfigurationException e) {
|
||||||
|
@ -203,7 +206,8 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
||||||
text.append(iter.getSheetName());
|
text.append(iter.getSheetName());
|
||||||
text.append('\n');
|
text.append('\n');
|
||||||
}
|
}
|
||||||
processSheet(sheetExtractor, styles, strings, stream);
|
CommentsTable comments = includeCellComments ? iter.getSheetComments() : null;
|
||||||
|
processSheet(sheetExtractor, styles, comments, strings, stream);
|
||||||
if (includeHeadersFooters) {
|
if (includeHeadersFooters) {
|
||||||
sheetExtractor.appendHeaderText(text);
|
sheetExtractor.appendHeaderText(text);
|
||||||
}
|
}
|
||||||
|
@ -268,17 +272,32 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
||||||
firstCellOfRow = true;
|
firstCellOfRow = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void endRow() {
|
public void endRow(int rowNum) {
|
||||||
output.append('\n');
|
output.append('\n');
|
||||||
}
|
}
|
||||||
|
|
||||||
public void cell(String cellRef, String formattedValue) {
|
public void cell(String cellRef, String formattedValue, XSSFComment comment) {
|
||||||
if(firstCellOfRow) {
|
if(firstCellOfRow) {
|
||||||
firstCellOfRow = false;
|
firstCellOfRow = false;
|
||||||
} else {
|
} else {
|
||||||
output.append('\t');
|
output.append('\t');
|
||||||
}
|
}
|
||||||
output.append(formattedValue);
|
if (formattedValue != null) {
|
||||||
|
output.append(formattedValue);
|
||||||
|
}
|
||||||
|
if (includeCellComments && comment != null) {
|
||||||
|
String commentText = comment.getString().getString().replace('\n', ' ');
|
||||||
|
output.append(formattedValue != null ? " Comment by " : "Comment by ");
|
||||||
|
if (commentText.startsWith(comment.getAuthor() + ": ")) {
|
||||||
|
output.append(commentText);
|
||||||
|
} else {
|
||||||
|
output.append(comment.getAuthor()).append(": ").append(commentText);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void emptyCellComment(String cellRef, XSSFComment comment) {
|
||||||
|
cell(cellRef, null, comment);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void headerFooter(String text, boolean isHeader, String tagName) {
|
public void headerFooter(String text, boolean isHeader, String tagName) {
|
||||||
|
@ -287,7 +306,6 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Append the text for the named header or footer if found.
|
* Append the text for the named header or footer if found.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -20,13 +20,13 @@ package org.apache.poi.xssf.extractor;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
|
||||||
|
|
||||||
import org.apache.poi.POITextExtractor;
|
import org.apache.poi.POITextExtractor;
|
||||||
import org.apache.poi.hssf.HSSFTestDataSamples;
|
import org.apache.poi.hssf.HSSFTestDataSamples;
|
||||||
import org.apache.poi.hssf.extractor.ExcelExtractor;
|
import org.apache.poi.hssf.extractor.ExcelExtractor;
|
||||||
import org.apache.poi.xssf.XSSFTestDataSamples;
|
import org.apache.poi.xssf.XSSFTestDataSamples;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests for {@link XSSFEventBasedExcelExtractor}
|
* Tests for {@link XSSFEventBasedExcelExtractor}
|
||||||
*/
|
*/
|
||||||
|
@ -240,4 +240,68 @@ public class TestXSSFEventBasedExcelExtractor extends TestCase {
|
||||||
fixture.setIncludeHeadersFooters(false);
|
fixture.setIncludeHeadersFooters(false);
|
||||||
assertEquals(expectedOutputWithoutHeadersAndFooters, fixture.getText());
|
assertEquals(expectedOutputWithoutHeadersAndFooters, fixture.getText());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that XSSFEventBasedExcelExtractor outputs comments when specified.
|
||||||
|
* The output will contain two improvements over the output from
|
||||||
|
* XSSFExcelExtractor in that (1) comments from empty cells will be
|
||||||
|
* outputted, and (2) the author will not be outputted twice.
|
||||||
|
* <p>
|
||||||
|
* This test will need to be modified if these improvements are ported to
|
||||||
|
* XSSFExcelExtractor.
|
||||||
|
*/
|
||||||
|
public void testCommentsComparedToNonEventBasedExtractor()
|
||||||
|
throws Exception {
|
||||||
|
|
||||||
|
String expectedOutputWithoutComments =
|
||||||
|
"Sheet1\n" +
|
||||||
|
"\n" +
|
||||||
|
"abc\n" +
|
||||||
|
"\n" +
|
||||||
|
"123\n" +
|
||||||
|
"\n" +
|
||||||
|
"\n" +
|
||||||
|
"\n";
|
||||||
|
|
||||||
|
String nonEventBasedExtractorOutputWithComments =
|
||||||
|
"Sheet1\n" +
|
||||||
|
"\n" +
|
||||||
|
"abc Comment by Shaun Kalley: Shaun Kalley: Comment A2\n" +
|
||||||
|
"\n" +
|
||||||
|
"123 Comment by Shaun Kalley: Shaun Kalley: Comment B4\n" +
|
||||||
|
"\n" +
|
||||||
|
"\n" +
|
||||||
|
"\n";
|
||||||
|
|
||||||
|
String eventBasedExtractorOutputWithComments =
|
||||||
|
"Sheet1\n" +
|
||||||
|
"Comment by Shaun Kalley: Comment A1\tComment by Shaun Kalley: Comment B1\n" +
|
||||||
|
"abc Comment by Shaun Kalley: Comment A2\tComment by Shaun Kalley: Comment B2\n" +
|
||||||
|
"Comment by Shaun Kalley: Comment A3\tComment by Shaun Kalley: Comment B3\n" +
|
||||||
|
"Comment by Shaun Kalley: Comment A4\t123 Comment by Shaun Kalley: Comment B4\n" +
|
||||||
|
"Comment by Shaun Kalley: Comment A5\tComment by Shaun Kalley: Comment B5\n" +
|
||||||
|
"Comment by Shaun Kalley: Comment A7\tComment by Shaun Kalley: Comment B7\n" +
|
||||||
|
"Comment by Shaun Kalley: Comment A8\tComment by Shaun Kalley: Comment B8\n";
|
||||||
|
|
||||||
|
XSSFExcelExtractor extractor = new XSSFExcelExtractor(
|
||||||
|
XSSFTestDataSamples.openSampleWorkbook("commentTest.xlsx"));
|
||||||
|
try {
|
||||||
|
assertEquals(expectedOutputWithoutComments, extractor.getText());
|
||||||
|
extractor.setIncludeCellComments(true);
|
||||||
|
assertEquals(nonEventBasedExtractorOutputWithComments, extractor.getText());
|
||||||
|
} finally {
|
||||||
|
extractor.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
XSSFEventBasedExcelExtractor fixture =
|
||||||
|
new XSSFEventBasedExcelExtractor(
|
||||||
|
XSSFTestDataSamples.openSamplePackage("commentTest.xlsx"));
|
||||||
|
try {
|
||||||
|
assertEquals(expectedOutputWithoutComments, fixture.getText());
|
||||||
|
fixture.setIncludeCellComments(true);
|
||||||
|
assertEquals(eventBasedExtractorOutputWithComments, fixture.getText());
|
||||||
|
} finally {
|
||||||
|
fixture.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
Loading…
Reference in New Issue