mirror of https://github.com/apache/poi.git
Patch from Shaun Kalley from bug #56022 - XSSF Event Text Extractor header/footer support
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1563657 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7d7f5015e5
commit
aa19aeae43
|
@ -18,8 +18,10 @@ package org.apache.poi.xssf.extractor;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import javax.xml.parsers.SAXParser;
|
||||
|
@ -56,9 +58,10 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||
private POIXMLProperties properties;
|
||||
|
||||
private Locale locale;
|
||||
private boolean includeSheetNames = true;
|
||||
private boolean formulasNotResults = false;
|
||||
private boolean includeTextBoxes = true;
|
||||
private boolean includeSheetNames = true;
|
||||
private boolean includeHeadersFooters = true;
|
||||
private boolean formulasNotResults = false;
|
||||
|
||||
public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
|
||||
this(OPCPackage.open(path));
|
||||
|
@ -94,7 +97,12 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||
public void setFormulasNotResults(boolean formulasNotResults) {
|
||||
this.formulasNotResults = formulasNotResults;
|
||||
}
|
||||
|
||||
/**
|
||||
* Should headers and footers be included? Default is true
|
||||
*/
|
||||
public void setIncludeHeadersFooters(boolean includeHeadersFooters) {
|
||||
this.includeHeadersFooters = includeHeadersFooters;
|
||||
}
|
||||
/**
|
||||
* Should text from textboxes be included? Default is true
|
||||
*/
|
||||
|
@ -186,7 +194,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
|
||||
|
||||
StringBuffer text = new StringBuffer();
|
||||
SheetTextExtractor sheetExtractor = new SheetTextExtractor(text);
|
||||
SheetTextExtractor sheetExtractor = new SheetTextExtractor();
|
||||
|
||||
while (iter.hasNext()) {
|
||||
InputStream stream = iter.next();
|
||||
|
@ -195,9 +203,17 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||
text.append('\n');
|
||||
}
|
||||
processSheet(sheetExtractor, styles, strings, stream);
|
||||
if (includeHeadersFooters) {
|
||||
sheetExtractor.appendHeaderText(text);
|
||||
}
|
||||
sheetExtractor.appendCellText(text);
|
||||
if (includeTextBoxes){
|
||||
processShapes(iter.getShapes(), text);
|
||||
}
|
||||
if (includeHeadersFooters) {
|
||||
sheetExtractor.appendFooterText(text);
|
||||
}
|
||||
sheetExtractor.reset();
|
||||
stream.close();
|
||||
}
|
||||
|
||||
|
@ -238,10 +254,13 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||
|
||||
protected class SheetTextExtractor implements SheetContentsHandler {
|
||||
private final StringBuffer output;
|
||||
private boolean firstCellOfRow = true;
|
||||
private boolean firstCellOfRow;
|
||||
private final Map<String, String> headerFooterMap;
|
||||
|
||||
protected SheetTextExtractor(StringBuffer output) {
|
||||
this.output = output;
|
||||
protected SheetTextExtractor() {
|
||||
this.output = new StringBuffer();
|
||||
this.firstCellOfRow = true;
|
||||
this.headerFooterMap = includeHeadersFooters ? new HashMap<String, String>() : null;
|
||||
}
|
||||
|
||||
public void startRow(int rowNum) {
|
||||
|
@ -262,7 +281,84 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||
}
|
||||
|
||||
public void headerFooter(String text, boolean isHeader, String tagName) {
|
||||
// We don't include headers in the output yet, so ignore
|
||||
if (headerFooterMap != null) {
|
||||
headerFooterMap.put(tagName, text);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Append the text for the named header or footer if found.
|
||||
*/
|
||||
private void appendHeaderFooterText(StringBuffer buffer, String name) {
|
||||
String text = headerFooterMap.get(name);
|
||||
if (text != null && text.length() > 0) {
|
||||
// this is a naive way of handling the left, center, and right
|
||||
// header and footer delimiters, but it seems to be as good as
|
||||
// the method used by XSSFExcelExtractor
|
||||
text = handleHeaderFooterDelimiter(text, "&L");
|
||||
text = handleHeaderFooterDelimiter(text, "&C");
|
||||
text = handleHeaderFooterDelimiter(text, "&R");
|
||||
buffer.append(text).append('\n');
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Remove the delimiter if its found at the beginning of the text,
|
||||
* or replace it with a tab if its in the middle.
|
||||
*/
|
||||
private String handleHeaderFooterDelimiter(String text, String delimiter) {
|
||||
int index = text.indexOf(delimiter);
|
||||
if (index == 0) {
|
||||
text = text.substring(2);
|
||||
} else if (index > 0) {
|
||||
text = text.substring(0, index) + "\t" + text.substring(index + 2);
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Append the text for each header type in the same order
|
||||
* they are appended in XSSFExcelExtractor.
|
||||
* @see XSSFExcelExtractor#getText()
|
||||
* @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter)
|
||||
*/
|
||||
private void appendHeaderText(StringBuffer buffer) {
|
||||
appendHeaderFooterText(buffer, "firstHeader");
|
||||
appendHeaderFooterText(buffer, "oddHeader");
|
||||
appendHeaderFooterText(buffer, "evenHeader");
|
||||
}
|
||||
|
||||
/**
|
||||
* Append the text for each footer type in the same order
|
||||
* they are appended in XSSFExcelExtractor.
|
||||
* @see XSSFExcelExtractor#getText()
|
||||
* @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter)
|
||||
*/
|
||||
private void appendFooterText(StringBuffer buffer) {
|
||||
// append the text for each footer type in the same order
|
||||
// they are appended in XSSFExcelExtractor
|
||||
appendHeaderFooterText(buffer, "firstFooter");
|
||||
appendHeaderFooterText(buffer, "oddFooter");
|
||||
appendHeaderFooterText(buffer, "evenFooter");
|
||||
}
|
||||
|
||||
/**
|
||||
* Append the cell contents we have collected.
|
||||
*/
|
||||
private void appendCellText(StringBuffer buffer) {
|
||||
buffer.append(output);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset this <code>SheetTextExtractor</code> for the next sheet.
|
||||
*/
|
||||
private void reset() {
|
||||
output.setLength(0);
|
||||
firstCellOfRow = true;
|
||||
if (headerFooterMap != null) {
|
||||
headerFooterMap.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -209,4 +209,35 @@ public class TestXSSFEventBasedExcelExtractor extends TestCase {
|
|||
fixture.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that we return the same output headers and footers as the
|
||||
* non-event-based XSSFExcelExtractor.
|
||||
*/
|
||||
public void testHeadersAndFootersComparedToNonEventBasedExtractor()
|
||||
throws Exception {
|
||||
|
||||
String expectedOutputWithHeadersAndFooters =
|
||||
"Sheet1\n" +
|
||||
"&\"Calibri,Regular\"&K000000top left\t&\"Calibri,Regular\"&K000000top center\t&\"Calibri,Regular\"&K000000top right\n" +
|
||||
"abc\t123\n" +
|
||||
"&\"Calibri,Regular\"&K000000bottom left\t&\"Calibri,Regular\"&K000000bottom center\t&\"Calibri,Regular\"&K000000bottom right\n";
|
||||
|
||||
String expectedOutputWithoutHeadersAndFooters =
|
||||
"Sheet1\n" +
|
||||
"abc\t123\n";
|
||||
|
||||
XSSFExcelExtractor extractor = new XSSFExcelExtractor(
|
||||
XSSFTestDataSamples.openSampleWorkbook("headerFooterTest.xlsx"));
|
||||
assertEquals(expectedOutputWithHeadersAndFooters, extractor.getText());
|
||||
extractor.setIncludeHeadersFooters(false);
|
||||
assertEquals(expectedOutputWithoutHeadersAndFooters, extractor.getText());
|
||||
|
||||
XSSFEventBasedExcelExtractor fixture =
|
||||
new XSSFEventBasedExcelExtractor(
|
||||
XSSFTestDataSamples.openSamplePackage("headerFooterTest.xlsx"));
|
||||
assertEquals(expectedOutputWithHeadersAndFooters, fixture.getText());
|
||||
fixture.setIncludeHeadersFooters(false);
|
||||
assertEquals(expectedOutputWithoutHeadersAndFooters, fixture.getText());
|
||||
}
|
||||
}
|
||||
|
|
Binary file not shown.
Loading…
Reference in New Issue