mirror of https://github.com/apache/poi.git
Support for extraction of footnotes from docx files, see Bugzilla 45556
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@795328 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d09ab59ab0
commit
1aafa11722
|
@ -33,6 +33,8 @@
|
|||
|
||||
<changes>
|
||||
<release version="3.5-beta7" date="2009-??-??">
|
||||
<action dev="POI-DEVELOPERS" type="fix">45556 - Fixed ExtractorFactory to support .xltx and .dotx files</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">45556 - Support for extraction of footnotes from docx files</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">47520 - Initial support for custom XML mappings in XSSF</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">47460 - Fixed NPE when retrieving core properties from a newly created workbook</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">47498 - Fixed HyperlinkRecord to properly handle URL monikers</action>
|
||||
|
@ -41,7 +43,7 @@
|
|||
<action dev="POI-DEVELOPERS" type="fix">47448 - Allow HSSFEventFactory to handle non-zero padding at the end of the workbook stream</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">47456 - Support for getting OLE object data in PowerPointExtractor</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">47411 - Explicitly set the 1900 date system when creating XSSF workbooks</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">47400 - Support fo text extraction of footnotes, endnotes and comments in HWPF</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">47400 - Support for text extraction of footnotes, endnotes and comments in HWPF</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">47415 - Fixed PageSettingsBlock to allow multiple PLS records</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">47412 - Fixed concurrency issue with EscherProperties.initProps()</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">47143 - Fixed OOM in HSSFWorkbook#getAllPictures when reading .xls files containing metafiles</action>
|
||||
|
|
|
@ -66,6 +66,24 @@ public final class XSSFRelation extends POIXMLRelation {
|
|||
"/xl/workbook.xml",
|
||||
null
|
||||
);
|
||||
public static final XSSFRelation TEMPLATE_WORKBOOK = new XSSFRelation(
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.template.main+xml",
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
|
||||
"/xl/workbook.xml",
|
||||
null
|
||||
);
|
||||
public static final XSSFRelation MACRO_TEMPLATE_WORKBOOK = new XSSFRelation(
|
||||
"application/vnd.ms-excel.template.macroEnabled.main+xml",
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
|
||||
"/xl/workbook.xml",
|
||||
null
|
||||
);
|
||||
public static final XSSFRelation MACRO_ADDIN_WORKBOOK = new XSSFRelation(
|
||||
"application/vnd.ms-excel.addin.macroEnabled.main+xml",
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
|
||||
"/xl/workbook.xml",
|
||||
null
|
||||
);
|
||||
public static final XSSFRelation WORKSHEET = new XSSFRelation(
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml",
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet",
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.poi.xwpf.model;
|
|||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFParagraph;;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
||||
|
||||
/**
|
||||
* Decorator class for XWPFParagraph allowing to add hyperlinks
|
||||
|
|
|
@ -30,15 +30,7 @@ import org.apache.xmlbeans.XmlOptions;
|
|||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
|
||||
import org.apache.poi.openxml4j.opc.*;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTComment;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyles;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CommentsDocument;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.StylesDocument;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
|
||||
|
||||
import javax.xml.namespace.QName;
|
||||
|
||||
|
@ -60,6 +52,7 @@ public class XWPFDocument extends POIXMLDocument {
|
|||
protected List<XWPFHyperlink> hyperlinks;
|
||||
protected List<XWPFParagraph> paragraphs;
|
||||
protected List<XWPFTable> tables;
|
||||
protected Map<Integer, XWPFFootnote> footnotes;
|
||||
|
||||
/** Handles the joy of different headers/footers for different pages */
|
||||
private XWPFHeaderFooterPolicy headerFooterPolicy;
|
||||
|
@ -87,6 +80,7 @@ public class XWPFDocument extends POIXMLDocument {
|
|||
comments = new ArrayList<XWPFComment>();
|
||||
paragraphs = new ArrayList<XWPFParagraph>();
|
||||
tables= new ArrayList<XWPFTable>();
|
||||
footnotes = new HashMap<Integer, XWPFFootnote>();
|
||||
|
||||
try {
|
||||
DocumentDocument doc = DocumentDocument.Factory.parse(getPackagePart().getInputStream());
|
||||
|
@ -94,6 +88,8 @@ public class XWPFDocument extends POIXMLDocument {
|
|||
|
||||
CTBody body = ctDocument.getBody();
|
||||
|
||||
initFootnotes();
|
||||
|
||||
// filling paragraph list
|
||||
for (CTP p : body.getPArray()) {
|
||||
paragraphs.add(new XWPFParagraph(p, this));
|
||||
|
@ -101,7 +97,7 @@ public class XWPFDocument extends POIXMLDocument {
|
|||
|
||||
// Get any tables
|
||||
for(CTTbl table : body.getTblArray()) {
|
||||
tables.add(new XWPFTable(table));
|
||||
tables.add(new XWPFTable(this, table));
|
||||
}
|
||||
|
||||
// Sort out headers and footers
|
||||
|
@ -118,7 +114,6 @@ public class XWPFDocument extends POIXMLDocument {
|
|||
}
|
||||
|
||||
initHyperlinks();
|
||||
|
||||
} catch (XmlException e) {
|
||||
throw new POIXMLException(e);
|
||||
}
|
||||
|
@ -139,6 +134,19 @@ public class XWPFDocument extends POIXMLDocument {
|
|||
}
|
||||
}
|
||||
|
||||
private void initFootnotes() throws XmlException, IOException {
|
||||
for(POIXMLDocumentPart p : getRelations()){
|
||||
String relation = p.getPackageRelationship().getRelationshipType();
|
||||
if(relation.equals(XWPFRelation.FOOTNOTE.getRelation())){
|
||||
FootnotesDocument footnotesDocument = FootnotesDocument.Factory.parse(p.getPackagePart().getInputStream());
|
||||
|
||||
for(CTFtnEdn ctFtnEdn : footnotesDocument.getFootnotes().getFootnoteArray()) {
|
||||
footnotes.put(ctFtnEdn.getId().intValue(), new XWPFFootnote(this, ctFtnEdn));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new SpreadsheetML package and setup the default minimal content
|
||||
*/
|
||||
|
@ -205,6 +213,15 @@ public class XWPFDocument extends POIXMLDocument {
|
|||
|
||||
return null;
|
||||
}
|
||||
|
||||
public XWPFFootnote getFootnoteByID(int id) {
|
||||
return footnotes.get(id);
|
||||
}
|
||||
|
||||
public Collection<XWPFFootnote> getFootnotes() {
|
||||
return footnotes == null ? new ArrayList<XWPFFootnote>() : footnotes.values();
|
||||
}
|
||||
|
||||
public XWPFHyperlink[] getHyperlinks() {
|
||||
return hyperlinks.toArray(
|
||||
new XWPFHyperlink[hyperlinks.size()]
|
||||
|
@ -323,7 +340,7 @@ public class XWPFDocument extends POIXMLDocument {
|
|||
* @return a new table
|
||||
*/
|
||||
public XWPFTable createTable(){
|
||||
return new XWPFTable(ctDocument.getBody().addNewTbl());
|
||||
return new XWPFTable(this, ctDocument.getBody().addNewTbl());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -333,7 +350,7 @@ public class XWPFDocument extends POIXMLDocument {
|
|||
* @return table
|
||||
*/
|
||||
public XWPFTable createTable(int rows, int cols) {
|
||||
return new XWPFTable(ctDocument.getBody().addNewTbl(), rows, cols);
|
||||
return new XWPFTable(this, ctDocument.getBody().addNewTbl(), rows, cols);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.xwpf.usermodel;
|
||||
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTFtnEdn;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
|
||||
public class XWPFFootnote implements Iterable<XWPFParagraph> {
|
||||
private List<XWPFParagraph> paragraphs = new ArrayList<XWPFParagraph>();
|
||||
|
||||
public XWPFFootnote(XWPFDocument document, CTFtnEdn body) {
|
||||
for (CTP p : body.getPArray()) {
|
||||
paragraphs.add(new XWPFParagraph(p, document));
|
||||
}
|
||||
}
|
||||
|
||||
public List<XWPFParagraph> getParagraphs() {
|
||||
return paragraphs;
|
||||
}
|
||||
|
||||
public Iterator<XWPFParagraph> iterator(){
|
||||
return paragraphs.iterator();
|
||||
}
|
||||
|
||||
}
|
|
@ -65,6 +65,7 @@ public abstract class XWPFHeaderFooter {
|
|||
new XWPFTable[headerFooter.getTblArray().length];
|
||||
for(int i=0; i<tables.length; i++) {
|
||||
tables[i] = new XWPFTable(
|
||||
null,
|
||||
headerFooter.getTblArray(i)
|
||||
);
|
||||
}
|
||||
|
|
|
@ -21,26 +21,7 @@ import java.util.ArrayList;
|
|||
|
||||
import org.apache.xmlbeans.XmlCursor;
|
||||
import org.apache.xmlbeans.XmlObject;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBorder;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTInd;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTJc;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTOnOff;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPBdr;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPPr;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPTab;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPicture;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentRun;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRun;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSpacing;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTextAlignment;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STBorder;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STJc;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STLineSpacingRule;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STOnOff;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STTextAlignment;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
|
||||
import org.w3c.dom.NodeList;
|
||||
import org.w3c.dom.Text;
|
||||
|
||||
|
@ -58,6 +39,7 @@ public class XWPFParagraph {
|
|||
*/
|
||||
private StringBuffer text = new StringBuffer();
|
||||
private StringBuffer pictureText = new StringBuffer();
|
||||
private StringBuffer footnoteText = new StringBuffer();
|
||||
|
||||
|
||||
protected XWPFParagraph(CTP prgrph) {
|
||||
|
@ -107,6 +89,23 @@ public class XWPFParagraph {
|
|||
if (o instanceof CTPTab) {
|
||||
text.append("\t");
|
||||
}
|
||||
//got a reference to a footnote
|
||||
if (o instanceof CTFtnEdnRef) {
|
||||
CTFtnEdnRef ftn = (CTFtnEdnRef) o;
|
||||
footnoteText.append("[").append(ftn.getId()).append(": ");
|
||||
XWPFFootnote footnote = document.getFootnoteByID(ftn.getId().intValue());
|
||||
|
||||
boolean first = true;
|
||||
for (XWPFParagraph p : footnote.getParagraphs()) {
|
||||
if (!first) {
|
||||
footnoteText.append("\n");
|
||||
first = false;
|
||||
}
|
||||
footnoteText.append(p.getText());
|
||||
}
|
||||
|
||||
footnoteText.append("]");
|
||||
}
|
||||
}
|
||||
|
||||
// Loop over pictures inside our
|
||||
|
@ -146,7 +145,9 @@ public class XWPFParagraph {
|
|||
* in it.
|
||||
*/
|
||||
public String getText() {
|
||||
return getParagraphText() + getPictureText();
|
||||
StringBuffer out = new StringBuffer();
|
||||
out.append(text).append(footnoteText).append(pictureText);
|
||||
return out.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -164,6 +165,15 @@ public class XWPFParagraph {
|
|||
return pictureText.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the footnote text of the paragraph
|
||||
*
|
||||
* @return the footnote text or empty string if the paragraph does not have footnotes
|
||||
*/
|
||||
public String getFootnoteText() {
|
||||
return footnoteText.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Appends a new run to this paragraph
|
||||
*
|
||||
|
|
|
@ -40,6 +40,24 @@ public final class XWPFRelation extends POIXMLRelation {
|
|||
"/word/document.xml",
|
||||
null
|
||||
);
|
||||
public static final XWPFRelation TEMPLATE = new XWPFRelation(
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.template.main+xml",
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
|
||||
"/word/document.xml",
|
||||
null
|
||||
);
|
||||
public static final XWPFRelation MACRO_DOCUMENT = new XWPFRelation(
|
||||
"application/vnd.ms-word.document.macroEnabled.main+xml",
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
|
||||
"/word/document.xml",
|
||||
null
|
||||
);
|
||||
public static final XWPFRelation MACRO_TEMPLATE_DOCUMENT = new XWPFRelation(
|
||||
"application/vnd.ms-word.template.macroEnabledTemplate.main+xml",
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
|
||||
"/word/document.xml",
|
||||
null
|
||||
);
|
||||
public static final XWPFRelation FONT_TABLE = new XWPFRelation(
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml",
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable",
|
||||
|
@ -88,6 +106,12 @@ public final class XWPFRelation extends POIXMLRelation {
|
|||
null,
|
||||
null
|
||||
);
|
||||
public static final XWPFRelation FOOTNOTE = new XWPFRelation(
|
||||
null,
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes",
|
||||
null,
|
||||
null
|
||||
);
|
||||
|
||||
|
||||
private XWPFRelation(String type, String rel, String defaultName, Class<? extends POIXMLDocumentPart> cls) {
|
||||
|
|
|
@ -42,8 +42,8 @@ public class XWPFTable {
|
|||
private CTTbl ctTbl;
|
||||
|
||||
|
||||
public XWPFTable(CTTbl table, int row, int col) {
|
||||
this(table);
|
||||
public XWPFTable(XWPFDocument doc, CTTbl table, int row, int col) {
|
||||
this(doc, table);
|
||||
for (int i = 0; i < row; i++) {
|
||||
XWPFTableRow tabRow = (getRow(i) == null) ? createRow() : getRow(i);
|
||||
for (int k = 0; k < col; k++) {
|
||||
|
@ -54,7 +54,7 @@ public class XWPFTable {
|
|||
}
|
||||
|
||||
|
||||
public XWPFTable(CTTbl table) {
|
||||
public XWPFTable(XWPFDocument doc, CTTbl table) {
|
||||
this.ctTbl = table;
|
||||
|
||||
// is an empty table: I add one row and one column as default
|
||||
|
@ -65,7 +65,7 @@ public class XWPFTable {
|
|||
StringBuffer rowText = new StringBuffer();
|
||||
for (CTTc cell : row.getTcArray()) {
|
||||
for (CTP ctp : cell.getPArray()) {
|
||||
XWPFParagraph p = new XWPFParagraph(ctp, null);
|
||||
XWPFParagraph p = new XWPFParagraph(ctp, doc);
|
||||
if (rowText.length() > 0) {
|
||||
rowText.append('\t');
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
package org.apache.poi.xwpf.extractor;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.poi.POIXMLDocument;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
||||
|
@ -27,79 +28,13 @@ import junit.framework.TestCase;
|
|||
* Tests for HXFWordExtractor
|
||||
*/
|
||||
public class TestXWPFWordExtractor extends TestCase {
|
||||
/**
|
||||
* A very simple file
|
||||
*/
|
||||
private XWPFDocument xmlA;
|
||||
private File fileA;
|
||||
/**
|
||||
* A fairly complex file
|
||||
*/
|
||||
private XWPFDocument xmlB;
|
||||
private File fileB;
|
||||
/**
|
||||
* With a simplish header+footer
|
||||
*/
|
||||
private XWPFDocument xmlC;
|
||||
private File fileC;
|
||||
/**
|
||||
* With different header+footer on first/rest
|
||||
*/
|
||||
private XWPFDocument xmlD;
|
||||
private File fileD;
|
||||
|
||||
/**
|
||||
* File with hyperlinks
|
||||
*/
|
||||
private XWPFDocument xmlE;
|
||||
private File fileE;
|
||||
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
|
||||
fileA = new File(
|
||||
System.getProperty("HWPF.testdata.path") +
|
||||
File.separator + "sample.docx"
|
||||
);
|
||||
fileB = new File(
|
||||
System.getProperty("HWPF.testdata.path") +
|
||||
File.separator + "IllustrativeCases.docx"
|
||||
);
|
||||
fileC = new File(
|
||||
System.getProperty("HWPF.testdata.path") +
|
||||
File.separator + "ThreeColHeadFoot.docx"
|
||||
);
|
||||
fileD = new File(
|
||||
System.getProperty("HWPF.testdata.path") +
|
||||
File.separator + "DiffFirstPageHeadFoot.docx"
|
||||
);
|
||||
fileE = new File(
|
||||
System.getProperty("HWPF.testdata.path") +
|
||||
File.separator + "TestDocument.docx"
|
||||
);
|
||||
assertTrue(fileA.exists());
|
||||
assertTrue(fileB.exists());
|
||||
assertTrue(fileC.exists());
|
||||
assertTrue(fileD.exists());
|
||||
assertTrue(fileE.exists());
|
||||
|
||||
xmlA = new XWPFDocument(POIXMLDocument.openPackage(fileA.toString()));
|
||||
xmlB = new XWPFDocument(POIXMLDocument.openPackage(fileB.toString()));
|
||||
xmlC = new XWPFDocument(POIXMLDocument.openPackage(fileC.toString()));
|
||||
xmlD = new XWPFDocument(POIXMLDocument.openPackage(fileD.toString()));
|
||||
xmlE = new XWPFDocument(POIXMLDocument.openPackage(fileE.toString()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get text out of the simple file
|
||||
*/
|
||||
public void testGetSimpleText() throws Exception {
|
||||
new XWPFWordExtractor(xmlA);
|
||||
new XWPFWordExtractor(POIXMLDocument.openPackage(fileA.toString()));
|
||||
|
||||
XWPFWordExtractor extractor =
|
||||
new XWPFWordExtractor(xmlA);
|
||||
extractor.getText();
|
||||
XWPFDocument doc = open("sample.docx");
|
||||
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
||||
|
||||
String text = extractor.getText();
|
||||
assertTrue(text.length() > 0);
|
||||
|
@ -116,7 +51,9 @@ public class TestXWPFWordExtractor extends TestCase {
|
|||
int ps = 0;
|
||||
char[] t = text.toCharArray();
|
||||
for (int i = 0; i < t.length; i++) {
|
||||
if(t[i] == '\n') { ps++; }
|
||||
if (t[i] == '\n') {
|
||||
ps++;
|
||||
}
|
||||
}
|
||||
assertEquals(3, ps);
|
||||
}
|
||||
|
@ -125,9 +62,8 @@ public class TestXWPFWordExtractor extends TestCase {
|
|||
* Tests getting the text out of a complex file
|
||||
*/
|
||||
public void testGetComplexText() throws Exception {
|
||||
XWPFWordExtractor extractor =
|
||||
new XWPFWordExtractor(xmlB);
|
||||
extractor.getText();
|
||||
XWPFDocument doc = open("IllustrativeCases.docx");
|
||||
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
||||
|
||||
String text = extractor.getText();
|
||||
assertTrue(text.length() > 0);
|
||||
|
@ -150,17 +86,16 @@ public class TestXWPFWordExtractor extends TestCase {
|
|||
int ps = 0;
|
||||
char[] t = text.toCharArray();
|
||||
for (int i = 0; i < t.length; i++) {
|
||||
if(t[i] == '\n') { ps++; }
|
||||
if (t[i] == '\n') {
|
||||
ps++;
|
||||
}
|
||||
}
|
||||
assertEquals(103, ps);
|
||||
}
|
||||
|
||||
public void testGetWithHyperlinks() throws Exception {
|
||||
XWPFWordExtractor extractor =
|
||||
new XWPFWordExtractor(xmlE);
|
||||
extractor.getText();
|
||||
extractor.setFetchHyperlinks(true);
|
||||
extractor.getText();
|
||||
XWPFDocument doc = open("TestDocument.docx");
|
||||
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
||||
|
||||
// Now check contents
|
||||
// TODO - fix once correctly handling contents
|
||||
|
@ -184,9 +119,8 @@ public class TestXWPFWordExtractor extends TestCase {
|
|||
}
|
||||
|
||||
public void testHeadersFooters() throws Exception {
|
||||
XWPFWordExtractor extractor =
|
||||
new XWPFWordExtractor(xmlC);
|
||||
extractor.getText();
|
||||
XWPFDocument doc = open("ThreeColHeadFoot.docx");
|
||||
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
||||
|
||||
assertEquals(
|
||||
"First header column!\tMid header\tRight header!\n" +
|
||||
|
@ -202,11 +136,12 @@ public class TestXWPFWordExtractor extends TestCase {
|
|||
extractor.getText()
|
||||
);
|
||||
|
||||
|
||||
// Now another file, expect multiple headers
|
||||
// and multiple footers
|
||||
doc = open("DiffFirstPageHeadFoot.docx");
|
||||
extractor = new XWPFWordExtractor(doc);
|
||||
extractor =
|
||||
new XWPFWordExtractor(xmlD);
|
||||
new XWPFWordExtractor(doc);
|
||||
extractor.getText();
|
||||
|
||||
assertEquals(
|
||||
|
@ -225,4 +160,44 @@ public class TestXWPFWordExtractor extends TestCase {
|
|||
extractor.getText()
|
||||
);
|
||||
}
|
||||
|
||||
public void testFootnotes() throws Exception {
|
||||
XWPFDocument doc = open("footnotes.docx");
|
||||
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
||||
|
||||
assertTrue(extractor.getText().contains("snoska"));
|
||||
}
|
||||
|
||||
|
||||
public void testTableFootnotes() throws Exception {
|
||||
XWPFDocument doc = open("table_footnotes.docx");
|
||||
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
||||
|
||||
assertTrue(extractor.getText().contains("snoska"));
|
||||
}
|
||||
|
||||
public void testFormFootnotes() throws Exception {
|
||||
XWPFDocument doc = open("form_footnotes.docx");
|
||||
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
||||
|
||||
String text = extractor.getText();
|
||||
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
|
||||
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
|
||||
}
|
||||
|
||||
//TODO use the same logic as in HSSFTestDataSamples
|
||||
private XWPFDocument open(String sampleFileName) throws IOException {
|
||||
File file = new File(
|
||||
System.getProperty("HWPF.testdata.path"), sampleFileName);
|
||||
|
||||
try {
|
||||
if(!sampleFileName.equals(file.getCanonicalFile().getName())){
|
||||
throw new RuntimeException("File name is case-sensitive: requested '" + sampleFileName
|
||||
+ "' but actual file is '" + file.getCanonicalFile().getName() + "'");
|
||||
}
|
||||
} catch (IOException e){
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
return new XWPFDocument(POIXMLDocument.openPackage(file.getPath()));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,14 +43,14 @@ public class TestXWPFTable extends TestCase {
|
|||
|
||||
public void testConstructor() {
|
||||
CTTbl ctTable=CTTbl.Factory.newInstance();
|
||||
XWPFTable xtab=new XWPFTable(ctTable);
|
||||
XWPFTable xtab=new XWPFTable(null, ctTable);
|
||||
assertNotNull(xtab);
|
||||
assertEquals(1,ctTable.sizeOfTrArray());
|
||||
assertEquals(1,ctTable.getTrArray(0).sizeOfTcArray());
|
||||
assertNotNull(ctTable.getTrArray(0).getTcArray(0).getPArray(0));
|
||||
|
||||
ctTable=CTTbl.Factory.newInstance();
|
||||
xtab=new XWPFTable(ctTable, 3,2);
|
||||
xtab=new XWPFTable(null, ctTable, 3,2);
|
||||
assertNotNull(xtab);
|
||||
assertEquals(3,ctTable.sizeOfTrArray());
|
||||
assertEquals(2,ctTable.getTrArray(0).sizeOfTcArray());
|
||||
|
@ -67,7 +67,7 @@ public class TestXWPFTable extends TestCase {
|
|||
CTText text=run.addNewT();
|
||||
text.setStringValue("finally I can write!");
|
||||
|
||||
XWPFTable xtab=new XWPFTable(table);
|
||||
XWPFTable xtab=new XWPFTable(null, table);
|
||||
assertEquals("finally I can write!\n",xtab.getText());
|
||||
}
|
||||
|
||||
|
@ -84,7 +84,7 @@ public class TestXWPFTable extends TestCase {
|
|||
r3.addNewTc().addNewP();
|
||||
r3.addNewTc().addNewP();
|
||||
|
||||
XWPFTable xtab=new XWPFTable(table);
|
||||
XWPFTable xtab=new XWPFTable(null, table);
|
||||
assertEquals(3,xtab.getNumberOfRows());
|
||||
assertNotNull(xtab.getRow(2));
|
||||
|
||||
|
@ -95,7 +95,7 @@ public class TestXWPFTable extends TestCase {
|
|||
assertEquals(2,table.getTrArray(0).sizeOfTcArray());
|
||||
|
||||
//check creation of first row
|
||||
xtab=new XWPFTable(CTTbl.Factory.newInstance());
|
||||
xtab=new XWPFTable(null, CTTbl.Factory.newInstance());
|
||||
assertEquals(1,xtab.getCTTbl().getTrArray(0).sizeOfTcArray());
|
||||
}
|
||||
|
||||
|
@ -104,7 +104,7 @@ public class TestXWPFTable extends TestCase {
|
|||
CTTbl table = CTTbl.Factory.newInstance();
|
||||
table.addNewTblPr().addNewTblW().setW(new BigInteger("1000"));
|
||||
|
||||
XWPFTable xtab=new XWPFTable(table);
|
||||
XWPFTable xtab=new XWPFTable(null, table);
|
||||
|
||||
assertEquals(1000,xtab.getWidth());
|
||||
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue