mirror of
https://github.com/apache/poi.git
synced 2025-02-08 11:04:53 +00:00
Support for extraction of footnotes from docx files, see Bugzilla 45556
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@795328 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d09ab59ab0
commit
1aafa11722
@ -33,6 +33,8 @@
|
|||||||
|
|
||||||
<changes>
|
<changes>
|
||||||
<release version="3.5-beta7" date="2009-??-??">
|
<release version="3.5-beta7" date="2009-??-??">
|
||||||
|
<action dev="POI-DEVELOPERS" type="fix">45556 - Fixed ExtractorFactory to support .xltx and .dotx files</action>
|
||||||
|
<action dev="POI-DEVELOPERS" type="add">45556 - Support for extraction of footnotes from docx files</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">47520 - Initial support for custom XML mappings in XSSF</action>
|
<action dev="POI-DEVELOPERS" type="add">47520 - Initial support for custom XML mappings in XSSF</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">47460 - Fixed NPE when retrieving core properties from a newly created workbook</action>
|
<action dev="POI-DEVELOPERS" type="fix">47460 - Fixed NPE when retrieving core properties from a newly created workbook</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">47498 - Fixed HyperlinkRecord to properly handle URL monikers</action>
|
<action dev="POI-DEVELOPERS" type="fix">47498 - Fixed HyperlinkRecord to properly handle URL monikers</action>
|
||||||
@ -41,7 +43,7 @@
|
|||||||
<action dev="POI-DEVELOPERS" type="fix">47448 - Allow HSSFEventFactory to handle non-zero padding at the end of the workbook stream</action>
|
<action dev="POI-DEVELOPERS" type="fix">47448 - Allow HSSFEventFactory to handle non-zero padding at the end of the workbook stream</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">47456 - Support for getting OLE object data in PowerPointExtractor</action>
|
<action dev="POI-DEVELOPERS" type="add">47456 - Support for getting OLE object data in PowerPointExtractor</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">47411 - Explicitly set the 1900 date system when creating XSSF workbooks</action>
|
<action dev="POI-DEVELOPERS" type="fix">47411 - Explicitly set the 1900 date system when creating XSSF workbooks</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">47400 - Support fo text extraction of footnotes, endnotes and comments in HWPF</action>
|
<action dev="POI-DEVELOPERS" type="add">47400 - Support for text extraction of footnotes, endnotes and comments in HWPF</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">47415 - Fixed PageSettingsBlock to allow multiple PLS records</action>
|
<action dev="POI-DEVELOPERS" type="fix">47415 - Fixed PageSettingsBlock to allow multiple PLS records</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">47412 - Fixed concurrency issue with EscherProperties.initProps()</action>
|
<action dev="POI-DEVELOPERS" type="fix">47412 - Fixed concurrency issue with EscherProperties.initProps()</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">47143 - Fixed OOM in HSSFWorkbook#getAllPictures when reading .xls files containing metafiles</action>
|
<action dev="POI-DEVELOPERS" type="fix">47143 - Fixed OOM in HSSFWorkbook#getAllPictures when reading .xls files containing metafiles</action>
|
||||||
|
@ -66,6 +66,24 @@ public final class XSSFRelation extends POIXMLRelation {
|
|||||||
"/xl/workbook.xml",
|
"/xl/workbook.xml",
|
||||||
null
|
null
|
||||||
);
|
);
|
||||||
|
public static final XSSFRelation TEMPLATE_WORKBOOK = new XSSFRelation(
|
||||||
|
"application/vnd.openxmlformats-officedocument.spreadsheetml.template.main+xml",
|
||||||
|
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
|
||||||
|
"/xl/workbook.xml",
|
||||||
|
null
|
||||||
|
);
|
||||||
|
public static final XSSFRelation MACRO_TEMPLATE_WORKBOOK = new XSSFRelation(
|
||||||
|
"application/vnd.ms-excel.template.macroEnabled.main+xml",
|
||||||
|
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
|
||||||
|
"/xl/workbook.xml",
|
||||||
|
null
|
||||||
|
);
|
||||||
|
public static final XSSFRelation MACRO_ADDIN_WORKBOOK = new XSSFRelation(
|
||||||
|
"application/vnd.ms-excel.addin.macroEnabled.main+xml",
|
||||||
|
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
|
||||||
|
"/xl/workbook.xml",
|
||||||
|
null
|
||||||
|
);
|
||||||
public static final XSSFRelation WORKSHEET = new XSSFRelation(
|
public static final XSSFRelation WORKSHEET = new XSSFRelation(
|
||||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml",
|
"application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml",
|
||||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet",
|
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet",
|
||||||
|
@ -19,7 +19,7 @@ package org.apache.poi.xwpf.model;
|
|||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
|
||||||
import org.apache.poi.xwpf.usermodel.XWPFParagraph;;
|
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Decorator class for XWPFParagraph allowing to add hyperlinks
|
* Decorator class for XWPFParagraph allowing to add hyperlinks
|
||||||
|
@ -30,15 +30,7 @@ import org.apache.xmlbeans.XmlOptions;
|
|||||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||||
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
|
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
|
||||||
import org.apache.poi.openxml4j.opc.*;
|
import org.apache.poi.openxml4j.opc.*;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTComment;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyles;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CommentsDocument;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.StylesDocument;
|
|
||||||
|
|
||||||
import javax.xml.namespace.QName;
|
import javax.xml.namespace.QName;
|
||||||
|
|
||||||
@ -60,6 +52,7 @@ public class XWPFDocument extends POIXMLDocument {
|
|||||||
protected List<XWPFHyperlink> hyperlinks;
|
protected List<XWPFHyperlink> hyperlinks;
|
||||||
protected List<XWPFParagraph> paragraphs;
|
protected List<XWPFParagraph> paragraphs;
|
||||||
protected List<XWPFTable> tables;
|
protected List<XWPFTable> tables;
|
||||||
|
protected Map<Integer, XWPFFootnote> footnotes;
|
||||||
|
|
||||||
/** Handles the joy of different headers/footers for different pages */
|
/** Handles the joy of different headers/footers for different pages */
|
||||||
private XWPFHeaderFooterPolicy headerFooterPolicy;
|
private XWPFHeaderFooterPolicy headerFooterPolicy;
|
||||||
@ -87,6 +80,7 @@ public class XWPFDocument extends POIXMLDocument {
|
|||||||
comments = new ArrayList<XWPFComment>();
|
comments = new ArrayList<XWPFComment>();
|
||||||
paragraphs = new ArrayList<XWPFParagraph>();
|
paragraphs = new ArrayList<XWPFParagraph>();
|
||||||
tables= new ArrayList<XWPFTable>();
|
tables= new ArrayList<XWPFTable>();
|
||||||
|
footnotes = new HashMap<Integer, XWPFFootnote>();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
DocumentDocument doc = DocumentDocument.Factory.parse(getPackagePart().getInputStream());
|
DocumentDocument doc = DocumentDocument.Factory.parse(getPackagePart().getInputStream());
|
||||||
@ -94,6 +88,8 @@ public class XWPFDocument extends POIXMLDocument {
|
|||||||
|
|
||||||
CTBody body = ctDocument.getBody();
|
CTBody body = ctDocument.getBody();
|
||||||
|
|
||||||
|
initFootnotes();
|
||||||
|
|
||||||
// filling paragraph list
|
// filling paragraph list
|
||||||
for (CTP p : body.getPArray()) {
|
for (CTP p : body.getPArray()) {
|
||||||
paragraphs.add(new XWPFParagraph(p, this));
|
paragraphs.add(new XWPFParagraph(p, this));
|
||||||
@ -101,7 +97,7 @@ public class XWPFDocument extends POIXMLDocument {
|
|||||||
|
|
||||||
// Get any tables
|
// Get any tables
|
||||||
for(CTTbl table : body.getTblArray()) {
|
for(CTTbl table : body.getTblArray()) {
|
||||||
tables.add(new XWPFTable(table));
|
tables.add(new XWPFTable(this, table));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sort out headers and footers
|
// Sort out headers and footers
|
||||||
@ -118,7 +114,6 @@ public class XWPFDocument extends POIXMLDocument {
|
|||||||
}
|
}
|
||||||
|
|
||||||
initHyperlinks();
|
initHyperlinks();
|
||||||
|
|
||||||
} catch (XmlException e) {
|
} catch (XmlException e) {
|
||||||
throw new POIXMLException(e);
|
throw new POIXMLException(e);
|
||||||
}
|
}
|
||||||
@ -139,6 +134,19 @@ public class XWPFDocument extends POIXMLDocument {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void initFootnotes() throws XmlException, IOException {
|
||||||
|
for(POIXMLDocumentPart p : getRelations()){
|
||||||
|
String relation = p.getPackageRelationship().getRelationshipType();
|
||||||
|
if(relation.equals(XWPFRelation.FOOTNOTE.getRelation())){
|
||||||
|
FootnotesDocument footnotesDocument = FootnotesDocument.Factory.parse(p.getPackagePart().getInputStream());
|
||||||
|
|
||||||
|
for(CTFtnEdn ctFtnEdn : footnotesDocument.getFootnotes().getFootnoteArray()) {
|
||||||
|
footnotes.put(ctFtnEdn.getId().intValue(), new XWPFFootnote(this, ctFtnEdn));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new SpreadsheetML package and setup the default minimal content
|
* Create a new SpreadsheetML package and setup the default minimal content
|
||||||
*/
|
*/
|
||||||
@ -205,6 +213,15 @@ public class XWPFDocument extends POIXMLDocument {
|
|||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public XWPFFootnote getFootnoteByID(int id) {
|
||||||
|
return footnotes.get(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Collection<XWPFFootnote> getFootnotes() {
|
||||||
|
return footnotes == null ? new ArrayList<XWPFFootnote>() : footnotes.values();
|
||||||
|
}
|
||||||
|
|
||||||
public XWPFHyperlink[] getHyperlinks() {
|
public XWPFHyperlink[] getHyperlinks() {
|
||||||
return hyperlinks.toArray(
|
return hyperlinks.toArray(
|
||||||
new XWPFHyperlink[hyperlinks.size()]
|
new XWPFHyperlink[hyperlinks.size()]
|
||||||
@ -323,7 +340,7 @@ public class XWPFDocument extends POIXMLDocument {
|
|||||||
* @return a new table
|
* @return a new table
|
||||||
*/
|
*/
|
||||||
public XWPFTable createTable(){
|
public XWPFTable createTable(){
|
||||||
return new XWPFTable(ctDocument.getBody().addNewTbl());
|
return new XWPFTable(this, ctDocument.getBody().addNewTbl());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -333,7 +350,7 @@ public class XWPFDocument extends POIXMLDocument {
|
|||||||
* @return table
|
* @return table
|
||||||
*/
|
*/
|
||||||
public XWPFTable createTable(int rows, int cols) {
|
public XWPFTable createTable(int rows, int cols) {
|
||||||
return new XWPFTable(ctDocument.getBody().addNewTbl(), rows, cols);
|
return new XWPFTable(this, ctDocument.getBody().addNewTbl(), rows, cols);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
43
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFFootnote.java
Executable file
43
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFFootnote.java
Executable file
@ -0,0 +1,43 @@
|
|||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
package org.apache.poi.xwpf.usermodel;
|
||||||
|
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTFtnEdn;
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
public class XWPFFootnote implements Iterable<XWPFParagraph> {
|
||||||
|
private List<XWPFParagraph> paragraphs = new ArrayList<XWPFParagraph>();
|
||||||
|
|
||||||
|
public XWPFFootnote(XWPFDocument document, CTFtnEdn body) {
|
||||||
|
for (CTP p : body.getPArray()) {
|
||||||
|
paragraphs.add(new XWPFParagraph(p, document));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<XWPFParagraph> getParagraphs() {
|
||||||
|
return paragraphs;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Iterator<XWPFParagraph> iterator(){
|
||||||
|
return paragraphs.iterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -65,6 +65,7 @@ public abstract class XWPFHeaderFooter {
|
|||||||
new XWPFTable[headerFooter.getTblArray().length];
|
new XWPFTable[headerFooter.getTblArray().length];
|
||||||
for(int i=0; i<tables.length; i++) {
|
for(int i=0; i<tables.length; i++) {
|
||||||
tables[i] = new XWPFTable(
|
tables[i] = new XWPFTable(
|
||||||
|
null,
|
||||||
headerFooter.getTblArray(i)
|
headerFooter.getTblArray(i)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -21,26 +21,7 @@ import java.util.ArrayList;
|
|||||||
|
|
||||||
import org.apache.xmlbeans.XmlCursor;
|
import org.apache.xmlbeans.XmlCursor;
|
||||||
import org.apache.xmlbeans.XmlObject;
|
import org.apache.xmlbeans.XmlObject;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBorder;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTInd;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTJc;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTOnOff;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPBdr;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPPr;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPTab;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPicture;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentRun;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRun;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSpacing;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTextAlignment;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STBorder;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STJc;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STLineSpacingRule;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STOnOff;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STTextAlignment;
|
|
||||||
import org.w3c.dom.NodeList;
|
import org.w3c.dom.NodeList;
|
||||||
import org.w3c.dom.Text;
|
import org.w3c.dom.Text;
|
||||||
|
|
||||||
@ -58,6 +39,7 @@ public class XWPFParagraph {
|
|||||||
*/
|
*/
|
||||||
private StringBuffer text = new StringBuffer();
|
private StringBuffer text = new StringBuffer();
|
||||||
private StringBuffer pictureText = new StringBuffer();
|
private StringBuffer pictureText = new StringBuffer();
|
||||||
|
private StringBuffer footnoteText = new StringBuffer();
|
||||||
|
|
||||||
|
|
||||||
protected XWPFParagraph(CTP prgrph) {
|
protected XWPFParagraph(CTP prgrph) {
|
||||||
@ -107,6 +89,23 @@ public class XWPFParagraph {
|
|||||||
if (o instanceof CTPTab) {
|
if (o instanceof CTPTab) {
|
||||||
text.append("\t");
|
text.append("\t");
|
||||||
}
|
}
|
||||||
|
//got a reference to a footnote
|
||||||
|
if (o instanceof CTFtnEdnRef) {
|
||||||
|
CTFtnEdnRef ftn = (CTFtnEdnRef) o;
|
||||||
|
footnoteText.append("[").append(ftn.getId()).append(": ");
|
||||||
|
XWPFFootnote footnote = document.getFootnoteByID(ftn.getId().intValue());
|
||||||
|
|
||||||
|
boolean first = true;
|
||||||
|
for (XWPFParagraph p : footnote.getParagraphs()) {
|
||||||
|
if (!first) {
|
||||||
|
footnoteText.append("\n");
|
||||||
|
first = false;
|
||||||
|
}
|
||||||
|
footnoteText.append(p.getText());
|
||||||
|
}
|
||||||
|
|
||||||
|
footnoteText.append("]");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Loop over pictures inside our
|
// Loop over pictures inside our
|
||||||
@ -146,7 +145,9 @@ public class XWPFParagraph {
|
|||||||
* in it.
|
* in it.
|
||||||
*/
|
*/
|
||||||
public String getText() {
|
public String getText() {
|
||||||
return getParagraphText() + getPictureText();
|
StringBuffer out = new StringBuffer();
|
||||||
|
out.append(text).append(footnoteText).append(pictureText);
|
||||||
|
return out.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -164,6 +165,15 @@ public class XWPFParagraph {
|
|||||||
return pictureText.toString();
|
return pictureText.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the footnote text of the paragraph
|
||||||
|
*
|
||||||
|
* @return the footnote text or empty string if the paragraph does not have footnotes
|
||||||
|
*/
|
||||||
|
public String getFootnoteText() {
|
||||||
|
return footnoteText.toString();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Appends a new run to this paragraph
|
* Appends a new run to this paragraph
|
||||||
*
|
*
|
||||||
|
@ -40,6 +40,24 @@ public final class XWPFRelation extends POIXMLRelation {
|
|||||||
"/word/document.xml",
|
"/word/document.xml",
|
||||||
null
|
null
|
||||||
);
|
);
|
||||||
|
public static final XWPFRelation TEMPLATE = new XWPFRelation(
|
||||||
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.template.main+xml",
|
||||||
|
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
|
||||||
|
"/word/document.xml",
|
||||||
|
null
|
||||||
|
);
|
||||||
|
public static final XWPFRelation MACRO_DOCUMENT = new XWPFRelation(
|
||||||
|
"application/vnd.ms-word.document.macroEnabled.main+xml",
|
||||||
|
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
|
||||||
|
"/word/document.xml",
|
||||||
|
null
|
||||||
|
);
|
||||||
|
public static final XWPFRelation MACRO_TEMPLATE_DOCUMENT = new XWPFRelation(
|
||||||
|
"application/vnd.ms-word.template.macroEnabledTemplate.main+xml",
|
||||||
|
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
|
||||||
|
"/word/document.xml",
|
||||||
|
null
|
||||||
|
);
|
||||||
public static final XWPFRelation FONT_TABLE = new XWPFRelation(
|
public static final XWPFRelation FONT_TABLE = new XWPFRelation(
|
||||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml",
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml",
|
||||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable",
|
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable",
|
||||||
@ -88,6 +106,12 @@ public final class XWPFRelation extends POIXMLRelation {
|
|||||||
null,
|
null,
|
||||||
null
|
null
|
||||||
);
|
);
|
||||||
|
public static final XWPFRelation FOOTNOTE = new XWPFRelation(
|
||||||
|
null,
|
||||||
|
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes",
|
||||||
|
null,
|
||||||
|
null
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
private XWPFRelation(String type, String rel, String defaultName, Class<? extends POIXMLDocumentPart> cls) {
|
private XWPFRelation(String type, String rel, String defaultName, Class<? extends POIXMLDocumentPart> cls) {
|
||||||
|
@ -42,8 +42,8 @@ public class XWPFTable {
|
|||||||
private CTTbl ctTbl;
|
private CTTbl ctTbl;
|
||||||
|
|
||||||
|
|
||||||
public XWPFTable(CTTbl table, int row, int col) {
|
public XWPFTable(XWPFDocument doc, CTTbl table, int row, int col) {
|
||||||
this(table);
|
this(doc, table);
|
||||||
for (int i = 0; i < row; i++) {
|
for (int i = 0; i < row; i++) {
|
||||||
XWPFTableRow tabRow = (getRow(i) == null) ? createRow() : getRow(i);
|
XWPFTableRow tabRow = (getRow(i) == null) ? createRow() : getRow(i);
|
||||||
for (int k = 0; k < col; k++) {
|
for (int k = 0; k < col; k++) {
|
||||||
@ -54,7 +54,7 @@ public class XWPFTable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public XWPFTable(CTTbl table) {
|
public XWPFTable(XWPFDocument doc, CTTbl table) {
|
||||||
this.ctTbl = table;
|
this.ctTbl = table;
|
||||||
|
|
||||||
// is an empty table: I add one row and one column as default
|
// is an empty table: I add one row and one column as default
|
||||||
@ -65,7 +65,7 @@ public class XWPFTable {
|
|||||||
StringBuffer rowText = new StringBuffer();
|
StringBuffer rowText = new StringBuffer();
|
||||||
for (CTTc cell : row.getTcArray()) {
|
for (CTTc cell : row.getTcArray()) {
|
||||||
for (CTP ctp : cell.getPArray()) {
|
for (CTP ctp : cell.getPArray()) {
|
||||||
XWPFParagraph p = new XWPFParagraph(ctp, null);
|
XWPFParagraph p = new XWPFParagraph(ctp, doc);
|
||||||
if (rowText.length() > 0) {
|
if (rowText.length() > 0) {
|
||||||
rowText.append('\t');
|
rowText.append('\t');
|
||||||
}
|
}
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
package org.apache.poi.xwpf.extractor;
|
package org.apache.poi.xwpf.extractor;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.poi.POIXMLDocument;
|
import org.apache.poi.POIXMLDocument;
|
||||||
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
||||||
@ -27,79 +28,13 @@ import junit.framework.TestCase;
|
|||||||
* Tests for HXFWordExtractor
|
* Tests for HXFWordExtractor
|
||||||
*/
|
*/
|
||||||
public class TestXWPFWordExtractor extends TestCase {
|
public class TestXWPFWordExtractor extends TestCase {
|
||||||
/**
|
|
||||||
* A very simple file
|
|
||||||
*/
|
|
||||||
private XWPFDocument xmlA;
|
|
||||||
private File fileA;
|
|
||||||
/**
|
|
||||||
* A fairly complex file
|
|
||||||
*/
|
|
||||||
private XWPFDocument xmlB;
|
|
||||||
private File fileB;
|
|
||||||
/**
|
|
||||||
* With a simplish header+footer
|
|
||||||
*/
|
|
||||||
private XWPFDocument xmlC;
|
|
||||||
private File fileC;
|
|
||||||
/**
|
|
||||||
* With different header+footer on first/rest
|
|
||||||
*/
|
|
||||||
private XWPFDocument xmlD;
|
|
||||||
private File fileD;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* File with hyperlinks
|
|
||||||
*/
|
|
||||||
private XWPFDocument xmlE;
|
|
||||||
private File fileE;
|
|
||||||
|
|
||||||
protected void setUp() throws Exception {
|
|
||||||
super.setUp();
|
|
||||||
|
|
||||||
fileA = new File(
|
|
||||||
System.getProperty("HWPF.testdata.path") +
|
|
||||||
File.separator + "sample.docx"
|
|
||||||
);
|
|
||||||
fileB = new File(
|
|
||||||
System.getProperty("HWPF.testdata.path") +
|
|
||||||
File.separator + "IllustrativeCases.docx"
|
|
||||||
);
|
|
||||||
fileC = new File(
|
|
||||||
System.getProperty("HWPF.testdata.path") +
|
|
||||||
File.separator + "ThreeColHeadFoot.docx"
|
|
||||||
);
|
|
||||||
fileD = new File(
|
|
||||||
System.getProperty("HWPF.testdata.path") +
|
|
||||||
File.separator + "DiffFirstPageHeadFoot.docx"
|
|
||||||
);
|
|
||||||
fileE = new File(
|
|
||||||
System.getProperty("HWPF.testdata.path") +
|
|
||||||
File.separator + "TestDocument.docx"
|
|
||||||
);
|
|
||||||
assertTrue(fileA.exists());
|
|
||||||
assertTrue(fileB.exists());
|
|
||||||
assertTrue(fileC.exists());
|
|
||||||
assertTrue(fileD.exists());
|
|
||||||
assertTrue(fileE.exists());
|
|
||||||
|
|
||||||
xmlA = new XWPFDocument(POIXMLDocument.openPackage(fileA.toString()));
|
|
||||||
xmlB = new XWPFDocument(POIXMLDocument.openPackage(fileB.toString()));
|
|
||||||
xmlC = new XWPFDocument(POIXMLDocument.openPackage(fileC.toString()));
|
|
||||||
xmlD = new XWPFDocument(POIXMLDocument.openPackage(fileD.toString()));
|
|
||||||
xmlE = new XWPFDocument(POIXMLDocument.openPackage(fileE.toString()));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get text out of the simple file
|
* Get text out of the simple file
|
||||||
*/
|
*/
|
||||||
public void testGetSimpleText() throws Exception {
|
public void testGetSimpleText() throws Exception {
|
||||||
new XWPFWordExtractor(xmlA);
|
XWPFDocument doc = open("sample.docx");
|
||||||
new XWPFWordExtractor(POIXMLDocument.openPackage(fileA.toString()));
|
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
||||||
|
|
||||||
XWPFWordExtractor extractor =
|
|
||||||
new XWPFWordExtractor(xmlA);
|
|
||||||
extractor.getText();
|
|
||||||
|
|
||||||
String text = extractor.getText();
|
String text = extractor.getText();
|
||||||
assertTrue(text.length() > 0);
|
assertTrue(text.length() > 0);
|
||||||
@ -116,7 +51,9 @@ public class TestXWPFWordExtractor extends TestCase {
|
|||||||
int ps = 0;
|
int ps = 0;
|
||||||
char[] t = text.toCharArray();
|
char[] t = text.toCharArray();
|
||||||
for (int i = 0; i < t.length; i++) {
|
for (int i = 0; i < t.length; i++) {
|
||||||
if(t[i] == '\n') { ps++; }
|
if (t[i] == '\n') {
|
||||||
|
ps++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
assertEquals(3, ps);
|
assertEquals(3, ps);
|
||||||
}
|
}
|
||||||
@ -125,9 +62,8 @@ public class TestXWPFWordExtractor extends TestCase {
|
|||||||
* Tests getting the text out of a complex file
|
* Tests getting the text out of a complex file
|
||||||
*/
|
*/
|
||||||
public void testGetComplexText() throws Exception {
|
public void testGetComplexText() throws Exception {
|
||||||
XWPFWordExtractor extractor =
|
XWPFDocument doc = open("IllustrativeCases.docx");
|
||||||
new XWPFWordExtractor(xmlB);
|
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
||||||
extractor.getText();
|
|
||||||
|
|
||||||
String text = extractor.getText();
|
String text = extractor.getText();
|
||||||
assertTrue(text.length() > 0);
|
assertTrue(text.length() > 0);
|
||||||
@ -150,17 +86,16 @@ public class TestXWPFWordExtractor extends TestCase {
|
|||||||
int ps = 0;
|
int ps = 0;
|
||||||
char[] t = text.toCharArray();
|
char[] t = text.toCharArray();
|
||||||
for (int i = 0; i < t.length; i++) {
|
for (int i = 0; i < t.length; i++) {
|
||||||
if(t[i] == '\n') { ps++; }
|
if (t[i] == '\n') {
|
||||||
|
ps++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
assertEquals(103, ps);
|
assertEquals(103, ps);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testGetWithHyperlinks() throws Exception {
|
public void testGetWithHyperlinks() throws Exception {
|
||||||
XWPFWordExtractor extractor =
|
XWPFDocument doc = open("TestDocument.docx");
|
||||||
new XWPFWordExtractor(xmlE);
|
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
||||||
extractor.getText();
|
|
||||||
extractor.setFetchHyperlinks(true);
|
|
||||||
extractor.getText();
|
|
||||||
|
|
||||||
// Now check contents
|
// Now check contents
|
||||||
// TODO - fix once correctly handling contents
|
// TODO - fix once correctly handling contents
|
||||||
@ -184,9 +119,8 @@ public class TestXWPFWordExtractor extends TestCase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public void testHeadersFooters() throws Exception {
|
public void testHeadersFooters() throws Exception {
|
||||||
XWPFWordExtractor extractor =
|
XWPFDocument doc = open("ThreeColHeadFoot.docx");
|
||||||
new XWPFWordExtractor(xmlC);
|
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
||||||
extractor.getText();
|
|
||||||
|
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"First header column!\tMid header\tRight header!\n" +
|
"First header column!\tMid header\tRight header!\n" +
|
||||||
@ -202,11 +136,12 @@ public class TestXWPFWordExtractor extends TestCase {
|
|||||||
extractor.getText()
|
extractor.getText()
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
// Now another file, expect multiple headers
|
// Now another file, expect multiple headers
|
||||||
// and multiple footers
|
// and multiple footers
|
||||||
|
doc = open("DiffFirstPageHeadFoot.docx");
|
||||||
|
extractor = new XWPFWordExtractor(doc);
|
||||||
extractor =
|
extractor =
|
||||||
new XWPFWordExtractor(xmlD);
|
new XWPFWordExtractor(doc);
|
||||||
extractor.getText();
|
extractor.getText();
|
||||||
|
|
||||||
assertEquals(
|
assertEquals(
|
||||||
@ -225,4 +160,44 @@ public class TestXWPFWordExtractor extends TestCase {
|
|||||||
extractor.getText()
|
extractor.getText()
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testFootnotes() throws Exception {
|
||||||
|
XWPFDocument doc = open("footnotes.docx");
|
||||||
|
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
||||||
|
|
||||||
|
assertTrue(extractor.getText().contains("snoska"));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void testTableFootnotes() throws Exception {
|
||||||
|
XWPFDocument doc = open("table_footnotes.docx");
|
||||||
|
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
||||||
|
|
||||||
|
assertTrue(extractor.getText().contains("snoska"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testFormFootnotes() throws Exception {
|
||||||
|
XWPFDocument doc = open("form_footnotes.docx");
|
||||||
|
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
||||||
|
|
||||||
|
String text = extractor.getText();
|
||||||
|
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
|
||||||
|
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
|
||||||
|
}
|
||||||
|
|
||||||
|
//TODO use the same logic as in HSSFTestDataSamples
|
||||||
|
private XWPFDocument open(String sampleFileName) throws IOException {
|
||||||
|
File file = new File(
|
||||||
|
System.getProperty("HWPF.testdata.path"), sampleFileName);
|
||||||
|
|
||||||
|
try {
|
||||||
|
if(!sampleFileName.equals(file.getCanonicalFile().getName())){
|
||||||
|
throw new RuntimeException("File name is case-sensitive: requested '" + sampleFileName
|
||||||
|
+ "' but actual file is '" + file.getCanonicalFile().getName() + "'");
|
||||||
|
}
|
||||||
|
} catch (IOException e){
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
return new XWPFDocument(POIXMLDocument.openPackage(file.getPath()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -43,14 +43,14 @@ public class TestXWPFTable extends TestCase {
|
|||||||
|
|
||||||
public void testConstructor() {
|
public void testConstructor() {
|
||||||
CTTbl ctTable=CTTbl.Factory.newInstance();
|
CTTbl ctTable=CTTbl.Factory.newInstance();
|
||||||
XWPFTable xtab=new XWPFTable(ctTable);
|
XWPFTable xtab=new XWPFTable(null, ctTable);
|
||||||
assertNotNull(xtab);
|
assertNotNull(xtab);
|
||||||
assertEquals(1,ctTable.sizeOfTrArray());
|
assertEquals(1,ctTable.sizeOfTrArray());
|
||||||
assertEquals(1,ctTable.getTrArray(0).sizeOfTcArray());
|
assertEquals(1,ctTable.getTrArray(0).sizeOfTcArray());
|
||||||
assertNotNull(ctTable.getTrArray(0).getTcArray(0).getPArray(0));
|
assertNotNull(ctTable.getTrArray(0).getTcArray(0).getPArray(0));
|
||||||
|
|
||||||
ctTable=CTTbl.Factory.newInstance();
|
ctTable=CTTbl.Factory.newInstance();
|
||||||
xtab=new XWPFTable(ctTable, 3,2);
|
xtab=new XWPFTable(null, ctTable, 3,2);
|
||||||
assertNotNull(xtab);
|
assertNotNull(xtab);
|
||||||
assertEquals(3,ctTable.sizeOfTrArray());
|
assertEquals(3,ctTable.sizeOfTrArray());
|
||||||
assertEquals(2,ctTable.getTrArray(0).sizeOfTcArray());
|
assertEquals(2,ctTable.getTrArray(0).sizeOfTcArray());
|
||||||
@ -67,7 +67,7 @@ public class TestXWPFTable extends TestCase {
|
|||||||
CTText text=run.addNewT();
|
CTText text=run.addNewT();
|
||||||
text.setStringValue("finally I can write!");
|
text.setStringValue("finally I can write!");
|
||||||
|
|
||||||
XWPFTable xtab=new XWPFTable(table);
|
XWPFTable xtab=new XWPFTable(null, table);
|
||||||
assertEquals("finally I can write!\n",xtab.getText());
|
assertEquals("finally I can write!\n",xtab.getText());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -84,7 +84,7 @@ public class TestXWPFTable extends TestCase {
|
|||||||
r3.addNewTc().addNewP();
|
r3.addNewTc().addNewP();
|
||||||
r3.addNewTc().addNewP();
|
r3.addNewTc().addNewP();
|
||||||
|
|
||||||
XWPFTable xtab=new XWPFTable(table);
|
XWPFTable xtab=new XWPFTable(null, table);
|
||||||
assertEquals(3,xtab.getNumberOfRows());
|
assertEquals(3,xtab.getNumberOfRows());
|
||||||
assertNotNull(xtab.getRow(2));
|
assertNotNull(xtab.getRow(2));
|
||||||
|
|
||||||
@ -95,7 +95,7 @@ public class TestXWPFTable extends TestCase {
|
|||||||
assertEquals(2,table.getTrArray(0).sizeOfTcArray());
|
assertEquals(2,table.getTrArray(0).sizeOfTcArray());
|
||||||
|
|
||||||
//check creation of first row
|
//check creation of first row
|
||||||
xtab=new XWPFTable(CTTbl.Factory.newInstance());
|
xtab=new XWPFTable(null, CTTbl.Factory.newInstance());
|
||||||
assertEquals(1,xtab.getCTTbl().getTrArray(0).sizeOfTcArray());
|
assertEquals(1,xtab.getCTTbl().getTrArray(0).sizeOfTcArray());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -104,7 +104,7 @@ public class TestXWPFTable extends TestCase {
|
|||||||
CTTbl table = CTTbl.Factory.newInstance();
|
CTTbl table = CTTbl.Factory.newInstance();
|
||||||
table.addNewTblPr().addNewTblW().setW(new BigInteger("1000"));
|
table.addNewTblPr().addNewTblW().setW(new BigInteger("1000"));
|
||||||
|
|
||||||
XWPFTable xtab=new XWPFTable(table);
|
XWPFTable xtab=new XWPFTable(null, table);
|
||||||
|
|
||||||
assertEquals(1000,xtab.getWidth());
|
assertEquals(1000,xtab.getWidth());
|
||||||
|
|
||||||
|
BIN
src/scratchpad/testcases/org/apache/poi/hwpf/data/footnotes.docx
Executable file
BIN
src/scratchpad/testcases/org/apache/poi/hwpf/data/footnotes.docx
Executable file
Binary file not shown.
BIN
src/scratchpad/testcases/org/apache/poi/hwpf/data/form_footnotes.docx
Executable file
BIN
src/scratchpad/testcases/org/apache/poi/hwpf/data/form_footnotes.docx
Executable file
Binary file not shown.
BIN
src/scratchpad/testcases/org/apache/poi/hwpf/data/table_footnotes.docx
Executable file
BIN
src/scratchpad/testcases/org/apache/poi/hwpf/data/table_footnotes.docx
Executable file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user