mirror of https://github.com/apache/poi.git
Improved hyperlink and comment fetching for xwpf text extraction, based on the patch from bug #44821
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@651979 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
24ba833f03
commit
61405ba81f
|
@ -17,6 +17,9 @@
|
||||||
package org.apache.poi.xwpf;
|
package org.apache.poi.xwpf;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
import org.apache.poi.POIXMLDocument;
|
import org.apache.poi.POIXMLDocument;
|
||||||
import org.apache.xmlbeans.XmlException;
|
import org.apache.xmlbeans.XmlException;
|
||||||
|
@ -24,12 +27,22 @@ import org.openxml4j.exceptions.InvalidFormatException;
|
||||||
import org.openxml4j.exceptions.OpenXML4JException;
|
import org.openxml4j.exceptions.OpenXML4JException;
|
||||||
import org.openxml4j.opc.Package;
|
import org.openxml4j.opc.Package;
|
||||||
import org.openxml4j.opc.PackagePart;
|
import org.openxml4j.opc.PackagePart;
|
||||||
|
import org.openxml4j.opc.PackageRelationship;
|
||||||
import org.openxml4j.opc.PackageRelationshipCollection;
|
import org.openxml4j.opc.PackageRelationshipCollection;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1;
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyles;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyles;
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTComment;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.StylesDocument;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.StylesDocument;
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CommentsDocument;
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl;
|
||||||
|
|
||||||
|
import org.apache.poi.xwpf.usermodel.XWPFHyperlink;
|
||||||
|
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
||||||
|
import org.apache.poi.xwpf.usermodel.XWPFComment;
|
||||||
|
import org.apache.poi.xwpf.usermodel.XWPFTable;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Experimental class to do low level processing
|
* Experimental class to do low level processing
|
||||||
|
@ -48,15 +61,59 @@ public class XWPFDocument extends POIXMLDocument {
|
||||||
public static final String HEADER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml";
|
public static final String HEADER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml";
|
||||||
public static final String STYLES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml";
|
public static final String STYLES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml";
|
||||||
public static final String STYLES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles";
|
public static final String STYLES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles";
|
||||||
public static final String HYPERLINK_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink";
|
public static final String HYPERLINK_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink";
|
||||||
|
public static final String COMMENT_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments";
|
||||||
|
|
||||||
private DocumentDocument wordDoc;
|
private DocumentDocument wordDoc;
|
||||||
|
protected List<XWPFComment> comments;
|
||||||
|
protected List<XWPFHyperlink> hyperlinks;
|
||||||
|
protected List<XWPFParagraph> paragraphs;
|
||||||
|
protected List<XWPFTable> tables;
|
||||||
|
|
||||||
public XWPFDocument(Package container) throws OpenXML4JException, IOException, XmlException {
|
public XWPFDocument(Package container) throws OpenXML4JException, IOException, XmlException {
|
||||||
super(container);
|
super(container);
|
||||||
|
|
||||||
|
hyperlinks = new LinkedList<XWPFHyperlink>();
|
||||||
|
comments = new LinkedList<XWPFComment>();
|
||||||
|
paragraphs = new LinkedList<XWPFParagraph>();
|
||||||
|
tables= new LinkedList<XWPFTable>();
|
||||||
|
|
||||||
wordDoc =
|
wordDoc =
|
||||||
DocumentDocument.Factory.parse(getCorePart().getInputStream());
|
DocumentDocument.Factory.parse(getCorePart().getInputStream());
|
||||||
|
|
||||||
|
// filling paragraph list
|
||||||
|
for (CTP p : getDocumentBody().getPArray()) {
|
||||||
|
paragraphs.add(new XWPFParagraph(p, this));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the hyperlinks
|
||||||
|
// TODO: make me optional/separated in private function
|
||||||
|
try {
|
||||||
|
Iterator <PackageRelationship> relIter =
|
||||||
|
getCorePart().getRelationshipsByType(HYPERLINK_RELATION_TYPE).iterator();
|
||||||
|
while(relIter.hasNext()) {
|
||||||
|
PackageRelationship rel = relIter.next();
|
||||||
|
hyperlinks.add(new XWPFHyperlink(rel.getId(), rel.getTargetURI().toString()));
|
||||||
|
}
|
||||||
|
} catch(Exception e) {
|
||||||
|
throw new OpenXML4JException(e.getLocalizedMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the comments, if there are any
|
||||||
|
PackageRelationshipCollection commentsRel = getCmntRelations();
|
||||||
|
if(commentsRel != null && commentsRel.size() > 0) {
|
||||||
|
PackagePart commentsPart = getTargetPart(commentsRel.getRelationship(0));
|
||||||
|
CommentsDocument cmntdoc = CommentsDocument.Factory.parse(commentsPart.getInputStream());
|
||||||
|
for(CTComment ctcomment : cmntdoc.getComments().getCommentArray())
|
||||||
|
{
|
||||||
|
comments.add(new XWPFComment(ctcomment));
|
||||||
|
}
|
||||||
|
|
||||||
|
for(CTTbl table : getDocumentBody().getTblArray())
|
||||||
|
{
|
||||||
|
tables.add(new XWPFTable(table));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -66,6 +123,42 @@ public class XWPFDocument extends POIXMLDocument {
|
||||||
return wordDoc.getDocument();
|
return wordDoc.getDocument();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Iterator<XWPFParagraph> getParagraphsIterator()
|
||||||
|
{
|
||||||
|
return paragraphs.iterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Iterator<XWPFTable> getTablesIterator()
|
||||||
|
{
|
||||||
|
return tables.iterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
public XWPFHyperlink getHyperlinkByID(String id)
|
||||||
|
{
|
||||||
|
Iterator<XWPFHyperlink> iter = hyperlinks.iterator();
|
||||||
|
while(iter.hasNext())
|
||||||
|
{
|
||||||
|
XWPFHyperlink link = iter.next();
|
||||||
|
if(link.getId().equals(id))
|
||||||
|
return link;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public XWPFComment getCommentByID(String id)
|
||||||
|
{
|
||||||
|
Iterator<XWPFComment> iter = comments.iterator();
|
||||||
|
while(iter.hasNext())
|
||||||
|
{
|
||||||
|
XWPFComment comment = iter.next();
|
||||||
|
if(comment.getId().equals(id))
|
||||||
|
return comment;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the low level body of the document
|
* Returns the low level body of the document
|
||||||
*/
|
*/
|
||||||
|
@ -91,18 +184,10 @@ public class XWPFDocument extends POIXMLDocument {
|
||||||
StylesDocument.Factory.parse(parts[0].getInputStream());
|
StylesDocument.Factory.parse(parts[0].getInputStream());
|
||||||
return sd.getStyles();
|
return sd.getStyles();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
protected PackageRelationshipCollection getCmntRelations() throws InvalidFormatException
|
||||||
* Returns all the hyperlink relations for the file.
|
{
|
||||||
* You'll generally want to get the target to get
|
return getCorePart().getRelationshipsByType(COMMENT_RELATION_TYPE);
|
||||||
* the destination of the hyperlink
|
|
||||||
*/
|
|
||||||
public PackageRelationshipCollection getHyperlinks() {
|
|
||||||
try {
|
|
||||||
return getCorePart().getRelationshipsByType(HYPERLINK_RELATION_TYPE);
|
|
||||||
} catch(InvalidFormatException e) {
|
|
||||||
// Should never happen
|
|
||||||
throw new IllegalStateException(e);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,19 +17,19 @@
|
||||||
package org.apache.poi.xwpf.extractor;
|
package org.apache.poi.xwpf.extractor;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
import org.apache.poi.POIXMLDocument;
|
import org.apache.poi.POIXMLDocument;
|
||||||
import org.apache.poi.POIXMLTextExtractor;
|
import org.apache.poi.POIXMLTextExtractor;
|
||||||
import org.apache.poi.xwpf.XWPFDocument;
|
import org.apache.poi.xwpf.XWPFDocument;
|
||||||
|
import org.apache.poi.xwpf.model.XWPFCommentsDecorator;
|
||||||
|
import org.apache.poi.xwpf.model.XWPFHyperlinkDecorator;
|
||||||
|
import org.apache.poi.xwpf.model.XWPFParagraphDecorator;
|
||||||
|
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
||||||
|
import org.apache.poi.xwpf.usermodel.XWPFTable;
|
||||||
import org.apache.xmlbeans.XmlException;
|
import org.apache.xmlbeans.XmlException;
|
||||||
import org.openxml4j.exceptions.OpenXML4JException;
|
import org.openxml4j.exceptions.OpenXML4JException;
|
||||||
import org.openxml4j.opc.Package;
|
import org.openxml4j.opc.Package;
|
||||||
import org.openxml4j.opc.PackageRelationship;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Helper class to extract text from an OOXML Word file
|
* Helper class to extract text from an OOXML Word file
|
||||||
|
@ -45,6 +45,15 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
|
||||||
super(document);
|
super(document);
|
||||||
this.document = document;
|
this.document = document;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Should we also fetch the hyperlinks, when fetching
|
||||||
|
* the text content? Default is to only output the
|
||||||
|
* hyperlink label, and not the contents
|
||||||
|
*/
|
||||||
|
public void setFetchHyperlinks(boolean fetch) {
|
||||||
|
fetchHyperlinks = fetch;
|
||||||
|
}
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
if(args.length < 1) {
|
if(args.length < 1) {
|
||||||
|
@ -59,56 +68,21 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
|
||||||
System.out.println(extractor.getText());
|
System.out.println(extractor.getText());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Should we also fetch the hyperlinks, when fetching
|
|
||||||
* the text content? Default is to only output the
|
|
||||||
* hyperlink label, and not the contents
|
|
||||||
*/
|
|
||||||
public void setFetchHyperlinks(boolean fetch) {
|
|
||||||
fetchHyperlinks = fetch;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getText() {
|
public String getText() {
|
||||||
CTBody body = document.getDocumentBody();
|
|
||||||
StringBuffer text = new StringBuffer();
|
StringBuffer text = new StringBuffer();
|
||||||
|
|
||||||
// Loop over paragraphs
|
|
||||||
CTP[] ps = body.getPArray();
|
|
||||||
for (int i = 0; i < ps.length; i++) {
|
|
||||||
// Loop over ranges and hyperlinks
|
|
||||||
// TODO - properly intersperce ranges and hyperlinks
|
|
||||||
CTR[] rs = ps[i].getRArray();
|
|
||||||
for(int j = 0; j < rs.length; j++) {
|
|
||||||
// Loop over text runs
|
|
||||||
CTText[] texts = rs[j].getTArray();
|
|
||||||
for (int k = 0; k < texts.length; k++) {
|
|
||||||
text.append(
|
|
||||||
texts[k].getStringValue()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
CTHyperlink[] hls = ps[i].getHyperlinkArray();
|
Iterator<XWPFParagraph> i = document.getParagraphsIterator();
|
||||||
for(CTHyperlink hl : hls) {
|
while(i.hasNext()) {
|
||||||
for(CTR r : hl.getRArray()) {
|
XWPFParagraphDecorator decorator = new XWPFCommentsDecorator(
|
||||||
for(CTText txt : r.getTArray()) {
|
new XWPFHyperlinkDecorator(i.next(), null, fetchHyperlinks));
|
||||||
text.append(txt.getStringValue());
|
text.append(decorator.getText()+"\n");
|
||||||
}
|
}
|
||||||
}
|
|
||||||
if(fetchHyperlinks) {
|
|
||||||
String id = hl.getId();
|
|
||||||
if(id != null) {
|
|
||||||
PackageRelationship hlRel =
|
|
||||||
document.getHyperlinks().getRelationshipByID(id);
|
|
||||||
if(hlRel != null) {
|
|
||||||
text.append(" <" + hlRel.getTargetURI().toString() + ">");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// New line after each paragraph.
|
Iterator<XWPFTable> j = document.getTablesIterator();
|
||||||
text.append("\n");
|
while(j.hasNext())
|
||||||
|
{
|
||||||
|
text.append(j.next().getText()+"\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
return text.toString();
|
return text.toString();
|
||||||
|
|
|
@ -0,0 +1,37 @@
|
||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
package org.apache.poi.xwpf.model;
|
||||||
|
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Base class for XWPF paragraphs
|
||||||
|
*
|
||||||
|
* @author Yury Batrakov (batrakov at gmail.com)
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class XMLParagraph {
|
||||||
|
protected CTP paragraph;
|
||||||
|
|
||||||
|
public XMLParagraph(CTP paragraph) {
|
||||||
|
this.paragraph = paragraph;
|
||||||
|
}
|
||||||
|
|
||||||
|
public CTP getCTP() {
|
||||||
|
return paragraph;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,52 @@
|
||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
package org.apache.poi.xwpf.model;
|
||||||
|
|
||||||
|
import org.apache.poi.xwpf.usermodel.XWPFComment;
|
||||||
|
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTMarkupRange;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decorator class for XWPFParagraph allowing to add comments
|
||||||
|
* found in paragraph to its text
|
||||||
|
*
|
||||||
|
* @author Yury Batrakov (batrakov at gmail.com)
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class XWPFCommentsDecorator extends XWPFParagraphDecorator {
|
||||||
|
private StringBuffer commentText;
|
||||||
|
|
||||||
|
public XWPFCommentsDecorator(XWPFParagraphDecorator nextDecorator) {
|
||||||
|
this(nextDecorator.paragraph, nextDecorator);
|
||||||
|
}
|
||||||
|
public XWPFCommentsDecorator(XWPFParagraph paragraph, XWPFParagraphDecorator nextDecorator) {
|
||||||
|
super(paragraph, nextDecorator);
|
||||||
|
|
||||||
|
XWPFComment comment;
|
||||||
|
commentText = new StringBuffer();
|
||||||
|
|
||||||
|
for(CTMarkupRange anchor : paragraph.getCTP().getCommentRangeStartArray())
|
||||||
|
{
|
||||||
|
if((comment = paragraph.getDocRef().getCommentByID(anchor.getId().toString())) != null)
|
||||||
|
commentText.append("\tComment by " + comment.getAuthor()+": "+comment.getText());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getText() {
|
||||||
|
return super.getText() + commentText;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,69 @@
|
||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
package org.apache.poi.xwpf.model;
|
||||||
|
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink;
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
|
||||||
|
import org.apache.poi.xwpf.usermodel.XWPFParagraph;;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decorator class for XWPFParagraph allowing to add hyperlinks
|
||||||
|
* found in paragraph to its text.
|
||||||
|
*
|
||||||
|
* TODO - add the hyperlink text in the right place, and not just
|
||||||
|
* at the end
|
||||||
|
*/
|
||||||
|
public class XWPFHyperlinkDecorator extends XWPFParagraphDecorator {
|
||||||
|
private StringBuffer hyperlinkText;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param nextDecorator The next decorator to use
|
||||||
|
* @param outputHyperlinkUrls Should we output the links too, or just the link text?
|
||||||
|
*/
|
||||||
|
public XWPFHyperlinkDecorator(XWPFParagraphDecorator nextDecorator, boolean outputHyperlinkUrls) {
|
||||||
|
this(nextDecorator.paragraph, nextDecorator, outputHyperlinkUrls);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param prgrph The paragraph of text to work on
|
||||||
|
* @param outputHyperlinkUrls Should we output the links too, or just the link text?
|
||||||
|
*/
|
||||||
|
public XWPFHyperlinkDecorator(XWPFParagraph prgrph, XWPFParagraphDecorator nextDecorator, boolean outputHyperlinkUrls) {
|
||||||
|
super(prgrph, nextDecorator);
|
||||||
|
|
||||||
|
hyperlinkText = new StringBuffer();
|
||||||
|
|
||||||
|
// loop over hyperlink anchors
|
||||||
|
for(CTHyperlink link : paragraph.getCTP().getHyperlinkArray()){
|
||||||
|
for (CTR r : link.getRArray()) {
|
||||||
|
// Loop over text runs
|
||||||
|
for (CTText text : r.getTArray()){
|
||||||
|
hyperlinkText.append(text.getStringValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(outputHyperlinkUrls && paragraph.getDocRef().getHyperlinkByID(link.getId()) != null) {
|
||||||
|
hyperlinkText.append(" <"+paragraph.getDocRef().getHyperlinkByID(link.getId()).getURL()+">");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getText()
|
||||||
|
{
|
||||||
|
return super.getText() + hyperlinkText;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,43 @@
|
||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
package org.apache.poi.xwpf.model;
|
||||||
|
|
||||||
|
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Base decorator class for XWPFParagraph
|
||||||
|
*/
|
||||||
|
public abstract class XWPFParagraphDecorator {
|
||||||
|
protected XWPFParagraph paragraph;
|
||||||
|
protected XWPFParagraphDecorator nextDecorator;
|
||||||
|
|
||||||
|
public XWPFParagraphDecorator(XWPFParagraph paragraph) {
|
||||||
|
this(paragraph, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public XWPFParagraphDecorator(XWPFParagraph paragraph, XWPFParagraphDecorator nextDecorator) {
|
||||||
|
this.paragraph = paragraph;
|
||||||
|
this.nextDecorator = nextDecorator;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getText() {
|
||||||
|
if(nextDecorator != null) {
|
||||||
|
return nextDecorator.getText();
|
||||||
|
}
|
||||||
|
return paragraph.getText();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,61 @@
|
||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
package org.apache.poi.xwpf.usermodel;
|
||||||
|
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTComment;
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sketch of XWPF comment class
|
||||||
|
*
|
||||||
|
* @author Yury Batrakov (batrakov at gmail.com)
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class XWPFComment
|
||||||
|
{
|
||||||
|
protected String id;
|
||||||
|
protected String author;
|
||||||
|
protected StringBuffer text;
|
||||||
|
|
||||||
|
public XWPFComment(CTComment comment)
|
||||||
|
{
|
||||||
|
text = new StringBuffer();
|
||||||
|
id = comment.getId().toString();
|
||||||
|
author = comment.getAuthor();
|
||||||
|
|
||||||
|
for(CTP ctp : comment.getPArray())
|
||||||
|
{
|
||||||
|
XWPFParagraph p = new XWPFParagraph(ctp);
|
||||||
|
text.append(p.getText());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getId()
|
||||||
|
{
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getAuthor()
|
||||||
|
{
|
||||||
|
return author;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getText()
|
||||||
|
{
|
||||||
|
return text.toString();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,43 @@
|
||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
package org.apache.poi.xwpf.usermodel;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sketch of XWPF hyperlink class
|
||||||
|
*
|
||||||
|
* @author Yury Batrakov (batrakov at gmail.com)
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class XWPFHyperlink
|
||||||
|
{
|
||||||
|
String id, url;
|
||||||
|
public XWPFHyperlink(String id, String url)
|
||||||
|
{
|
||||||
|
this.id = id;
|
||||||
|
this.url = url;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getId()
|
||||||
|
{
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getURL()
|
||||||
|
{
|
||||||
|
return url;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,70 @@
|
||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
package org.apache.poi.xwpf.usermodel;
|
||||||
|
|
||||||
|
import org.apache.poi.xwpf.model.XMLParagraph;
|
||||||
|
import org.apache.poi.xwpf.XWPFDocument;
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sketch of XWPF paragraph class
|
||||||
|
*/
|
||||||
|
public class XWPFParagraph extends XMLParagraph
|
||||||
|
{
|
||||||
|
protected XWPFDocument docRef; // XXX: we'd like to have access to document's hyperlink, comments and other tables
|
||||||
|
/**
|
||||||
|
* TODO - replace with RichText String
|
||||||
|
*/
|
||||||
|
private StringBuffer text = new StringBuffer();
|
||||||
|
|
||||||
|
public XWPFParagraph(CTP prgrph, XWPFDocument docRef)
|
||||||
|
{
|
||||||
|
super(prgrph);
|
||||||
|
|
||||||
|
this.docRef = docRef;
|
||||||
|
CTR[] rs = paragraph.getRArray();
|
||||||
|
|
||||||
|
// Get text
|
||||||
|
for (int j = 0; j < rs.length; j++) {
|
||||||
|
// Loop over text runs
|
||||||
|
CTText[] texts = rs[j].getTArray();
|
||||||
|
for (int k = 0; k < texts.length; k++) {
|
||||||
|
text.append(
|
||||||
|
texts[k].getStringValue()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public XWPFParagraph(CTP prgrph) {
|
||||||
|
this(prgrph, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public XWPFParagraph(XMLParagraph paragraph) {
|
||||||
|
this(paragraph.getCTP());
|
||||||
|
}
|
||||||
|
|
||||||
|
public XWPFDocument getDocRef() {
|
||||||
|
return docRef;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getText() {
|
||||||
|
return text.toString();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,6 @@
|
||||||
|
package org.apache.poi.xwpf.usermodel;
|
||||||
|
|
||||||
|
public class XWPFParagraphText
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,55 @@
|
||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
package org.apache.poi.xwpf.usermodel;
|
||||||
|
|
||||||
|
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl;
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRow;
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTc;
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sketch of XWPFTable class. Only table's text is being hold.
|
||||||
|
*
|
||||||
|
* @author Yury Batrakov (batrakov at gmail.com)
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class XWPFTable
|
||||||
|
{
|
||||||
|
protected StringBuffer text=new StringBuffer();
|
||||||
|
|
||||||
|
public XWPFTable(CTTbl table)
|
||||||
|
{
|
||||||
|
for(CTRow row : table.getTrArray())
|
||||||
|
{
|
||||||
|
for(CTTc cell : row.getTcArray())
|
||||||
|
{
|
||||||
|
for(CTP ctp : cell.getPArray())
|
||||||
|
{
|
||||||
|
XWPFParagraph p = new XWPFParagraph(ctp);
|
||||||
|
this.text.append(p.getText()+"\t");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.text.append("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getText()
|
||||||
|
{
|
||||||
|
return text.toString();
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue