mirror of https://github.com/apache/poi.git
Improved hyperlink and comment fetching for xwpf text extraction, based on the patch from bug #44821
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@651979 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
24ba833f03
commit
61405ba81f
|
@ -17,6 +17,9 @@
|
|||
package org.apache.poi.xwpf;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.poi.POIXMLDocument;
|
||||
import org.apache.xmlbeans.XmlException;
|
||||
|
@ -24,12 +27,22 @@ import org.openxml4j.exceptions.InvalidFormatException;
|
|||
import org.openxml4j.exceptions.OpenXML4JException;
|
||||
import org.openxml4j.opc.Package;
|
||||
import org.openxml4j.opc.PackagePart;
|
||||
import org.openxml4j.opc.PackageRelationship;
|
||||
import org.openxml4j.opc.PackageRelationshipCollection;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyles;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTComment;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.StylesDocument;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CommentsDocument;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl;
|
||||
|
||||
import org.apache.poi.xwpf.usermodel.XWPFHyperlink;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFComment;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFTable;
|
||||
|
||||
/**
|
||||
* Experimental class to do low level processing
|
||||
|
@ -49,14 +62,58 @@ public class XWPFDocument extends POIXMLDocument {
|
|||
public static final String STYLES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml";
|
||||
public static final String STYLES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles";
|
||||
public static final String HYPERLINK_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink";
|
||||
public static final String COMMENT_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments";
|
||||
|
||||
private DocumentDocument wordDoc;
|
||||
protected List<XWPFComment> comments;
|
||||
protected List<XWPFHyperlink> hyperlinks;
|
||||
protected List<XWPFParagraph> paragraphs;
|
||||
protected List<XWPFTable> tables;
|
||||
|
||||
public XWPFDocument(Package container) throws OpenXML4JException, IOException, XmlException {
|
||||
super(container);
|
||||
|
||||
hyperlinks = new LinkedList<XWPFHyperlink>();
|
||||
comments = new LinkedList<XWPFComment>();
|
||||
paragraphs = new LinkedList<XWPFParagraph>();
|
||||
tables= new LinkedList<XWPFTable>();
|
||||
|
||||
wordDoc =
|
||||
DocumentDocument.Factory.parse(getCorePart().getInputStream());
|
||||
|
||||
// filling paragraph list
|
||||
for (CTP p : getDocumentBody().getPArray()) {
|
||||
paragraphs.add(new XWPFParagraph(p, this));
|
||||
}
|
||||
|
||||
// Get the hyperlinks
|
||||
// TODO: make me optional/separated in private function
|
||||
try {
|
||||
Iterator <PackageRelationship> relIter =
|
||||
getCorePart().getRelationshipsByType(HYPERLINK_RELATION_TYPE).iterator();
|
||||
while(relIter.hasNext()) {
|
||||
PackageRelationship rel = relIter.next();
|
||||
hyperlinks.add(new XWPFHyperlink(rel.getId(), rel.getTargetURI().toString()));
|
||||
}
|
||||
} catch(Exception e) {
|
||||
throw new OpenXML4JException(e.getLocalizedMessage());
|
||||
}
|
||||
|
||||
// Get the comments, if there are any
|
||||
PackageRelationshipCollection commentsRel = getCmntRelations();
|
||||
if(commentsRel != null && commentsRel.size() > 0) {
|
||||
PackagePart commentsPart = getTargetPart(commentsRel.getRelationship(0));
|
||||
CommentsDocument cmntdoc = CommentsDocument.Factory.parse(commentsPart.getInputStream());
|
||||
for(CTComment ctcomment : cmntdoc.getComments().getCommentArray())
|
||||
{
|
||||
comments.add(new XWPFComment(ctcomment));
|
||||
}
|
||||
|
||||
for(CTTbl table : getDocumentBody().getTblArray())
|
||||
{
|
||||
tables.add(new XWPFTable(table));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -66,6 +123,42 @@ public class XWPFDocument extends POIXMLDocument {
|
|||
return wordDoc.getDocument();
|
||||
}
|
||||
|
||||
public Iterator<XWPFParagraph> getParagraphsIterator()
|
||||
{
|
||||
return paragraphs.iterator();
|
||||
}
|
||||
|
||||
public Iterator<XWPFTable> getTablesIterator()
|
||||
{
|
||||
return tables.iterator();
|
||||
}
|
||||
|
||||
public XWPFHyperlink getHyperlinkByID(String id)
|
||||
{
|
||||
Iterator<XWPFHyperlink> iter = hyperlinks.iterator();
|
||||
while(iter.hasNext())
|
||||
{
|
||||
XWPFHyperlink link = iter.next();
|
||||
if(link.getId().equals(id))
|
||||
return link;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public XWPFComment getCommentByID(String id)
|
||||
{
|
||||
Iterator<XWPFComment> iter = comments.iterator();
|
||||
while(iter.hasNext())
|
||||
{
|
||||
XWPFComment comment = iter.next();
|
||||
if(comment.getId().equals(id))
|
||||
return comment;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the low level body of the document
|
||||
*/
|
||||
|
@ -92,17 +185,9 @@ public class XWPFDocument extends POIXMLDocument {
|
|||
return sd.getStyles();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all the hyperlink relations for the file.
|
||||
* You'll generally want to get the target to get
|
||||
* the destination of the hyperlink
|
||||
*/
|
||||
public PackageRelationshipCollection getHyperlinks() {
|
||||
try {
|
||||
return getCorePart().getRelationshipsByType(HYPERLINK_RELATION_TYPE);
|
||||
} catch(InvalidFormatException e) {
|
||||
// Should never happen
|
||||
throw new IllegalStateException(e);
|
||||
}
|
||||
protected PackageRelationshipCollection getCmntRelations() throws InvalidFormatException
|
||||
{
|
||||
return getCorePart().getRelationshipsByType(COMMENT_RELATION_TYPE);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -17,19 +17,19 @@
|
|||
package org.apache.poi.xwpf.extractor;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.poi.POIXMLDocument;
|
||||
import org.apache.poi.POIXMLTextExtractor;
|
||||
import org.apache.poi.xwpf.XWPFDocument;
|
||||
import org.apache.poi.xwpf.model.XWPFCommentsDecorator;
|
||||
import org.apache.poi.xwpf.model.XWPFHyperlinkDecorator;
|
||||
import org.apache.poi.xwpf.model.XWPFParagraphDecorator;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFTable;
|
||||
import org.apache.xmlbeans.XmlException;
|
||||
import org.openxml4j.exceptions.OpenXML4JException;
|
||||
import org.openxml4j.opc.Package;
|
||||
import org.openxml4j.opc.PackageRelationship;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
|
||||
|
||||
/**
|
||||
* Helper class to extract text from an OOXML Word file
|
||||
|
@ -46,6 +46,15 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
|
|||
this.document = document;
|
||||
}
|
||||
|
||||
/**
|
||||
* Should we also fetch the hyperlinks, when fetching
|
||||
* the text content? Default is to only output the
|
||||
* hyperlink label, and not the contents
|
||||
*/
|
||||
public void setFetchHyperlinks(boolean fetch) {
|
||||
fetchHyperlinks = fetch;
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
if(args.length < 1) {
|
||||
System.err.println("Use:");
|
||||
|
@ -59,56 +68,21 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
|
|||
System.out.println(extractor.getText());
|
||||
}
|
||||
|
||||
/**
|
||||
* Should we also fetch the hyperlinks, when fetching
|
||||
* the text content? Default is to only output the
|
||||
* hyperlink label, and not the contents
|
||||
*/
|
||||
public void setFetchHyperlinks(boolean fetch) {
|
||||
fetchHyperlinks = fetch;
|
||||
}
|
||||
|
||||
public String getText() {
|
||||
CTBody body = document.getDocumentBody();
|
||||
StringBuffer text = new StringBuffer();
|
||||
|
||||
// Loop over paragraphs
|
||||
CTP[] ps = body.getPArray();
|
||||
for (int i = 0; i < ps.length; i++) {
|
||||
// Loop over ranges and hyperlinks
|
||||
// TODO - properly intersperce ranges and hyperlinks
|
||||
CTR[] rs = ps[i].getRArray();
|
||||
for(int j = 0; j < rs.length; j++) {
|
||||
// Loop over text runs
|
||||
CTText[] texts = rs[j].getTArray();
|
||||
for (int k = 0; k < texts.length; k++) {
|
||||
text.append(
|
||||
texts[k].getStringValue()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
CTHyperlink[] hls = ps[i].getHyperlinkArray();
|
||||
for(CTHyperlink hl : hls) {
|
||||
for(CTR r : hl.getRArray()) {
|
||||
for(CTText txt : r.getTArray()) {
|
||||
text.append(txt.getStringValue());
|
||||
}
|
||||
}
|
||||
if(fetchHyperlinks) {
|
||||
String id = hl.getId();
|
||||
if(id != null) {
|
||||
PackageRelationship hlRel =
|
||||
document.getHyperlinks().getRelationshipByID(id);
|
||||
if(hlRel != null) {
|
||||
text.append(" <" + hlRel.getTargetURI().toString() + ">");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Iterator<XWPFParagraph> i = document.getParagraphsIterator();
|
||||
while(i.hasNext()) {
|
||||
XWPFParagraphDecorator decorator = new XWPFCommentsDecorator(
|
||||
new XWPFHyperlinkDecorator(i.next(), null, fetchHyperlinks));
|
||||
text.append(decorator.getText()+"\n");
|
||||
}
|
||||
|
||||
// New line after each paragraph.
|
||||
text.append("\n");
|
||||
Iterator<XWPFTable> j = document.getTablesIterator();
|
||||
while(j.hasNext())
|
||||
{
|
||||
text.append(j.next().getText()+"\n");
|
||||
}
|
||||
|
||||
return text.toString();
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.xwpf.model;
|
||||
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
||||
|
||||
/**
|
||||
* Base class for XWPF paragraphs
|
||||
*
|
||||
* @author Yury Batrakov (batrakov at gmail.com)
|
||||
*
|
||||
*/
|
||||
public class XMLParagraph {
|
||||
protected CTP paragraph;
|
||||
|
||||
public XMLParagraph(CTP paragraph) {
|
||||
this.paragraph = paragraph;
|
||||
}
|
||||
|
||||
public CTP getCTP() {
|
||||
return paragraph;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.xwpf.model;
|
||||
|
||||
import org.apache.poi.xwpf.usermodel.XWPFComment;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTMarkupRange;
|
||||
|
||||
/**
|
||||
* Decorator class for XWPFParagraph allowing to add comments
|
||||
* found in paragraph to its text
|
||||
*
|
||||
* @author Yury Batrakov (batrakov at gmail.com)
|
||||
*
|
||||
*/
|
||||
public class XWPFCommentsDecorator extends XWPFParagraphDecorator {
|
||||
private StringBuffer commentText;
|
||||
|
||||
public XWPFCommentsDecorator(XWPFParagraphDecorator nextDecorator) {
|
||||
this(nextDecorator.paragraph, nextDecorator);
|
||||
}
|
||||
public XWPFCommentsDecorator(XWPFParagraph paragraph, XWPFParagraphDecorator nextDecorator) {
|
||||
super(paragraph, nextDecorator);
|
||||
|
||||
XWPFComment comment;
|
||||
commentText = new StringBuffer();
|
||||
|
||||
for(CTMarkupRange anchor : paragraph.getCTP().getCommentRangeStartArray())
|
||||
{
|
||||
if((comment = paragraph.getDocRef().getCommentByID(anchor.getId().toString())) != null)
|
||||
commentText.append("\tComment by " + comment.getAuthor()+": "+comment.getText());
|
||||
}
|
||||
}
|
||||
|
||||
public String getText() {
|
||||
return super.getText() + commentText;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.xwpf.model;
|
||||
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFParagraph;;
|
||||
|
||||
/**
|
||||
* Decorator class for XWPFParagraph allowing to add hyperlinks
|
||||
* found in paragraph to its text.
|
||||
*
|
||||
* TODO - add the hyperlink text in the right place, and not just
|
||||
* at the end
|
||||
*/
|
||||
public class XWPFHyperlinkDecorator extends XWPFParagraphDecorator {
|
||||
private StringBuffer hyperlinkText;
|
||||
|
||||
/**
|
||||
* @param nextDecorator The next decorator to use
|
||||
* @param outputHyperlinkUrls Should we output the links too, or just the link text?
|
||||
*/
|
||||
public XWPFHyperlinkDecorator(XWPFParagraphDecorator nextDecorator, boolean outputHyperlinkUrls) {
|
||||
this(nextDecorator.paragraph, nextDecorator, outputHyperlinkUrls);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param prgrph The paragraph of text to work on
|
||||
* @param outputHyperlinkUrls Should we output the links too, or just the link text?
|
||||
*/
|
||||
public XWPFHyperlinkDecorator(XWPFParagraph prgrph, XWPFParagraphDecorator nextDecorator, boolean outputHyperlinkUrls) {
|
||||
super(prgrph, nextDecorator);
|
||||
|
||||
hyperlinkText = new StringBuffer();
|
||||
|
||||
// loop over hyperlink anchors
|
||||
for(CTHyperlink link : paragraph.getCTP().getHyperlinkArray()){
|
||||
for (CTR r : link.getRArray()) {
|
||||
// Loop over text runs
|
||||
for (CTText text : r.getTArray()){
|
||||
hyperlinkText.append(text.getStringValue());
|
||||
}
|
||||
}
|
||||
if(outputHyperlinkUrls && paragraph.getDocRef().getHyperlinkByID(link.getId()) != null) {
|
||||
hyperlinkText.append(" <"+paragraph.getDocRef().getHyperlinkByID(link.getId()).getURL()+">");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public String getText()
|
||||
{
|
||||
return super.getText() + hyperlinkText;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.xwpf.model;
|
||||
|
||||
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
||||
|
||||
/**
|
||||
* Base decorator class for XWPFParagraph
|
||||
*/
|
||||
public abstract class XWPFParagraphDecorator {
|
||||
protected XWPFParagraph paragraph;
|
||||
protected XWPFParagraphDecorator nextDecorator;
|
||||
|
||||
public XWPFParagraphDecorator(XWPFParagraph paragraph) {
|
||||
this(paragraph, null);
|
||||
}
|
||||
|
||||
public XWPFParagraphDecorator(XWPFParagraph paragraph, XWPFParagraphDecorator nextDecorator) {
|
||||
this.paragraph = paragraph;
|
||||
this.nextDecorator = nextDecorator;
|
||||
}
|
||||
|
||||
public String getText() {
|
||||
if(nextDecorator != null) {
|
||||
return nextDecorator.getText();
|
||||
}
|
||||
return paragraph.getText();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.xwpf.usermodel;
|
||||
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTComment;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
||||
|
||||
/**
|
||||
* Sketch of XWPF comment class
|
||||
*
|
||||
* @author Yury Batrakov (batrakov at gmail.com)
|
||||
*
|
||||
*/
|
||||
public class XWPFComment
|
||||
{
|
||||
protected String id;
|
||||
protected String author;
|
||||
protected StringBuffer text;
|
||||
|
||||
public XWPFComment(CTComment comment)
|
||||
{
|
||||
text = new StringBuffer();
|
||||
id = comment.getId().toString();
|
||||
author = comment.getAuthor();
|
||||
|
||||
for(CTP ctp : comment.getPArray())
|
||||
{
|
||||
XWPFParagraph p = new XWPFParagraph(ctp);
|
||||
text.append(p.getText());
|
||||
}
|
||||
}
|
||||
|
||||
public String getId()
|
||||
{
|
||||
return id;
|
||||
}
|
||||
|
||||
public String getAuthor()
|
||||
{
|
||||
return author;
|
||||
}
|
||||
|
||||
public String getText()
|
||||
{
|
||||
return text.toString();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.xwpf.usermodel;
|
||||
|
||||
/**
|
||||
* Sketch of XWPF hyperlink class
|
||||
*
|
||||
* @author Yury Batrakov (batrakov at gmail.com)
|
||||
*
|
||||
*/
|
||||
public class XWPFHyperlink
|
||||
{
|
||||
String id, url;
|
||||
public XWPFHyperlink(String id, String url)
|
||||
{
|
||||
this.id = id;
|
||||
this.url = url;
|
||||
}
|
||||
|
||||
public String getId()
|
||||
{
|
||||
return id;
|
||||
}
|
||||
|
||||
public String getURL()
|
||||
{
|
||||
return url;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.xwpf.usermodel;
|
||||
|
||||
import org.apache.poi.xwpf.model.XMLParagraph;
|
||||
import org.apache.poi.xwpf.XWPFDocument;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
|
||||
|
||||
/**
|
||||
* Sketch of XWPF paragraph class
|
||||
*/
|
||||
public class XWPFParagraph extends XMLParagraph
|
||||
{
|
||||
protected XWPFDocument docRef; // XXX: we'd like to have access to document's hyperlink, comments and other tables
|
||||
/**
|
||||
* TODO - replace with RichText String
|
||||
*/
|
||||
private StringBuffer text = new StringBuffer();
|
||||
|
||||
public XWPFParagraph(CTP prgrph, XWPFDocument docRef)
|
||||
{
|
||||
super(prgrph);
|
||||
|
||||
this.docRef = docRef;
|
||||
CTR[] rs = paragraph.getRArray();
|
||||
|
||||
// Get text
|
||||
for (int j = 0; j < rs.length; j++) {
|
||||
// Loop over text runs
|
||||
CTText[] texts = rs[j].getTArray();
|
||||
for (int k = 0; k < texts.length; k++) {
|
||||
text.append(
|
||||
texts[k].getStringValue()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public XWPFParagraph(CTP prgrph) {
|
||||
this(prgrph, null);
|
||||
}
|
||||
|
||||
public XWPFParagraph(XMLParagraph paragraph) {
|
||||
this(paragraph.getCTP());
|
||||
}
|
||||
|
||||
public XWPFDocument getDocRef() {
|
||||
return docRef;
|
||||
}
|
||||
|
||||
public String getText() {
|
||||
return text.toString();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,6 @@
|
|||
package org.apache.poi.xwpf.usermodel;
|
||||
|
||||
public class XWPFParagraphText
|
||||
{
|
||||
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.xwpf.usermodel;
|
||||
|
||||
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRow;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTc;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
||||
|
||||
/**
|
||||
* Sketch of XWPFTable class. Only table's text is being hold.
|
||||
*
|
||||
* @author Yury Batrakov (batrakov at gmail.com)
|
||||
*
|
||||
*/
|
||||
public class XWPFTable
|
||||
{
|
||||
protected StringBuffer text=new StringBuffer();
|
||||
|
||||
public XWPFTable(CTTbl table)
|
||||
{
|
||||
for(CTRow row : table.getTrArray())
|
||||
{
|
||||
for(CTTc cell : row.getTcArray())
|
||||
{
|
||||
for(CTP ctp : cell.getPArray())
|
||||
{
|
||||
XWPFParagraph p = new XWPFParagraph(ctp);
|
||||
this.text.append(p.getText()+"\t");
|
||||
}
|
||||
}
|
||||
this.text.append("\n");
|
||||
}
|
||||
}
|
||||
|
||||
public String getText()
|
||||
{
|
||||
return text.toString();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue