mirror of https://github.com/apache/poi.git
Fix bug #45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@682843 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2126583f46
commit
1559e6cc3c
|
@ -37,6 +37,7 @@
|
|||
|
||||
<!-- Don't forget to update status.xml too! -->
|
||||
<release version="3.5.1-beta2" date="2008-??-??">
|
||||
<action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">45431 - Support for .xlsm files, sufficient for simple files to be loaded by excel without warning</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">New class org.apache.poi.hssf.record.RecordFormatException, which DDF uses instead of the HSSF version, and the HSSF version inherits from</action>
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
<!-- Don't forget to update changes.xml too! -->
|
||||
<changes>
|
||||
<release version="3.5.1-beta2" date="2008-??-??">
|
||||
<action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">45431 - Support for .xlsm files, sufficient for simple files to be loaded by excel without warning</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">New class org.apache.poi.hssf.record.RecordFormatException, which DDF uses instead of the HSSF version, and the HSSF version inherits from</action>
|
||||
|
|
|
@ -16,18 +16,20 @@
|
|||
==================================================================== */
|
||||
package org.apache.poi.xslf.extractor;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.poi.POIXMLDocument;
|
||||
import org.apache.poi.POIXMLTextExtractor;
|
||||
import org.apache.poi.xslf.XSLFSlideShow;
|
||||
import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
||||
import org.apache.poi.xslf.usermodel.XSLFSlide;
|
||||
import org.apache.xmlbeans.XmlException;
|
||||
import org.openxml4j.exceptions.OpenXML4JException;
|
||||
import org.openxml4j.opc.Package;
|
||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun;
|
||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
|
||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
|
||||
import org.openxmlformats.schemas.presentationml.x2006.main.CTComment;
|
||||
import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentList;
|
||||
import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
|
||||
import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide;
|
||||
import org.openxmlformats.schemas.presentationml.x2006.main.CTShape;
|
||||
|
@ -35,17 +37,20 @@ import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
|
|||
import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
|
||||
|
||||
public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
|
||||
private XSLFSlideShow slideshow;
|
||||
private XMLSlideShow slideshow;
|
||||
private boolean slidesByDefault = true;
|
||||
private boolean notesByDefault = false;
|
||||
|
||||
public XSLFPowerPointExtractor(XMLSlideShow slideshow) {
|
||||
super(slideshow._getXSLFSlideShow());
|
||||
this.slideshow = slideshow;
|
||||
}
|
||||
public XSLFPowerPointExtractor(XSLFSlideShow slideshow) throws XmlException, IOException {
|
||||
this(new XMLSlideShow(slideshow));
|
||||
}
|
||||
public XSLFPowerPointExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
|
||||
this(new XSLFSlideShow(container));
|
||||
}
|
||||
public XSLFPowerPointExtractor(XSLFSlideShow slideshow) {
|
||||
super(slideshow);
|
||||
this.slideshow = slideshow;
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
if(args.length < 1) {
|
||||
|
@ -88,18 +93,32 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
|
|||
*/
|
||||
public String getText(boolean slideText, boolean notesText) {
|
||||
StringBuffer text = new StringBuffer();
|
||||
|
||||
CTSlideIdListEntry[] slideRefs =
|
||||
slideshow.getSlideReferences().getSldIdArray();
|
||||
for (int i = 0; i < slideRefs.length; i++) {
|
||||
|
||||
XSLFSlide[] slides = slideshow.getSlides();
|
||||
for(int i = 0; i < slides.length; i++) {
|
||||
CTSlide rawSlide = slides[i]._getCTSlide();
|
||||
CTSlideIdListEntry slideId = slides[i]._getCTSlideId();
|
||||
|
||||
try {
|
||||
CTSlide slide =
|
||||
slideshow.getSlide(slideRefs[i]);
|
||||
// For now, still very low level
|
||||
CTNotesSlide notes =
|
||||
slideshow.getNotes(slideRefs[i]);
|
||||
slideshow._getXSLFSlideShow().getNotes(slideId);
|
||||
CTCommentList comments =
|
||||
slideshow._getXSLFSlideShow().getSlideComments(slideId);
|
||||
|
||||
if(slideText) {
|
||||
extractText(slide.getCSld().getSpTree(), text);
|
||||
extractText(rawSlide.getCSld().getSpTree(), text);
|
||||
|
||||
// Comments too for the slide
|
||||
if(comments != null) {
|
||||
for(CTComment comment : comments.getCmArray()) {
|
||||
// TODO - comment authors too
|
||||
// (They're in another stream)
|
||||
text.append(
|
||||
comment.getText() + "\n"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(notesText && notes != null) {
|
||||
extractText(notes.getCSld().getSpTree(), text);
|
||||
|
|
|
@ -108,4 +108,22 @@ public class TestXSLFPowerPointExtractor extends TestCase {
|
|||
"\n\n\n\n", text
|
||||
);
|
||||
}
|
||||
|
||||
public void testGetComments() throws Exception {
|
||||
File file = new File(
|
||||
System.getProperty("HSLF.testdata.path") +
|
||||
File.separator + "45545_Comment.pptx"
|
||||
);
|
||||
assertTrue(file.exists());
|
||||
|
||||
xmlA = new XSLFSlideShow(file.toString());
|
||||
XSLFPowerPointExtractor extractor =
|
||||
new XSLFPowerPointExtractor(xmlA);
|
||||
|
||||
String text = extractor.getText();
|
||||
assertTrue(text.length() > 0);
|
||||
|
||||
// Check comments are there
|
||||
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue