Fix bug #45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text

git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@682843 13f79535-47bb-0310-9956-ffa450edef68
2008-08-05 18:05:29 +00:00 · 2008-08-05 18:05:29 +00:00 · 1559e6cc3c
parent 2126583f46
commit 1559e6cc3c
4 changed files with 54 additions and 15 deletions
--- a/src/documentation/content/xdocs/changes.xml
+++ b/src/documentation/content/xdocs/changes.xml
@ -37,6 +37,7 @@

 		<!-- Don't forget to update status.xml too! -->
        <release version="3.5.1-beta2" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
           <action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
           <action dev="POI-DEVELOPERS" type="add">45431 - Support for .xlsm files, sufficient for simple files to be loaded by excel without warning</action>
           <action dev="POI-DEVELOPERS" type="add">New class org.apache.poi.hssf.record.RecordFormatException, which DDF uses instead of the HSSF version, and the HSSF version inherits from</action>
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@ -34,6 +34,7 @@
 	<!-- Don't forget to update changes.xml too! -->
    <changes>
        <release version="3.5.1-beta2" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
           <action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
           <action dev="POI-DEVELOPERS" type="add">45431 - Support for .xlsm files, sufficient for simple files to be loaded by excel without warning</action>
           <action dev="POI-DEVELOPERS" type="add">New class org.apache.poi.hssf.record.RecordFormatException, which DDF uses instead of the HSSF version, and the HSSF version inherits from</action>
--- a/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java
+++ b/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java
@ -16,18 +16,20 @@
 ==================================================================== */
 package org.apache.poi.xslf.extractor;

-import java.io.File;
 import java.io.IOException;

-import org.apache.poi.POIXMLDocument;
 import org.apache.poi.POIXMLTextExtractor;
 import org.apache.poi.xslf.XSLFSlideShow;
+import org.apache.poi.xslf.usermodel.XMLSlideShow;
+import org.apache.poi.xslf.usermodel.XSLFSlide;
 import org.apache.xmlbeans.XmlException;
 import org.openxml4j.exceptions.OpenXML4JException;
 import org.openxml4j.opc.Package;
 import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun;
 import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
 import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTComment;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentList;
 import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
 import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide;
 import org.openxmlformats.schemas.presentationml.x2006.main.CTShape;
@ -35,17 +37,20 @@ import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
 import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;

 public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
-	private XSLFSlideShow slideshow;
+	private XMLSlideShow slideshow;
 	private boolean slidesByDefault = true;
 	private boolean notesByDefault = false;
 	
+	public XSLFPowerPointExtractor(XMLSlideShow slideshow) {
+		super(slideshow._getXSLFSlideShow());
+		this.slideshow = slideshow;
+	}
+	public XSLFPowerPointExtractor(XSLFSlideShow slideshow) throws XmlException, IOException {
+		this(new XMLSlideShow(slideshow));
+	}
 	public XSLFPowerPointExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
 		this(new XSLFSlideShow(container));
 	}
-	public XSLFPowerPointExtractor(XSLFSlideShow slideshow) {
-		super(slideshow);
-		this.slideshow = slideshow;
-	}

 	public static void main(String[] args) throws Exception {
 		if(args.length < 1) {
@ -88,18 +93,32 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
 	 */
 	public String getText(boolean slideText, boolean notesText) {
 		StringBuffer text = new StringBuffer();
-		
-		CTSlideIdListEntry[] slideRefs =
-			slideshow.getSlideReferences().getSldIdArray();
-		for (int i = 0; i < slideRefs.length; i++) {
+
+		XSLFSlide[] slides = slideshow.getSlides();
+		for(int i = 0; i < slides.length; i++) {
+			CTSlide rawSlide = slides[i]._getCTSlide();
+			CTSlideIdListEntry slideId = slides[i]._getCTSlideId();
+			
 			try {
-				CTSlide slide =
-					slideshow.getSlide(slideRefs[i]);
+				// For now, still very low level
 				CTNotesSlide notes = 
-					slideshow.getNotes(slideRefs[i]);
+					slideshow._getXSLFSlideShow().getNotes(slideId);
+				CTCommentList comments =
+					slideshow._getXSLFSlideShow().getSlideComments(slideId);
 				
 				if(slideText) {
-					extractText(slide.getCSld().getSpTree(), text);
+					extractText(rawSlide.getCSld().getSpTree(), text);
+					
+					// Comments too for the slide
+					if(comments != null) {
+						for(CTComment comment : comments.getCmArray()) {
+							// TODO - comment authors too
+							// (They're in another stream)
+							text.append(
+									comment.getText() + "\n"
+							);
+						}
+					}
 				}
 				if(notesText && notes != null) {
 					extractText(notes.getCSld().getSpTree(), text);
--- a/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java
+++ b/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java
@ -108,4 +108,22 @@ public class TestXSLFPowerPointExtractor extends TestCase {
 				"\n\n\n\n", text
 		);
 	}
+	
+	public void testGetComments() throws Exception {
+		File file = new File(
+				System.getProperty("HSLF.testdata.path") +
+				File.separator + "45545_Comment.pptx"
+		);
+		assertTrue(file.exists());
+		
+		xmlA = new XSLFSlideShow(file.toString());
+		XSLFPowerPointExtractor extractor = 
+			new XSLFPowerPointExtractor(xmlA);
+		
+		String text = extractor.getText();
+		assertTrue(text.length() > 0);
+		
+		// Check comments are there
+		assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
+	}
 }