mirror of https://github.com/apache/poi.git
Update the XSLF slide master text extraction to be optional, as HSLF already is, plus fix some indents
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1173756 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3f43242f1a
commit
d12375b4c0
|
@ -34,7 +34,7 @@
|
||||||
|
|
||||||
<changes>
|
<changes>
|
||||||
<release version="3.8-beta5" date="2011-??-??">
|
<release version="3.8-beta5" date="2011-??-??">
|
||||||
<action dev="poi-developers" type="add">51804 - include Master Slide text in XSLF text extraction</action>
|
<action dev="poi-developers" type="add">51804 - optionally include Master Slide text in XSLF text extraction, as HSLF already offers</action>
|
||||||
<action dev="poi-developers" type="add">New PackagePart method getRelatedPart(PackageRelationship) to simplify navigation of relations between OPC Parts</action>
|
<action dev="poi-developers" type="add">New PackagePart method getRelatedPart(PackageRelationship) to simplify navigation of relations between OPC Parts</action>
|
||||||
<action dev="poi-developers" type="fix">51832 - handle XLS files where the WRITEPROTECT record preceeds the FILEPASS one, rather than following as normal</action>
|
<action dev="poi-developers" type="fix">51832 - handle XLS files where the WRITEPROTECT record preceeds the FILEPASS one, rather than following as normal</action>
|
||||||
<action dev="poi-developers" type="fix">51809 - correct GTE handling in COUNTIF</action>
|
<action dev="poi-developers" type="fix">51809 - correct GTE handling in COUNTIF</action>
|
||||||
|
|
|
@ -45,6 +45,7 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
|
||||||
private XMLSlideShow slideshow;
|
private XMLSlideShow slideshow;
|
||||||
private boolean slidesByDefault = true;
|
private boolean slidesByDefault = true;
|
||||||
private boolean notesByDefault = false;
|
private boolean notesByDefault = false;
|
||||||
|
private boolean masterByDefault = false;
|
||||||
|
|
||||||
public XSLFPowerPointExtractor(XMLSlideShow slideshow) {
|
public XSLFPowerPointExtractor(XMLSlideShow slideshow) {
|
||||||
super(slideshow);
|
super(slideshow);
|
||||||
|
@ -84,6 +85,13 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
|
||||||
this.notesByDefault = notesByDefault;
|
this.notesByDefault = notesByDefault;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Should a call to getText() return text from master? Default is no
|
||||||
|
*/
|
||||||
|
public void setMasterByDefault(boolean masterByDefault) {
|
||||||
|
this.masterByDefault = masterByDefault;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the slide text, but not the notes text
|
* Gets the slide text, but not the notes text
|
||||||
*/
|
*/
|
||||||
|
@ -97,6 +105,16 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
|
||||||
* @param notesText Should we retrieve text from notes?
|
* @param notesText Should we retrieve text from notes?
|
||||||
*/
|
*/
|
||||||
public String getText(boolean slideText, boolean notesText) {
|
public String getText(boolean slideText, boolean notesText) {
|
||||||
|
return getText(slideText, notesText, masterByDefault);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the requested text from the file
|
||||||
|
* @param slideText Should we retrieve text from slides?
|
||||||
|
* @param notesText Should we retrieve text from notes?
|
||||||
|
* @param masterText Should we retrieve text from master slides?
|
||||||
|
*/
|
||||||
|
public String getText(boolean slideText, boolean notesText, boolean masterText) {
|
||||||
StringBuffer text = new StringBuffer();
|
StringBuffer text = new StringBuffer();
|
||||||
|
|
||||||
XSLFSlide[] slides = slideshow.getSlides();
|
XSLFSlide[] slides = slideshow.getSlides();
|
||||||
|
@ -115,8 +133,8 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
|
||||||
if (slideText) {
|
if (slideText) {
|
||||||
extractText(slide.getCommonSlideData(), text);
|
extractText(slide.getCommonSlideData(), text);
|
||||||
|
|
||||||
// If there's a master sheet, grab text from there
|
// If there's a master sheet and it's requested, grab text from there
|
||||||
if(master != null) {
|
if(masterText && master != null) {
|
||||||
extractText(master.getCommonSlideData(), text);
|
extractText(master.getCommonSlideData(), text);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -68,22 +68,18 @@ public class TestXSLFPowerPointExtractor extends TestCase {
|
||||||
"Fifth level\n";
|
"Fifth level\n";
|
||||||
|
|
||||||
// Just slides, no notes
|
// Just slides, no notes
|
||||||
text = extractor.getText(true, false);
|
text = extractor.getText(true, false, false);
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"Lorem ipsum dolor sit amet\n" +
|
"Lorem ipsum dolor sit amet\n" +
|
||||||
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
|
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
|
||||||
"\n" +
|
"\n" +
|
||||||
masterText +
|
|
||||||
"\n\n\n" +
|
|
||||||
"Lorem ipsum dolor sit amet\n" +
|
"Lorem ipsum dolor sit amet\n" +
|
||||||
"Lorem\n" +
|
"Lorem\n" +
|
||||||
"ipsum\n" +
|
"ipsum\n" +
|
||||||
"dolor\n" +
|
"dolor\n" +
|
||||||
"sit\n" +
|
"sit\n" +
|
||||||
"amet\n" +
|
"amet\n" +
|
||||||
"\n" +
|
"\n"
|
||||||
masterText +
|
|
||||||
"\n\n\n"
|
|
||||||
, text
|
, text
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -94,25 +90,61 @@ public class TestXSLFPowerPointExtractor extends TestCase {
|
||||||
);
|
);
|
||||||
|
|
||||||
// Both
|
// Both
|
||||||
text = extractor.getText(true, true);
|
text = extractor.getText(true, true, false);
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"Lorem ipsum dolor sit amet\n" +
|
"Lorem ipsum dolor sit amet\n" +
|
||||||
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
|
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
|
||||||
"\n" +
|
"\n\n\n" +
|
||||||
masterText +
|
|
||||||
"\n\n\n\n\n" +
|
|
||||||
"Lorem ipsum dolor sit amet\n" +
|
"Lorem ipsum dolor sit amet\n" +
|
||||||
"Lorem\n" +
|
"Lorem\n" +
|
||||||
"ipsum\n" +
|
"ipsum\n" +
|
||||||
"dolor\n" +
|
"dolor\n" +
|
||||||
"sit\n" +
|
"sit\n" +
|
||||||
"amet\n" +
|
"amet\n" +
|
||||||
"\n" +
|
"\n\n\n"
|
||||||
masterText +
|
|
||||||
"\n\n\n\n\n"
|
|
||||||
, text
|
, text
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// With Slides and Master Text
|
||||||
|
text = extractor.getText(true, false, true);
|
||||||
|
assertEquals(
|
||||||
|
"Lorem ipsum dolor sit amet\n" +
|
||||||
|
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
|
||||||
|
"\n" +
|
||||||
|
masterText +
|
||||||
|
"\n\n\n" +
|
||||||
|
"Lorem ipsum dolor sit amet\n" +
|
||||||
|
"Lorem\n" +
|
||||||
|
"ipsum\n" +
|
||||||
|
"dolor\n" +
|
||||||
|
"sit\n" +
|
||||||
|
"amet\n" +
|
||||||
|
"\n" +
|
||||||
|
masterText +
|
||||||
|
"\n\n\n"
|
||||||
|
, text
|
||||||
|
);
|
||||||
|
|
||||||
|
// With Slides, Notes and Master Text
|
||||||
|
text = extractor.getText(true, true, true);
|
||||||
|
assertEquals(
|
||||||
|
"Lorem ipsum dolor sit amet\n" +
|
||||||
|
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
|
||||||
|
"\n" +
|
||||||
|
masterText +
|
||||||
|
"\n\n\n\n\n" +
|
||||||
|
"Lorem ipsum dolor sit amet\n" +
|
||||||
|
"Lorem\n" +
|
||||||
|
"ipsum\n" +
|
||||||
|
"dolor\n" +
|
||||||
|
"sit\n" +
|
||||||
|
"amet\n" +
|
||||||
|
"\n" +
|
||||||
|
masterText +
|
||||||
|
"\n\n\n\n\n"
|
||||||
|
, text
|
||||||
|
);
|
||||||
|
|
||||||
// Via set defaults
|
// Via set defaults
|
||||||
extractor.setSlidesByDefault(false);
|
extractor.setSlidesByDefault(false);
|
||||||
extractor.setNotesByDefault(true);
|
extractor.setNotesByDefault(true);
|
||||||
|
|
|
@ -39,14 +39,14 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
* @author Nick Burch
|
* @author Nick Burch
|
||||||
*/
|
*/
|
||||||
public final class PowerPointExtractor extends POIOLE2TextExtractor {
|
public final class PowerPointExtractor extends POIOLE2TextExtractor {
|
||||||
private HSLFSlideShow _hslfshow;
|
private HSLFSlideShow _hslfshow;
|
||||||
private SlideShow _show;
|
private SlideShow _show;
|
||||||
private Slide[] _slides;
|
private Slide[] _slides;
|
||||||
|
|
||||||
private boolean _slidesByDefault = true;
|
private boolean _slidesByDefault = true;
|
||||||
private boolean _notesByDefault = false;
|
private boolean _notesByDefault = false;
|
||||||
private boolean _commentsByDefault = false;
|
private boolean _commentsByDefault = false;
|
||||||
private boolean _masterByDefault = false;
|
private boolean _masterByDefault = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Basic extractor. Returns all the text, and optionally all the notes
|
* Basic extractor. Returns all the text, and optionally all the notes
|
||||||
|
|
Loading…
Reference in New Issue