Fix bug #45543 - Optionally extract comment text with PowerPointExtractor, and initial hslf model support for comments

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@682533 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2008-08-04 22:48:39 +00:00
parent fc187db058
commit eeb7d45566
9 changed files with 186 additions and 2 deletions

View File

@ -37,6 +37,7 @@
<!-- Don't forget to update status.xml too! --> <!-- Don't forget to update status.xml too! -->
<release version="3.1.1-alpha1" date="2008-??-??"> <release version="3.1.1-alpha1" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="add">45543 - Optionally extract comment text with PowerPointExtractor, and initial hslf model support for comments</action>
<action dev="POI-DEVELOPERS" type="fix">45538 - Include excel headers and footers in the output of ExcelExtractor</action> <action dev="POI-DEVELOPERS" type="fix">45538 - Include excel headers and footers in the output of ExcelExtractor</action>
<action dev="POI-DEVELOPERS" type="fix">44894 - refactor duplicate logic from EventRecordFactory to RecordFactory</action> <action dev="POI-DEVELOPERS" type="fix">44894 - refactor duplicate logic from EventRecordFactory to RecordFactory</action>
<action dev="POI-DEVELOPERS" type="add">Support for Headers / Footers in HSLF</action> <action dev="POI-DEVELOPERS" type="add">Support for Headers / Footers in HSLF</action>

View File

@ -34,6 +34,7 @@
<!-- Don't forget to update changes.xml too! --> <!-- Don't forget to update changes.xml too! -->
<changes> <changes>
<release version="3.1.1-alpha1" date="2008-??-??"> <release version="3.1.1-alpha1" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="add">45543 - Optionally extract comment text with PowerPointExtractor, and initial hslf model support for comments</action>
<action dev="POI-DEVELOPERS" type="fix">45538 - Include excel headers and footers in the output of ExcelExtractor</action> <action dev="POI-DEVELOPERS" type="fix">45538 - Include excel headers and footers in the output of ExcelExtractor</action>
<action dev="POI-DEVELOPERS" type="fix">44894 - refactor duplicate logic from EventRecordFactory to RecordFactory</action> <action dev="POI-DEVELOPERS" type="fix">44894 - refactor duplicate logic from EventRecordFactory to RecordFactory</action>
<action dev="POI-DEVELOPERS" type="add">Support for Headers / Footers in HSLF</action> <action dev="POI-DEVELOPERS" type="add">Support for Headers / Footers in HSLF</action>

View File

@ -27,6 +27,8 @@ import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.hslf.*; import org.apache.poi.hslf.*;
import org.apache.poi.hslf.model.*; import org.apache.poi.hslf.model.*;
import org.apache.poi.hslf.record.Comment2000;
import org.apache.poi.hslf.record.Record;
import org.apache.poi.hslf.usermodel.*; import org.apache.poi.hslf.usermodel.*;
/** /**
@ -44,6 +46,7 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
private boolean slidesByDefault = true; private boolean slidesByDefault = true;
private boolean notesByDefault = false; private boolean notesByDefault = false;
private boolean commentsByDefault = false;
/** /**
* Basic extractor. Returns all the text, and optionally all the notes * Basic extractor. Returns all the text, and optionally all the notes
@ -57,16 +60,20 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
} }
boolean notes = false; boolean notes = false;
boolean comments = false;
String file; String file;
if(args.length > 1) { if(args.length > 1) {
notes = true; notes = true;
file = args[1]; file = args[1];
if(args.length > 2) {
comments = true;
}
} else { } else {
file = args[0]; file = args[0];
} }
PowerPointExtractor ppe = new PowerPointExtractor(file); PowerPointExtractor ppe = new PowerPointExtractor(file);
System.out.println(ppe.getText(true,notes)); System.out.println(ppe.getText(true,notes,comments));
ppe.close(); ppe.close();
} }
@ -127,6 +134,13 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
public void setNotesByDefault(boolean notesByDefault) { public void setNotesByDefault(boolean notesByDefault) {
this.notesByDefault = notesByDefault; this.notesByDefault = notesByDefault;
} }
/**
* Should a call to getText() return comments text?
* Default is no
*/
public void setCommentsByDefault(boolean commentsByDefault) {
this.commentsByDefault = commentsByDefault;
}
/** /**
* Fetches all the slide text from the slideshow, * Fetches all the slide text from the slideshow,
@ -135,7 +149,7 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
* to change this * to change this
*/ */
public String getText() { public String getText() {
return getText(slidesByDefault,notesByDefault); return getText(slidesByDefault,notesByDefault,commentsByDefault);
} }
/** /**
@ -153,6 +167,9 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
* @param getNoteText fetch note text * @param getNoteText fetch note text
*/ */
public String getText(boolean getSlideText, boolean getNoteText) { public String getText(boolean getSlideText, boolean getNoteText) {
return getText(getSlideText, getNoteText, commentsByDefault);
}
public String getText(boolean getSlideText, boolean getNoteText, boolean getCommentText) {
StringBuffer ret = new StringBuffer(); StringBuffer ret = new StringBuffer();
if(getSlideText) { if(getSlideText) {
@ -169,6 +186,18 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
} }
} }
} }
if(getCommentText) {
Comment[] comments = slide.getComments();
for(int j=0; j<comments.length; j++) {
ret.append(
comments[j].getAuthor() +
" - " +
comments[j].getText() +
"\n"
);
}
}
} }
if(getNoteText) { if(getNoteText) {
ret.append("\n"); ret.append("\n");

View File

@ -0,0 +1,54 @@
package org.apache.poi.hslf.model;
import org.apache.poi.hslf.record.Comment2000;
public class Comment {
private Comment2000 comment2000;
public Comment(Comment2000 comment2000) {
this.comment2000 = comment2000;
}
protected Comment2000 getComment2000() {
return comment2000;
}
/**
* Get the Author of this comment
*/
public String getAuthor() {
return comment2000.getAuthor();
}
/**
* Set the Author of this comment
*/
public void setAuthor(String author) {
comment2000.setAuthor(author);
}
/**
* Get the Author's Initials of this comment
*/
public String getAuthorInitials() {
return comment2000.getAuthorInitials();
}
/**
* Set the Author's Initials of this comment
*/
public void setAuthorInitials(String initials) {
comment2000.setAuthorInitials(initials);
}
/**
* Get the text of this comment
*/
public String getText() {
return comment2000.getText();
}
/**
* Set the text of this comment
*/
public void setText(String text) {
comment2000.setText(text);
}
}

View File

@ -361,6 +361,59 @@ public class Slide extends Sheet
} }
return super.getColorScheme(); return super.getColorScheme();
} }
/**
* Get the comment(s) for this slide.
* Note - for now, only works on PPT 2000 and
* PPT 2003 files. Doesn't work for PPT 97
* ones, as they do their comments oddly.
*/
public Comment[] getComments() {
// If there are any, they're in
// ProgTags -> ProgBinaryTag -> BinaryTagData
RecordContainer progTags = (RecordContainer)
getSheetContainer().findFirstOfType(
RecordTypes.ProgTags.typeID
);
if(progTags != null) {
RecordContainer progBinaryTag = (RecordContainer)
progTags.findFirstOfType(
RecordTypes.ProgBinaryTag.typeID
);
if(progBinaryTag != null) {
RecordContainer binaryTags = (RecordContainer)
progBinaryTag.findFirstOfType(
RecordTypes.BinaryTagData.typeID
);
if(binaryTags != null) {
// This is where they'll be
int count = 0;
for(int i=0; i<binaryTags.getChildRecords().length; i++) {
if(binaryTags.getChildRecords()[i] instanceof Comment2000) {
count++;
}
}
// Now build
Comment[] comments = new Comment[count];
count = 0;
for(int i=0; i<binaryTags.getChildRecords().length; i++) {
if(binaryTags.getChildRecords()[i] instanceof Comment2000) {
comments[i] = new Comment(
(Comment2000)binaryTags.getChildRecords()[i]
);
count++;
}
}
return comments;
}
}
}
// None found
return new Comment[0];
}
public void draw(Graphics2D graphics){ public void draw(Graphics2D graphics){
MasterSheet master = getMasterSheet(); MasterSheet master = getMasterSheet();

View File

@ -123,6 +123,20 @@ public abstract class RecordContainer extends Record
} }
/**
* Finds the first child record of the given type,
* or null if none of the child records are of the
* given type. Does not descend.
*/
public Record findFirstOfType(long type) {
for(int i=0; i<_children.length; i++) {
if(_children[i].getRecordType() == type) {
return _children[i];
}
}
return null;
}
/* =============================================================== /* ===============================================================
* External Move Methods * External Move Methods
* =============================================================== * ===============================================================

View File

@ -216,4 +216,36 @@ public class TextExtractor extends TestCase {
ppe.getText(true, false) ppe.getText(true, false)
); );
} }
/**
* From bug #45543
*/
public void testWithComments() throws Exception {
String filename;
// New file
filename = dirname + "/WithComments.ppt";
ppe = new PowerPointExtractor(filename);
String text = ppe.getText();
assertFalse("Comments not in by default", text.contains("This is a test comment"));
ppe.setCommentsByDefault(true);
text = ppe.getText();
assertTrue("Unable to find expected word in text\n" + text, text.contains("This is a test comment"));
// And another file
filename = dirname + "/45543.ppt";
ppe = new PowerPointExtractor(filename);
text = ppe.getText();
assertFalse("Comments not in by default", text.contains("testdoc"));
ppe.setCommentsByDefault(true);
text = ppe.getText();
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
}
} }