mirror of https://github.com/apache/poi.git
Fix bug #45543 - Optionally extract comment text with PowerPointExtractor, and initial hslf model support for comments
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@682533 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
fc187db058
commit
eeb7d45566
|
@ -37,6 +37,7 @@
|
||||||
|
|
||||||
<!-- Don't forget to update status.xml too! -->
|
<!-- Don't forget to update status.xml too! -->
|
||||||
<release version="3.1.1-alpha1" date="2008-??-??">
|
<release version="3.1.1-alpha1" date="2008-??-??">
|
||||||
|
<action dev="POI-DEVELOPERS" type="add">45543 - Optionally extract comment text with PowerPointExtractor, and initial hslf model support for comments</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">45538 - Include excel headers and footers in the output of ExcelExtractor</action>
|
<action dev="POI-DEVELOPERS" type="fix">45538 - Include excel headers and footers in the output of ExcelExtractor</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">44894 - refactor duplicate logic from EventRecordFactory to RecordFactory</action>
|
<action dev="POI-DEVELOPERS" type="fix">44894 - refactor duplicate logic from EventRecordFactory to RecordFactory</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">Support for Headers / Footers in HSLF</action>
|
<action dev="POI-DEVELOPERS" type="add">Support for Headers / Footers in HSLF</action>
|
||||||
|
|
|
@ -34,6 +34,7 @@
|
||||||
<!-- Don't forget to update changes.xml too! -->
|
<!-- Don't forget to update changes.xml too! -->
|
||||||
<changes>
|
<changes>
|
||||||
<release version="3.1.1-alpha1" date="2008-??-??">
|
<release version="3.1.1-alpha1" date="2008-??-??">
|
||||||
|
<action dev="POI-DEVELOPERS" type="add">45543 - Optionally extract comment text with PowerPointExtractor, and initial hslf model support for comments</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">45538 - Include excel headers and footers in the output of ExcelExtractor</action>
|
<action dev="POI-DEVELOPERS" type="fix">45538 - Include excel headers and footers in the output of ExcelExtractor</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">44894 - refactor duplicate logic from EventRecordFactory to RecordFactory</action>
|
<action dev="POI-DEVELOPERS" type="fix">44894 - refactor duplicate logic from EventRecordFactory to RecordFactory</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">Support for Headers / Footers in HSLF</action>
|
<action dev="POI-DEVELOPERS" type="add">Support for Headers / Footers in HSLF</action>
|
||||||
|
|
|
@ -27,6 +27,8 @@ import org.apache.poi.POIOLE2TextExtractor;
|
||||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
import org.apache.poi.hslf.*;
|
import org.apache.poi.hslf.*;
|
||||||
import org.apache.poi.hslf.model.*;
|
import org.apache.poi.hslf.model.*;
|
||||||
|
import org.apache.poi.hslf.record.Comment2000;
|
||||||
|
import org.apache.poi.hslf.record.Record;
|
||||||
import org.apache.poi.hslf.usermodel.*;
|
import org.apache.poi.hslf.usermodel.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -44,6 +46,7 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
|
||||||
|
|
||||||
private boolean slidesByDefault = true;
|
private boolean slidesByDefault = true;
|
||||||
private boolean notesByDefault = false;
|
private boolean notesByDefault = false;
|
||||||
|
private boolean commentsByDefault = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Basic extractor. Returns all the text, and optionally all the notes
|
* Basic extractor. Returns all the text, and optionally all the notes
|
||||||
|
@ -57,16 +60,20 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean notes = false;
|
boolean notes = false;
|
||||||
|
boolean comments = false;
|
||||||
String file;
|
String file;
|
||||||
if(args.length > 1) {
|
if(args.length > 1) {
|
||||||
notes = true;
|
notes = true;
|
||||||
file = args[1];
|
file = args[1];
|
||||||
|
if(args.length > 2) {
|
||||||
|
comments = true;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
file = args[0];
|
file = args[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
PowerPointExtractor ppe = new PowerPointExtractor(file);
|
PowerPointExtractor ppe = new PowerPointExtractor(file);
|
||||||
System.out.println(ppe.getText(true,notes));
|
System.out.println(ppe.getText(true,notes,comments));
|
||||||
ppe.close();
|
ppe.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -127,6 +134,13 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
|
||||||
public void setNotesByDefault(boolean notesByDefault) {
|
public void setNotesByDefault(boolean notesByDefault) {
|
||||||
this.notesByDefault = notesByDefault;
|
this.notesByDefault = notesByDefault;
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Should a call to getText() return comments text?
|
||||||
|
* Default is no
|
||||||
|
*/
|
||||||
|
public void setCommentsByDefault(boolean commentsByDefault) {
|
||||||
|
this.commentsByDefault = commentsByDefault;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Fetches all the slide text from the slideshow,
|
* Fetches all the slide text from the slideshow,
|
||||||
|
@ -135,7 +149,7 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
|
||||||
* to change this
|
* to change this
|
||||||
*/
|
*/
|
||||||
public String getText() {
|
public String getText() {
|
||||||
return getText(slidesByDefault,notesByDefault);
|
return getText(slidesByDefault,notesByDefault,commentsByDefault);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -153,6 +167,9 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
|
||||||
* @param getNoteText fetch note text
|
* @param getNoteText fetch note text
|
||||||
*/
|
*/
|
||||||
public String getText(boolean getSlideText, boolean getNoteText) {
|
public String getText(boolean getSlideText, boolean getNoteText) {
|
||||||
|
return getText(getSlideText, getNoteText, commentsByDefault);
|
||||||
|
}
|
||||||
|
public String getText(boolean getSlideText, boolean getNoteText, boolean getCommentText) {
|
||||||
StringBuffer ret = new StringBuffer();
|
StringBuffer ret = new StringBuffer();
|
||||||
|
|
||||||
if(getSlideText) {
|
if(getSlideText) {
|
||||||
|
@ -169,6 +186,18 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(getCommentText) {
|
||||||
|
Comment[] comments = slide.getComments();
|
||||||
|
for(int j=0; j<comments.length; j++) {
|
||||||
|
ret.append(
|
||||||
|
comments[j].getAuthor() +
|
||||||
|
" - " +
|
||||||
|
comments[j].getText() +
|
||||||
|
"\n"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if(getNoteText) {
|
if(getNoteText) {
|
||||||
ret.append("\n");
|
ret.append("\n");
|
||||||
|
|
|
@ -0,0 +1,54 @@
|
||||||
|
package org.apache.poi.hslf.model;
|
||||||
|
|
||||||
|
import org.apache.poi.hslf.record.Comment2000;
|
||||||
|
|
||||||
|
public class Comment {
|
||||||
|
private Comment2000 comment2000;
|
||||||
|
|
||||||
|
public Comment(Comment2000 comment2000) {
|
||||||
|
this.comment2000 = comment2000;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Comment2000 getComment2000() {
|
||||||
|
return comment2000;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the Author of this comment
|
||||||
|
*/
|
||||||
|
public String getAuthor() {
|
||||||
|
return comment2000.getAuthor();
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Set the Author of this comment
|
||||||
|
*/
|
||||||
|
public void setAuthor(String author) {
|
||||||
|
comment2000.setAuthor(author);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the Author's Initials of this comment
|
||||||
|
*/
|
||||||
|
public String getAuthorInitials() {
|
||||||
|
return comment2000.getAuthorInitials();
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Set the Author's Initials of this comment
|
||||||
|
*/
|
||||||
|
public void setAuthorInitials(String initials) {
|
||||||
|
comment2000.setAuthorInitials(initials);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the text of this comment
|
||||||
|
*/
|
||||||
|
public String getText() {
|
||||||
|
return comment2000.getText();
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Set the text of this comment
|
||||||
|
*/
|
||||||
|
public void setText(String text) {
|
||||||
|
comment2000.setText(text);
|
||||||
|
}
|
||||||
|
}
|
|
@ -362,6 +362,59 @@ public class Slide extends Sheet
|
||||||
return super.getColorScheme();
|
return super.getColorScheme();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the comment(s) for this slide.
|
||||||
|
* Note - for now, only works on PPT 2000 and
|
||||||
|
* PPT 2003 files. Doesn't work for PPT 97
|
||||||
|
* ones, as they do their comments oddly.
|
||||||
|
*/
|
||||||
|
public Comment[] getComments() {
|
||||||
|
// If there are any, they're in
|
||||||
|
// ProgTags -> ProgBinaryTag -> BinaryTagData
|
||||||
|
RecordContainer progTags = (RecordContainer)
|
||||||
|
getSheetContainer().findFirstOfType(
|
||||||
|
RecordTypes.ProgTags.typeID
|
||||||
|
);
|
||||||
|
if(progTags != null) {
|
||||||
|
RecordContainer progBinaryTag = (RecordContainer)
|
||||||
|
progTags.findFirstOfType(
|
||||||
|
RecordTypes.ProgBinaryTag.typeID
|
||||||
|
);
|
||||||
|
if(progBinaryTag != null) {
|
||||||
|
RecordContainer binaryTags = (RecordContainer)
|
||||||
|
progBinaryTag.findFirstOfType(
|
||||||
|
RecordTypes.BinaryTagData.typeID
|
||||||
|
);
|
||||||
|
if(binaryTags != null) {
|
||||||
|
// This is where they'll be
|
||||||
|
int count = 0;
|
||||||
|
for(int i=0; i<binaryTags.getChildRecords().length; i++) {
|
||||||
|
if(binaryTags.getChildRecords()[i] instanceof Comment2000) {
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now build
|
||||||
|
Comment[] comments = new Comment[count];
|
||||||
|
count = 0;
|
||||||
|
for(int i=0; i<binaryTags.getChildRecords().length; i++) {
|
||||||
|
if(binaryTags.getChildRecords()[i] instanceof Comment2000) {
|
||||||
|
comments[i] = new Comment(
|
||||||
|
(Comment2000)binaryTags.getChildRecords()[i]
|
||||||
|
);
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return comments;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// None found
|
||||||
|
return new Comment[0];
|
||||||
|
}
|
||||||
|
|
||||||
public void draw(Graphics2D graphics){
|
public void draw(Graphics2D graphics){
|
||||||
MasterSheet master = getMasterSheet();
|
MasterSheet master = getMasterSheet();
|
||||||
if(getFollowMasterBackground()) master.getBackground().draw(graphics);
|
if(getFollowMasterBackground()) master.getBackground().draw(graphics);
|
||||||
|
|
|
@ -123,6 +123,20 @@ public abstract class RecordContainer extends Record
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finds the first child record of the given type,
|
||||||
|
* or null if none of the child records are of the
|
||||||
|
* given type. Does not descend.
|
||||||
|
*/
|
||||||
|
public Record findFirstOfType(long type) {
|
||||||
|
for(int i=0; i<_children.length; i++) {
|
||||||
|
if(_children[i].getRecordType() == type) {
|
||||||
|
return _children[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
/* ===============================================================
|
/* ===============================================================
|
||||||
* External Move Methods
|
* External Move Methods
|
||||||
* ===============================================================
|
* ===============================================================
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -216,4 +216,36 @@ public class TextExtractor extends TestCase {
|
||||||
ppe.getText(true, false)
|
ppe.getText(true, false)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* From bug #45543
|
||||||
|
*/
|
||||||
|
public void testWithComments() throws Exception {
|
||||||
|
String filename;
|
||||||
|
|
||||||
|
// New file
|
||||||
|
filename = dirname + "/WithComments.ppt";
|
||||||
|
ppe = new PowerPointExtractor(filename);
|
||||||
|
|
||||||
|
String text = ppe.getText();
|
||||||
|
assertFalse("Comments not in by default", text.contains("This is a test comment"));
|
||||||
|
|
||||||
|
ppe.setCommentsByDefault(true);
|
||||||
|
|
||||||
|
text = ppe.getText();
|
||||||
|
assertTrue("Unable to find expected word in text\n" + text, text.contains("This is a test comment"));
|
||||||
|
|
||||||
|
|
||||||
|
// And another file
|
||||||
|
filename = dirname + "/45543.ppt";
|
||||||
|
ppe = new PowerPointExtractor(filename);
|
||||||
|
|
||||||
|
text = ppe.getText();
|
||||||
|
assertFalse("Comments not in by default", text.contains("testdoc"));
|
||||||
|
|
||||||
|
ppe.setCommentsByDefault(true);
|
||||||
|
|
||||||
|
text = ppe.getText();
|
||||||
|
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue