Merged revisions 638786-638802,638805-638811,638813-638814,638816-639230,639233-639241,639243-639253,639255-639486,639488-639601,639603-639835,639837-639917,639919-640056,640058-640710,640712-641156,641158-641184,641186-641795,641797-641798,641800-641933,641935-641963,641965-641966,641968-641995,641997-642230,642232-642562,642564-642565,642568-642570,642572-642573,642576-642736,642739-642877,642879,642881-642890,642892-642903,642905-642945,642947-643624,643626-643653,643655-643669,643671,643673-643830,643832-643833,643835-644342,644344-644472,644474-644508,644510-645347,645349-645351,645353-645559,645561-645565,645568-645951,645953-646193,646195-646311,646313-646404,646406-646665,646667-646853,646855-646869,646871-647151,647153-647185,647187-647277,647279-647566,647568-647573,647575,647578-647711,647714-647737,647739-647823,647825-648155,648157-648202,648204-648273,648275,648277-648302,648304-648333,648335-648588,648590-648622,648625-648673,648675-649141,649144,649146-649556,649558-649795,649799,649801-649910,649912-649913,649915-650128,650131-650132,650134-650137,650140-650914,650916-651991,651993-652284,652286-652287,652289,652291,652293-652297,652299-652328,652330-652425,652427-652445,652447-652560,652562-652933,652935,652937-652993,652995-653116,653118-653124,653126-653483,653487-653519,653522-653550,653552-653607,653609-653667,653669-653674,653676-653814,653817-653830,653832-653891,653893-653944,653946-654055,654057-654355,654357-654365,654367-654648,654651-655215,655217-655277,655279-655281,655283-655911,655913-656212,656214,656216-656251,656253-656698,656700-656756,656758-656892,656894-657135,657137-657165,657168-657179,657181-657354,657356-657357,657359-657701,657703-657874,657876-658032,658034-658284,658286,658288-658301,658303-658307,658309-658321,658323-658335,658337-658348,658351,658353-658832,658834-658983,658985,658987-659066,659068-659402,659404-659428,659430-659451,659453-659454,659456-659461,659463-659477,659479-659524,659526-659571,659574,659576-660255,660257-660262,660264-660279,660281-660343,660345-660473,660475-660827,660829-660833,660835-660888,660890-663321,663323-663435,663437-663764,663766-663854,663856-664219,664221-664489,664494-664514,664516-668013,668015-668142,668144-668152,668154,668156-668256,668258,668260-669139,669141-669455,669457-669657,669659-669808,669810-670189,670191-671321,671323-672229,672231-672549,672551-672552,672554-672561,672563-672566,672568,672571-673049,673051-673852,673854-673862,673864-673986,673988-673996,673998-674347,674349-674890,674892-674910,674912-674936,674938-674952,674954-675078,675080-675085,675087-675217,675219-675660,675662-675670,675672-675716,675718-675726,675728-675733,675735-675775,675777-675782,675784,675786-675791,675794-675852,675854-676200,676202,676204,676206-676220,676222-676309,676311-676456,676458-676994,676996-677027,677030-677040,677042-677056,677058-677375,677377-677968,677970-677971,677973,677975-677994,677996-678286,678288-678538,678540-680393,680395-680469,680471-680529,680531-680852,680854-681529,681531-681571,681573-682224,682226,682228,682231-682281,682283-682335,682337-682507,682509,682512-682517,682519-682532,682534-682619,682622-682777,682779-683020 via svnmerge from

https://svn.apache.org/repos/asf/poi/trunk

........
  r682999 | nick | 2008-08-05 23:29:20 +0100 (Tue, 05 Aug 2008) | 1 line
  
  More tests for bug #45365, but still not able to reproduce it
........
  r683020 | nick | 2008-08-05 23:49:24 +0100 (Tue, 05 Aug 2008) | 1 line
  
  With help from Yegor, fix bug #45537 - Include headers and footers (of slides and notes) in the extracted text from HSLF
........


git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@683024 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2008-08-05 23:05:00 +00:00
parent 1559e6cc3c
commit 6d58699317
8 changed files with 80 additions and 33 deletions

View File

@ -54,6 +54,7 @@
<action dev="POI-DEVELOPERS" type="add">Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx</action>
</release>
<release version="3.1.1-alpha1" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="add">45537 - Include headers and footers (of slides and notes) in the extracted text from HSLF</action>
<action dev="POI-DEVELOPERS" type="fix">45472 - Fixed incorrect default row height in OpenOffice 2.3</action>
<action dev="POI-DEVELOPERS" type="fix">44692 - HSSFPicture.resize() stretched image when there was a text next to it</action>
<action dev="POI-DEVELOPERS" type="add">45543 - Optionally extract comment text with PowerPointExtractor, and initial hslf model support for comments</action>

View File

@ -51,6 +51,7 @@
<action dev="POI-DEVELOPERS" type="add">Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx</action>
</release>
<release version="3.1.1-alpha1" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="add">45537 - Include headers and footers (of slides and notes) in the extracted text from HSLF</action>
<action dev="POI-DEVELOPERS" type="fix">45472 - Fixed incorrect default row height in OpenOffice 2.3</action>
<action dev="POI-DEVELOPERS" type="fix">44692 - HSSFPicture.resize() stretched image when there was a text next to it</action>
<action dev="POI-DEVELOPERS" type="add">45543 - Optionally extract comment text with PowerPointExtractor, and initial hslf model support for comments</action>

View File

@ -16,11 +16,7 @@
==================================================================== */
package org.apache.poi.hssf.eventusermodel;
import java.text.DateFormat;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;
@ -33,7 +29,6 @@ import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.usermodel.HSSFDataFormat;
import org.apache.poi.hssf.usermodel.HSSFDataFormatter;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
/**
* A proxy HSSFListener that keeps track of the document
@ -50,6 +45,13 @@ public class FormatTrackingHSSFListener implements HSSFListener {
this.childListener = childListener;
}
protected int getNumberOfCustomFormats() {
return customFormatRecords.size();
}
protected int getNumberOfExtendedFormats() {
return xfRecords.size();
}
/**
* Process this record ourselves, and then
* pass it on to our child listener

View File

@ -176,11 +176,13 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
for(int i=0; i<_slides.length; i++) {
Slide slide = _slides[i];
// Slide header, if set
HeadersFooters hf = slide.getHeadersFooters();
if(hf != null && hf.getHeaderText() != null) {
if(hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) {
ret.append(hf.getHeaderText() + "\n");
}
// Slide text
TextRun[] runs = slide.getTextRuns();
for(int j=0; j<runs.length; j++) {
TextRun run = runs[j];
@ -193,10 +195,12 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
}
}
if(hf != null && hf.getFooterText() != null) {
// Slide footer, if set
if(hf != null && hf.isFooterVisible() && hf.getFooterText() != null) {
ret.append(hf.getFooterText() + "\n");
}
// Comments, if requested and present
if(getCommentText) {
Comment[] comments = slide.getComments();
for(int j=0; j<comments.length; j++) {
@ -219,6 +223,8 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
// master sheets in. Grab Slide list, then work from there,
// but ensure no duplicates
HashSet seenNotes = new HashSet();
HeadersFooters hf = _show.getNotesHeadersFooters();
for(int i=0; i<_slides.length; i++) {
Notes notes = _slides[i].getNotesSheet();
if(notes == null) { continue; }
@ -226,6 +232,12 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
if(seenNotes.contains(id)) { continue; }
seenNotes.add(id);
// Repeat the Notes header, if set
if(hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) {
ret.append(hf.getHeaderText() + "\n");
}
// Notes text
TextRun[] runs = notes.getTextRuns();
if(runs != null && runs.length > 0) {
for(int j=0; j<runs.length; j++) {
@ -237,6 +249,11 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
}
}
}
// Repeat the notes footer, if set
if(hf != null && hf.isFooterVisible() && hf.getFooterText() != null) {
ret.append(hf.getFooterText() + "\n");
}
}
}

View File

@ -23,6 +23,8 @@ import org.apache.poi.hslf.usermodel.SlideShow;
/**
* Header / Footer settings.
*
* You can get these on slides, or across all notes
*
* @author Yegor Kozlov
*/
public class HeadersFooters {

View File

@ -253,32 +253,42 @@ public class TextExtractor extends TestCase {
/**
* From bug #45537
*/
public void DISABLEDtestHeaderFooter() throws Exception {
public void testHeaderFooter() throws Exception {
String filename, text;
// With a header
// With a header on the notes
filename = dirname + "/45537_Header.ppt";
HSLFSlideShow hslf = new HSLFSlideShow(new FileInputStream(filename));
SlideShow ss = new SlideShow(hslf);
assertNotNull(ss.getSlides()[0].getHeadersFooters());
assertEquals("testdoc test phrase", ss.getSlides()[0].getHeadersFooters().getHeaderText());
assertNotNull(ss.getNotesHeadersFooters());
assertEquals("testdoc test phrase", ss.getNotesHeadersFooters().getHeaderText());
ppe = new PowerPointExtractor(hslf);
text = ppe.getText();
assertFalse("Unable to find expected word in text\n" + text, text.contains("testdoc"));
assertFalse("Unable to find expected word in text\n" + text, text.contains("test phrase"));
ppe.setNotesByDefault(true);
text = ppe.getText();
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
// And with a footer
// And with a footer, also on notes
filename = dirname + "/45537_Footer.ppt";
hslf = new HSLFSlideShow(new FileInputStream(filename));
ss = new SlideShow(hslf);
assertNotNull(ss.getSlides()[0].getHeadersFooters());
assertEquals("testdoc test phrase", ss.getSlides()[0].getHeadersFooters().getFooterText());
assertNotNull(ss.getNotesHeadersFooters());
assertEquals("testdoc test phrase", ss.getNotesHeadersFooters().getFooterText());
ppe = new PowerPointExtractor(filename);
text = ppe.getText();
assertFalse("Unable to find expected word in text\n" + text, text.contains("testdoc"));
assertFalse("Unable to find expected word in text\n" + text, text.contains("test phrase"));
ppe.setNotesByDefault(true);
text = ppe.getText();
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));

Binary file not shown.

View File

@ -62,31 +62,45 @@ public final class TestFormatTrackingHSSFListener extends TestCase {
/**
* Ensure that all number and formula records can be
* turned into strings without problems
* turned into strings without problems.
* For now, we're just looking to get text back, no
* exceptions thrown, but in future we might also
* want to check the exact strings!
*/
public void testTurnToString() throws Exception {
processFile("45365.xls");
String[] files = new String[] {
"45365.xls", "45365-2.xls", "MissingBits.xls"
};
for(int k=0; k<files.length; k++) {
processFile(files[k]);
for(int i=0; i<mockListen._records.size(); i++) {
Record r = (Record)mockListen._records.get(i);
CellValueRecordInterface cvr = null;
// Check we found our formats
assertTrue(listener.getNumberOfCustomFormats() > 5);
assertTrue(listener.getNumberOfExtendedFormats() > 5);
if(r instanceof NumberRecord) {
cvr = (CellValueRecordInterface)r;
}
if(r instanceof FormulaRecord) {
cvr = (CellValueRecordInterface)r;
// Now check we can turn all the numeric
// cells into strings without error
for(int i=0; i<mockListen._records.size(); i++) {
Record r = (Record)mockListen._records.get(i);
CellValueRecordInterface cvr = null;
if(r instanceof NumberRecord) {
cvr = (CellValueRecordInterface)r;
}
if(r instanceof FormulaRecord) {
cvr = (CellValueRecordInterface)r;
}
if(cvr != null) {
// Should always give us a string
String s = listener.formatNumberDateCell(cvr);
assertNotNull(s);
assertTrue(s.length() > 0);
}
}
if(cvr != null) {
// Should always give us a string
String s = listener.formatNumberDateCell(cvr);
assertNotNull(s);
assertTrue(s.length() > 0);
}
// TODO - test some specific format strings
}
// TODO - test some specific format strings
}
private static final class MockHSSFListener implements HSSFListener {