mirror of https://github.com/apache/poi.git
Tweak HSLF and HWPF to work well with NPOIFS, and add unit tests for this
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1054191 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ae43d980c5
commit
a7a35871db
|
@ -25,7 +25,6 @@ import org.apache.poi.hslf.record.DocumentEncryptionAtom;
|
||||||
import org.apache.poi.hslf.record.PersistPtrHolder;
|
import org.apache.poi.hslf.record.PersistPtrHolder;
|
||||||
import org.apache.poi.hslf.record.Record;
|
import org.apache.poi.hslf.record.Record;
|
||||||
import org.apache.poi.hslf.record.UserEditAtom;
|
import org.apache.poi.hslf.record.UserEditAtom;
|
||||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class provides helper functions for determining if a
|
* This class provides helper functions for determining if a
|
||||||
|
@ -48,9 +47,8 @@ public final class EncryptedSlideShow
|
||||||
public static boolean checkIfEncrypted(HSLFSlideShow hss) {
|
public static boolean checkIfEncrypted(HSLFSlideShow hss) {
|
||||||
// Easy way to check - contains a stream
|
// Easy way to check - contains a stream
|
||||||
// "EncryptedSummary"
|
// "EncryptedSummary"
|
||||||
POIFSFileSystem fs = hss.getPOIFSFileSystem();
|
|
||||||
try {
|
try {
|
||||||
fs.getRoot().getEntry("EncryptedSummary");
|
hss.getPOIFSDirectory().getEntry("EncryptedSummary");
|
||||||
return true;
|
return true;
|
||||||
} catch(FileNotFoundException fnfe) {
|
} catch(FileNotFoundException fnfe) {
|
||||||
// Doesn't have encrypted properties
|
// Doesn't have encrypted properties
|
||||||
|
|
|
@ -84,6 +84,14 @@ public final class HSLFSlideShow extends POIDocument {
|
||||||
return directory.getFileSystem();
|
return directory.getFileSystem();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the directory in the underlying POIFSFileSystem for the
|
||||||
|
* document that is open.
|
||||||
|
*/
|
||||||
|
protected DirectoryNode getPOIFSDirectory() {
|
||||||
|
return directory;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs a Powerpoint document from fileName. Parses the document
|
* Constructs a Powerpoint document from fileName. Parses the document
|
||||||
* and places all the important stuff into data structures.
|
* and places all the important stuff into data structures.
|
||||||
|
|
|
@ -17,20 +17,20 @@
|
||||||
|
|
||||||
package org.apache.poi.hslf.extractor;
|
package org.apache.poi.hslf.extractor;
|
||||||
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
import org.apache.poi.POIDataSamples;
|
||||||
import org.apache.poi.hslf.HSLFSlideShow;
|
import org.apache.poi.hslf.HSLFSlideShow;
|
||||||
import org.apache.poi.hslf.model.OLEShape;
|
import org.apache.poi.hslf.model.OLEShape;
|
||||||
import org.apache.poi.hslf.usermodel.SlideShow;
|
import org.apache.poi.hslf.usermodel.SlideShow;
|
||||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
|
||||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
|
||||||
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
||||||
import org.apache.poi.hwpf.HWPFDocument;
|
import org.apache.poi.hwpf.HWPFDocument;
|
||||||
import org.apache.poi.POIDataSamples;
|
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||||
|
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
|
||||||
import junit.framework.TestCase;
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests that the extractor correctly gets the text out of our sample file
|
* Tests that the extractor correctly gets the text out of our sample file
|
||||||
|
@ -40,8 +40,13 @@ import junit.framework.TestCase;
|
||||||
public final class TestExtractor extends TestCase {
|
public final class TestExtractor extends TestCase {
|
||||||
/** Extractor primed on the 2 page basic test data */
|
/** Extractor primed on the 2 page basic test data */
|
||||||
private PowerPointExtractor ppe;
|
private PowerPointExtractor ppe;
|
||||||
|
private static final String expectText = "This is a test title\nThis is a test subtitle\nThis is on page 1\nThis is the title on page 2\nThis is page two\nIt has several blocks of text\nNone of them have formatting\n";
|
||||||
|
|
||||||
/** Extractor primed on the 1 page but text-box'd test data */
|
/** Extractor primed on the 1 page but text-box'd test data */
|
||||||
private PowerPointExtractor ppe2;
|
private PowerPointExtractor ppe2;
|
||||||
|
private static final String expectText2 = "Hello, World!!!\nI am just a poor boy\nThis is Times New Roman\nPlain Text \n";
|
||||||
|
|
||||||
|
|
||||||
/** Where our embeded files live */
|
/** Where our embeded files live */
|
||||||
//private String pdirname;
|
//private String pdirname;
|
||||||
private static POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
|
private static POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
|
||||||
|
@ -55,16 +60,14 @@ public final class TestExtractor extends TestCase {
|
||||||
public void testReadSheetText() {
|
public void testReadSheetText() {
|
||||||
// Basic 2 page example
|
// Basic 2 page example
|
||||||
String sheetText = ppe.getText();
|
String sheetText = ppe.getText();
|
||||||
String expectText = "This is a test title\nThis is a test subtitle\nThis is on page 1\nThis is the title on page 2\nThis is page two\nIt has several blocks of text\nNone of them have formatting\n";
|
|
||||||
|
|
||||||
ensureTwoStringsTheSame(expectText, sheetText);
|
ensureTwoStringsTheSame(expectText, sheetText);
|
||||||
|
|
||||||
|
|
||||||
// 1 page example with text boxes
|
// 1 page example with text boxes
|
||||||
sheetText = ppe2.getText();
|
sheetText = ppe2.getText();
|
||||||
expectText = "Hello, World!!!\nI am just a poor boy\nThis is Times New Roman\nPlain Text \n";
|
|
||||||
|
|
||||||
ensureTwoStringsTheSame(expectText, sheetText);
|
ensureTwoStringsTheSame(expectText2, sheetText);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testReadNoteText() {
|
public void testReadNoteText() {
|
||||||
|
@ -273,4 +276,28 @@ public final class TestExtractor extends TestCase {
|
||||||
assertTrue(text.contains("Master Header Text"));
|
assertTrue(text.contains("Master Header Text"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests that we can work with both {@link POIFSFileSystem}
|
||||||
|
* and {@link NPOIFSFileSystem}
|
||||||
|
*/
|
||||||
|
public void testDifferentPOIFS() throws Exception {
|
||||||
|
// Open the two filesystems
|
||||||
|
DirectoryNode[] files = new DirectoryNode[2];
|
||||||
|
files[0] = (new POIFSFileSystem(slTests.openResourceAsStream("basic_test_ppt_file.ppt"))).getRoot();
|
||||||
|
files[1] = (new NPOIFSFileSystem(slTests.getFile("basic_test_ppt_file.ppt"))).getRoot();
|
||||||
|
|
||||||
|
// Open directly
|
||||||
|
for(DirectoryNode dir : files) {
|
||||||
|
PowerPointExtractor extractor = new PowerPointExtractor(dir, null);
|
||||||
|
assertEquals(expectText, extractor.getText());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Open via a HWPFDocument
|
||||||
|
for(DirectoryNode dir : files) {
|
||||||
|
HSLFSlideShow slideshow = new HSLFSlideShow(dir);
|
||||||
|
PowerPointExtractor extractor = new PowerPointExtractor(slideshow);
|
||||||
|
assertEquals(expectText, extractor.getText());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.poi.hwpf.HWPFDocument;
|
||||||
import org.apache.poi.hwpf.HWPFTestDataSamples;
|
import org.apache.poi.hwpf.HWPFTestDataSamples;
|
||||||
import org.apache.poi.hwpf.OldWordFileFormatException;
|
import org.apache.poi.hwpf.OldWordFileFormatException;
|
||||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||||
|
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
|
||||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -314,4 +315,30 @@ public final class TestWordExtractor extends TestCase {
|
||||||
|
|
||||||
assertTrue(text.startsWith("\u041f\u0440\u0438\u043b\u043e\u0436\u0435\u043d\u0438\u0435"));
|
assertTrue(text.startsWith("\u041f\u0440\u0438\u043b\u043e\u0436\u0435\u043d\u0438\u0435"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests that we can work with both {@link POIFSFileSystem}
|
||||||
|
* and {@link NPOIFSFileSystem}
|
||||||
|
*/
|
||||||
|
public void testDifferentPOIFS() throws Exception {
|
||||||
|
POIDataSamples docTests = POIDataSamples.getDocumentInstance();
|
||||||
|
|
||||||
|
// Open the two filesystems
|
||||||
|
DirectoryNode[] files = new DirectoryNode[2];
|
||||||
|
files[0] = (new POIFSFileSystem(docTests.openResourceAsStream("test2.doc"))).getRoot();
|
||||||
|
files[1] = (new NPOIFSFileSystem(docTests.getFile("test2.doc"))).getRoot();
|
||||||
|
|
||||||
|
// Open directly
|
||||||
|
for(DirectoryNode dir : files) {
|
||||||
|
WordExtractor extractor = new WordExtractor(dir, null);
|
||||||
|
assertEquals(p_text1_block, extractor.getText());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Open via a HWPFDocument
|
||||||
|
for(DirectoryNode dir : files) {
|
||||||
|
HWPFDocument doc = new HWPFDocument(dir);
|
||||||
|
WordExtractor extractor = new WordExtractor(doc);
|
||||||
|
assertEquals(p_text1_block, extractor.getText());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue