mirror of https://github.com/apache/poi.git
Improve how POIFS works with directory entries, and update HWPFDocument to support reading an embeded word document
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@646870 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
636e3df7cf
commit
450c9754f3
|
@ -521,6 +521,8 @@ under the License.
|
||||||
file="${main.src.test}/org/apache/poi/hwpf/data"/>
|
file="${main.src.test}/org/apache/poi/hwpf/data"/>
|
||||||
<sysproperty key="HPSF.testdata.path"
|
<sysproperty key="HPSF.testdata.path"
|
||||||
file="${main.src.test}/org/apache/poi/hpsf/data"/>
|
file="${main.src.test}/org/apache/poi/hpsf/data"/>
|
||||||
|
<sysproperty key="POIFS.testdata.path"
|
||||||
|
file="${main.src.test}/org/apache/poi/poifs/data"/>
|
||||||
<sysproperty key="java.awt.headless" value="true"/>
|
<sysproperty key="java.awt.headless" value="true"/>
|
||||||
<formatter type="plain"/>
|
<formatter type="plain"/>
|
||||||
<formatter type="xml"/>
|
<formatter type="xml"/>
|
||||||
|
@ -556,6 +558,8 @@ under the License.
|
||||||
file="${main.src.test}/org/apache/poi/hpsf/data"/>
|
file="${main.src.test}/org/apache/poi/hpsf/data"/>
|
||||||
<sysproperty key="HWPF.testdata.path"
|
<sysproperty key="HWPF.testdata.path"
|
||||||
file="${scratchpad.src.test}/org/apache/poi/hwpf/data"/>
|
file="${scratchpad.src.test}/org/apache/poi/hwpf/data"/>
|
||||||
|
<sysproperty key="POIFS.testdata.path"
|
||||||
|
file="${main.src.test}/org/apache/poi/poifs/data"/>
|
||||||
<sysproperty key="java.awt.headless" value="true"/>
|
<sysproperty key="java.awt.headless" value="true"/>
|
||||||
<formatter type="plain" usefile="no"/>
|
<formatter type="plain" usefile="no"/>
|
||||||
<batchtest todir="${main.reports.test}">
|
<batchtest todir="${main.reports.test}">
|
||||||
|
@ -585,6 +589,7 @@ under the License.
|
||||||
<sysproperty key="HWPF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hwpf/data"/>
|
<sysproperty key="HWPF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hwpf/data"/>
|
||||||
<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/>
|
<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/>
|
||||||
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
|
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
|
||||||
|
<sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/>
|
||||||
<sysproperty key="java.awt.headless" value="true"/>
|
<sysproperty key="java.awt.headless" value="true"/>
|
||||||
<formatter type="plain" usefile="no"/>
|
<formatter type="plain" usefile="no"/>
|
||||||
<formatter type="xml"/>
|
<formatter type="xml"/>
|
||||||
|
@ -601,6 +606,7 @@ under the License.
|
||||||
<classpath refid="test.classpath"/>
|
<classpath refid="test.classpath"/>
|
||||||
<sysproperty key="HSSF.testdata.path" file="${main.src.test}/org/apache/poi/hssf/data"/>
|
<sysproperty key="HSSF.testdata.path" file="${main.src.test}/org/apache/poi/hssf/data"/>
|
||||||
<sysproperty key="HPSF.testdata.path" file="${main.src.test}/org/apache/poi/hpsf/data"/>
|
<sysproperty key="HPSF.testdata.path" file="${main.src.test}/org/apache/poi/hpsf/data"/>
|
||||||
|
<sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/>
|
||||||
<sysproperty key="java.awt.headless" value="true"/>
|
<sysproperty key="java.awt.headless" value="true"/>
|
||||||
<formatter type="plain" usefile="no"/>
|
<formatter type="plain" usefile="no"/>
|
||||||
<test name="${testcase}"/>
|
<test name="${testcase}"/>
|
||||||
|
@ -639,6 +645,7 @@ under the License.
|
||||||
<sysproperty key="HSLF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hslf/data"/>
|
<sysproperty key="HSLF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hslf/data"/>
|
||||||
<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/>
|
<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/>
|
||||||
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
|
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
|
||||||
|
<sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/>
|
||||||
<sysproperty key="java.awt.headless" value="true"/>
|
<sysproperty key="java.awt.headless" value="true"/>
|
||||||
<formatter type="plain"/>
|
<formatter type="plain"/>
|
||||||
<formatter type="xml"/>
|
<formatter type="xml"/>
|
||||||
|
@ -673,6 +680,7 @@ under the License.
|
||||||
<sysproperty key="HSLF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hslf/data"/>
|
<sysproperty key="HSLF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hslf/data"/>
|
||||||
<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/>
|
<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/>
|
||||||
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
|
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
|
||||||
|
<sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/>
|
||||||
<sysproperty key="java.awt.headless" value="true"/>
|
<sysproperty key="java.awt.headless" value="true"/>
|
||||||
<sysproperty key="java.awt.headless" value="true"/>
|
<sysproperty key="java.awt.headless" value="true"/>
|
||||||
<formatter type="plain" usefile="no"/>
|
<formatter type="plain" usefile="no"/>
|
||||||
|
|
|
@ -37,6 +37,7 @@
|
||||||
|
|
||||||
<!-- Don't forget to update status.xml too! -->
|
<!-- Don't forget to update status.xml too! -->
|
||||||
<release version="3.0.3-beta1" date="2008-04-??">
|
<release version="3.0.3-beta1" date="2008-04-??">
|
||||||
|
<action dev="POI-DEVELOPERS" type="add">Improve how POIFS works with directory entries, and update HWPFDocument to support reading an embeded word document</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">Initial support for getting and changing chart and series titles</action>
|
<action dev="POI-DEVELOPERS" type="add">Initial support for getting and changing chart and series titles</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">Implement a proxy HSSFListener which tracks the format records, and lets you lookup the format string for a given cell. Convert the xls to csv example to use it</action>
|
<action dev="POI-DEVELOPERS" type="add">Implement a proxy HSSFListener which tracks the format records, and lets you lookup the format string for a given cell. Convert the xls to csv example to use it</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">44792 - fixed encode/decode problems in ExternalNameRecord and CRNRecord.</action>
|
<action dev="POI-DEVELOPERS" type="fix">44792 - fixed encode/decode problems in ExternalNameRecord and CRNRecord.</action>
|
||||||
|
|
|
@ -34,6 +34,7 @@
|
||||||
<!-- Don't forget to update changes.xml too! -->
|
<!-- Don't forget to update changes.xml too! -->
|
||||||
<changes>
|
<changes>
|
||||||
<release version="3.0.3-beta1" date="2008-04-??">
|
<release version="3.0.3-beta1" date="2008-04-??">
|
||||||
|
<action dev="POI-DEVELOPERS" type="add">Improve how POIFS works with directory entries, and update HWPFDocument to support reading an embeded word document</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">Initial support for getting and changing chart and series titles</action>
|
<action dev="POI-DEVELOPERS" type="add">Initial support for getting and changing chart and series titles</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">Implement a proxy HSSFListener which tracks the format records, and lets you lookup the format string for a given cell. Convert the xls to csv example to use it</action>
|
<action dev="POI-DEVELOPERS" type="add">Implement a proxy HSSFListener which tracks the format records, and lets you lookup the format string for a given cell. Convert the xls to csv example to use it</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">44792 - fixed encode/decode problems in ExternalNameRecord and CRNRecord.</action>
|
<action dev="POI-DEVELOPERS" type="fix">44792 - fixed encode/decode problems in ExternalNameRecord and CRNRecord.</action>
|
||||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.poi.hpsf.PropertySet;
|
||||||
import org.apache.poi.hpsf.PropertySetFactory;
|
import org.apache.poi.hpsf.PropertySetFactory;
|
||||||
import org.apache.poi.hpsf.SummaryInformation;
|
import org.apache.poi.hpsf.SummaryInformation;
|
||||||
import org.apache.poi.poifs.filesystem.DirectoryEntry;
|
import org.apache.poi.poifs.filesystem.DirectoryEntry;
|
||||||
|
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||||
import org.apache.poi.poifs.filesystem.DocumentEntry;
|
import org.apache.poi.poifs.filesystem.DocumentEntry;
|
||||||
import org.apache.poi.poifs.filesystem.DocumentInputStream;
|
import org.apache.poi.poifs.filesystem.DocumentInputStream;
|
||||||
import org.apache.poi.poifs.filesystem.Entry;
|
import org.apache.poi.poifs.filesystem.Entry;
|
||||||
|
@ -50,6 +51,8 @@ public abstract class POIDocument {
|
||||||
protected DocumentSummaryInformation dsInf;
|
protected DocumentSummaryInformation dsInf;
|
||||||
/** The open POIFS FileSystem that contains our document */
|
/** The open POIFS FileSystem that contains our document */
|
||||||
protected POIFSFileSystem filesystem;
|
protected POIFSFileSystem filesystem;
|
||||||
|
/** The directory that our document lives in */
|
||||||
|
protected DirectoryNode directory;
|
||||||
|
|
||||||
/** For our own logging use */
|
/** For our own logging use */
|
||||||
protected POILogger logger = POILogFactory.getLogger(this.getClass());
|
protected POILogger logger = POILogFactory.getLogger(this.getClass());
|
||||||
|
@ -57,6 +60,15 @@ public abstract class POIDocument {
|
||||||
/* Have the property streams been read yet? (Only done on-demand) */
|
/* Have the property streams been read yet? (Only done on-demand) */
|
||||||
protected boolean initialized = false;
|
protected boolean initialized = false;
|
||||||
|
|
||||||
|
|
||||||
|
protected POIDocument(DirectoryNode dir, POIFSFileSystem fs) {
|
||||||
|
this.filesystem = fs;
|
||||||
|
this.directory = dir;
|
||||||
|
}
|
||||||
|
protected POIDocument(POIFSFileSystem fs) {
|
||||||
|
this(fs.getRoot(), fs);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Fetch the Document Summary Information of the document
|
* Fetch the Document Summary Information of the document
|
||||||
*/
|
*/
|
||||||
|
@ -110,7 +122,7 @@ public abstract class POIDocument {
|
||||||
DocumentInputStream dis;
|
DocumentInputStream dis;
|
||||||
try {
|
try {
|
||||||
// Find the entry, and get an input stream for it
|
// Find the entry, and get an input stream for it
|
||||||
dis = filesystem.createDocumentInputStream(setName);
|
dis = directory.createDocumentInputStream(setName);
|
||||||
} catch(IOException ie) {
|
} catch(IOException ie) {
|
||||||
// Oh well, doesn't exist
|
// Oh well, doesn't exist
|
||||||
logger.log(POILogger.WARN, "Error getting property set with name " + setName + "\n" + ie);
|
logger.log(POILogger.WARN, "Error getting property set with name " + setName + "\n" + ie);
|
||||||
|
|
|
@ -139,6 +139,7 @@ public class HSSFWorkbook extends POIDocument
|
||||||
|
|
||||||
protected HSSFWorkbook( Workbook book )
|
protected HSSFWorkbook( Workbook book )
|
||||||
{
|
{
|
||||||
|
super(null, null);
|
||||||
workbook = book;
|
workbook = book;
|
||||||
sheets = new ArrayList( INITIAL_CAPACITY );
|
sheets = new ArrayList( INITIAL_CAPACITY );
|
||||||
names = new ArrayList( INITIAL_CAPACITY );
|
names = new ArrayList( INITIAL_CAPACITY );
|
||||||
|
@ -164,8 +165,8 @@ public class HSSFWorkbook extends POIDocument
|
||||||
public HSSFWorkbook(POIFSFileSystem fs, boolean preserveNodes)
|
public HSSFWorkbook(POIFSFileSystem fs, boolean preserveNodes)
|
||||||
throws IOException
|
throws IOException
|
||||||
{
|
{
|
||||||
|
super(fs);
|
||||||
this.preserveNodes = preserveNodes;
|
this.preserveNodes = preserveNodes;
|
||||||
this.filesystem = fs;
|
|
||||||
|
|
||||||
// If we're not preserving nodes, don't track the
|
// If we're not preserving nodes, don't track the
|
||||||
// POIFS any more
|
// POIFS any more
|
||||||
|
|
|
@ -106,6 +106,31 @@ public class DirectoryNode
|
||||||
return _path;
|
return _path;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* open a document in the directory's entry's list of entries
|
||||||
|
*
|
||||||
|
* @param documentName the name of the document to be opened
|
||||||
|
*
|
||||||
|
* @return a newly opened DocumentInputStream
|
||||||
|
*
|
||||||
|
* @exception IOException if the document does not exist or the
|
||||||
|
* name is that of a DirectoryEntry
|
||||||
|
*/
|
||||||
|
|
||||||
|
public DocumentInputStream createDocumentInputStream(
|
||||||
|
final String documentName)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
Entry document = getEntry(documentName);
|
||||||
|
|
||||||
|
if (!document.isDocumentEntry())
|
||||||
|
{
|
||||||
|
throw new IOException("Entry '" + documentName
|
||||||
|
+ "' is not a DocumentEntry");
|
||||||
|
}
|
||||||
|
return new DocumentInputStream(( DocumentEntry ) document);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create a new DocumentEntry
|
* create a new DocumentEntry
|
||||||
*
|
*
|
||||||
|
|
|
@ -422,7 +422,7 @@ public class POIFSFileSystem
|
||||||
* @return the root entry
|
* @return the root entry
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public DirectoryEntry getRoot()
|
public DirectoryNode getRoot()
|
||||||
{
|
{
|
||||||
if (_root == null)
|
if (_root == null)
|
||||||
{
|
{
|
||||||
|
@ -446,14 +446,7 @@ public class POIFSFileSystem
|
||||||
final String documentName)
|
final String documentName)
|
||||||
throws IOException
|
throws IOException
|
||||||
{
|
{
|
||||||
Entry document = getRoot().getEntry(documentName);
|
return getRoot().createDocumentInputStream(documentName);
|
||||||
|
|
||||||
if (!document.isDocumentEntry())
|
|
||||||
{
|
|
||||||
throw new IOException("Entry '" + documentName
|
|
||||||
+ "' is not a DocumentEntry");
|
|
||||||
}
|
|
||||||
return new DocumentInputStream(( DocumentEntry ) document);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -53,7 +53,7 @@ public class HDGFDiagram extends POIDocument {
|
||||||
private PointerFactory ptrFactory;
|
private PointerFactory ptrFactory;
|
||||||
|
|
||||||
public HDGFDiagram(POIFSFileSystem fs) throws IOException {
|
public HDGFDiagram(POIFSFileSystem fs) throws IOException {
|
||||||
filesystem = fs;
|
super(fs);
|
||||||
|
|
||||||
DocumentEntry docProps =
|
DocumentEntry docProps =
|
||||||
(DocumentEntry)filesystem.getRoot().getEntry("VisioDocument");
|
(DocumentEntry)filesystem.getRoot().getEntry("VisioDocument");
|
||||||
|
|
|
@ -124,7 +124,7 @@ public class HSLFSlideShow extends POIDocument
|
||||||
*/
|
*/
|
||||||
public HSLFSlideShow(POIFSFileSystem filesystem) throws IOException
|
public HSLFSlideShow(POIFSFileSystem filesystem) throws IOException
|
||||||
{
|
{
|
||||||
this.filesystem = filesystem;
|
super(filesystem);
|
||||||
|
|
||||||
// First up, grab the "Current User" stream
|
// First up, grab the "Current User" stream
|
||||||
// We need this before we can detect Encrypted Documents
|
// We need this before we can detect Encrypted Documents
|
||||||
|
|
|
@ -29,6 +29,7 @@ import java.io.ByteArrayInputStream;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
import org.apache.poi.POIDocument;
|
import org.apache.poi.POIDocument;
|
||||||
|
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
import org.apache.poi.poifs.filesystem.DocumentEntry;
|
import org.apache.poi.poifs.filesystem.DocumentEntry;
|
||||||
import org.apache.poi.poifs.common.POIFSConstants;
|
import org.apache.poi.poifs.common.POIFSConstants;
|
||||||
|
@ -95,7 +96,7 @@ public class HWPFDocument extends POIDocument
|
||||||
|
|
||||||
protected HWPFDocument()
|
protected HWPFDocument()
|
||||||
{
|
{
|
||||||
|
super(null, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -141,16 +142,31 @@ public class HWPFDocument extends POIDocument
|
||||||
* in POIFSFileSystem.
|
* in POIFSFileSystem.
|
||||||
*/
|
*/
|
||||||
public HWPFDocument(POIFSFileSystem pfilesystem) throws IOException
|
public HWPFDocument(POIFSFileSystem pfilesystem) throws IOException
|
||||||
|
{
|
||||||
|
this(pfilesystem.getRoot(), pfilesystem);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This constructor loads a Word document from a specific point
|
||||||
|
* in a POIFSFileSystem, probably not the default.
|
||||||
|
* Used typically to open embeded documents.
|
||||||
|
*
|
||||||
|
* @param pfilesystem The POIFSFileSystem that contains the Word document.
|
||||||
|
* @throws IOException If there is an unexpected IOException from the passed
|
||||||
|
* in POIFSFileSystem.
|
||||||
|
*/
|
||||||
|
public HWPFDocument(DirectoryNode directory, POIFSFileSystem pfilesystem) throws IOException
|
||||||
{
|
{
|
||||||
// Sort out the hpsf properties
|
// Sort out the hpsf properties
|
||||||
filesystem = pfilesystem;
|
super(directory, pfilesystem);
|
||||||
readProperties();
|
readProperties();
|
||||||
|
|
||||||
// read in the main stream.
|
// read in the main stream.
|
||||||
DocumentEntry documentProps =
|
DocumentEntry documentProps = (DocumentEntry)
|
||||||
(DocumentEntry)filesystem.getRoot().getEntry("WordDocument");
|
directory.getEntry("WordDocument");
|
||||||
_mainStream = new byte[documentProps.getSize()];
|
_mainStream = new byte[documentProps.getSize()];
|
||||||
filesystem.createDocumentInputStream("WordDocument").read(_mainStream);
|
|
||||||
|
directory.createDocumentInputStream("WordDocument").read(_mainStream);
|
||||||
|
|
||||||
// use the fib to determine the name of the table stream.
|
// use the fib to determine the name of the table stream.
|
||||||
_fib = new FileInformationBlock(_mainStream);
|
_fib = new FileInformationBlock(_mainStream);
|
||||||
|
@ -165,14 +181,14 @@ public class HWPFDocument extends POIDocument
|
||||||
DocumentEntry tableProps;
|
DocumentEntry tableProps;
|
||||||
try {
|
try {
|
||||||
tableProps =
|
tableProps =
|
||||||
(DocumentEntry)filesystem.getRoot().getEntry(name);
|
(DocumentEntry)directory.getEntry(name);
|
||||||
} catch(FileNotFoundException fnfe) {
|
} catch(FileNotFoundException fnfe) {
|
||||||
throw new IllegalStateException("Table Stream '" + name + "' wasn't found - Either the document is corrupt, or is Word95 (or earlier)");
|
throw new IllegalStateException("Table Stream '" + name + "' wasn't found - Either the document is corrupt, or is Word95 (or earlier)");
|
||||||
}
|
}
|
||||||
|
|
||||||
// read in the table stream.
|
// read in the table stream.
|
||||||
_tableStream = new byte[tableProps.getSize()];
|
_tableStream = new byte[tableProps.getSize()];
|
||||||
filesystem.createDocumentInputStream(name).read(_tableStream);
|
directory.createDocumentInputStream(name).read(_tableStream);
|
||||||
|
|
||||||
_fib.fillVariableFields(_mainStream, _tableStream);
|
_fib.fillVariableFields(_mainStream, _tableStream);
|
||||||
|
|
||||||
|
@ -180,7 +196,7 @@ public class HWPFDocument extends POIDocument
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
DocumentEntry dataProps =
|
DocumentEntry dataProps =
|
||||||
(DocumentEntry) filesystem.getRoot().getEntry("Data");
|
(DocumentEntry)directory.getEntry("Data");
|
||||||
_dataStream = new byte[dataProps.getSize()];
|
_dataStream = new byte[dataProps.getSize()];
|
||||||
filesystem.createDocumentInputStream("Data").read(_dataStream);
|
filesystem.createDocumentInputStream("Data").read(_dataStream);
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,6 +23,8 @@ import org.apache.poi.hwpf.HWPFDocument;
|
||||||
import org.apache.poi.hwpf.model.TextPiece;
|
import org.apache.poi.hwpf.model.TextPiece;
|
||||||
import org.apache.poi.hwpf.usermodel.Paragraph;
|
import org.apache.poi.hwpf.usermodel.Paragraph;
|
||||||
import org.apache.poi.hwpf.usermodel.Range;
|
import org.apache.poi.hwpf.usermodel.Range;
|
||||||
|
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||||
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
@ -54,12 +56,16 @@ public class TestWordExtractor extends TestCase {
|
||||||
private WordExtractor extractor;
|
private WordExtractor extractor;
|
||||||
// Corrupted document - can't do paragraph based stuff
|
// Corrupted document - can't do paragraph based stuff
|
||||||
private WordExtractor extractor2;
|
private WordExtractor extractor2;
|
||||||
|
// A word doc embeded in an excel file
|
||||||
|
private String filename3;
|
||||||
|
|
||||||
protected void setUp() throws Exception {
|
protected void setUp() throws Exception {
|
||||||
String dirname = System.getProperty("HWPF.testdata.path");
|
String dirname = System.getProperty("HWPF.testdata.path");
|
||||||
|
String pdirname = System.getProperty("POIFS.testdata.path");
|
||||||
|
|
||||||
String filename = dirname + "/test2.doc";
|
String filename = dirname + "/test2.doc";
|
||||||
String filename2 = dirname + "/test.doc";
|
String filename2 = dirname + "/test.doc";
|
||||||
|
filename3 = pdirname + "/excel_with_embeded.xls";
|
||||||
extractor = new WordExtractor(new FileInputStream(filename));
|
extractor = new WordExtractor(new FileInputStream(filename));
|
||||||
extractor2 = new WordExtractor(new FileInputStream(filename2));
|
extractor2 = new WordExtractor(new FileInputStream(filename2));
|
||||||
|
|
||||||
|
@ -101,4 +107,25 @@ public class TestWordExtractor extends TestCase {
|
||||||
String text = extractor.getTextFromPieces();
|
String text = extractor.getTextFromPieces();
|
||||||
assertEquals(p_text1_block, text);
|
assertEquals(p_text1_block, text);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that we can get data from an
|
||||||
|
* embeded word document
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
public void testExtractFromEmbeded() throws Exception {
|
||||||
|
POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(filename3));
|
||||||
|
DirectoryNode dir = (DirectoryNode)
|
||||||
|
fs.getRoot().getEntry("MBD03F25D8D");
|
||||||
|
// Should have WordDocument and 1Table
|
||||||
|
assertNotNull(dir.getEntry("1Table"));
|
||||||
|
assertNotNull(dir.getEntry("WordDocument"));
|
||||||
|
|
||||||
|
HWPFDocument doc = new HWPFDocument(dir, fs);
|
||||||
|
WordExtractor extractor3 = new WordExtractor(doc);
|
||||||
|
|
||||||
|
assertNotNull(extractor3.getText());
|
||||||
|
assertTrue(extractor3.getText().length() > 20);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue