mirror of https://github.com/apache/poi.git
Initial ExtractorFactory support for building TextExtractors for embeded documents
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@691351 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b83a13bb2a
commit
e4ff06ec79
|
@ -41,6 +41,7 @@
|
||||||
</release>
|
</release>
|
||||||
-->
|
-->
|
||||||
<release version="3.5.1-beta2" date="2008-08-20">
|
<release version="3.5.1-beta2" date="2008-08-20">
|
||||||
|
<action dev="POI-DEVELOPERS" type="add">Initial ExtractorFactory support for building TextExtractors for embeded documents</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">Support stripping XSSF header and footer fields (eg page number) out of header and footer text if required</action>
|
<action dev="POI-DEVELOPERS" type="add">Support stripping XSSF header and footer fields (eg page number) out of header and footer text if required</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">Add POIXMLPropertiesTextExtractor, which provides to the OOXML file formats a similar function to HPSF's HPSFPropertiesExtractor</action>
|
<action dev="POI-DEVELOPERS" type="add">Add POIXMLPropertiesTextExtractor, which provides to the OOXML file formats a similar function to HPSF's HPSFPropertiesExtractor</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">45539 - Improve XWPFWordExtractor to extract headers and footers</action>
|
<action dev="POI-DEVELOPERS" type="add">45539 - Improve XWPFWordExtractor to extract headers and footers</action>
|
||||||
|
|
|
@ -38,6 +38,7 @@
|
||||||
</release>
|
</release>
|
||||||
-->
|
-->
|
||||||
<release version="3.5.1-beta2" date="2008-08-20">
|
<release version="3.5.1-beta2" date="2008-08-20">
|
||||||
|
<action dev="POI-DEVELOPERS" type="add">Initial ExtractorFactory support for building TextExtractors for embeded documents</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">Support stripping XSSF header and footer fields (eg page number) out of header and footer text if required</action>
|
<action dev="POI-DEVELOPERS" type="add">Support stripping XSSF header and footer fields (eg page number) out of header and footer text if required</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">Add POIXMLPropertiesTextExtractor, which provides to the OOXML file formats a similar function to HPSF's HPSFPropertiesExtractor</action>
|
<action dev="POI-DEVELOPERS" type="add">Add POIXMLPropertiesTextExtractor, which provides to the OOXML file formats a similar function to HPSF's HPSFPropertiesExtractor</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">45539 - Improve XWPFWordExtractor to extract headers and footers</action>
|
<action dev="POI-DEVELOPERS" type="add">45539 - Improve XWPFWordExtractor to extract headers and footers</action>
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.poi;
|
||||||
import org.apache.poi.hpsf.DocumentSummaryInformation;
|
import org.apache.poi.hpsf.DocumentSummaryInformation;
|
||||||
import org.apache.poi.hpsf.SummaryInformation;
|
import org.apache.poi.hpsf.SummaryInformation;
|
||||||
import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;
|
import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;
|
||||||
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Common Parent for OLE2 based Text Extractors
|
* Common Parent for OLE2 based Text Extractors
|
||||||
|
@ -59,4 +60,12 @@ public abstract class POIOLE2TextExtractor extends POITextExtractor {
|
||||||
public POITextExtractor getMetadataTextExtractor() {
|
public POITextExtractor getMetadataTextExtractor() {
|
||||||
return new HPSFPropertiesExtractor(this);
|
return new HPSFPropertiesExtractor(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the underlying POIFS FileSystem of
|
||||||
|
* this document.
|
||||||
|
*/
|
||||||
|
public POIFSFileSystem getFileSystem() {
|
||||||
|
return document.filesystem;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.poi.hssf.usermodel.HSSFRichTextString;
|
||||||
import org.apache.poi.hssf.usermodel.HSSFRow;
|
import org.apache.poi.hssf.usermodel.HSSFRow;
|
||||||
import org.apache.poi.hssf.usermodel.HSSFSheet;
|
import org.apache.poi.hssf.usermodel.HSSFSheet;
|
||||||
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
||||||
|
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -48,7 +49,10 @@ public class ExcelExtractor extends POIOLE2TextExtractor implements org.apache.p
|
||||||
this.wb = wb;
|
this.wb = wb;
|
||||||
}
|
}
|
||||||
public ExcelExtractor(POIFSFileSystem fs) throws IOException {
|
public ExcelExtractor(POIFSFileSystem fs) throws IOException {
|
||||||
this(new HSSFWorkbook(fs));
|
this(fs.getRoot(), fs);
|
||||||
|
}
|
||||||
|
public ExcelExtractor(DirectoryNode dir, POIFSFileSystem fs) throws IOException {
|
||||||
|
this(new HSSFWorkbook(dir, fs, true));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -18,9 +18,11 @@ package org.apache.poi.extractor;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.PushbackInputStream;
|
import java.io.PushbackInputStream;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
import org.apache.poi.POIOLE2TextExtractor;
|
import org.apache.poi.POIOLE2TextExtractor;
|
||||||
|
@ -31,6 +33,8 @@ import org.apache.poi.hdgf.extractor.VisioTextExtractor;
|
||||||
import org.apache.poi.hslf.extractor.PowerPointExtractor;
|
import org.apache.poi.hslf.extractor.PowerPointExtractor;
|
||||||
import org.apache.poi.hssf.extractor.ExcelExtractor;
|
import org.apache.poi.hssf.extractor.ExcelExtractor;
|
||||||
import org.apache.poi.hwpf.extractor.WordExtractor;
|
import org.apache.poi.hwpf.extractor.WordExtractor;
|
||||||
|
import org.apache.poi.poifs.filesystem.DirectoryEntry;
|
||||||
|
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||||
import org.apache.poi.poifs.filesystem.Entry;
|
import org.apache.poi.poifs.filesystem.Entry;
|
||||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
import org.apache.poi.xslf.XSLFSlideShow;
|
import org.apache.poi.xslf.XSLFSlideShow;
|
||||||
|
@ -105,24 +109,95 @@ public class ExtractorFactory {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
|
public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
|
||||||
|
return createExtractor(fs.getRoot(), fs);
|
||||||
|
}
|
||||||
|
public static POIOLE2TextExtractor createExtractor(DirectoryNode poifsDir, POIFSFileSystem fs) throws IOException {
|
||||||
// Look for certain entries in the stream, to figure it
|
// Look for certain entries in the stream, to figure it
|
||||||
// out from
|
// out from
|
||||||
for(Iterator entries = fs.getRoot().getEntries(); entries.hasNext(); ) {
|
for(Iterator entries = poifsDir.getEntries(); entries.hasNext(); ) {
|
||||||
Entry entry = (Entry)entries.next();
|
Entry entry = (Entry)entries.next();
|
||||||
|
|
||||||
if(entry.getName().equals("Workbook")) {
|
if(entry.getName().equals("Workbook")) {
|
||||||
return new ExcelExtractor(fs);
|
return new ExcelExtractor(poifsDir, fs);
|
||||||
}
|
}
|
||||||
if(entry.getName().equals("WordDocument")) {
|
if(entry.getName().equals("WordDocument")) {
|
||||||
return new WordExtractor(fs);
|
return new WordExtractor(poifsDir, fs);
|
||||||
}
|
}
|
||||||
if(entry.getName().equals("PowerPoint Document")) {
|
if(entry.getName().equals("PowerPoint Document")) {
|
||||||
return new PowerPointExtractor(fs);
|
return new PowerPointExtractor(poifsDir, fs);
|
||||||
}
|
}
|
||||||
if(entry.getName().equals("VisioDocument")) {
|
if(entry.getName().equals("VisioDocument")) {
|
||||||
return new VisioTextExtractor(fs);
|
return new VisioTextExtractor(poifsDir, fs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
|
throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an array of text extractors, one for each of
|
||||||
|
* the embeded documents in the file (if there are any).
|
||||||
|
* If there are no embeded documents, you'll get back an
|
||||||
|
* empty array. Otherwise, you'll get one open
|
||||||
|
* {@link POITextExtractor} for each embeded file.
|
||||||
|
*/
|
||||||
|
public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException {
|
||||||
|
// Find all the embeded directories
|
||||||
|
ArrayList dirs = new ArrayList();
|
||||||
|
POIFSFileSystem fs = ext.getFileSystem();
|
||||||
|
if(fs == null) {
|
||||||
|
throw new IllegalStateException("The extractor didn't know which POIFS it came from!");
|
||||||
|
}
|
||||||
|
|
||||||
|
if(ext instanceof ExcelExtractor) {
|
||||||
|
// These are in MBD... under the root
|
||||||
|
Iterator it = fs.getRoot().getEntries();
|
||||||
|
while(it.hasNext()) {
|
||||||
|
Entry entry = (Entry)it.next();
|
||||||
|
if(entry.getName().startsWith("MBD")) {
|
||||||
|
dirs.add(entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if(ext instanceof WordExtractor) {
|
||||||
|
// These are in ObjectPool -> _... under the root
|
||||||
|
try {
|
||||||
|
DirectoryEntry op = (DirectoryEntry)
|
||||||
|
fs.getRoot().getEntry("ObjectPool");
|
||||||
|
Iterator it = op.getEntries();
|
||||||
|
while(it.hasNext()) {
|
||||||
|
Entry entry = (Entry)it.next();
|
||||||
|
if(entry.getName().startsWith("_")) {
|
||||||
|
dirs.add(entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch(FileNotFoundException e) {}
|
||||||
|
} else if(ext instanceof PowerPointExtractor) {
|
||||||
|
// Tricky, not stored directly in poifs
|
||||||
|
// TODO
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the extractors
|
||||||
|
if(dirs == null || dirs.size() == 0) {
|
||||||
|
return new POITextExtractor[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
POITextExtractor[] te = new POITextExtractor[dirs.size()];
|
||||||
|
for(int i=0; i<te.length; i++) {
|
||||||
|
te[i] = createExtractor(
|
||||||
|
(DirectoryNode)dirs.get(i), ext.getFileSystem()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return te;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an array of text extractors, one for each of
|
||||||
|
* the embeded documents in the file (if there are any).
|
||||||
|
* If there are no embeded documents, you'll get back an
|
||||||
|
* empty array. Otherwise, you'll get one open
|
||||||
|
* {@link POITextExtractor} for each embeded file.
|
||||||
|
*/
|
||||||
|
public static POITextExtractor[] getEmbededDocsTextExtractors(POIXMLTextExtractor ext) {
|
||||||
|
throw new IllegalStateException("Not yet supported");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,6 +20,8 @@ import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.poi.POIOLE2TextExtractor;
|
||||||
|
import org.apache.poi.POITextExtractor;
|
||||||
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
|
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
|
||||||
import org.apache.poi.hslf.extractor.PowerPointExtractor;
|
import org.apache.poi.hslf.extractor.PowerPointExtractor;
|
||||||
import org.apache.poi.hssf.extractor.ExcelExtractor;
|
import org.apache.poi.hssf.extractor.ExcelExtractor;
|
||||||
|
@ -42,6 +44,7 @@ public class TestExtractorFactory extends TestCase {
|
||||||
private String word_dir;
|
private String word_dir;
|
||||||
private String powerpoint_dir;
|
private String powerpoint_dir;
|
||||||
private String visio_dir;
|
private String visio_dir;
|
||||||
|
private String poifs_dir;
|
||||||
|
|
||||||
private File txt;
|
private File txt;
|
||||||
|
|
||||||
|
@ -63,6 +66,12 @@ public class TestExtractorFactory extends TestCase {
|
||||||
word_dir = System.getProperty("HWPF.testdata.path");
|
word_dir = System.getProperty("HWPF.testdata.path");
|
||||||
powerpoint_dir = System.getProperty("HSLF.testdata.path");
|
powerpoint_dir = System.getProperty("HSLF.testdata.path");
|
||||||
visio_dir = System.getProperty("HDGF.testdata.path");
|
visio_dir = System.getProperty("HDGF.testdata.path");
|
||||||
|
poifs_dir = System.getProperty("POIFS.testdata.path");
|
||||||
|
assertNotNull(excel_dir);
|
||||||
|
assertNotNull(word_dir);
|
||||||
|
assertNotNull(powerpoint_dir);
|
||||||
|
assertNotNull(visio_dir);
|
||||||
|
assertNotNull(poifs_dir);
|
||||||
|
|
||||||
txt = new File(powerpoint_dir, "SampleShow.txt");
|
txt = new File(powerpoint_dir, "SampleShow.txt");
|
||||||
|
|
||||||
|
@ -300,4 +309,56 @@ public class TestExtractorFactory extends TestCase {
|
||||||
// Good
|
// Good
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test embeded docs text extraction. For now, only
|
||||||
|
* does poifs embeded, but will do ooxml ones
|
||||||
|
* at some point.
|
||||||
|
*/
|
||||||
|
public void testEmbeded() throws Exception {
|
||||||
|
POIOLE2TextExtractor ext;
|
||||||
|
POITextExtractor[] embeds;
|
||||||
|
File f;
|
||||||
|
|
||||||
|
// No embedings
|
||||||
|
ext = (POIOLE2TextExtractor)
|
||||||
|
ExtractorFactory.createExtractor(xls);
|
||||||
|
embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
|
||||||
|
assertEquals(0, embeds.length);
|
||||||
|
|
||||||
|
// Excel
|
||||||
|
f = new File(poifs_dir, "excel_with_embeded.xls");
|
||||||
|
ext = (POIOLE2TextExtractor)
|
||||||
|
ExtractorFactory.createExtractor(f);
|
||||||
|
embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
|
||||||
|
|
||||||
|
assertEquals(6, embeds.length);
|
||||||
|
assertTrue(embeds[0] instanceof PowerPointExtractor);
|
||||||
|
assertTrue(embeds[1] instanceof ExcelExtractor);
|
||||||
|
assertTrue(embeds[2] instanceof ExcelExtractor);
|
||||||
|
assertTrue(embeds[3] instanceof PowerPointExtractor);
|
||||||
|
assertTrue(embeds[4] instanceof WordExtractor);
|
||||||
|
assertTrue(embeds[5] instanceof WordExtractor);
|
||||||
|
for(int i=0; i<embeds.length; i++) {
|
||||||
|
assertTrue(embeds[i].getText().length() > 20);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Word
|
||||||
|
f = new File(poifs_dir, "word_with_embeded.doc");
|
||||||
|
ext = (POIOLE2TextExtractor)
|
||||||
|
ExtractorFactory.createExtractor(f);
|
||||||
|
embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
|
||||||
|
|
||||||
|
assertEquals(4, embeds.length);
|
||||||
|
assertTrue(embeds[0] instanceof WordExtractor);
|
||||||
|
assertTrue(embeds[1] instanceof ExcelExtractor);
|
||||||
|
assertTrue(embeds[2] instanceof ExcelExtractor);
|
||||||
|
assertTrue(embeds[3] instanceof PowerPointExtractor);
|
||||||
|
for(int i=0; i<embeds.length; i++) {
|
||||||
|
assertTrue(embeds[i].getText().length() > 20);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO - PowerPoint
|
||||||
|
// TODO - Visio
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -64,6 +64,10 @@ public class ChunkFactory {
|
||||||
private void processChunkParseCommands() throws IOException {
|
private void processChunkParseCommands() throws IOException {
|
||||||
String line;
|
String line;
|
||||||
InputStream cpd = ChunkFactory.class.getResourceAsStream(chunkTableName);
|
InputStream cpd = ChunkFactory.class.getResourceAsStream(chunkTableName);
|
||||||
|
if(cpd == null) {
|
||||||
|
throw new IllegalStateException("Unable to find HDGF chunk definition on the classpath - " + chunkTableName);
|
||||||
|
}
|
||||||
|
|
||||||
BufferedReader inp = new BufferedReader(new InputStreamReader(cpd));
|
BufferedReader inp = new BufferedReader(new InputStreamReader(cpd));
|
||||||
while( (line = inp.readLine()) != null ) {
|
while( (line = inp.readLine()) != null ) {
|
||||||
if(line.startsWith("#")) continue;
|
if(line.startsWith("#")) continue;
|
||||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.poi.hdgf.chunks.Chunk.Command;
|
||||||
import org.apache.poi.hdgf.streams.ChunkStream;
|
import org.apache.poi.hdgf.streams.ChunkStream;
|
||||||
import org.apache.poi.hdgf.streams.PointerContainingStream;
|
import org.apache.poi.hdgf.streams.PointerContainingStream;
|
||||||
import org.apache.poi.hdgf.streams.Stream;
|
import org.apache.poi.hdgf.streams.Stream;
|
||||||
|
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -44,7 +45,10 @@ public class VisioTextExtractor extends POIOLE2TextExtractor {
|
||||||
this.hdgf = hdgf;
|
this.hdgf = hdgf;
|
||||||
}
|
}
|
||||||
public VisioTextExtractor(POIFSFileSystem fs) throws IOException {
|
public VisioTextExtractor(POIFSFileSystem fs) throws IOException {
|
||||||
this(new HDGFDiagram(fs));
|
this(fs.getRoot(), fs);
|
||||||
|
}
|
||||||
|
public VisioTextExtractor(DirectoryNode dir, POIFSFileSystem fs) throws IOException {
|
||||||
|
this(new HDGFDiagram(dir, fs));
|
||||||
this.fs = fs;
|
this.fs = fs;
|
||||||
}
|
}
|
||||||
public VisioTextExtractor(InputStream inp) throws IOException {
|
public VisioTextExtractor(InputStream inp) throws IOException {
|
||||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.poi.hslf.model.Notes;
|
||||||
import org.apache.poi.hslf.model.Slide;
|
import org.apache.poi.hslf.model.Slide;
|
||||||
import org.apache.poi.hslf.model.TextRun;
|
import org.apache.poi.hslf.model.TextRun;
|
||||||
import org.apache.poi.hslf.usermodel.SlideShow;
|
import org.apache.poi.hslf.usermodel.SlideShow;
|
||||||
|
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -96,6 +97,9 @@ public final class PowerPointExtractor extends POIOLE2TextExtractor {
|
||||||
public PowerPointExtractor(POIFSFileSystem fs) throws IOException {
|
public PowerPointExtractor(POIFSFileSystem fs) throws IOException {
|
||||||
this(new HSLFSlideShow(fs));
|
this(new HSLFSlideShow(fs));
|
||||||
}
|
}
|
||||||
|
public PowerPointExtractor(DirectoryNode dir, POIFSFileSystem fs) throws IOException {
|
||||||
|
this(new HSLFSlideShow(dir, fs));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a PowerPointExtractor, from a HSLFSlideShow
|
* Creates a PowerPointExtractor, from a HSLFSlideShow
|
||||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.poi.hwpf.model.TextPiece;
|
||||||
import org.apache.poi.hwpf.usermodel.HeaderStories;
|
import org.apache.poi.hwpf.usermodel.HeaderStories;
|
||||||
import org.apache.poi.hwpf.usermodel.Paragraph;
|
import org.apache.poi.hwpf.usermodel.Paragraph;
|
||||||
import org.apache.poi.hwpf.usermodel.Range;
|
import org.apache.poi.hwpf.usermodel.Range;
|
||||||
|
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -58,6 +59,10 @@ public class WordExtractor extends POIOLE2TextExtractor {
|
||||||
this(new HWPFDocument(fs));
|
this(new HWPFDocument(fs));
|
||||||
this.fs = fs;
|
this.fs = fs;
|
||||||
}
|
}
|
||||||
|
public WordExtractor(DirectoryNode dir, POIFSFileSystem fs) throws IOException {
|
||||||
|
this(new HWPFDocument(dir, fs));
|
||||||
|
this.fs = fs;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new Word Extractor
|
* Create a new Word Extractor
|
||||||
|
|
Binary file not shown.
Loading…
Reference in New Issue