mirror of https://github.com/apache/poi.git
bug#51686 - ConcurrentModificationException in Tika's OfficeParser
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1160137 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e11ec7dd93
commit
879fc2dc1c
|
@ -141,41 +141,32 @@ public abstract class HWPFDocumentCore extends POIDocument
|
||||||
* @throws IOException If there is an unexpected IOException from the passed
|
* @throws IOException If there is an unexpected IOException from the passed
|
||||||
* in POIFSFileSystem.
|
* in POIFSFileSystem.
|
||||||
*/
|
*/
|
||||||
public HWPFDocumentCore(DirectoryNode directory) throws IOException
|
public HWPFDocumentCore(DirectoryNode directory) throws IOException {
|
||||||
{
|
|
||||||
// Sort out the hpsf properties
|
// Sort out the hpsf properties
|
||||||
super(directory);
|
super(directory);
|
||||||
|
|
||||||
// read in the main stream.
|
// read in the main stream.
|
||||||
DocumentEntry documentProps = (DocumentEntry)
|
DocumentEntry documentProps = (DocumentEntry)
|
||||||
directory.getEntry("WordDocument");
|
directory.getEntry("WordDocument");
|
||||||
_mainStream = new byte[documentProps.getSize()];
|
_mainStream = new byte[documentProps.getSize()];
|
||||||
|
|
||||||
directory.createDocumentInputStream(STREAM_WORD_DOCUMENT).read(_mainStream);
|
directory.createDocumentInputStream(STREAM_WORD_DOCUMENT).read(_mainStream);
|
||||||
|
|
||||||
// Create our FIB, and check for the doc being encrypted
|
// Create our FIB, and check for the doc being encrypted
|
||||||
_fib = new FileInformationBlock(_mainStream);
|
_fib = new FileInformationBlock(_mainStream);
|
||||||
if(_fib.isFEncrypted()) {
|
if (_fib.isFEncrypted()) {
|
||||||
throw new EncryptedDocumentException("Cannot process encrypted word files!");
|
throw new EncryptedDocumentException("Cannot process encrypted word files!");
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
try {
|
||||||
DirectoryEntry objectPoolEntry;
|
DirectoryEntry objectPoolEntry = (DirectoryEntry) directory
|
||||||
try
|
.getEntry(STREAM_OBJECT_POOL);
|
||||||
{
|
_objectPool = new ObjectPoolImpl(objectPoolEntry);
|
||||||
objectPoolEntry = (DirectoryEntry) directory
|
} catch (FileNotFoundException exc) {
|
||||||
.getEntry( STREAM_OBJECT_POOL );
|
|
||||||
}
|
|
||||||
catch ( FileNotFoundException exc )
|
|
||||||
{
|
|
||||||
objectPoolEntry = directory
|
|
||||||
.createDirectory( STREAM_OBJECT_POOL );
|
|
||||||
}
|
|
||||||
_objectPool = new ObjectPoolImpl( objectPoolEntry );
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the range which covers the whole of the document, but excludes
|
* Returns the range which covers the whole of the document, but excludes
|
||||||
* any headers and footers.
|
* any headers and footers.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -24,9 +24,13 @@ import org.apache.poi.hwpf.HWPFDocument;
|
||||||
import org.apache.poi.hwpf.HWPFTestDataSamples;
|
import org.apache.poi.hwpf.HWPFTestDataSamples;
|
||||||
import org.apache.poi.hwpf.OldWordFileFormatException;
|
import org.apache.poi.hwpf.OldWordFileFormatException;
|
||||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||||
|
import org.apache.poi.poifs.filesystem.Entry;
|
||||||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
|
||||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test the different routes to extracting text
|
* Test the different routes to extracting text
|
||||||
*
|
*
|
||||||
|
@ -353,4 +357,21 @@ public final class TestWordExtractor extends TestCase {
|
||||||
assertEquals(p_text1_block, extractor.getText());
|
assertEquals(p_text1_block, extractor.getText());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testRootEntiesNavigation() throws IOException {
|
||||||
|
InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream("testWORD.doc");
|
||||||
|
|
||||||
|
POIFSFileSystem fs = new POIFSFileSystem(is);
|
||||||
|
|
||||||
|
String text = null;
|
||||||
|
|
||||||
|
for (Entry entry : fs.getRoot()) {
|
||||||
|
if ("WordDocument".equals(entry.getName())) {
|
||||||
|
WordExtractor ex = new WordExtractor(fs);
|
||||||
|
text = ex.getText();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assertNotNull(text);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
Loading…
Reference in New Issue