Bug 61267: detect Word v2 files and report that they are not supported in Apache POI

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1828176 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Dominik Stadler 2018-04-02 17:15:42 +00:00
parent 56717351c1
commit 7d8da8ea59
6 changed files with 25 additions and 5 deletions

View File

@ -302,6 +302,8 @@ public class TestAllFiles {
"spreadsheet/poc-xmlbomb.xlsx", // contains xml-entity-expansion
"spreadsheet/poc-xmlbomb-empty.xlsx", // contains xml-entity-expansion
"spreadsheet/poc-shared-strings.xlsx", // contains shared-string-entity-expansion
"document/61612a.docx",
"document/word2.doc",
// old Excel files, which we only support simple text extraction of
"spreadsheet/testEXCEL_2.xls",

View File

@ -54,7 +54,8 @@ public class HPSFFileHandler extends POIFSFileHandler {
"spreadsheet/55982.xls",
"spreadsheet/testEXCEL_3.xls",
"spreadsheet/testEXCEL_4.xls",
"hpsf/Test_Humor-Generation.ppt"
"hpsf/Test_Humor-Generation.ppt",
"document/word2.doc"
);
static final Set<String> EXCLUDES_HANDLE_FILE = unmodifiableHashSet(

View File

@ -77,6 +77,7 @@ public enum FileMagic {
PDF("%PDF"),
/** Some different HTML documents */
HTML("<!DOCTYP".getBytes(UTF_8), "<html".getBytes(UTF_8)),
WORD2(new byte[]{ (byte)0xdb, (byte)0xa5, 0x2d, 0x00}),
// keep UNKNOWN always as last enum!
/** UNKNOWN magic */
UNKNOWN(new byte[0]);

View File

@ -136,6 +136,9 @@ public final class HeaderBlock implements HeaderBlockConstants {
case MSWRITE:
throw new NotOLE2FileException("The supplied data appears to be in the old MS Write format. "
+ "Apache POI doesn't currently support this format");
case WORD2:
throw new NotOLE2FileException("The supplied data appears to be an old Word version 2 file. "
+ "Apache POI doesn't currently support this format");
case BIFF2:
case BIFF3:
case BIFF4:

View File

@ -60,7 +60,20 @@ public final class TestHWPFOldDocument extends HWPFTestCase {
doc.close();
}
/**
* Test a simple Word 2 document
*/
@Test(expected=IllegalArgumentException.class)
public void testWord2hwpf() throws IOException {
// Can't open as HWPFDocument
HWPFTestDataSamples.openSampleFile("word2.doc");
}
@Test(expected=RuntimeException.class)
public void testWord2hwpfOld() throws IOException {
// Open
HWPFTestDataSamples.openOldSampleFile("word2.doc");
}
/**
* Test a simple Word 95 document

Binary file not shown.