mirror of
https://github.com/apache/poi.git
synced 2025-02-07 02:28:13 +00:00
Add some more variants of HTML with preceding newline which we see frequently
in the large regression test corpus git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1856689 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
097fd7a5e2
commit
0e69c64a62
@ -78,7 +78,17 @@ public enum FileMagic {
|
|||||||
/** PDF document */
|
/** PDF document */
|
||||||
PDF("%PDF"),
|
PDF("%PDF"),
|
||||||
/** Some different HTML documents */
|
/** Some different HTML documents */
|
||||||
HTML("<!DOCTYP".getBytes(UTF_8), "<html".getBytes(UTF_8), "<HTML".getBytes(UTF_8)),
|
HTML("<!DOCTYP".getBytes(UTF_8),
|
||||||
|
"<html".getBytes(UTF_8),
|
||||||
|
"\n\r<html".getBytes(UTF_8),
|
||||||
|
"\r\n<html".getBytes(UTF_8),
|
||||||
|
"\r<html".getBytes(UTF_8),
|
||||||
|
"\n<html".getBytes(UTF_8),
|
||||||
|
"<HTML".getBytes(UTF_8),
|
||||||
|
"\r\n<HTML".getBytes(UTF_8),
|
||||||
|
"\n\r<HTML".getBytes(UTF_8),
|
||||||
|
"\r<HTML".getBytes(UTF_8),
|
||||||
|
"\n<HTML".getBytes(UTF_8)),
|
||||||
WORD2(new byte[]{ (byte)0xdb, (byte)0xa5, 0x2d, 0x00}),
|
WORD2(new byte[]{ (byte)0xdb, (byte)0xa5, 0x2d, 0x00}),
|
||||||
// keep UNKNOWN always as last enum!
|
// keep UNKNOWN always as last enum!
|
||||||
/** UNKNOWN magic */
|
/** UNKNOWN magic */
|
||||||
@ -110,11 +120,12 @@ public enum FileMagic {
|
|||||||
return UNKNOWN;
|
return UNKNOWN;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean findMagic(byte[] cmp, byte[] actual) {
|
private static boolean findMagic(byte[] expected, byte[] actual) {
|
||||||
int i=0;
|
int i=0;
|
||||||
for (byte m : cmp) {
|
for (byte expectedByte : expected) {
|
||||||
byte d = actual[i++];
|
byte actualByte = actual[i++];
|
||||||
if (!(d == m || (m == 0x70 && (d == 0x10 || d == 0x20 || d == 0x40)))) {
|
if ((actualByte != expectedByte &&
|
||||||
|
(expectedByte != 0x70 || (actualByte != 0x10 && actualByte != 0x20 && actualByte != 0x40)))) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -38,6 +38,10 @@ public class TestFileMagic {
|
|||||||
assertEquals(FileMagic.HTML, FileMagic.valueOf("<!DOCTYP".getBytes(Charsets.UTF_8)));
|
assertEquals(FileMagic.HTML, FileMagic.valueOf("<!DOCTYP".getBytes(Charsets.UTF_8)));
|
||||||
assertEquals(FileMagic.HTML, FileMagic.valueOf("<!DOCTYPE".getBytes(Charsets.UTF_8)));
|
assertEquals(FileMagic.HTML, FileMagic.valueOf("<!DOCTYPE".getBytes(Charsets.UTF_8)));
|
||||||
assertEquals(FileMagic.HTML, FileMagic.valueOf("<html".getBytes(Charsets.UTF_8)));
|
assertEquals(FileMagic.HTML, FileMagic.valueOf("<html".getBytes(Charsets.UTF_8)));
|
||||||
|
assertEquals(FileMagic.HTML, FileMagic.valueOf("\n\r<html".getBytes(Charsets.UTF_8)));
|
||||||
|
assertEquals(FileMagic.HTML, FileMagic.valueOf("\n<html".getBytes(Charsets.UTF_8)));
|
||||||
|
assertEquals(FileMagic.HTML, FileMagic.valueOf("\r\n<html".getBytes(Charsets.UTF_8)));
|
||||||
|
assertEquals(FileMagic.HTML, FileMagic.valueOf("\r<html".getBytes(Charsets.UTF_8)));
|
||||||
|
|
||||||
try {
|
try {
|
||||||
FileMagic.valueOf("some string");
|
FileMagic.valueOf("some string");
|
||||||
|
Loading…
x
Reference in New Issue
Block a user