mirror of https://github.com/apache/poi.git
FileMagic now has patterns with up to 12 bytes (JPG)
Avoid exception if a very short file is encountered Add more tests git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1859564 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f4a3d3bb89
commit
b2130769b6
src
java/org/apache/poi/poifs/filesystem
testcases/org/apache/poi/poifs/filesystem
|
@ -19,13 +19,13 @@ package org.apache.poi.poifs.filesystem;
|
|||
|
||||
import static org.apache.poi.poifs.common.POIFSConstants.OOXML_FILE_HEADER;
|
||||
import static org.apache.poi.poifs.common.POIFSConstants.RAW_XML_FILE_HEADER;
|
||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.poi.poifs.storage.HeaderBlockConstants;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
|
@ -98,6 +98,9 @@ public enum FileMagic {
|
|||
/** UNKNOWN magic */
|
||||
UNKNOWN(new byte[0]);
|
||||
|
||||
// update this if a longer pattern is added
|
||||
final static int MAX_PATTERN_LENGTH = 12;
|
||||
|
||||
final byte[][] magic;
|
||||
|
||||
FileMagic(long magic) {
|
||||
|
@ -120,6 +123,12 @@ public enum FileMagic {
|
|||
public static FileMagic valueOf(byte[] magic) {
|
||||
for (FileMagic fm : values()) {
|
||||
for (byte[] ma : fm.magic) {
|
||||
// don't try to match if the given byte-array is too short
|
||||
// for this pattern anyway
|
||||
if(magic.length < ma.length) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (findMagic(ma, magic)) {
|
||||
return fm;
|
||||
}
|
||||
|
@ -149,7 +158,13 @@ public enum FileMagic {
|
|||
*/
|
||||
public static FileMagic valueOf(final File inp) throws IOException {
|
||||
try (FileInputStream fis = new FileInputStream(inp)) {
|
||||
final byte[] data = IOUtils.toByteArray(fis, 8);
|
||||
// read as many bytes as possible, up to the required number of bytes
|
||||
byte[] data = new byte[MAX_PATTERN_LENGTH];
|
||||
int read = IOUtils.readFully(fis, data, 0, MAX_PATTERN_LENGTH);
|
||||
|
||||
// only use the bytes that could be read
|
||||
data = Arrays.copyOf(data, read);
|
||||
|
||||
return FileMagic.valueOf(data);
|
||||
}
|
||||
}
|
||||
|
@ -173,8 +188,8 @@ public enum FileMagic {
|
|||
throw new IOException("getFileMagic() only operates on streams which support mark(int)");
|
||||
}
|
||||
|
||||
// Grab the first 8 bytes
|
||||
byte[] data = IOUtils.peekFirst8Bytes(inp);
|
||||
// Grab the first bytes of this stream
|
||||
byte[] data = IOUtils.peekFirstNBytes(inp, MAX_PATTERN_LENGTH);
|
||||
|
||||
return FileMagic.valueOf(data);
|
||||
}
|
||||
|
|
|
@ -22,9 +22,13 @@ import org.apache.poi.POIDataSamples;
|
|||
import org.junit.Test;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Arrays;
|
||||
import java.util.Random;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
|
@ -43,6 +47,14 @@ public class TestFileMagic {
|
|||
assertEquals(FileMagic.HTML, FileMagic.valueOf("\r\n<html".getBytes(Charsets.UTF_8)));
|
||||
assertEquals(FileMagic.HTML, FileMagic.valueOf("\r<html".getBytes(Charsets.UTF_8)));
|
||||
|
||||
assertEquals(FileMagic.JPEG, FileMagic.valueOf(new byte[]{ (byte)0xFF, (byte)0xD8, (byte)0xFF, (byte)0xDB }));
|
||||
assertEquals(FileMagic.JPEG, FileMagic.valueOf(new byte[]{ (byte)0xFF, (byte)0xD8, (byte)0xFF, (byte)0xE0, 'a', 'b', 'J', 'F', 'I', 'F', 0x00, 0x01 }));
|
||||
assertEquals(FileMagic.JPEG, FileMagic.valueOf(new byte[]{ (byte)0xFF, (byte)0xD8, (byte)0xFF, (byte)0xEE }));
|
||||
assertEquals(FileMagic.JPEG, FileMagic.valueOf(new byte[]{ (byte)0xFF, (byte)0xD8, (byte)0xFF, (byte)0xE1, 'd', 'c', 'E', 'x', 'i', 'f', 0x00, 0x00 }));
|
||||
|
||||
assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf("something".getBytes(Charsets.UTF_8)));
|
||||
assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf(new byte[0]));
|
||||
|
||||
try {
|
||||
FileMagic.valueOf("some string");
|
||||
fail("Should catch exception here");
|
||||
|
@ -82,4 +94,81 @@ public class TestFileMagic {
|
|||
assertNotSame(stream, FileMagic.prepareToCheckMagic(stream));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMatchingButTooLessData() {
|
||||
// this matches JPG, but is not long enough, previously this caused an Exception
|
||||
byte[] data = new byte[] { -1, -40, -1, -32, 0, 16, 74, 70 };
|
||||
|
||||
assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf(data));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testShortFile() throws IOException {
|
||||
// having a file shorter than 8 bytes previously caused an exception
|
||||
byte[] data = new byte[] { -1, -40, -1, -32, 0 };
|
||||
|
||||
File file = File.createTempFile("TestFileMagic", ".bin");
|
||||
try {
|
||||
try (FileOutputStream fos = new FileOutputStream(file)) {
|
||||
fos.write(data);
|
||||
}
|
||||
|
||||
assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf(file));
|
||||
} finally {
|
||||
assertTrue(file.delete());
|
||||
}
|
||||
}
|
||||
|
||||
@Test(expected = IOException.class)
|
||||
public void testMarkRequired() throws IOException {
|
||||
byte[] data = new byte[] { -1, -40, -1, -32, 0 };
|
||||
|
||||
File file = File.createTempFile("TestFileMagic", ".bin");
|
||||
try {
|
||||
try (FileOutputStream fos = new FileOutputStream(file)) {
|
||||
fos.write(data);
|
||||
}
|
||||
|
||||
// a FileInputStream does not support "marking"
|
||||
try (FileInputStream str = new FileInputStream(file)) {
|
||||
assertFalse(str.markSupported());
|
||||
|
||||
FileMagic.valueOf(str);
|
||||
}
|
||||
} finally {
|
||||
assertTrue(file.delete());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPatterns() {
|
||||
// just try to trash the functionality with some byte-patterns
|
||||
for(int i = 0; i < 256;i++) {
|
||||
final byte[] data = new byte[12];
|
||||
for(int j = 0;j < 12; j++) {
|
||||
data[j] = (byte)i;
|
||||
|
||||
assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf(data));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRandomPatterns() {
|
||||
Random random = new Random();
|
||||
|
||||
// just try to trash the functionality with some byte-patterns
|
||||
for(int i = 0; i < 1000;i++) {
|
||||
final byte[] data = new byte[12];
|
||||
random.nextBytes(data);
|
||||
|
||||
// we cannot check for UNKNOWN as we might hit valid byte-patterns here as well
|
||||
try {
|
||||
assertNotNull(FileMagic.valueOf(data));
|
||||
} catch (Exception e) {
|
||||
throw new IllegalStateException("Failed with pattern " + Arrays.toString(data), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue