FileMagic now has patterns with up to 12 bytes (JPG)

Avoid exception if a very short file is encountered
Add more tests

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1859564 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Dominik Stadler 2019-05-20 14:42:31 +00:00
parent f4a3d3bb89
commit b2130769b6
2 changed files with 108 additions and 4 deletions
src
java/org/apache/poi/poifs/filesystem
testcases/org/apache/poi/poifs/filesystem

View File

@ -19,13 +19,13 @@ package org.apache.poi.poifs.filesystem;
import static org.apache.poi.poifs.common.POIFSConstants.OOXML_FILE_HEADER;
import static org.apache.poi.poifs.common.POIFSConstants.RAW_XML_FILE_HEADER;
import static java.nio.charset.StandardCharsets.UTF_8;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import org.apache.poi.poifs.storage.HeaderBlockConstants;
import org.apache.poi.util.IOUtils;
@ -98,6 +98,9 @@ public enum FileMagic {
/** UNKNOWN magic */
UNKNOWN(new byte[0]);
// update this if a longer pattern is added
final static int MAX_PATTERN_LENGTH = 12;
final byte[][] magic;
FileMagic(long magic) {
@ -120,6 +123,12 @@ public enum FileMagic {
public static FileMagic valueOf(byte[] magic) {
for (FileMagic fm : values()) {
for (byte[] ma : fm.magic) {
// don't try to match if the given byte-array is too short
// for this pattern anyway
if(magic.length < ma.length) {
continue;
}
if (findMagic(ma, magic)) {
return fm;
}
@ -149,7 +158,13 @@ public enum FileMagic {
*/
public static FileMagic valueOf(final File inp) throws IOException {
try (FileInputStream fis = new FileInputStream(inp)) {
final byte[] data = IOUtils.toByteArray(fis, 8);
// read as many bytes as possible, up to the required number of bytes
byte[] data = new byte[MAX_PATTERN_LENGTH];
int read = IOUtils.readFully(fis, data, 0, MAX_PATTERN_LENGTH);
// only use the bytes that could be read
data = Arrays.copyOf(data, read);
return FileMagic.valueOf(data);
}
}
@ -173,8 +188,8 @@ public enum FileMagic {
throw new IOException("getFileMagic() only operates on streams which support mark(int)");
}
// Grab the first 8 bytes
byte[] data = IOUtils.peekFirst8Bytes(inp);
// Grab the first bytes of this stream
byte[] data = IOUtils.peekFirstNBytes(inp, MAX_PATTERN_LENGTH);
return FileMagic.valueOf(data);
}

View File

@ -22,9 +22,13 @@ import org.apache.poi.POIDataSamples;
import org.junit.Test;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Random;
import static org.junit.Assert.*;
@ -43,6 +47,14 @@ public class TestFileMagic {
assertEquals(FileMagic.HTML, FileMagic.valueOf("\r\n<html".getBytes(Charsets.UTF_8)));
assertEquals(FileMagic.HTML, FileMagic.valueOf("\r<html".getBytes(Charsets.UTF_8)));
assertEquals(FileMagic.JPEG, FileMagic.valueOf(new byte[]{ (byte)0xFF, (byte)0xD8, (byte)0xFF, (byte)0xDB }));
assertEquals(FileMagic.JPEG, FileMagic.valueOf(new byte[]{ (byte)0xFF, (byte)0xD8, (byte)0xFF, (byte)0xE0, 'a', 'b', 'J', 'F', 'I', 'F', 0x00, 0x01 }));
assertEquals(FileMagic.JPEG, FileMagic.valueOf(new byte[]{ (byte)0xFF, (byte)0xD8, (byte)0xFF, (byte)0xEE }));
assertEquals(FileMagic.JPEG, FileMagic.valueOf(new byte[]{ (byte)0xFF, (byte)0xD8, (byte)0xFF, (byte)0xE1, 'd', 'c', 'E', 'x', 'i', 'f', 0x00, 0x00 }));
assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf("something".getBytes(Charsets.UTF_8)));
assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf(new byte[0]));
try {
FileMagic.valueOf("some string");
fail("Should catch exception here");
@ -82,4 +94,81 @@ public class TestFileMagic {
assertNotSame(stream, FileMagic.prepareToCheckMagic(stream));
}
}
@Test
public void testMatchingButTooLessData() {
// this matches JPG, but is not long enough, previously this caused an Exception
byte[] data = new byte[] { -1, -40, -1, -32, 0, 16, 74, 70 };
assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf(data));
}
@Test
public void testShortFile() throws IOException {
// having a file shorter than 8 bytes previously caused an exception
byte[] data = new byte[] { -1, -40, -1, -32, 0 };
File file = File.createTempFile("TestFileMagic", ".bin");
try {
try (FileOutputStream fos = new FileOutputStream(file)) {
fos.write(data);
}
assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf(file));
} finally {
assertTrue(file.delete());
}
}
@Test(expected = IOException.class)
public void testMarkRequired() throws IOException {
byte[] data = new byte[] { -1, -40, -1, -32, 0 };
File file = File.createTempFile("TestFileMagic", ".bin");
try {
try (FileOutputStream fos = new FileOutputStream(file)) {
fos.write(data);
}
// a FileInputStream does not support "marking"
try (FileInputStream str = new FileInputStream(file)) {
assertFalse(str.markSupported());
FileMagic.valueOf(str);
}
} finally {
assertTrue(file.delete());
}
}
@Test
public void testPatterns() {
// just try to trash the functionality with some byte-patterns
for(int i = 0; i < 256;i++) {
final byte[] data = new byte[12];
for(int j = 0;j < 12; j++) {
data[j] = (byte)i;
assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf(data));
}
}
}
@Test
public void testRandomPatterns() {
Random random = new Random();
// just try to trash the functionality with some byte-patterns
for(int i = 0; i < 1000;i++) {
final byte[] data = new byte[12];
random.nextBytes(data);
// we cannot check for UNKNOWN as we might hit valid byte-patterns here as well
try {
assertNotNull(FileMagic.valueOf(data));
} catch (Exception e) {
throw new IllegalStateException("Failed with pattern " + Arrays.toString(data), e);
}
}
}
}