Support compressed pictures properly, from bug #41032

git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@480585 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2006-11-29 14:49:40 +00:00
parent c6960a5c08
commit 925f724d4c
6 changed files with 144 additions and 23 deletions

View File

@ -25,6 +25,8 @@ import java.io.IOException;
import org.apache.poi.hwpf.model.io.HWPFOutputStream; import org.apache.poi.hwpf.model.io.HWPFOutputStream;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
public class FIBFieldHandler public class FIBFieldHandler
{ {
@ -122,6 +124,8 @@ public class FIBFieldHandler
public static final int STTBLISTNAMES = 91; public static final int STTBLISTNAMES = 91;
public static final int STTBFUSSR = 92; public static final int STTBFUSSR = 92;
private static POILogger log = POILogFactory.getLogger(FIBFieldHandler.class);
private static final int FIELD_SIZE = LittleEndian.INT_SIZE * 2; private static final int FIELD_SIZE = LittleEndian.INT_SIZE * 2;
private HashMap _unknownMap = new HashMap(); private HashMap _unknownMap = new HashMap();
@ -145,12 +149,21 @@ public class FIBFieldHandler
if (offsetList.contains(new Integer(x)) ^ areKnown) if (offsetList.contains(new Integer(x)) ^ areKnown)
{ {
if (dsSize > 0) if (dsSize > 0)
{
if (dsOffset + dsSize > tableStream.length)
{
log.log(POILogger.WARN, "Unhandled data structure points to outside the buffer. " +
"offset = " + dsOffset + ", length = " + dsSize +
", buffer length = " + tableStream.length);
}
else
{ {
UnhandledDataStructure unhandled = new UnhandledDataStructure( UnhandledDataStructure unhandled = new UnhandledDataStructure(
tableStream, dsOffset, dsSize); tableStream, dsOffset, dsSize);
_unknownMap.put(new Integer(x), unhandled); _unknownMap.put(new Integer(x), unhandled);
} }
} }
}
_fields[x*2] = dsOffset; _fields[x*2] = dsOffset;
_fields[(x*2) + 1] = dsSize; _fields[(x*2) + 1] = dsSize;
} }

View File

@ -23,7 +23,13 @@ public class UnhandledDataStructure
public UnhandledDataStructure(byte[] buf, int offset, int length) public UnhandledDataStructure(byte[] buf, int offset, int length)
{ {
// System.out.println("Yes, using my code");
_buf = new byte[length]; _buf = new byte[length];
if (offset + length > buf.length)
{
throw new IndexOutOfBoundsException("buffer length is " + buf.length +
"but code is trying to read " + length + " from offset " + offset);
}
System.arraycopy(buf, offset, _buf, 0, length); System.arraycopy(buf, offset, _buf, 0, length);
} }

View File

@ -18,9 +18,14 @@
package org.apache.poi.hwpf.usermodel; package org.apache.poi.hwpf.usermodel;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogger;
import org.apache.poi.util.POILogFactory;
import java.io.OutputStream; import java.io.OutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.util.zip.InflaterInputStream;
/** /**
* Represents embedded picture extracted from Word Document * Represents embedded picture extracted from Word Document
@ -28,8 +33,11 @@ import java.io.IOException;
*/ */
public class Picture public class Picture
{ {
private static final POILogger log = POILogFactory.getLogger(Picture.class);
// public static final int FILENAME_OFFSET = 0x7C; // public static final int FILENAME_OFFSET = 0x7C;
// public static final int FILENAME_SIZE_OFFSET = 0x6C; // public static final int FILENAME_SIZE_OFFSET = 0x6C;
static final int MFPMM_OFFSET = 0x6;
static final int BLOCK_TYPE_OFFSET = 0xE; static final int BLOCK_TYPE_OFFSET = 0xE;
static final int PICT_HEADER_OFFSET = 0x4; static final int PICT_HEADER_OFFSET = 0x4;
static final int UNKNOWN_HEADER_SIZE = 0x49; static final int UNKNOWN_HEADER_SIZE = 0x49;
@ -41,13 +49,22 @@ public class Picture
public static final byte[] TIFF = new byte[]{0x49, 0x49, 0x2A, 0x00}; public static final byte[] TIFF = new byte[]{0x49, 0x49, 0x2A, 0x00};
public static final byte[] TIFF1 = new byte[]{0x4D, 0x4D, 0x00, 0x2A}; public static final byte[] TIFF1 = new byte[]{0x4D, 0x4D, 0x00, 0x2A};
public static final byte[] EMF = { 0x01, 0x00, 0x00, 0x00 };
public static final byte[] WMF1 = { (byte)0xD7, (byte)0xCD, (byte)0xC6, (byte)0x9A, 0x00, 0x00 };
public static final byte[] WMF2 = { 0x01, 0x00, 0x09, 0x00, 0x00, 0x03 }; // Windows 3.x
// TODO: DIB, PICT
public static final byte[] IHDR = new byte[]{'I', 'H', 'D', 'R'}; public static final byte[] IHDR = new byte[]{'I', 'H', 'D', 'R'};
public static final byte[] COMPRESSED1 = { (byte)0xFE, 0x78, (byte)0xDA };
public static final byte[] COMPRESSED2 = { (byte)0xFE, 0x78, (byte)0x9C };
private int dataBlockStartOfsset; private int dataBlockStartOfsset;
private int pictureBytesStartOffset; private int pictureBytesStartOffset;
private int dataBlockSize; private int dataBlockSize;
private int size; private int size;
// private String fileName; // private String fileName;
private byte[] rawContent;
private byte[] content; private byte[] content;
private byte[] _dataStream; private byte[] _dataStream;
private int aspectRatioX; private int aspectRatioX;
@ -77,9 +94,12 @@ public class Picture
if (fillBytes) if (fillBytes)
{ {
fillImageContent(_dataStream); fillImageContent();
}
} }
private void fillWidthHeight()
{
String ext = suggestFileExtension(); String ext = suggestFileExtension();
// trying to extract width and height from pictures content: // trying to extract width and height from pictures content:
if ("jpg".equalsIgnoreCase(ext)) { if ("jpg".equalsIgnoreCase(ext)) {
@ -121,8 +141,8 @@ public class Picture
*/ */
public void writeImageContent(OutputStream out) throws IOException public void writeImageContent(OutputStream out) throws IOException
{ {
if (content!=null && content.length>0) { if (rawContent!=null && rawContent.length>0) {
out.write(content, 0, size); out.write(rawContent, 0, size);
} else { } else {
out.write(_dataStream, pictureBytesStartOffset, size); out.write(_dataStream, pictureBytesStartOffset, size);
} }
@ -135,11 +155,20 @@ public class Picture
{ {
if (content == null || content.length<=0) if (content == null || content.length<=0)
{ {
fillImageContent(this._dataStream); fillImageContent();
} }
return content; return content;
} }
public byte[] getRawContent()
{
if (rawContent == null || rawContent.length <= 0)
{
fillRawImageContent();
}
return rawContent;
}
/** /**
* *
* @return size in bytes of the picture * @return size in bytes of the picture
@ -171,10 +200,12 @@ public class Picture
*/ */
public String suggestFileExtension() public String suggestFileExtension()
{ {
if (content!=null && content.length>0) { String extension = suggestFileExtension(_dataStream, pictureBytesStartOffset);
return suggestFileExtension(content, 0); if ("".equals(extension)) {
// May be compressed. Get the uncompressed content and inspect that.
extension = suggestFileExtension(getContent(), 0);
} }
return suggestFileExtension(_dataStream, pictureBytesStartOffset); return extension;
} }
@ -188,11 +219,16 @@ public class Picture
return "gif"; return "gif";
} else if (matchSignature(_dataStream, BMP, pictureBytesStartOffset)) { } else if (matchSignature(_dataStream, BMP, pictureBytesStartOffset)) {
return "bmp"; return "bmp";
} else if (matchSignature(_dataStream, TIFF, pictureBytesStartOffset)) { } else if (matchSignature(_dataStream, TIFF, pictureBytesStartOffset) ||
return "tiff"; matchSignature(_dataStream, TIFF1, pictureBytesStartOffset)) {
} else if (matchSignature(_dataStream, TIFF1, pictureBytesStartOffset)) {
return "tiff"; return "tiff";
} else if (matchSignature(content, WMF1, 0) ||
matchSignature(content, WMF2, 0)) {
return "wmf";
} else if (matchSignature(content, EMF, 0)) {
return "emf";
} }
// TODO: DIB, PICT
return ""; return "";
} }
@ -233,10 +269,44 @@ public class Picture
// return fileName.trim(); // return fileName.trim();
// } // }
private void fillImageContent(byte[] dataStream) private void fillRawImageContent()
{ {
this.content = new byte[size]; this.rawContent = new byte[size];
System.arraycopy(dataStream, pictureBytesStartOffset, content, 0, size); System.arraycopy(_dataStream, pictureBytesStartOffset, rawContent, 0, size);
}
private void fillImageContent()
{
byte[] rawContent = getRawContent();
// HACK: Detect compressed images. In reality there should be some way to determine
// this from the first 32 bytes, but I can't see any similarity between all the
// samples I have obtained, nor any similarity in the data block contents.
if (matchSignature(rawContent, COMPRESSED1, 32) || matchSignature(rawContent, COMPRESSED2, 32))
{
try
{
InflaterInputStream in = new InflaterInputStream(
new ByteArrayInputStream(rawContent, 33, rawContent.length - 33));
ByteArrayOutputStream out = new ByteArrayOutputStream();
byte[] buf = new byte[4096];
int readBytes;
while ((readBytes = in.read(buf)) > 0)
{
out.write(buf, 0, readBytes);
}
content = out.toByteArray();
}
catch (IOException e)
{
// Problems reading from the actual ByteArrayInputStream should never happen
// so this will only ever be a ZipException.
log.log(POILogger.INFO, "Possibly corrupt compression or non-compressed data", e);
}
} else {
// Raw data is not compressed.
content = rawContent;
}
} }
private static int getPictureBytesStartOffset(int dataBlockStartOffset, byte[] _dataStream, int dataBlockSize) private static int getPictureBytesStartOffset(int dataBlockStartOffset, byte[] _dataStream, int dataBlockSize)
@ -322,18 +392,28 @@ public class Picture
this.height = getBigEndianInt(_dataStream, IHDR_CHUNK_WIDTH + 4); this.height = getBigEndianInt(_dataStream, IHDR_CHUNK_WIDTH + 4);
} }
} }
/** /**
* returns pixel width of the picture or -1 if dimensions determining was failed * returns pixel width of the picture or -1 if dimensions determining was failed
*/ */
public int getWidth() public int getWidth()
{ {
if (width == -1)
{
fillWidthHeight();
}
return width; return width;
} }
/** /**
* returns pixel height of the picture or -1 if dimensions determining was failed * returns pixel height of the picture or -1 if dimensions determining was failed
*/ */
public int getHeight() public int getHeight()
{ {
if (height == -1)
{
fillWidthHeight();
}
return height; return height;
} }

View File

@ -31,38 +31,40 @@ import junit.framework.TestCase;
* @author nick * @author nick
*/ */
public class TestHWPFPictures extends TestCase { public class TestHWPFPictures extends TestCase {
private HWPFDocument docA;
private HWPFDocument docB;
private String docAFile; private String docAFile;
private String docBFile; private String docBFile;
private String docCFile;
private String imgAFile; private String imgAFile;
private String imgBFile; private String imgBFile;
private String imgCFile;
protected void setUp() throws Exception { protected void setUp() throws Exception {
String dirname = System.getProperty("HWPF.testdata.path"); String dirname = System.getProperty("HWPF.testdata.path");
docAFile = dirname + "/testPictures.doc"; docAFile = dirname + "/testPictures.doc";
docBFile = dirname + "/two_images.doc"; docBFile = dirname + "/two_images.doc";
docCFile = dirname + "/vector_image.doc";
imgAFile = dirname + "/simple_image.jpg"; imgAFile = dirname + "/simple_image.jpg";
imgBFile = dirname + "/simple_image.png"; imgBFile = dirname + "/simple_image.png";
imgCFile = dirname + "/vector_image.emf";
} }
/** /**
* Test just opening the files * Test just opening the files
*/ */
public void testOpen() throws Exception { public void testOpen() throws Exception {
docA = new HWPFDocument(new FileInputStream(docAFile)); HWPFDocument docA = new HWPFDocument(new FileInputStream(docAFile));
docB = new HWPFDocument(new FileInputStream(docBFile)); HWPFDocument docB = new HWPFDocument(new FileInputStream(docBFile));
} }
/** /**
* Test that we have the right numbers of images in each file * Test that we have the right numbers of images in each file
*/ */
public void testImageCount() throws Exception { public void testImageCount() throws Exception {
docA = new HWPFDocument(new FileInputStream(docAFile)); HWPFDocument docA = new HWPFDocument(new FileInputStream(docAFile));
docB = new HWPFDocument(new FileInputStream(docBFile)); HWPFDocument docB = new HWPFDocument(new FileInputStream(docBFile));
assertNotNull(docA.getPicturesTable()); assertNotNull(docA.getPicturesTable());
assertNotNull(docB.getPicturesTable()); assertNotNull(docB.getPicturesTable());
@ -81,7 +83,7 @@ public class TestHWPFPictures extends TestCase {
* Test that we have the right images in at least one file * Test that we have the right images in at least one file
*/ */
public void testImageData() throws Exception { public void testImageData() throws Exception {
docB = new HWPFDocument(new FileInputStream(docBFile)); HWPFDocument docB = new HWPFDocument(new FileInputStream(docBFile));
PicturesTable picB = docB.getPicturesTable(); PicturesTable picB = docB.getPicturesTable();
List picturesB = picB.getAllPictures(); List picturesB = picB.getAllPictures();
@ -104,6 +106,26 @@ public class TestHWPFPictures extends TestCase {
assertBytesSame(pic2B, pic2.getContent()); assertBytesSame(pic2B, pic2.getContent());
} }
/**
* Test that compressed image data is correctly returned.
*/
public void testCompressedImageData() throws Exception {
HWPFDocument docC = new HWPFDocument(new FileInputStream(docCFile));
PicturesTable picC = docC.getPicturesTable();
List picturesC = picC.getAllPictures();
assertEquals(1, picturesC.size());
Picture pic = (Picture)picturesC.get(0);
assertNotNull(pic);
// Check the same
byte[] picBytes = readFile(imgCFile);
assertEquals(picBytes.length, pic.getContent().length);
assertBytesSame(picBytes, pic.getContent());
}
private void assertBytesSame(byte[] a, byte[] b) { private void assertBytesSame(byte[] a, byte[] b) {
assertEquals(a.length, b.length); assertEquals(a.length, b.length);