Support compressed pictures properly, from bug #41032

git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@480585 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2006-11-29 14:49:40 +00:00
parent c6960a5c08
commit 925f724d4c
6 changed files with 144 additions and 23 deletions

View File

@ -25,6 +25,8 @@ import java.io.IOException;
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
public class FIBFieldHandler
{
@ -122,6 +124,8 @@ public class FIBFieldHandler
public static final int STTBLISTNAMES = 91;
public static final int STTBFUSSR = 92;
private static POILogger log = POILogFactory.getLogger(FIBFieldHandler.class);
private static final int FIELD_SIZE = LittleEndian.INT_SIZE * 2;
private HashMap _unknownMap = new HashMap();
@ -146,9 +150,18 @@ public class FIBFieldHandler
{
if (dsSize > 0)
{
UnhandledDataStructure unhandled = new UnhandledDataStructure(
tableStream, dsOffset, dsSize);
_unknownMap.put(new Integer(x), unhandled);
if (dsOffset + dsSize > tableStream.length)
{
log.log(POILogger.WARN, "Unhandled data structure points to outside the buffer. " +
"offset = " + dsOffset + ", length = " + dsSize +
", buffer length = " + tableStream.length);
}
else
{
UnhandledDataStructure unhandled = new UnhandledDataStructure(
tableStream, dsOffset, dsSize);
_unknownMap.put(new Integer(x), unhandled);
}
}
}
_fields[x*2] = dsOffset;

View File

@ -23,7 +23,13 @@ public class UnhandledDataStructure
public UnhandledDataStructure(byte[] buf, int offset, int length)
{
// System.out.println("Yes, using my code");
_buf = new byte[length];
if (offset + length > buf.length)
{
throw new IndexOutOfBoundsException("buffer length is " + buf.length +
"but code is trying to read " + length + " from offset " + offset);
}
System.arraycopy(buf, offset, _buf, 0, length);
}

View File

@ -18,9 +18,14 @@
package org.apache.poi.hwpf.usermodel;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogger;
import org.apache.poi.util.POILogFactory;
import java.io.OutputStream;
import java.io.IOException;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.util.zip.InflaterInputStream;
/**
* Represents embedded picture extracted from Word Document
@ -28,8 +33,11 @@ import java.io.IOException;
*/
public class Picture
{
private static final POILogger log = POILogFactory.getLogger(Picture.class);
// public static final int FILENAME_OFFSET = 0x7C;
// public static final int FILENAME_SIZE_OFFSET = 0x6C;
static final int MFPMM_OFFSET = 0x6;
static final int BLOCK_TYPE_OFFSET = 0xE;
static final int PICT_HEADER_OFFSET = 0x4;
static final int UNKNOWN_HEADER_SIZE = 0x49;
@ -41,13 +49,22 @@ public class Picture
public static final byte[] TIFF = new byte[]{0x49, 0x49, 0x2A, 0x00};
public static final byte[] TIFF1 = new byte[]{0x4D, 0x4D, 0x00, 0x2A};
public static final byte[] EMF = { 0x01, 0x00, 0x00, 0x00 };
public static final byte[] WMF1 = { (byte)0xD7, (byte)0xCD, (byte)0xC6, (byte)0x9A, 0x00, 0x00 };
public static final byte[] WMF2 = { 0x01, 0x00, 0x09, 0x00, 0x00, 0x03 }; // Windows 3.x
// TODO: DIB, PICT
public static final byte[] IHDR = new byte[]{'I', 'H', 'D', 'R'};
public static final byte[] COMPRESSED1 = { (byte)0xFE, 0x78, (byte)0xDA };
public static final byte[] COMPRESSED2 = { (byte)0xFE, 0x78, (byte)0x9C };
private int dataBlockStartOfsset;
private int pictureBytesStartOffset;
private int dataBlockSize;
private int size;
// private String fileName;
private byte[] rawContent;
private byte[] content;
private byte[] _dataStream;
private int aspectRatioX;
@ -77,9 +94,12 @@ public class Picture
if (fillBytes)
{
fillImageContent(_dataStream);
fillImageContent();
}
}
private void fillWidthHeight()
{
String ext = suggestFileExtension();
// trying to extract width and height from pictures content:
if ("jpg".equalsIgnoreCase(ext)) {
@ -121,8 +141,8 @@ public class Picture
*/
public void writeImageContent(OutputStream out) throws IOException
{
if (content!=null && content.length>0) {
out.write(content, 0, size);
if (rawContent!=null && rawContent.length>0) {
out.write(rawContent, 0, size);
} else {
out.write(_dataStream, pictureBytesStartOffset, size);
}
@ -135,11 +155,20 @@ public class Picture
{
if (content == null || content.length<=0)
{
fillImageContent(this._dataStream);
fillImageContent();
}
return content;
}
public byte[] getRawContent()
{
if (rawContent == null || rawContent.length <= 0)
{
fillRawImageContent();
}
return rawContent;
}
/**
*
* @return size in bytes of the picture
@ -171,10 +200,12 @@ public class Picture
*/
public String suggestFileExtension()
{
if (content!=null && content.length>0) {
return suggestFileExtension(content, 0);
String extension = suggestFileExtension(_dataStream, pictureBytesStartOffset);
if ("".equals(extension)) {
// May be compressed. Get the uncompressed content and inspect that.
extension = suggestFileExtension(getContent(), 0);
}
return suggestFileExtension(_dataStream, pictureBytesStartOffset);
return extension;
}
@ -188,11 +219,16 @@ public class Picture
return "gif";
} else if (matchSignature(_dataStream, BMP, pictureBytesStartOffset)) {
return "bmp";
} else if (matchSignature(_dataStream, TIFF, pictureBytesStartOffset)) {
return "tiff";
} else if (matchSignature(_dataStream, TIFF1, pictureBytesStartOffset)) {
} else if (matchSignature(_dataStream, TIFF, pictureBytesStartOffset) ||
matchSignature(_dataStream, TIFF1, pictureBytesStartOffset)) {
return "tiff";
} else if (matchSignature(content, WMF1, 0) ||
matchSignature(content, WMF2, 0)) {
return "wmf";
} else if (matchSignature(content, EMF, 0)) {
return "emf";
}
// TODO: DIB, PICT
return "";
}
@ -233,10 +269,44 @@ public class Picture
// return fileName.trim();
// }
private void fillImageContent(byte[] dataStream)
private void fillRawImageContent()
{
this.content = new byte[size];
System.arraycopy(dataStream, pictureBytesStartOffset, content, 0, size);
this.rawContent = new byte[size];
System.arraycopy(_dataStream, pictureBytesStartOffset, rawContent, 0, size);
}
private void fillImageContent()
{
byte[] rawContent = getRawContent();
// HACK: Detect compressed images. In reality there should be some way to determine
// this from the first 32 bytes, but I can't see any similarity between all the
// samples I have obtained, nor any similarity in the data block contents.
if (matchSignature(rawContent, COMPRESSED1, 32) || matchSignature(rawContent, COMPRESSED2, 32))
{
try
{
InflaterInputStream in = new InflaterInputStream(
new ByteArrayInputStream(rawContent, 33, rawContent.length - 33));
ByteArrayOutputStream out = new ByteArrayOutputStream();
byte[] buf = new byte[4096];
int readBytes;
while ((readBytes = in.read(buf)) > 0)
{
out.write(buf, 0, readBytes);
}
content = out.toByteArray();
}
catch (IOException e)
{
// Problems reading from the actual ByteArrayInputStream should never happen
// so this will only ever be a ZipException.
log.log(POILogger.INFO, "Possibly corrupt compression or non-compressed data", e);
}
} else {
// Raw data is not compressed.
content = rawContent;
}
}
private static int getPictureBytesStartOffset(int dataBlockStartOffset, byte[] _dataStream, int dataBlockSize)
@ -322,18 +392,28 @@ public class Picture
this.height = getBigEndianInt(_dataStream, IHDR_CHUNK_WIDTH + 4);
}
}
/**
* returns pixel width of the picture or -1 if dimensions determining was failed
*/
public int getWidth()
{
if (width == -1)
{
fillWidthHeight();
}
return width;
}
/**
* returns pixel height of the picture or -1 if dimensions determining was failed
*/
public int getHeight()
{
if (height == -1)
{
fillWidthHeight();
}
return height;
}

View File

@ -31,38 +31,40 @@ import junit.framework.TestCase;
* @author nick
*/
public class TestHWPFPictures extends TestCase {
private HWPFDocument docA;
private HWPFDocument docB;
private String docAFile;
private String docBFile;
private String docCFile;
private String imgAFile;
private String imgBFile;
private String imgCFile;
protected void setUp() throws Exception {
String dirname = System.getProperty("HWPF.testdata.path");
docAFile = dirname + "/testPictures.doc";
docBFile = dirname + "/two_images.doc";
docCFile = dirname + "/vector_image.doc";
imgAFile = dirname + "/simple_image.jpg";
imgBFile = dirname + "/simple_image.png";
imgCFile = dirname + "/vector_image.emf";
}
/**
* Test just opening the files
*/
public void testOpen() throws Exception {
docA = new HWPFDocument(new FileInputStream(docAFile));
docB = new HWPFDocument(new FileInputStream(docBFile));
HWPFDocument docA = new HWPFDocument(new FileInputStream(docAFile));
HWPFDocument docB = new HWPFDocument(new FileInputStream(docBFile));
}
/**
* Test that we have the right numbers of images in each file
*/
public void testImageCount() throws Exception {
docA = new HWPFDocument(new FileInputStream(docAFile));
docB = new HWPFDocument(new FileInputStream(docBFile));
HWPFDocument docA = new HWPFDocument(new FileInputStream(docAFile));
HWPFDocument docB = new HWPFDocument(new FileInputStream(docBFile));
assertNotNull(docA.getPicturesTable());
assertNotNull(docB.getPicturesTable());
@ -81,7 +83,7 @@ public class TestHWPFPictures extends TestCase {
* Test that we have the right images in at least one file
*/
public void testImageData() throws Exception {
docB = new HWPFDocument(new FileInputStream(docBFile));
HWPFDocument docB = new HWPFDocument(new FileInputStream(docBFile));
PicturesTable picB = docB.getPicturesTable();
List picturesB = picB.getAllPictures();
@ -104,6 +106,26 @@ public class TestHWPFPictures extends TestCase {
assertBytesSame(pic2B, pic2.getContent());
}
/**
* Test that compressed image data is correctly returned.
*/
public void testCompressedImageData() throws Exception {
HWPFDocument docC = new HWPFDocument(new FileInputStream(docCFile));
PicturesTable picC = docC.getPicturesTable();
List picturesC = picC.getAllPictures();
assertEquals(1, picturesC.size());
Picture pic = (Picture)picturesC.get(0);
assertNotNull(pic);
// Check the same
byte[] picBytes = readFile(imgCFile);
assertEquals(picBytes.length, pic.getContent().length);
assertBytesSame(picBytes, pic.getContent());
}
private void assertBytesSame(byte[] a, byte[] b) {
assertEquals(a.length, b.length);