mirror of https://github.com/apache/poi.git
Support compressed pictures properly, from bug #41032
git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@480585 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c6960a5c08
commit
925f724d4c
|
@ -25,6 +25,8 @@ import java.io.IOException;
|
||||||
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
|
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
|
||||||
|
|
||||||
import org.apache.poi.util.LittleEndian;
|
import org.apache.poi.util.LittleEndian;
|
||||||
|
import org.apache.poi.util.POILogFactory;
|
||||||
|
import org.apache.poi.util.POILogger;
|
||||||
|
|
||||||
public class FIBFieldHandler
|
public class FIBFieldHandler
|
||||||
{
|
{
|
||||||
|
@ -122,6 +124,8 @@ public class FIBFieldHandler
|
||||||
public static final int STTBLISTNAMES = 91;
|
public static final int STTBLISTNAMES = 91;
|
||||||
public static final int STTBFUSSR = 92;
|
public static final int STTBFUSSR = 92;
|
||||||
|
|
||||||
|
private static POILogger log = POILogFactory.getLogger(FIBFieldHandler.class);
|
||||||
|
|
||||||
private static final int FIELD_SIZE = LittleEndian.INT_SIZE * 2;
|
private static final int FIELD_SIZE = LittleEndian.INT_SIZE * 2;
|
||||||
|
|
||||||
private HashMap _unknownMap = new HashMap();
|
private HashMap _unknownMap = new HashMap();
|
||||||
|
@ -146,9 +150,18 @@ public class FIBFieldHandler
|
||||||
{
|
{
|
||||||
if (dsSize > 0)
|
if (dsSize > 0)
|
||||||
{
|
{
|
||||||
UnhandledDataStructure unhandled = new UnhandledDataStructure(
|
if (dsOffset + dsSize > tableStream.length)
|
||||||
tableStream, dsOffset, dsSize);
|
{
|
||||||
_unknownMap.put(new Integer(x), unhandled);
|
log.log(POILogger.WARN, "Unhandled data structure points to outside the buffer. " +
|
||||||
|
"offset = " + dsOffset + ", length = " + dsSize +
|
||||||
|
", buffer length = " + tableStream.length);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
UnhandledDataStructure unhandled = new UnhandledDataStructure(
|
||||||
|
tableStream, dsOffset, dsSize);
|
||||||
|
_unknownMap.put(new Integer(x), unhandled);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_fields[x*2] = dsOffset;
|
_fields[x*2] = dsOffset;
|
||||||
|
|
|
@ -23,7 +23,13 @@ public class UnhandledDataStructure
|
||||||
|
|
||||||
public UnhandledDataStructure(byte[] buf, int offset, int length)
|
public UnhandledDataStructure(byte[] buf, int offset, int length)
|
||||||
{
|
{
|
||||||
|
// System.out.println("Yes, using my code");
|
||||||
_buf = new byte[length];
|
_buf = new byte[length];
|
||||||
|
if (offset + length > buf.length)
|
||||||
|
{
|
||||||
|
throw new IndexOutOfBoundsException("buffer length is " + buf.length +
|
||||||
|
"but code is trying to read " + length + " from offset " + offset);
|
||||||
|
}
|
||||||
System.arraycopy(buf, offset, _buf, 0, length);
|
System.arraycopy(buf, offset, _buf, 0, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,9 +18,14 @@
|
||||||
package org.apache.poi.hwpf.usermodel;
|
package org.apache.poi.hwpf.usermodel;
|
||||||
|
|
||||||
import org.apache.poi.util.LittleEndian;
|
import org.apache.poi.util.LittleEndian;
|
||||||
|
import org.apache.poi.util.POILogger;
|
||||||
|
import org.apache.poi.util.POILogFactory;
|
||||||
|
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.util.zip.InflaterInputStream;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represents embedded picture extracted from Word Document
|
* Represents embedded picture extracted from Word Document
|
||||||
|
@ -28,8 +33,11 @@ import java.io.IOException;
|
||||||
*/
|
*/
|
||||||
public class Picture
|
public class Picture
|
||||||
{
|
{
|
||||||
|
private static final POILogger log = POILogFactory.getLogger(Picture.class);
|
||||||
|
|
||||||
// public static final int FILENAME_OFFSET = 0x7C;
|
// public static final int FILENAME_OFFSET = 0x7C;
|
||||||
// public static final int FILENAME_SIZE_OFFSET = 0x6C;
|
// public static final int FILENAME_SIZE_OFFSET = 0x6C;
|
||||||
|
static final int MFPMM_OFFSET = 0x6;
|
||||||
static final int BLOCK_TYPE_OFFSET = 0xE;
|
static final int BLOCK_TYPE_OFFSET = 0xE;
|
||||||
static final int PICT_HEADER_OFFSET = 0x4;
|
static final int PICT_HEADER_OFFSET = 0x4;
|
||||||
static final int UNKNOWN_HEADER_SIZE = 0x49;
|
static final int UNKNOWN_HEADER_SIZE = 0x49;
|
||||||
|
@ -41,13 +49,22 @@ public class Picture
|
||||||
public static final byte[] TIFF = new byte[]{0x49, 0x49, 0x2A, 0x00};
|
public static final byte[] TIFF = new byte[]{0x49, 0x49, 0x2A, 0x00};
|
||||||
public static final byte[] TIFF1 = new byte[]{0x4D, 0x4D, 0x00, 0x2A};
|
public static final byte[] TIFF1 = new byte[]{0x4D, 0x4D, 0x00, 0x2A};
|
||||||
|
|
||||||
|
public static final byte[] EMF = { 0x01, 0x00, 0x00, 0x00 };
|
||||||
|
public static final byte[] WMF1 = { (byte)0xD7, (byte)0xCD, (byte)0xC6, (byte)0x9A, 0x00, 0x00 };
|
||||||
|
public static final byte[] WMF2 = { 0x01, 0x00, 0x09, 0x00, 0x00, 0x03 }; // Windows 3.x
|
||||||
|
// TODO: DIB, PICT
|
||||||
|
|
||||||
public static final byte[] IHDR = new byte[]{'I', 'H', 'D', 'R'};
|
public static final byte[] IHDR = new byte[]{'I', 'H', 'D', 'R'};
|
||||||
|
|
||||||
|
public static final byte[] COMPRESSED1 = { (byte)0xFE, 0x78, (byte)0xDA };
|
||||||
|
public static final byte[] COMPRESSED2 = { (byte)0xFE, 0x78, (byte)0x9C };
|
||||||
|
|
||||||
private int dataBlockStartOfsset;
|
private int dataBlockStartOfsset;
|
||||||
private int pictureBytesStartOffset;
|
private int pictureBytesStartOffset;
|
||||||
private int dataBlockSize;
|
private int dataBlockSize;
|
||||||
private int size;
|
private int size;
|
||||||
// private String fileName;
|
// private String fileName;
|
||||||
|
private byte[] rawContent;
|
||||||
private byte[] content;
|
private byte[] content;
|
||||||
private byte[] _dataStream;
|
private byte[] _dataStream;
|
||||||
private int aspectRatioX;
|
private int aspectRatioX;
|
||||||
|
@ -77,9 +94,12 @@ public class Picture
|
||||||
|
|
||||||
if (fillBytes)
|
if (fillBytes)
|
||||||
{
|
{
|
||||||
fillImageContent(_dataStream);
|
fillImageContent();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void fillWidthHeight()
|
||||||
|
{
|
||||||
String ext = suggestFileExtension();
|
String ext = suggestFileExtension();
|
||||||
// trying to extract width and height from pictures content:
|
// trying to extract width and height from pictures content:
|
||||||
if ("jpg".equalsIgnoreCase(ext)) {
|
if ("jpg".equalsIgnoreCase(ext)) {
|
||||||
|
@ -121,8 +141,8 @@ public class Picture
|
||||||
*/
|
*/
|
||||||
public void writeImageContent(OutputStream out) throws IOException
|
public void writeImageContent(OutputStream out) throws IOException
|
||||||
{
|
{
|
||||||
if (content!=null && content.length>0) {
|
if (rawContent!=null && rawContent.length>0) {
|
||||||
out.write(content, 0, size);
|
out.write(rawContent, 0, size);
|
||||||
} else {
|
} else {
|
||||||
out.write(_dataStream, pictureBytesStartOffset, size);
|
out.write(_dataStream, pictureBytesStartOffset, size);
|
||||||
}
|
}
|
||||||
|
@ -135,11 +155,20 @@ public class Picture
|
||||||
{
|
{
|
||||||
if (content == null || content.length<=0)
|
if (content == null || content.length<=0)
|
||||||
{
|
{
|
||||||
fillImageContent(this._dataStream);
|
fillImageContent();
|
||||||
}
|
}
|
||||||
return content;
|
return content;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public byte[] getRawContent()
|
||||||
|
{
|
||||||
|
if (rawContent == null || rawContent.length <= 0)
|
||||||
|
{
|
||||||
|
fillRawImageContent();
|
||||||
|
}
|
||||||
|
return rawContent;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* @return size in bytes of the picture
|
* @return size in bytes of the picture
|
||||||
|
@ -171,10 +200,12 @@ public class Picture
|
||||||
*/
|
*/
|
||||||
public String suggestFileExtension()
|
public String suggestFileExtension()
|
||||||
{
|
{
|
||||||
if (content!=null && content.length>0) {
|
String extension = suggestFileExtension(_dataStream, pictureBytesStartOffset);
|
||||||
return suggestFileExtension(content, 0);
|
if ("".equals(extension)) {
|
||||||
|
// May be compressed. Get the uncompressed content and inspect that.
|
||||||
|
extension = suggestFileExtension(getContent(), 0);
|
||||||
}
|
}
|
||||||
return suggestFileExtension(_dataStream, pictureBytesStartOffset);
|
return extension;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -188,11 +219,16 @@ public class Picture
|
||||||
return "gif";
|
return "gif";
|
||||||
} else if (matchSignature(_dataStream, BMP, pictureBytesStartOffset)) {
|
} else if (matchSignature(_dataStream, BMP, pictureBytesStartOffset)) {
|
||||||
return "bmp";
|
return "bmp";
|
||||||
} else if (matchSignature(_dataStream, TIFF, pictureBytesStartOffset)) {
|
} else if (matchSignature(_dataStream, TIFF, pictureBytesStartOffset) ||
|
||||||
return "tiff";
|
matchSignature(_dataStream, TIFF1, pictureBytesStartOffset)) {
|
||||||
} else if (matchSignature(_dataStream, TIFF1, pictureBytesStartOffset)) {
|
|
||||||
return "tiff";
|
return "tiff";
|
||||||
|
} else if (matchSignature(content, WMF1, 0) ||
|
||||||
|
matchSignature(content, WMF2, 0)) {
|
||||||
|
return "wmf";
|
||||||
|
} else if (matchSignature(content, EMF, 0)) {
|
||||||
|
return "emf";
|
||||||
}
|
}
|
||||||
|
// TODO: DIB, PICT
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -233,10 +269,44 @@ public class Picture
|
||||||
// return fileName.trim();
|
// return fileName.trim();
|
||||||
// }
|
// }
|
||||||
|
|
||||||
private void fillImageContent(byte[] dataStream)
|
private void fillRawImageContent()
|
||||||
{
|
{
|
||||||
this.content = new byte[size];
|
this.rawContent = new byte[size];
|
||||||
System.arraycopy(dataStream, pictureBytesStartOffset, content, 0, size);
|
System.arraycopy(_dataStream, pictureBytesStartOffset, rawContent, 0, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void fillImageContent()
|
||||||
|
{
|
||||||
|
byte[] rawContent = getRawContent();
|
||||||
|
|
||||||
|
// HACK: Detect compressed images. In reality there should be some way to determine
|
||||||
|
// this from the first 32 bytes, but I can't see any similarity between all the
|
||||||
|
// samples I have obtained, nor any similarity in the data block contents.
|
||||||
|
if (matchSignature(rawContent, COMPRESSED1, 32) || matchSignature(rawContent, COMPRESSED2, 32))
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
InflaterInputStream in = new InflaterInputStream(
|
||||||
|
new ByteArrayInputStream(rawContent, 33, rawContent.length - 33));
|
||||||
|
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||||
|
byte[] buf = new byte[4096];
|
||||||
|
int readBytes;
|
||||||
|
while ((readBytes = in.read(buf)) > 0)
|
||||||
|
{
|
||||||
|
out.write(buf, 0, readBytes);
|
||||||
|
}
|
||||||
|
content = out.toByteArray();
|
||||||
|
}
|
||||||
|
catch (IOException e)
|
||||||
|
{
|
||||||
|
// Problems reading from the actual ByteArrayInputStream should never happen
|
||||||
|
// so this will only ever be a ZipException.
|
||||||
|
log.log(POILogger.INFO, "Possibly corrupt compression or non-compressed data", e);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Raw data is not compressed.
|
||||||
|
content = rawContent;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int getPictureBytesStartOffset(int dataBlockStartOffset, byte[] _dataStream, int dataBlockSize)
|
private static int getPictureBytesStartOffset(int dataBlockStartOffset, byte[] _dataStream, int dataBlockSize)
|
||||||
|
@ -322,18 +392,28 @@ public class Picture
|
||||||
this.height = getBigEndianInt(_dataStream, IHDR_CHUNK_WIDTH + 4);
|
this.height = getBigEndianInt(_dataStream, IHDR_CHUNK_WIDTH + 4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* returns pixel width of the picture or -1 if dimensions determining was failed
|
* returns pixel width of the picture or -1 if dimensions determining was failed
|
||||||
*/
|
*/
|
||||||
public int getWidth()
|
public int getWidth()
|
||||||
{
|
{
|
||||||
|
if (width == -1)
|
||||||
|
{
|
||||||
|
fillWidthHeight();
|
||||||
|
}
|
||||||
return width;
|
return width;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* returns pixel height of the picture or -1 if dimensions determining was failed
|
* returns pixel height of the picture or -1 if dimensions determining was failed
|
||||||
*/
|
*/
|
||||||
public int getHeight()
|
public int getHeight()
|
||||||
{
|
{
|
||||||
|
if (height == -1)
|
||||||
|
{
|
||||||
|
fillWidthHeight();
|
||||||
|
}
|
||||||
return height;
|
return height;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -31,38 +31,40 @@ import junit.framework.TestCase;
|
||||||
* @author nick
|
* @author nick
|
||||||
*/
|
*/
|
||||||
public class TestHWPFPictures extends TestCase {
|
public class TestHWPFPictures extends TestCase {
|
||||||
private HWPFDocument docA;
|
|
||||||
private HWPFDocument docB;
|
|
||||||
private String docAFile;
|
private String docAFile;
|
||||||
private String docBFile;
|
private String docBFile;
|
||||||
|
private String docCFile;
|
||||||
|
|
||||||
private String imgAFile;
|
private String imgAFile;
|
||||||
private String imgBFile;
|
private String imgBFile;
|
||||||
|
private String imgCFile;
|
||||||
|
|
||||||
protected void setUp() throws Exception {
|
protected void setUp() throws Exception {
|
||||||
String dirname = System.getProperty("HWPF.testdata.path");
|
String dirname = System.getProperty("HWPF.testdata.path");
|
||||||
|
|
||||||
docAFile = dirname + "/testPictures.doc";
|
docAFile = dirname + "/testPictures.doc";
|
||||||
docBFile = dirname + "/two_images.doc";
|
docBFile = dirname + "/two_images.doc";
|
||||||
|
docCFile = dirname + "/vector_image.doc";
|
||||||
|
|
||||||
imgAFile = dirname + "/simple_image.jpg";
|
imgAFile = dirname + "/simple_image.jpg";
|
||||||
imgBFile = dirname + "/simple_image.png";
|
imgBFile = dirname + "/simple_image.png";
|
||||||
|
imgCFile = dirname + "/vector_image.emf";
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test just opening the files
|
* Test just opening the files
|
||||||
*/
|
*/
|
||||||
public void testOpen() throws Exception {
|
public void testOpen() throws Exception {
|
||||||
docA = new HWPFDocument(new FileInputStream(docAFile));
|
HWPFDocument docA = new HWPFDocument(new FileInputStream(docAFile));
|
||||||
docB = new HWPFDocument(new FileInputStream(docBFile));
|
HWPFDocument docB = new HWPFDocument(new FileInputStream(docBFile));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test that we have the right numbers of images in each file
|
* Test that we have the right numbers of images in each file
|
||||||
*/
|
*/
|
||||||
public void testImageCount() throws Exception {
|
public void testImageCount() throws Exception {
|
||||||
docA = new HWPFDocument(new FileInputStream(docAFile));
|
HWPFDocument docA = new HWPFDocument(new FileInputStream(docAFile));
|
||||||
docB = new HWPFDocument(new FileInputStream(docBFile));
|
HWPFDocument docB = new HWPFDocument(new FileInputStream(docBFile));
|
||||||
|
|
||||||
assertNotNull(docA.getPicturesTable());
|
assertNotNull(docA.getPicturesTable());
|
||||||
assertNotNull(docB.getPicturesTable());
|
assertNotNull(docB.getPicturesTable());
|
||||||
|
@ -81,7 +83,7 @@ public class TestHWPFPictures extends TestCase {
|
||||||
* Test that we have the right images in at least one file
|
* Test that we have the right images in at least one file
|
||||||
*/
|
*/
|
||||||
public void testImageData() throws Exception {
|
public void testImageData() throws Exception {
|
||||||
docB = new HWPFDocument(new FileInputStream(docBFile));
|
HWPFDocument docB = new HWPFDocument(new FileInputStream(docBFile));
|
||||||
PicturesTable picB = docB.getPicturesTable();
|
PicturesTable picB = docB.getPicturesTable();
|
||||||
List picturesB = picB.getAllPictures();
|
List picturesB = picB.getAllPictures();
|
||||||
|
|
||||||
|
@ -104,6 +106,26 @@ public class TestHWPFPictures extends TestCase {
|
||||||
assertBytesSame(pic2B, pic2.getContent());
|
assertBytesSame(pic2B, pic2.getContent());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that compressed image data is correctly returned.
|
||||||
|
*/
|
||||||
|
public void testCompressedImageData() throws Exception {
|
||||||
|
HWPFDocument docC = new HWPFDocument(new FileInputStream(docCFile));
|
||||||
|
PicturesTable picC = docC.getPicturesTable();
|
||||||
|
List picturesC = picC.getAllPictures();
|
||||||
|
|
||||||
|
assertEquals(1, picturesC.size());
|
||||||
|
|
||||||
|
Picture pic = (Picture)picturesC.get(0);
|
||||||
|
assertNotNull(pic);
|
||||||
|
|
||||||
|
// Check the same
|
||||||
|
byte[] picBytes = readFile(imgCFile);
|
||||||
|
|
||||||
|
assertEquals(picBytes.length, pic.getContent().length);
|
||||||
|
assertBytesSame(picBytes, pic.getContent());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private void assertBytesSame(byte[] a, byte[] b) {
|
private void assertBytesSame(byte[] a, byte[] b) {
|
||||||
assertEquals(a.length, b.length);
|
assertEquals(a.length, b.length);
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue