mirror of https://github.com/apache/poi.git
Bug 54332 - WMF extraction failing in Tika for older PowerPoint Files
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1687398 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
dfca03ec03
commit
1b1d5835b6
|
@ -401,6 +401,7 @@ public final class HSLFSlideShow extends POIDocument {
|
||||||
// Build the PictureData object from the data
|
// Build the PictureData object from the data
|
||||||
try {
|
try {
|
||||||
PictureData pict = PictureData.create(type - 0xF018);
|
PictureData pict = PictureData.create(type - 0xF018);
|
||||||
|
pict.setSignature(signature);
|
||||||
|
|
||||||
// Copy the data, ready to pass to PictureData
|
// Copy the data, ready to pass to PictureData
|
||||||
byte[] imgdata = new byte[imgsize];
|
byte[] imgdata = new byte[imgsize];
|
||||||
|
|
|
@ -32,15 +32,18 @@ public abstract class Bitmap extends PictureData {
|
||||||
|
|
||||||
public byte[] getData(){
|
public byte[] getData(){
|
||||||
byte[] rawdata = getRawData();
|
byte[] rawdata = getRawData();
|
||||||
byte[] imgdata = new byte[rawdata.length-17];
|
int prefixLen = 16*uidInstanceCount+1;
|
||||||
System.arraycopy(rawdata, 17, imgdata, 0, imgdata.length);
|
byte[] imgdata = new byte[rawdata.length-prefixLen];
|
||||||
|
System.arraycopy(rawdata, prefixLen, imgdata, 0, imgdata.length);
|
||||||
return imgdata;
|
return imgdata;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setData(byte[] data) throws IOException {
|
public void setData(byte[] data) throws IOException {
|
||||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||||
|
for (int i=0; i<uidInstanceCount; i++) {
|
||||||
byte[] checksum = getChecksum(data);
|
byte[] checksum = getChecksum(data);
|
||||||
out.write(checksum);
|
out.write(checksum);
|
||||||
|
}
|
||||||
out.write(0);
|
out.write(0);
|
||||||
out.write(data);
|
out.write(data);
|
||||||
|
|
||||||
|
|
|
@ -31,7 +31,7 @@ public final class DIB extends Bitmap {
|
||||||
/**
|
/**
|
||||||
* Size of the BITMAPFILEHEADER structure preceding the actual DIB bytes
|
* Size of the BITMAPFILEHEADER structure preceding the actual DIB bytes
|
||||||
*/
|
*/
|
||||||
public static final int HEADER_SIZE = 14;
|
private static final int HEADER_SIZE = 14;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return type of this picture
|
* @return type of this picture
|
||||||
|
@ -42,12 +42,28 @@ public final class DIB extends Bitmap {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* DIB signature is <code>0x7A80</code>
|
* DIB signature is {@code 0x7A80} or {@code 0x7A90}
|
||||||
*
|
*
|
||||||
* @return DIB signature (<code>0x7A80</code>)
|
* @return DIB signature ({@code 0x7A80} or {@code 0x7A90})
|
||||||
*/
|
*/
|
||||||
public int getSignature(){
|
public int getSignature(){
|
||||||
return 0x7A80;
|
return (uidInstanceCount == 1 ? 0x7A80 : 0x7A90);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the DIB signature - either {@code 0x7A80} or {@code 0x7A90}
|
||||||
|
*/
|
||||||
|
public void setSignature(int signature) {
|
||||||
|
switch (signature) {
|
||||||
|
case 0x7A80:
|
||||||
|
uidInstanceCount = 1;
|
||||||
|
break;
|
||||||
|
case 0x7A90:
|
||||||
|
uidInstanceCount = 2;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new IllegalArgumentException(signature+" is not a valid instance/signature value for DIB");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte[] getData(){
|
public byte[] getData(){
|
||||||
|
|
|
@ -84,11 +84,27 @@ public final class EMF extends Metafile {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* EMF signature is <code>0x3D40</code>
|
* EMF signature is {@code 0x3D40} or {@code 0x3D50}
|
||||||
*
|
*
|
||||||
* @return EMF signature (<code>0x3D40</code>)
|
* @return EMF signature ({@code 0x3D40} or {@code 0x3D50})
|
||||||
*/
|
*/
|
||||||
public int getSignature(){
|
public int getSignature() {
|
||||||
return 0x3D40;
|
return (uidInstanceCount == 1 ? 0x3D40 : 0x3D50);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the EMF signature - either {@code 0x3D40} or {@code 0x3D50}
|
||||||
|
*/
|
||||||
|
public void setSignature(int signature) {
|
||||||
|
switch (signature) {
|
||||||
|
case 0x3D40:
|
||||||
|
uidInstanceCount = 1;
|
||||||
|
break;
|
||||||
|
case 0x3D50:
|
||||||
|
uidInstanceCount = 2;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new IllegalArgumentException(signature+" is not a valid instance/signature value for EMF");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,6 +26,10 @@ import org.apache.poi.hslf.model.Picture;
|
||||||
*/
|
*/
|
||||||
public final class JPEG extends Bitmap {
|
public final class JPEG extends Bitmap {
|
||||||
|
|
||||||
|
public enum ColorSpace { rgb, cymk };
|
||||||
|
|
||||||
|
private ColorSpace colorSpace = ColorSpace.rgb;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return type of this picture
|
* @return type of this picture
|
||||||
* @see org.apache.poi.hslf.model.Picture#JPEG
|
* @see org.apache.poi.hslf.model.Picture#JPEG
|
||||||
|
@ -34,12 +38,48 @@ public final class JPEG extends Bitmap {
|
||||||
return Picture.JPEG;
|
return Picture.JPEG;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public ColorSpace getColorSpace() {
|
||||||
|
return colorSpace;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setColorSpace(ColorSpace colorSpace) {
|
||||||
|
this.colorSpace = colorSpace;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* JPEG signature is <code>0x46A0</code>
|
* JPEG signature is one of {@code 0x46A0, 0x46B0, 0x6E20, 0x6E30}
|
||||||
*
|
*
|
||||||
* @return JPEG signature (<code>0x46A0</code>)
|
* @return JPEG signature ({@code 0x46A0, 0x46B0, 0x6E20, 0x6E30})
|
||||||
*/
|
*/
|
||||||
public int getSignature(){
|
public int getSignature(){
|
||||||
return 0x46A0;
|
return (colorSpace == ColorSpace.rgb)
|
||||||
|
? (uidInstanceCount == 1 ? 0x46A0 : 0x46B0)
|
||||||
|
: (uidInstanceCount == 1 ? 0x6E20 : 0x6E30);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the PICT signature - either {@code 0x5420} or {@code 0x5430}
|
||||||
|
*/
|
||||||
|
public void setSignature(int signature) {
|
||||||
|
switch (signature) {
|
||||||
|
case 0x46A0:
|
||||||
|
uidInstanceCount = 1;
|
||||||
|
colorSpace = ColorSpace.rgb;
|
||||||
|
break;
|
||||||
|
case 0x46B0:
|
||||||
|
uidInstanceCount = 2;
|
||||||
|
colorSpace = ColorSpace.rgb;
|
||||||
|
break;
|
||||||
|
case 0x6E20:
|
||||||
|
uidInstanceCount = 1;
|
||||||
|
colorSpace = ColorSpace.cymk;
|
||||||
|
break;
|
||||||
|
case 0x6E30:
|
||||||
|
uidInstanceCount = 2;
|
||||||
|
colorSpace = ColorSpace.cymk;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new IllegalArgumentException(signature+" is not a valid instance/signature value for JPEG");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -86,8 +86,8 @@ public abstract class Metafile extends PictureData {
|
||||||
|
|
||||||
zipsize = LittleEndian.getInt(data, pos); pos += LittleEndian.INT_SIZE;
|
zipsize = LittleEndian.getInt(data, pos); pos += LittleEndian.INT_SIZE;
|
||||||
|
|
||||||
compression = LittleEndian.getUnsignedByte(data, pos); pos++;
|
compression = LittleEndian.getUByte(data, pos); pos++;
|
||||||
filter = LittleEndian.getUnsignedByte(data, pos); pos++;
|
filter = LittleEndian.getUByte(data, pos); pos++;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void write(OutputStream out) throws IOException {
|
public void write(OutputStream out) throws IOException {
|
||||||
|
|
|
@ -33,10 +33,6 @@ import org.apache.poi.hslf.model.Shape;
|
||||||
*/
|
*/
|
||||||
public final class PICT extends Metafile {
|
public final class PICT extends Metafile {
|
||||||
|
|
||||||
public PICT(){
|
|
||||||
super();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract compressed PICT data from a ppt
|
* Extract compressed PICT data from a ppt
|
||||||
*/
|
*/
|
||||||
|
@ -46,7 +42,7 @@ public final class PICT extends Metafile {
|
||||||
byte[] macheader = new byte[512];
|
byte[] macheader = new byte[512];
|
||||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||||
out.write(macheader);
|
out.write(macheader);
|
||||||
int pos = CHECKSUM_SIZE;
|
int pos = CHECKSUM_SIZE*uidInstanceCount;
|
||||||
byte[] pict;
|
byte[] pict;
|
||||||
try {
|
try {
|
||||||
pict = read(rawdata, pos);
|
pict = read(rawdata, pos);
|
||||||
|
@ -109,12 +105,27 @@ public final class PICT extends Metafile {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* PICT signature is <code>0x5430</code>
|
* PICT signature is {@code 0x5420} or {@code 0x5430}
|
||||||
*
|
*
|
||||||
* @return PICT signature (<code>0x5430</code>)
|
* @return PICT signature ({@code 0x5420} or {@code 0x5430})
|
||||||
*/
|
*/
|
||||||
public int getSignature(){
|
public int getSignature(){
|
||||||
return 0x5430;
|
return (uidInstanceCount == 1 ? 0x5420 : 0x5430);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the PICT signature - either {@code 0x5420} or {@code 0x5430}
|
||||||
|
*/
|
||||||
|
public void setSignature(int signature) {
|
||||||
|
switch (signature) {
|
||||||
|
case 0x5420:
|
||||||
|
uidInstanceCount = 1;
|
||||||
|
break;
|
||||||
|
case 0x5430:
|
||||||
|
uidInstanceCount = 2;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new IllegalArgumentException(signature+" is not a valid instance/signature value for PICT");
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,14 +17,8 @@
|
||||||
|
|
||||||
package org.apache.poi.hslf.blip;
|
package org.apache.poi.hslf.blip;
|
||||||
|
|
||||||
import org.apache.poi.util.PngUtils;
|
|
||||||
import org.apache.poi.hslf.model.Picture;
|
import org.apache.poi.hslf.model.Picture;
|
||||||
import org.apache.poi.hslf.exceptions.HSLFException;
|
import org.apache.poi.util.PngUtils;
|
||||||
|
|
||||||
import javax.imageio.ImageIO;
|
|
||||||
import java.awt.image.BufferedImage;
|
|
||||||
import java.io.ByteArrayInputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represents a PNG picture data in a PPT file
|
* Represents a PNG picture data in a PPT file
|
||||||
|
@ -59,11 +53,27 @@ public final class PNG extends Bitmap {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* PNG signature is <code>0x6E00</code>
|
* PNG signature is {@code 0x6E00} or {@code 0x6E10}
|
||||||
*
|
*
|
||||||
* @return PNG signature (<code>0x6E00</code>)
|
* @return PNG signature ({@code 0x6E00} or {@code 0x6E10})
|
||||||
*/
|
*/
|
||||||
public int getSignature(){
|
public int getSignature(){
|
||||||
return 0x6E00;
|
return (uidInstanceCount == 1 ? 0x6E00 : 0x6E10);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the PNG signature - either {@code 0x6E00} or {@code 0x6E10}
|
||||||
|
*/
|
||||||
|
public void setSignature(int signature) {
|
||||||
|
switch (signature) {
|
||||||
|
case 0x6E00:
|
||||||
|
uidInstanceCount = 1;
|
||||||
|
break;
|
||||||
|
case 0x6E10:
|
||||||
|
uidInstanceCount = 2;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new IllegalArgumentException(signature+" is not a valid instance/signature value for PNG");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,8 +43,8 @@ public final class WMF extends Metafile {
|
||||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||||
InputStream is = new ByteArrayInputStream( rawdata );
|
InputStream is = new ByteArrayInputStream( rawdata );
|
||||||
Header header = new Header();
|
Header header = new Header();
|
||||||
header.read(rawdata, CHECKSUM_SIZE);
|
header.read(rawdata, CHECKSUM_SIZE*uidInstanceCount);
|
||||||
is.skip(header.getSize() + CHECKSUM_SIZE);
|
is.skip(header.getSize() + CHECKSUM_SIZE*uidInstanceCount);
|
||||||
|
|
||||||
AldusHeader aldus = new AldusHeader();
|
AldusHeader aldus = new AldusHeader();
|
||||||
aldus.left = header.bounds.x;
|
aldus.left = header.bounds.x;
|
||||||
|
@ -84,7 +84,9 @@ public final class WMF extends Metafile {
|
||||||
|
|
||||||
byte[] checksum = getChecksum(data);
|
byte[] checksum = getChecksum(data);
|
||||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||||
|
for (int i=0; i<uidInstanceCount; i++) {
|
||||||
out.write(checksum);
|
out.write(checksum);
|
||||||
|
}
|
||||||
header.write(out);
|
header.write(out);
|
||||||
out.write(compressed);
|
out.write(compressed);
|
||||||
|
|
||||||
|
@ -99,12 +101,27 @@ public final class WMF extends Metafile {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* WMF signature is <code>0x2160</code>
|
* WMF signature is either {@code 0x2160} or {@code 0x2170}
|
||||||
*/
|
*/
|
||||||
public int getSignature(){
|
public int getSignature(){
|
||||||
return 0x2160;
|
return (uidInstanceCount == 1 ? 0x2160 : 0x2170);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the WMF signature - either {@code 0x2160} or {@code 0x2170}
|
||||||
|
*/
|
||||||
|
public void setSignature(int signature) {
|
||||||
|
switch (signature) {
|
||||||
|
case 0x2160:
|
||||||
|
uidInstanceCount = 1;
|
||||||
|
break;
|
||||||
|
case 0x2170:
|
||||||
|
uidInstanceCount = 2;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new IllegalArgumentException(signature+" is not a valid instance/signature value for WMF");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Aldus Placeable Metafile header - 22 byte structure before WMF data.
|
* Aldus Placeable Metafile header - 22 byte structure before WMF data.
|
||||||
|
|
|
@ -55,11 +55,17 @@ public abstract class PictureData {
|
||||||
* Binary data of the picture
|
* Binary data of the picture
|
||||||
*/
|
*/
|
||||||
private byte[] rawdata;
|
private byte[] rawdata;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The offset to the picture in the stream
|
* The offset to the picture in the stream
|
||||||
*/
|
*/
|
||||||
protected int offset;
|
protected int offset;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The instance type/signatures defines if one or two UID instances will be included
|
||||||
|
*/
|
||||||
|
protected int uidInstanceCount = 1;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns type of this picture.
|
* Returns type of this picture.
|
||||||
* Must be one of the static constants defined in the <code>Picture<code> class.
|
* Must be one of the static constants defined in the <code>Picture<code> class.
|
||||||
|
@ -82,7 +88,16 @@ public abstract class PictureData {
|
||||||
/**
|
/**
|
||||||
* Blip signature.
|
* Blip signature.
|
||||||
*/
|
*/
|
||||||
protected abstract int getSignature();
|
public abstract int getSignature();
|
||||||
|
|
||||||
|
public abstract void setSignature(int signature);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The instance type/signatures defines if one or two UID instances will be included
|
||||||
|
*/
|
||||||
|
protected int getUIDInstanceCount() {
|
||||||
|
return uidInstanceCount;
|
||||||
|
}
|
||||||
|
|
||||||
protected static final ImagePainter[] painters = new ImagePainter[8];
|
protected static final ImagePainter[] painters = new ImagePainter[8];
|
||||||
static {
|
static {
|
||||||
|
|
|
@ -131,17 +131,36 @@ public final class TestPicture {
|
||||||
null // EMF
|
null // EMF
|
||||||
};
|
};
|
||||||
|
|
||||||
for (int i = 0; i < pictures.length; i++) {
|
int i=0;
|
||||||
BufferedImage image = ImageIO.read(new ByteArrayInputStream(pictures[i].getData()));
|
for (PictureData pd : pictures) {
|
||||||
|
BufferedImage image = ImageIO.read(new ByteArrayInputStream(pd.getData()));
|
||||||
if (pictures[i].getType() != Picture.WMF && pictures[i].getType() != Picture.EMF) {
|
switch (pd.getType()) {
|
||||||
|
case Picture.WMF:
|
||||||
|
case Picture.EMF:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
assertNotNull(image);
|
assertNotNull(image);
|
||||||
|
|
||||||
int[] dimensions = expectedSizes[i];
|
int[] dimensions = expectedSizes[i];
|
||||||
assertEquals(dimensions[0], image.getWidth());
|
assertEquals(dimensions[0], image.getWidth());
|
||||||
assertEquals(dimensions[1], image.getHeight());
|
assertEquals(dimensions[1], image.getHeight());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
i++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void bug54332() throws Exception {
|
||||||
|
HSLFSlideShow hss = new HSLFSlideShow(_slTests.openResourceAsStream("54332a.ppt")); // TIKA-1046
|
||||||
|
|
||||||
|
PictureData[] pictures = hss.getPictures();
|
||||||
|
assertEquals(1, pictures.length);
|
||||||
|
assertEquals(102352, pictures[0].getData().length);
|
||||||
|
|
||||||
|
hss = new HSLFSlideShow(_slTests.openResourceAsStream("54332b.ppt")); // TIKA-1612
|
||||||
|
pictures = hss.getPictures();
|
||||||
|
assertEquals(1, pictures.length);
|
||||||
|
assertEquals(55830, pictures[0].getData().length);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue