Add the ability to edit HSLFPictureData contents

Pictures can now be edited by calling HSLFPictureData#setData(byte[]). The byte[] should contain the image data as an image viewer might read it.

To enable this functionality, a tighter coupling between the EscherBSERecords of the slideshow and the HSLFPictureData was required. This ensures that changes in image data size are accurately recorded in the records.

In the course of coupling the records and the HSLFPictureData, various scenarios arose where a mapping of records to pictures was non-trivial. Accordingly, the HSLFSlideShowImpl#matchPicturesAndRecords(...) function was added to perform a more sophisticated matching pass. This function is heavily exercised by org.apache.poi.hslf.usermodel.TestBugs.testFile[5] and PPTX2PNG.render[2], as well as the new TestPictures#testSlideshowWithIncorrectOffsets().

Closes #225

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1887017 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Marius Volkhart 2021-02-28 23:16:14 +00:00
parent 77ec895ee0
commit d1c9a07860
20 changed files with 1038 additions and 136 deletions

View File

@ -105,6 +105,10 @@ public interface PictureData {
/** /**
* Sets the binary picture data * Sets the binary picture data
* <p>
* The format of the data must match the format of {@link #getType()}. Failure to match the picture data may result
* in data loss.
*
* @param data picture data * @param data picture data
*/ */
void setData(byte[] data) throws IOException; void setData(byte[] data) throws IOException;

View File

@ -20,13 +20,17 @@ package org.apache.poi.hslf.blip;
import java.awt.Dimension; import java.awt.Dimension;
import java.awt.image.BufferedImage; import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException; import java.io.IOException;
import javax.imageio.ImageIO; import javax.imageio.ImageIO;
import org.apache.poi.ddf.EscherBSERecord;
import org.apache.poi.ddf.EscherContainerRecord;
import org.apache.poi.hslf.usermodel.HSLFPictureData; import org.apache.poi.hslf.usermodel.HSLFPictureData;
import org.apache.poi.hslf.usermodel.HSLFSlideShow;
import org.apache.poi.util.IOUtils; import org.apache.poi.util.IOUtils;
import org.apache.poi.util.Internal;
import org.apache.poi.util.Removal;
import org.apache.poi.util.Units; import org.apache.poi.util.Units;
/** /**
@ -35,6 +39,29 @@ import org.apache.poi.util.Units;
*/ */
public abstract class Bitmap extends HSLFPictureData { public abstract class Bitmap extends HSLFPictureData {
/**
* @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new
* {@link Bitmap}. This API led to detached {@link Bitmap} instances (See Bugzilla
* 46122) and prevented adding additional functionality.
*/
@Deprecated
@Removal(version = "5.3")
public Bitmap() {
this(new EscherContainerRecord(), new EscherBSERecord());
}
/**
* Creates a new instance.
*
* @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is
* linked to.
* @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to.
*/
@Internal
protected Bitmap(EscherContainerRecord recordContainer, EscherBSERecord bse) {
super(recordContainer, bse);
}
@Override @Override
public byte[] getData(){ public byte[] getData(){
byte[] rawdata = getRawData(); byte[] rawdata = getRawData();
@ -43,17 +70,22 @@ public abstract class Bitmap extends HSLFPictureData {
} }
@Override @Override
public void setData(byte[] data) throws IOException { protected byte[] formatImageForSlideshow(byte[] data) {
byte[] checksum = getChecksum(data); byte[] checksum = getChecksum(data);
ByteArrayOutputStream out = new ByteArrayOutputStream(); byte[] rawData = new byte[checksum.length * getUIDInstanceCount() + 1 + data.length];
out.write(checksum); int offset = 0;
if (getUIDInstanceCount() == 2) {
out.write(checksum);
}
out.write(0);
out.write(data);
setRawData(out.toByteArray()); System.arraycopy(checksum, 0, rawData, offset, checksum.length);
offset += checksum.length;
if (getUIDInstanceCount() == 2) {
System.arraycopy(checksum, 0, rawData, offset, checksum.length);
offset += checksum.length;
}
offset++;
System.arraycopy(data, 0, rawData, offset, data.length);
return rawData;
} }
@Override @Override

View File

@ -17,10 +17,13 @@
package org.apache.poi.hslf.blip; package org.apache.poi.hslf.blip;
import java.io.IOException; import org.apache.poi.ddf.EscherBSERecord;
import org.apache.poi.ddf.EscherContainerRecord;
import org.apache.poi.hslf.usermodel.HSLFSlideShow;
import org.apache.poi.util.IOUtils; import org.apache.poi.util.IOUtils;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.Removal;
/** /**
* Represents a DIB picture data in a PPT file * Represents a DIB picture data in a PPT file
@ -35,6 +38,29 @@ public final class DIB extends Bitmap {
*/ */
private static final int HEADER_SIZE = 14; private static final int HEADER_SIZE = 14;
/**
* @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new
* {@link DIB}. This API led to detached {@link DIB} instances (See Bugzilla
* 46122) and prevented adding additional functionality.
*/
@Deprecated
@Removal(version = "5.3")
public DIB() {
this(new EscherContainerRecord(), new EscherBSERecord());
}
/**
* Creates a new instance.
*
* @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is
* linked to.
* @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to.
*/
@Internal
public DIB(EscherContainerRecord recordContainer, EscherBSERecord bse) {
super(recordContainer, bse);
}
@Override @Override
public PictureType getType(){ public PictureType getType(){
return PictureType.DIB; return PictureType.DIB;
@ -100,9 +126,9 @@ public final class DIB extends Bitmap {
} }
@Override @Override
public void setData(byte[] data) throws IOException { protected byte[] formatImageForSlideshow(byte[] data) {
//cut off the bitmap file-header //cut off the bitmap file-header
byte[] dib = IOUtils.safelyClone(data, HEADER_SIZE, data.length-HEADER_SIZE, data.length); byte[] dib = IOUtils.safelyClone(data, HEADER_SIZE, data.length-HEADER_SIZE, data.length);
super.setData(dib); return super.formatImageForSlideshow(dib);
} }
} }

View File

@ -24,9 +24,14 @@ import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.zip.InflaterInputStream; import java.util.zip.InflaterInputStream;
import org.apache.poi.ddf.EscherBSERecord;
import org.apache.poi.ddf.EscherContainerRecord;
import org.apache.poi.hslf.exceptions.HSLFException; import org.apache.poi.hslf.exceptions.HSLFException;
import org.apache.poi.hslf.usermodel.HSLFSlideShow;
import org.apache.poi.sl.image.ImageHeaderEMF; import org.apache.poi.sl.image.ImageHeaderEMF;
import org.apache.poi.util.IOUtils; import org.apache.poi.util.IOUtils;
import org.apache.poi.util.Internal;
import org.apache.poi.util.Removal;
import org.apache.poi.util.Units; import org.apache.poi.util.Units;
/** /**
@ -34,6 +39,29 @@ import org.apache.poi.util.Units;
*/ */
public final class EMF extends Metafile { public final class EMF extends Metafile {
/**
* @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new
* {@link EMF}. This API led to detached {@link EMF} instances (See Bugzilla
* 46122) and prevented adding additional functionality.
*/
@Deprecated
@Removal(version = "5.3")
public EMF() {
this(new EscherContainerRecord(), new EscherBSERecord());
}
/**
* Creates a new instance.
*
* @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is
* linked to.
* @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to.
*/
@Internal
public EMF(EscherContainerRecord recordContainer, EscherBSERecord bse) {
super(recordContainer, bse);
}
@Override @Override
public byte[] getData(){ public byte[] getData(){
try { try {
@ -60,11 +88,11 @@ public final class EMF extends Metafile {
} }
@Override @Override
public void setData(byte[] data) throws IOException { protected byte[] formatImageForSlideshow(byte[] data) {
byte[] compressed = compress(data, 0, data.length); byte[] compressed = compress(data, 0, data.length);
ImageHeaderEMF nHeader = new ImageHeaderEMF(data, 0); ImageHeaderEMF nHeader = new ImageHeaderEMF(data, 0);
Header header = new Header(); Header header = new Header();
header.setWmfSize(data.length); header.setWmfSize(data.length);
header.setBounds(nHeader.getBounds()); header.setBounds(nHeader.getBounds());
@ -73,15 +101,22 @@ public final class EMF extends Metafile {
header.setZipSize(compressed.length); header.setZipSize(compressed.length);
byte[] checksum = getChecksum(data); byte[] checksum = getChecksum(data);
ByteArrayOutputStream out = new ByteArrayOutputStream(); byte[] rawData = new byte[checksum.length * getUIDInstanceCount() + header.getSize() + compressed.length];
out.write(checksum); int offset = 0;
if (getUIDInstanceCount() == 2) {
out.write(checksum);
}
header.write(out);
out.write(compressed);
setRawData(out.toByteArray()); System.arraycopy(checksum, 0, rawData, offset, checksum.length);
offset += checksum.length;
if (getUIDInstanceCount() == 2) {
System.arraycopy(checksum, 0, rawData, offset, checksum.length);
offset += checksum.length;
}
header.write(rawData, offset);
offset += header.getSize();
System.arraycopy(compressed, 0, rawData, offset, compressed.length);
return rawData;
} }
@Override @Override

View File

@ -18,6 +18,12 @@
package org.apache.poi.hslf.blip; package org.apache.poi.hslf.blip;
import org.apache.poi.ddf.EscherBSERecord;
import org.apache.poi.ddf.EscherContainerRecord;
import org.apache.poi.hslf.usermodel.HSLFSlideShow;
import org.apache.poi.util.Internal;
import org.apache.poi.util.Removal;
/** /**
* Represents a JPEG picture data in a PPT file * Represents a JPEG picture data in a PPT file
*/ */
@ -26,6 +32,29 @@ public final class JPEG extends Bitmap {
public enum ColorSpace { rgb, cymk } public enum ColorSpace { rgb, cymk }
private ColorSpace colorSpace = ColorSpace.rgb; private ColorSpace colorSpace = ColorSpace.rgb;
/**
* @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new
* {@link JPEG}. This API led to detached {@link JPEG} instances (See Bugzilla
* 46122) and prevented adding additional functionality.
*/
@Deprecated
@Removal(version = "5.3")
public JPEG() {
this(new EscherContainerRecord(), new EscherBSERecord());
}
/**
* Creates a new instance.
*
* @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is
* linked to.
* @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to.
*/
@Internal
public JPEG(EscherContainerRecord recordContainer, EscherBSERecord bse) {
super(recordContainer, bse);
}
@Override @Override
public PictureType getType(){ public PictureType getType(){

View File

@ -25,9 +25,15 @@ import java.io.IOException;
import java.io.OutputStream; import java.io.OutputStream;
import java.util.zip.DeflaterOutputStream; import java.util.zip.DeflaterOutputStream;
import org.apache.poi.ddf.EscherBSERecord;
import org.apache.poi.ddf.EscherContainerRecord;
import org.apache.poi.hslf.usermodel.HSLFPictureData; import org.apache.poi.hslf.usermodel.HSLFPictureData;
import org.apache.poi.hslf.usermodel.HSLFSlideShow;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LittleEndianInputStream; import org.apache.poi.util.LittleEndianInputStream;
import org.apache.poi.util.LittleEndianOutputStream; import org.apache.poi.util.LittleEndianOutputStream;
import org.apache.poi.util.Removal;
import org.apache.poi.util.Units; import org.apache.poi.util.Units;
/** /**
@ -36,6 +42,29 @@ import org.apache.poi.util.Units;
*/ */
public abstract class Metafile extends HSLFPictureData { public abstract class Metafile extends HSLFPictureData {
/**
* @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new
* {@link Metafile}. This API led to detached {@link Metafile} instances (See Bugzilla
* 46122) and prevented adding additional functionality.
*/
@Deprecated
@Removal(version = "5.3")
public Metafile() {
this(new EscherContainerRecord(), new EscherBSERecord());
}
/**
* Creates a new instance.
*
* @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is
* linked to.
* @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to.
*/
@Internal
protected Metafile(EscherContainerRecord recordContainer, EscherBSERecord bse) {
super(recordContainer, bse);
}
/** /**
* A structure which represents a 34-byte header preceding the compressed metafile data * A structure which represents a 34-byte header preceding the compressed metafile data
*/ */
@ -117,6 +146,44 @@ public abstract class Metafile extends HSLFPictureData {
leos.writeByte(filter); leos.writeByte(filter);
} }
void write(byte[] destination, int offset) {
//hmf
LittleEndian.putInt(destination, offset, wmfsize);
offset += 4;
//left
LittleEndian.putInt(destination, offset, bounds.x);
offset += 4;
//top
LittleEndian.putInt(destination, offset, bounds.y);
offset += 4;
//right
LittleEndian.putInt(destination, offset, bounds.x + bounds.width);
offset += 4;
//bottom
LittleEndian.putInt(destination, offset, bounds.y + bounds.height);
offset += 4;
//inch
LittleEndian.putInt(destination, offset, size.width);
offset += 4;
//inch
LittleEndian.putInt(destination, offset, size.height);
offset += 4;
LittleEndian.putInt(destination, offset, zipsize);
offset += 4;
destination[offset] = (byte) compression;
offset++;
destination[offset] = (byte) filter;
}
public int getSize(){ public int getSize(){
return 34; return 34;
} }
@ -146,11 +213,16 @@ public abstract class Metafile extends HSLFPictureData {
} }
} }
protected static byte[] compress(byte[] bytes, int offset, int length) throws IOException { protected static byte[] compress(byte[] bytes, int offset, int length) {
ByteArrayOutputStream out = new ByteArrayOutputStream(); ByteArrayOutputStream out = new ByteArrayOutputStream();
DeflaterOutputStream deflater = new DeflaterOutputStream( out ); try (DeflaterOutputStream deflater = new DeflaterOutputStream(out)) {
deflater.write(bytes, offset, length); deflater.write(bytes, offset, length);
deflater.close(); } catch (IOException e) {
// IOException won't get thrown by the DeflaterOutputStream in this configuration because:
// 1. ByteArrayOutputStream doesn't throw an IOException during writes.
// 2. The DeflaterOutputStream is not finished until we're done writing.
throw new AssertionError("Won't happen", e);
}
return out.toByteArray(); return out.toByteArray();
} }

View File

@ -26,9 +26,14 @@ import java.util.zip.InflaterInputStream;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
import org.apache.poi.ddf.EscherBSERecord;
import org.apache.poi.ddf.EscherContainerRecord;
import org.apache.poi.hslf.exceptions.HSLFException; import org.apache.poi.hslf.exceptions.HSLFException;
import org.apache.poi.hslf.usermodel.HSLFSlideShow;
import org.apache.poi.sl.image.ImageHeaderPICT; import org.apache.poi.sl.image.ImageHeaderPICT;
import org.apache.poi.util.IOUtils; import org.apache.poi.util.IOUtils;
import org.apache.poi.util.Internal;
import org.apache.poi.util.Removal;
import org.apache.poi.util.Units; import org.apache.poi.util.Units;
import static org.apache.logging.log4j.util.Unbox.box; import static org.apache.logging.log4j.util.Unbox.box;
@ -39,6 +44,28 @@ import static org.apache.logging.log4j.util.Unbox.box;
public final class PICT extends Metafile { public final class PICT extends Metafile {
private static final Logger LOG = LogManager.getLogger(PICT.class); private static final Logger LOG = LogManager.getLogger(PICT.class);
/**
* @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new
* {@link PICT}. This API led to detached {@link PICT} instances (See Bugzilla
* 46122) and prevented adding additional functionality.
*/
@Deprecated
@Removal(version = "5.3")
public PICT() {
this(new EscherContainerRecord(), new EscherBSERecord());
}
/**
* Creates a new instance.
*
* @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is
* linked to.
* @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to.
*/
@Internal
public PICT(EscherContainerRecord recordContainer, EscherBSERecord bse) {
super(recordContainer, bse);
}
@Override @Override
public byte[] getData(){ public byte[] getData(){
@ -93,7 +120,7 @@ public final class PICT extends Metafile {
} }
@Override @Override
public void setData(byte[] data) throws IOException { protected byte[] formatImageForSlideshow(byte[] data) {
// skip the first 512 bytes - they are MAC specific crap // skip the first 512 bytes - they are MAC specific crap
final int nOffset = ImageHeaderPICT.PICT_HEADER_OFFSET; final int nOffset = ImageHeaderPICT.PICT_HEADER_OFFSET;
ImageHeaderPICT nHeader = new ImageHeaderPICT(data, nOffset); ImageHeaderPICT nHeader = new ImageHeaderPICT(data, nOffset);
@ -108,15 +135,22 @@ public final class PICT extends Metafile {
header.setDimension(new Dimension(Units.toEMU(nDim.getWidth()), Units.toEMU(nDim.getHeight()))); header.setDimension(new Dimension(Units.toEMU(nDim.getWidth()), Units.toEMU(nDim.getHeight())));
byte[] checksum = getChecksum(data); byte[] checksum = getChecksum(data);
ByteArrayOutputStream out = new ByteArrayOutputStream(); byte[] rawData = new byte[checksum.length * getUIDInstanceCount() + header.getSize() + compressed.length];
out.write(checksum); int offset = 0;
if (getUIDInstanceCount() == 2) {
out.write(checksum);
}
header.write(out);
out.write(compressed);
setRawData(out.toByteArray()); System.arraycopy(checksum, 0, rawData, offset, checksum.length);
offset += checksum.length;
if (getUIDInstanceCount() == 2) {
System.arraycopy(checksum, 0, rawData, offset, checksum.length);
offset += checksum.length;
}
header.write(rawData, offset);
offset += header.getSize();
System.arraycopy(compressed, 0, rawData, offset, compressed.length);
return rawData;
} }
@Override @Override

View File

@ -17,13 +17,41 @@
package org.apache.poi.hslf.blip; package org.apache.poi.hslf.blip;
import org.apache.poi.ddf.EscherBSERecord;
import org.apache.poi.ddf.EscherContainerRecord;
import org.apache.poi.hslf.usermodel.HSLFSlideShow;
import org.apache.poi.sl.image.ImageHeaderPNG; import org.apache.poi.sl.image.ImageHeaderPNG;
import org.apache.poi.util.Internal;
import org.apache.poi.util.Removal;
/** /**
* Represents a PNG picture data in a PPT file * Represents a PNG picture data in a PPT file
*/ */
public final class PNG extends Bitmap { public final class PNG extends Bitmap {
/**
* @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new
* {@link PNG}. This API led to detached {@link PNG} instances (See Bugzilla
* 46122) and prevented adding additional functionality.
*/
@Deprecated
@Removal(version = "5.3")
public PNG() {
this(new EscherContainerRecord(), new EscherBSERecord());
}
/**
* Creates a new instance.
*
* @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is
* linked to.
* @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to.
*/
@Internal
public PNG(EscherContainerRecord recordContainer, EscherBSERecord bse) {
super(recordContainer, bse);
}
@Override @Override
public byte[] getData() { public byte[] getData() {
return new ImageHeaderPNG(super.getData()).extractPNG(); return new ImageHeaderPNG(super.getData()).extractPNG();

View File

@ -24,9 +24,14 @@ import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.zip.InflaterInputStream; import java.util.zip.InflaterInputStream;
import org.apache.poi.ddf.EscherBSERecord;
import org.apache.poi.ddf.EscherContainerRecord;
import org.apache.poi.hslf.exceptions.HSLFException; import org.apache.poi.hslf.exceptions.HSLFException;
import org.apache.poi.hslf.usermodel.HSLFSlideShow;
import org.apache.poi.sl.image.ImageHeaderWMF; import org.apache.poi.sl.image.ImageHeaderWMF;
import org.apache.poi.util.IOUtils; import org.apache.poi.util.IOUtils;
import org.apache.poi.util.Internal;
import org.apache.poi.util.Removal;
import org.apache.poi.util.Units; import org.apache.poi.util.Units;
/** /**
@ -34,6 +39,29 @@ import org.apache.poi.util.Units;
*/ */
public final class WMF extends Metafile { public final class WMF extends Metafile {
/**
* @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new
* {@link WMF}. This API led to detached {@link WMF} instances (See Bugzilla
* 46122) and prevented adding additional functionality.
*/
@Deprecated
@Removal(version = "5.3")
public WMF() {
this(new EscherContainerRecord(), new EscherBSERecord());
}
/**
* Creates a new instance.
*
* @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is
* linked to.
* @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to.
*/
@Internal
public WMF(EscherContainerRecord recordContainer, EscherBSERecord bse) {
super(recordContainer, bse);
}
@Override @Override
public byte[] getData(){ public byte[] getData(){
try { try {
@ -64,7 +92,7 @@ public final class WMF extends Metafile {
} }
@Override @Override
public void setData(byte[] data) throws IOException { protected byte[] formatImageForSlideshow(byte[] data) {
int pos = 0; int pos = 0;
ImageHeaderWMF nHeader = new ImageHeaderWMF(data, pos); ImageHeaderWMF nHeader = new ImageHeaderWMF(data, pos);
pos += nHeader.getLength(); pos += nHeader.getLength();
@ -79,15 +107,22 @@ public final class WMF extends Metafile {
header.setZipSize(compressed.length); header.setZipSize(compressed.length);
byte[] checksum = getChecksum(data); byte[] checksum = getChecksum(data);
ByteArrayOutputStream out = new ByteArrayOutputStream(); byte[] rawData = new byte[checksum.length * getUIDInstanceCount() + header.getSize() + compressed.length];
out.write(checksum); int offset = 0;
if (getUIDInstanceCount() == 2) {
out.write(checksum);
}
header.write(out);
out.write(compressed);
setRawData(out.toByteArray()); System.arraycopy(checksum, 0, rawData, offset, checksum.length);
offset += checksum.length;
if (getUIDInstanceCount() == 2) {
System.arraycopy(checksum, 0, rawData, offset, checksum.length);
offset += checksum.length;
}
header.write(rawData, offset);
offset += header.getSize();
System.arraycopy(compressed, 0, rawData, offset, compressed.length);
return rawData;
} }
@Override @Override

View File

@ -173,7 +173,6 @@ public final class PPTXMLDump {
return; return;
} }
byte[] pictdata = IOUtils.safelyClone(data, pos + PICT_HEADER_SIZE, size, MAX_RECORD_LENGTH);
pos += PICT_HEADER_SIZE + size; pos += PICT_HEADER_SIZE + size;
padding++; padding++;
@ -183,7 +182,7 @@ public final class PPTXMLDump {
dump(out, header, 0, header.length, padding, true); dump(out, header, 0, header.length, padding, true);
write(out, "</header>" + CR, padding); write(out, "</header>" + CR, padding);
write(out, "<imgdata>" + CR, padding); write(out, "<imgdata>" + CR, padding);
dump(out, pictdata, 0, Math.min(pictdata.length, 100), padding, true); dump(out, data, 0, Math.min(size, 100), padding, true);
write(out, "</imgdata>" + CR, padding); write(out, "</imgdata>" + CR, padding);
padding--; padding--;
write(out, "</picture>" + CR, padding); write(out, "</picture>" + CR, padding);

View File

@ -570,7 +570,9 @@ public final class HSLFFill {
} else { } else {
EscherBSERecord bse = (EscherBSERecord)lst.get(idx - 1); EscherBSERecord bse = (EscherBSERecord)lst.get(idx - 1);
for (HSLFPictureData pd : pict) { for (HSLFPictureData pd : pict) {
if (pd.getOffset() == bse.getOffset()){
// Reference equals is safe because these BSE belong to the same slideshow
if (pd.bse == bse) {
return pd; return pd;
} }
} }

View File

@ -23,11 +23,18 @@ import java.io.OutputStream;
import java.security.MessageDigest; import java.security.MessageDigest;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Objects;
import java.util.function.Supplier; import java.util.function.Supplier;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.poi.common.usermodel.GenericRecord; import org.apache.poi.common.usermodel.GenericRecord;
import org.apache.poi.ddf.EscherBSERecord;
import org.apache.poi.ddf.EscherContainerRecord;
import org.apache.poi.ddf.EscherRecordTypes; import org.apache.poi.ddf.EscherRecordTypes;
import org.apache.poi.hslf.blip.DIB; import org.apache.poi.hslf.blip.DIB;
import org.apache.poi.hslf.blip.EMF; import org.apache.poi.hslf.blip.EMF;
@ -38,8 +45,10 @@ import org.apache.poi.hslf.blip.WMF;
import org.apache.poi.poifs.crypt.CryptoFunctions; import org.apache.poi.poifs.crypt.CryptoFunctions;
import org.apache.poi.poifs.crypt.HashAlgorithm; import org.apache.poi.poifs.crypt.HashAlgorithm;
import org.apache.poi.sl.usermodel.PictureData; import org.apache.poi.sl.usermodel.PictureData;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LittleEndianConsts; import org.apache.poi.util.LittleEndianConsts;
import org.apache.poi.util.Removal;
import org.apache.poi.util.Units; import org.apache.poi.util.Units;
/** /**
@ -47,19 +56,37 @@ import org.apache.poi.util.Units;
*/ */
public abstract class HSLFPictureData implements PictureData, GenericRecord { public abstract class HSLFPictureData implements PictureData, GenericRecord {
private static final Logger LOGGER = LogManager.getLogger(HSLFPictureData.class);
/** /**
* Size of the image checksum calculated using MD5 algorithm. * Size of the image checksum calculated using MD5 algorithm.
*/ */
protected static final int CHECKSUM_SIZE = 16; protected static final int CHECKSUM_SIZE = 16;
/** /**
* Binary data of the picture * Size of the image preamble in bytes.
*/ * <p>
private byte[] rawdata; * The preamble describes how the image should be decoded. All image types have the same preamble format. The
/** * preamble has little endian encoding. Below is a diagram of the preamble contents.
* The offset to the picture in the stream *
* <pre>
* 0 1 2 3
* 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | Signature | Picture Type |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | Formatted Length |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* </pre>
*/ */
private int offset; static final int PREAMBLE_SIZE = 8;
/**
* Binary data of the picture, formatted as it will be stored in the {@link HSLFSlideShow}.
* <p>
* This does not include the {@link #PREAMBLE_SIZE preamble}.
*/
private byte[] formattedData;
/** /**
* The instance type/signatures defines if one or two UID instances will be included * The instance type/signatures defines if one or two UID instances will be included
@ -71,6 +98,43 @@ public abstract class HSLFPictureData implements PictureData, GenericRecord {
*/ */
private int index = -1; private int index = -1;
/**
* {@link EscherRecordTypes#BSTORE_CONTAINER BStore} record tracking all pictures. Should be attached to the
* slideshow that this picture is linked to.
*/
final EscherContainerRecord bStore;
/**
* Record referencing this picture. Should be attached to the slideshow that this picture is linked to.
*/
final EscherBSERecord bse;
/**
* @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new
* {@link HSLFPictureData}. This API led to detached {@link HSLFPictureData} instances (See Bugzilla
* 46122) and prevented adding additional functionality.
*/
@Deprecated
@Removal(version = "5.3")
public HSLFPictureData() {
this(new EscherContainerRecord(), new EscherBSERecord());
LOGGER.atWarn().log("The no-arg constructor is deprecated. Some functionality such as updating pictures won't " +
"work.");
}
/**
* Creates a new instance.
*
* @param bStore {@link EscherRecordTypes#BSTORE_CONTAINER BStore} record tracking all pictures. Should be attached
* to the slideshow that this picture is linked to.
* @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to.
*/
@Internal
protected HSLFPictureData(EscherContainerRecord bStore, EscherBSERecord bse) {
this.bStore = Objects.requireNonNull(bStore);
this.bse = Objects.requireNonNull(bse);
}
/** /**
* Blip signature. * Blip signature.
*/ */
@ -95,17 +159,34 @@ public abstract class HSLFPictureData implements PictureData, GenericRecord {
} }
/** /**
* Returns the raw binary data of this Picture excluding the first 8 bytes * Returns the formatted, binary data of this picture excluding the {@link #PREAMBLE_SIZE preamble} bytes.
* which hold image signature and size of the image data. * <p>
* Primarily intended for internal POI use. Use {@link #getData()} to retrieve the picture represented by this
* object.
* *
* @return picture data * @return Picture data formatted for the HSLF format.
* @see #getData()
* @see #formatImageForSlideshow(byte[])
*/ */
public byte[] getRawData(){ public byte[] getRawData(){
return rawdata; return formattedData;
} }
/**
* Sets the formatted data for this picture.
* <p>
* Primarily intended for internal POI use. Use {@link #setData(byte[])} to change the picture represented by this
* object.
*
* @param data Picture data formatted for the HSLF format. Excludes the {@link #PREAMBLE_SIZE preamble}.
* @see #setData(byte[])
* @see #formatImageForSlideshow(byte[])
* @deprecated Set image data using {@link #setData(byte[])}.
*/
@Deprecated
@Removal(version = "5.3")
public void setRawData(byte[] data){ public void setRawData(byte[] data){
rawdata = (data == null) ? null : data.clone(); formattedData = (data == null) ? null : data.clone();
} }
/** /**
@ -114,7 +195,7 @@ public abstract class HSLFPictureData implements PictureData, GenericRecord {
* @return offset in the 'Pictures' stream * @return offset in the 'Pictures' stream
*/ */
public int getOffset(){ public int getOffset(){
return offset; return bse.getOffset();
} }
/** /**
@ -122,21 +203,25 @@ public abstract class HSLFPictureData implements PictureData, GenericRecord {
* We need to set it when a new picture is created. * We need to set it when a new picture is created.
* *
* @param offset in the 'Pictures' stream * @param offset in the 'Pictures' stream
* @deprecated This function was only intended for POI internal use. If you have a use case you're concerned about,
* please open an issue in the POI issue tracker.
*/ */
@Deprecated
@Removal(version = "5.3")
public void setOffset(int offset){ public void setOffset(int offset){
this.offset = offset; LOGGER.atWarn().log("HSLFPictureData#setOffset is deprecated.");
} }
/** /**
* Returns 16-byte checksum of this picture * Returns 16-byte checksum of this picture
*/ */
public byte[] getUID(){ public byte[] getUID(){
return Arrays.copyOf(rawdata, 16); return Arrays.copyOf(formattedData, CHECKSUM_SIZE);
} }
@Override @Override
public byte[] getChecksum() { public byte[] getChecksum() {
return getChecksum(getData()); return getUID();
} }
/** /**
@ -173,25 +258,105 @@ public abstract class HSLFPictureData implements PictureData, GenericRecord {
} }
/** /**
* Create an instance of <code>PictureData</code> by type. * Create an instance of {@link HSLFPictureData} by type.
* *
* @param type type of the picture data. * @param type type of picture.
* Must be one of the static constants defined in the <code>Picture<code> class. * @return concrete instance of {@link HSLFPictureData}.
* @return concrete instance of <code>PictureData</code> * @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new
* {@link HSLFPictureData}. This API led to detached {@link HSLFPictureData} instances (See Bugzilla
* 46122) and prevented adding additional functionality.
*/ */
@Deprecated
@Removal(version = "5.3")
public static HSLFPictureData create(PictureType type){ public static HSLFPictureData create(PictureType type){
HSLFPictureData pict; LOGGER.atWarn().log("HSLFPictureData#create(PictureType) is deprecated. Some functionality such " +
switch (type){ "as updating pictures won't work.");
case EMF: pict = new EMF(); break;
case WMF: pict = new WMF(); break; // This record code is a stub. It exists only for API compatibility.
case PICT: pict = new PICT(); break; EscherContainerRecord record = new EscherContainerRecord();
case JPEG: pict = new JPEG(); break; EscherBSERecord bse = new EscherBSERecord();
case PNG: pict = new PNG(); break; return new HSLFSlideShowImpl.PictureFactory(record, type, new byte[0], 0, 0)
case DIB: pict = new DIB(); break; .setRecord(bse)
.build();
}
/**
* Creates a new instance of the given image type using data already formatted for storage inside the slideshow.
* <p>
* This function is most handy when parsing an existing slideshow, as the picture data are already formatted.
* @param type Image type.
* @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is
* linked to.
* @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to.
* @param data Image data formatted for storage in the slideshow. This does not include the
* {@link #PREAMBLE_SIZE preamble}.
* @param signature Image format-specific signature. See subclasses for signature details.
* @return New instance.
*
* @see #createFromImageData(PictureType, EscherContainerRecord, EscherBSERecord, byte[])
*/
static HSLFPictureData createFromSlideshowData(
PictureType type,
EscherContainerRecord recordContainer,
EscherBSERecord bse,
byte[] data,
int signature
) {
HSLFPictureData instance = newInstance(type, recordContainer, bse);
instance.setSignature(signature);
instance.formattedData = data;
return instance;
}
/**
* Creates a new instance of the given image type using data already formatted for storage inside the slideshow.
* <p>
* This function is most handy when adding new pictures to a slideshow, as the image data provided by users is not
* yet formatted.
*
* @param type Image type.
* @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is
* linked to.
* @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to.
* @param data Original image data. If these bytes were written to a disk, a common image viewer would be able to
* render the image.
* @return New instance.
*
* @see #createFromSlideshowData(PictureType, EscherContainerRecord, EscherBSERecord, byte[], int)
* @see #setData(byte[])
*/
static HSLFPictureData createFromImageData(
PictureType type,
EscherContainerRecord recordContainer,
EscherBSERecord bse,
byte[] data
) {
HSLFPictureData instance = newInstance(type, recordContainer, bse);
instance.formattedData = instance.formatImageForSlideshow(data);
return instance;
}
private static HSLFPictureData newInstance(
PictureType type,
EscherContainerRecord recordContainer,
EscherBSERecord bse
) {
switch (type) {
case EMF:
return new EMF(recordContainer, bse);
case WMF:
return new WMF(recordContainer, bse);
case PICT:
return new PICT(recordContainer, bse);
case JPEG:
return new JPEG(recordContainer, bse);
case PNG:
return new PNG(recordContainer, bse);
case DIB:
return new DIB(recordContainer, bse);
default: default:
throw new IllegalArgumentException("Unsupported picture type: " + type); throw new IllegalArgumentException("Unsupported picture type: " + type);
} }
return pict;
} }
/** /**
@ -204,14 +369,15 @@ public abstract class HSLFPictureData implements PictureData, GenericRecord {
* @return the 24 byte header which preceeds the actual picture data. * @return the 24 byte header which preceeds the actual picture data.
*/ */
public byte[] getHeader() { public byte[] getHeader() {
byte[] header = new byte[16 + 8]; byte[] header = new byte[CHECKSUM_SIZE + PREAMBLE_SIZE];
LittleEndian.putInt(header, 0, getSignature()); LittleEndian.putInt(header, 0, getSignature());
LittleEndian.putInt(header, 4, getRawData().length); LittleEndian.putInt(header, 4, getRawData().length);
System.arraycopy(rawdata, 0, header, 8, 16); System.arraycopy(formattedData, 0, header, PREAMBLE_SIZE, CHECKSUM_SIZE);
return header; return header;
} }
/** /**
* Returns the 1-based index of this picture.
* @return the 1-based index of this pictures within the pictures stream * @return the 1-based index of this pictures within the pictures stream
*/ */
public int getIndex() { public int getIndex() {
@ -225,6 +391,71 @@ public abstract class HSLFPictureData implements PictureData, GenericRecord {
this.index = index; this.index = index;
} }
/**
* Formats the picture data for storage in the slideshow.
* <p>
* Images stored in {@link HSLFSlideShow}s are represented differently than when they are standalone files. The
* exact formatting differs for each image type.
*
* @param data Original image data. If these bytes were written to a disk, a common image viewer would be able to
* render the image.
* @return Formatted image representation.
*/
protected abstract byte[] formatImageForSlideshow(byte[] data);
/**
* @return Size of this picture when stored in the image stream inside the {@link HSLFSlideShow}.
*/
int getBseSize() {
return formattedData.length + PREAMBLE_SIZE;
}
@Override
public final void setData(byte[] data) throws IOException {
/*
* When working with slideshow pictures, we need to be aware of 2 container units. The first is a list of
* HSLFPictureData that are the programmatic reference for working with the pictures. The second is the
* Blip Store. For the purposes of this function, you can think of the Blip Store as containing a list of
* pointers (with a small summary) to the picture in the slideshow.
*
* When updating a picture, we need to update the in-memory data structure (this instance), but we also need to
* update the stored pointer. When modifying the pointer, we also need to modify all subsequent pointers, since
* they might shift based on a change in the byte count of the underlying image.
*/
int oldSize = getBseSize();
formattedData = formatImageForSlideshow(data);
int newSize = getBseSize();
int changeInSize = newSize - oldSize;
byte[] newUid = getUID();
boolean foundBseForOldImage = false;
// Get the BSE records & sort the list by offset, so we can proceed to shift offsets
@SuppressWarnings("unchecked") // The BStore only contains BSE records
List<EscherBSERecord> bseRecords = (List<EscherBSERecord>) (Object) bStore.getChildRecords();
bseRecords.sort(Comparator.comparingInt(EscherBSERecord::getOffset));
for (EscherBSERecord bse : bseRecords) {
if (foundBseForOldImage) {
// The BSE for this picture was modified in a previous iteration, and we are now adjusting
// subsequent offsets.
bse.setOffset(bse.getOffset() + changeInSize);
} else if (bse == this.bse) { // Reference equals is safe because these BSE belong to the same slideshow
// This BSE matches the current image. Update the size and UID.
foundBseForOldImage = true;
bse.setUid(newUid);
// Image byte count may have changed, so update the pointer.
bse.setSize(newSize);
}
}
}
@Override @Override
public final String getContentType() { public final String getContentType() {
return getType().contentType; return getType().contentType;

View File

@ -125,7 +125,9 @@ public class HSLFPictureShape extends HSLFSimpleShape implements PictureShape<HS
LOG.atError().log("no reference to picture data found "); LOG.atError().log("no reference to picture data found ");
} else { } else {
for (HSLFPictureData pd : pict) { for (HSLFPictureData pd : pict) {
if (pd.getOffset() == bse.getOffset()){
// Reference equals is safe because these BSE belong to the same slideshow
if (pd.bse == bse) {
return pd; return pd;
} }
} }

View File

@ -794,36 +794,11 @@ public final class HSLFSlideShow extends POIDocument implements SlideShow<HSLFSh
dggContainer.addChildBefore(bstore, EscherOptRecord.RECORD_ID); dggContainer.addChildBefore(bstore, EscherOptRecord.RECORD_ID);
} }
HSLFPictureData pict = HSLFPictureData.create(format); EscherBSERecord bse = addNewEscherBseRecord(bstore, format, data, 0);
pict.setData(data); HSLFPictureData pict = HSLFPictureData.createFromImageData(format, bstore, bse, data);
int offset = _hslfSlideShow.addPicture(pict); int offset = _hslfSlideShow.addPicture(pict);
EscherBSERecord bse = new EscherBSERecord();
bse.setRecordId(EscherBSERecord.RECORD_ID);
bse.setOptions((short) (0x0002 | (format.nativeId << 4)));
bse.setSize(pict.getRawData().length + 8);
byte[] uid = HSLFPictureData.getChecksum(data);
bse.setUid(uid);
bse.setBlipTypeMacOS((byte) format.nativeId);
bse.setBlipTypeWin32((byte) format.nativeId);
if (format == PictureType.EMF) {
bse.setBlipTypeMacOS((byte) PictureType.PICT.nativeId);
} else if (format == PictureType.WMF) {
bse.setBlipTypeMacOS((byte) PictureType.PICT.nativeId);
} else if (format == PictureType.PICT) {
bse.setBlipTypeWin32((byte) PictureType.WMF.nativeId);
}
bse.setRef(0);
bse.setOffset(offset); bse.setOffset(offset);
bse.setRemainingData(new byte[0]);
bstore.addChildRecord(bse);
int count = bstore.getChildRecords().size();
bstore.setOptions((short) ((count << 4) | 0xF));
return pict; return pict;
} }
@ -1273,4 +1248,31 @@ public final class HSLFSlideShow extends POIDocument implements SlideShow<HSLFSh
public EncryptionInfo getEncryptionInfo() throws IOException { public EncryptionInfo getEncryptionInfo() throws IOException {
return getSlideShowImpl().getEncryptionInfo(); return getSlideShowImpl().getEncryptionInfo();
} }
static EscherBSERecord addNewEscherBseRecord(EscherContainerRecord blipStore, PictureType type, byte[] imageData, int offset) {
EscherBSERecord record = new EscherBSERecord();
record.setRecordId(EscherBSERecord.RECORD_ID);
record.setOptions((short) (0x0002 | (type.nativeId << 4)));
record.setSize(imageData.length + HSLFPictureData.PREAMBLE_SIZE);
record.setUid(Arrays.copyOf(imageData, HSLFPictureData.CHECKSUM_SIZE));
record.setBlipTypeMacOS((byte) type.nativeId);
record.setBlipTypeWin32((byte) type.nativeId);
if (type == PictureType.EMF) {
record.setBlipTypeMacOS((byte) PictureType.PICT.nativeId);
} else if (type == PictureType.WMF) {
record.setBlipTypeMacOS((byte) PictureType.PICT.nativeId);
} else if (type == PictureType.PICT) {
record.setBlipTypeWin32((byte) PictureType.WMF.nativeId);
}
record.setOffset(offset);
blipStore.addChildRecord(record);
int count = blipStore.getChildRecords().size();
blipStore.setOptions((short) ((count << 4) | 0xF));
return record;
}
} }

View File

@ -30,21 +30,33 @@ import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.NavigableMap; import java.util.NavigableMap;
import java.util.Objects;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.stream.Collectors;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
import org.apache.poi.POIDocument; import org.apache.poi.POIDocument;
import org.apache.poi.ddf.EscherBSERecord;
import org.apache.poi.ddf.EscherContainerRecord;
import org.apache.poi.ddf.EscherOptRecord;
import org.apache.poi.ddf.EscherRecord;
import org.apache.poi.hpsf.PropertySet; import org.apache.poi.hpsf.PropertySet;
import org.apache.poi.hslf.exceptions.CorruptPowerPointFileException; import org.apache.poi.hslf.exceptions.CorruptPowerPointFileException;
import org.apache.poi.hslf.exceptions.HSLFException; import org.apache.poi.hslf.exceptions.HSLFException;
import org.apache.poi.hslf.exceptions.OldPowerPointFormatException; import org.apache.poi.hslf.exceptions.OldPowerPointFormatException;
import org.apache.poi.hslf.record.CurrentUserAtom; import org.apache.poi.hslf.record.CurrentUserAtom;
import org.apache.poi.hslf.record.Document;
import org.apache.poi.hslf.record.DocumentEncryptionAtom; import org.apache.poi.hslf.record.DocumentEncryptionAtom;
import org.apache.poi.hslf.record.ExOleObjStg; import org.apache.poi.hslf.record.ExOleObjStg;
import org.apache.poi.hslf.record.PersistPtrHolder; import org.apache.poi.hslf.record.PersistPtrHolder;
@ -59,6 +71,7 @@ import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream; import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.EntryUtils; import org.apache.poi.poifs.filesystem.EntryUtils;
import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.sl.usermodel.PictureData;
import org.apache.poi.sl.usermodel.PictureData.PictureType; import org.apache.poi.sl.usermodel.PictureData.PictureType;
import org.apache.poi.util.IOUtils; import org.apache.poi.util.IOUtils;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
@ -347,23 +360,26 @@ public final class HSLFSlideShowImpl extends POIDocument implements Closeable {
* This is lazily called as and when we want to touch pictures. * This is lazily called as and when we want to touch pictures.
*/ */
private void readPictures() throws IOException { private void readPictures() throws IOException {
_pictures = new ArrayList<>();
// if the presentation doesn't contain pictures - will use a null set instead // if the presentation doesn't contain pictures, will use an empty collection instead
if (!getDirectory().hasEntry("Pictures")) { if (!getDirectory().hasEntry("Pictures")) {
_pictures = new ArrayList<>();
return; return;
} }
DocumentEntry entry = (DocumentEntry) getDirectory().getEntry("Pictures"); DocumentEntry entry = (DocumentEntry) getDirectory().getEntry("Pictures");
DocumentInputStream is = getDirectory().createDocumentInputStream(entry); EscherContainerRecord blipStore = getBlipStore();
byte[] pictstream = IOUtils.toByteArray(is, entry.getSize()); byte[] pictstream;
is.close(); try (DocumentInputStream is = getDirectory().createDocumentInputStream(entry)) {
pictstream = IOUtils.toByteArray(is, entry.getSize());
}
List<PictureFactory> factories = new ArrayList<>();
try (HSLFSlideShowEncrypted decryptData = new HSLFSlideShowEncrypted(getDocumentEncryptionAtom())) { try (HSLFSlideShowEncrypted decryptData = new HSLFSlideShowEncrypted(getDocumentEncryptionAtom())) {
int pos = 0; int pos = 0;
// An empty picture record (length 0) will take up 8 bytes // An empty picture record (length 0) will take up 8 bytes
while (pos <= (pictstream.length - 8)) { while (pos <= (pictstream.length - HSLFPictureData.PREAMBLE_SIZE)) {
int offset = pos; int offset = pos;
decryptData.decryptPicture(pictstream, offset); decryptData.decryptPicture(pictstream, offset);
@ -388,7 +404,7 @@ public final class HSLFSlideShowImpl extends POIDocument implements Closeable {
// (0 is allowed, but odd, since we do wind on by the header each // (0 is allowed, but odd, since we do wind on by the header each
// time, so we won't get stuck) // time, so we won't get stuck)
if (imgsize < 0) { if (imgsize < 0) {
throw new CorruptPowerPointFileException("The file contains a picture, at position " + _pictures.size() + ", which has a negatively sized data length, so we can't trust any of the picture data"); throw new CorruptPowerPointFileException("The file contains a picture, at position " + factories.size() + ", which has a negatively sized data length, so we can't trust any of the picture data");
} }
// If the type (including the bonus 0xF018) is 0, skip it // If the type (including the bonus 0xF018) is 0, skip it
@ -404,26 +420,127 @@ public final class HSLFSlideShowImpl extends POIDocument implements Closeable {
"in others, this could indicate a corrupt file"); "in others, this could indicate a corrupt file");
break; break;
} }
// Build the PictureData object from the data
try {
HSLFPictureData pict = HSLFPictureData.create(pt);
pict.setSignature(signature);
// Copy the data, ready to pass to PictureData // Copy the data, ready to pass to PictureData
byte[] imgdata = IOUtils.safelyClone(pictstream, pos, imgsize, MAX_RECORD_LENGTH); byte[] imgdata = IOUtils.safelyClone(pictstream, pos, imgsize, MAX_RECORD_LENGTH);
pict.setRawData(imgdata);
pict.setOffset(offset); factories.add(new PictureFactory(blipStore, pt, imgdata, offset, signature));
pict.setIndex(_pictures.size() + 1); // index is 1-based
_pictures.add(pict);
} catch (IllegalArgumentException e) {
LOG.atError().withThrowable(e).log("Problem reading picture. Your document will probably become corrupted if you save it!");
}
} }
pos += imgsize; pos += imgsize;
} }
} }
matchPicturesAndRecords(factories, blipStore);
List<HSLFPictureData> pictures = new ArrayList<>();
for (PictureFactory it : factories) {
try {
HSLFPictureData pict = it.build();
pict.setIndex(pictures.size() + 1); // index is 1-based
pictures.add(pict);
} catch (IllegalArgumentException e) {
LOG.atError().withThrowable(e).log("Problem reading picture. Your document will probably become corrupted if you save it!");
}
}
_pictures = pictures;
}
/**
* Matches all of the {@link PictureFactory PictureFactories} for a slideshow with {@link EscherBSERecord}s in the
* Blip Store for the slideshow.
* <p>
* When reading a slideshow into memory, we have to match the records in the Blip Store with the factories
* representing picture in the pictures stream. This can be difficult, as presentations might have incorrectly
* formatted data. This function attempts to perform matching using multiple heuristics to increase the likelihood
* of finding all pairs, while aiming to reduce the likelihood of associating incorrect pairs.
*
* @param factories Factories for creating {@link HSLFPictureData} out of the pictures stream.
* @param blipStore Blip Store of the presentation being loaded.
*/
private static void matchPicturesAndRecords(List<PictureFactory> factories, EscherContainerRecord blipStore) {
// LinkedList because we're sorting and removing.
LinkedList<PictureFactory> unmatchedFactories = new LinkedList<>(factories);
unmatchedFactories.sort(Comparator.comparingInt(PictureFactory::getOffset));
// Arrange records by offset. In the common case of a well-formed slideshow, where every factory has a
// matching record, this is somewhat wasteful, but is necessary to handle the uncommon case where multiple
// records share an offset.
Map<Integer, List<EscherBSERecord>> unmatchedRecords = new HashMap<>();
for (EscherRecord child : blipStore) {
EscherBSERecord record = (EscherBSERecord) child;
unmatchedRecords.computeIfAbsent(record.getOffset(), k -> new ArrayList<>()).add(record);
}
// The first pass through the factories only pairs a factory with a record if we're very confident that they
// are a match. Confidence comes from a perfect match on the offset, and if necessary, the UID. Matched
// factories and records are removed from the unmatched collections.
for (Iterator<PictureFactory> iterator = unmatchedFactories.iterator(); iterator.hasNext(); ) {
PictureFactory factory = iterator.next();
int physicalOffset = factory.getOffset();
List<EscherBSERecord> recordsAtOffset = unmatchedRecords.get(physicalOffset);
if (recordsAtOffset == null || recordsAtOffset.isEmpty()) {
// There are no records that have an offset matching the physical offset in the stream. We'll do
// more complicated and less reliable matching for this factory after all "well known"
// image <-> record pairs have been found.
LOG.atDebug().log("No records with offset {}", box(physicalOffset));
} else if (recordsAtOffset.size() == 1) {
// Only 1 record has the same offset as the target image. Assume these are a pair.
factory.setRecord(recordsAtOffset.get(0));
unmatchedRecords.remove(physicalOffset);
iterator.remove();
} else {
// Multiple records share an offset. Perform additional matching based on UID.
for (int i = 0; i < recordsAtOffset.size(); i++) {
EscherBSERecord record = recordsAtOffset.get(i);
byte[] recordUid = record.getUid();
byte[] imageHeader = Arrays.copyOf(factory.imageData, HSLFPictureData.CHECKSUM_SIZE);
if (Arrays.equals(recordUid, imageHeader)) {
factory.setRecord(record);
recordsAtOffset.remove(i);
iterator.remove();
break;
}
}
}
}
// At this point, any factories remaining didn't have a record with a matching offset. The second pass
// through the factories pairs based on the UID. Factories for which a record with a matching UID cannot be
// found will get a new record.
List<EscherBSERecord> remainingRecords = unmatchedRecords.values()
.stream()
.flatMap(Collection::stream)
.collect(Collectors.toList());
for (PictureFactory factory : unmatchedFactories) {
boolean matched = false;
for (int i = remainingRecords.size() - 1; i >= 0; i--) {
EscherBSERecord record = remainingRecords.get(i);
byte[] recordUid = record.getUid();
byte[] imageHeader = Arrays.copyOf(factory.imageData, HSLFPictureData.CHECKSUM_SIZE);
if (Arrays.equals(recordUid, imageHeader)) {
remainingRecords.remove(i);
factory.setRecord(record);
record.setOffset(factory.getOffset());
matched = true;
}
}
if (!matched) {
// Synthesize a new record
LOG.atDebug().log("No record found for picture at offset {}", box(factory.offset));
EscherBSERecord record = HSLFSlideShow.addNewEscherBseRecord(blipStore, factory.type, factory.imageData, factory.offset);
factory.setRecord(record);
}
}
LOG.atDebug().log("Found {} unmatched records.", box(remainingRecords.size()));
} }
/** /**
@ -756,9 +873,8 @@ public final class HSLFSlideShowImpl extends POIDocument implements Closeable {
int offset = 0; int offset = 0;
if (_pictures.size() > 0) { if (_pictures.size() > 0) {
HSLFPictureData prev = _pictures.get(_pictures.size() - 1); HSLFPictureData prev = _pictures.get(_pictures.size() - 1);
offset = prev.getOffset() + prev.getRawData().length + 8; offset = prev.getOffset() + prev.getBseSize();
} }
img.setOffset(offset);
img.setIndex(_pictures.size() + 1); // index is 1-based img.setIndex(_pictures.size() + 1); // index is 1-based
_pictures.add(img); _pictures.add(img);
return offset; return offset;
@ -825,6 +941,32 @@ public final class HSLFSlideShowImpl extends POIDocument implements Closeable {
return _objects; return _objects;
} }
private EscherContainerRecord getBlipStore() {
Document documentRecord = null;
for (Record record : _records) {
if (record.getRecordType() == RecordTypes.Document.typeID) {
documentRecord = (Document) record;
break;
}
}
if (documentRecord == null) {
throw new CorruptPowerPointFileException("Document record is missing");
}
EscherContainerRecord blipStore;
EscherContainerRecord dggContainer = documentRecord.getPPDrawingGroup().getDggContainer();
blipStore = HSLFShape.getEscherChild(dggContainer, EscherContainerRecord.BSTORE_CONTAINER);
if (blipStore == null) {
blipStore = new EscherContainerRecord();
blipStore.setRecordId(EscherContainerRecord.BSTORE_CONTAINER);
dggContainer.addChildBefore(blipStore, EscherOptRecord.RECORD_ID);
}
return blipStore;
}
@Override @Override
public void close() throws IOException { public void close() throws IOException {
// only close the filesystem, if we are based on the root node. // only close the filesystem, if we are based on the root node.
@ -903,4 +1045,55 @@ public final class HSLFSlideShowImpl extends POIDocument implements Closeable {
return count; return count;
} }
} }
/**
* Assists in creating {@link HSLFPictureData} when parsing a slideshow.
*
* This class is relied upon heavily by {@link #matchPicturesAndRecords(List, EscherContainerRecord)}.
*/
static final class PictureFactory {
final byte[] imageData;
private final EscherContainerRecord recordContainer;
private final PictureData.PictureType type;
private final int offset;
private final int signature;
private EscherBSERecord record;
PictureFactory(
EscherContainerRecord recordContainer,
PictureData.PictureType type,
byte[] imageData,
int offset,
int signature
) {
this.recordContainer = Objects.requireNonNull(recordContainer);
this.type = Objects.requireNonNull(type);
this.imageData = Objects.requireNonNull(imageData);
this.offset = offset;
this.signature = signature;
}
int getOffset() {
return offset;
}
/**
* Constructs a new {@link HSLFPictureData}.
* <p>
* The {@link EscherBSERecord} must have been set via {@link #setRecord(EscherBSERecord)} prior to invocation.
*/
HSLFPictureData build() {
Objects.requireNonNull(record, "Can't build an instance until the record has been assigned.");
return HSLFPictureData.createFromSlideshowData(type, recordContainer, record, imageData, signature);
}
/**
* Sets the {@link EscherBSERecord} with which this factory should create a {@link HSLFPictureData}.
*/
PictureFactory setRecord(EscherBSERecord bse) {
record = bse;
return this;
}
}
} }

View File

@ -55,6 +55,7 @@ public abstract class BaseTestPPTIterating {
ENCRYPTED_FILES.add("Password_Protected-np-hello.ppt"); ENCRYPTED_FILES.add("Password_Protected-np-hello.ppt");
ENCRYPTED_FILES.add("Password_Protected-56-hello.ppt"); ENCRYPTED_FILES.add("Password_Protected-56-hello.ppt");
ENCRYPTED_FILES.add("Password_Protected-hello.ppt"); ENCRYPTED_FILES.add("Password_Protected-hello.ppt");
ENCRYPTED_FILES.add("ppt_with_png_encrypted.ppt");
} }
protected static final Map<String,Class<? extends Throwable>> EXCLUDED = protected static final Map<String,Class<? extends Throwable>> EXCLUDED =

View File

@ -35,6 +35,7 @@ import javax.imageio.ImageIO;
import org.apache.poi.POIDataSamples; import org.apache.poi.POIDataSamples;
import org.apache.poi.ddf.EscherBSERecord; import org.apache.poi.ddf.EscherBSERecord;
import org.apache.poi.ddf.EscherContainerRecord;
import org.apache.poi.sl.usermodel.PictureData.PictureType; import org.apache.poi.sl.usermodel.PictureData.PictureType;
import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
@ -87,16 +88,19 @@ public final class TestPicture {
} }
/** /**
* Picture#getEscherBSERecord threw NullPointerException if EscherContainerRecord.BSTORE_CONTAINER * {@link HSLFPictureShape#getEscherBSERecord()} threw {@link NullPointerException} if
* was not found. The correct behaviour is to return null. * {@link EscherContainerRecord#BSTORE_CONTAINER} was not found. The correct behaviour is to return null.
*/ */
@Test @Test
void bug46122() throws IOException { void bug46122() throws IOException {
HSLFPictureData detachedData;
try (HSLFSlideShow ppt = new HSLFSlideShow()) {
detachedData = ppt.addPicture(new byte[0], PictureType.PNG);
}
try (HSLFSlideShow ppt = new HSLFSlideShow()) { try (HSLFSlideShow ppt = new HSLFSlideShow()) {
HSLFSlide slide = ppt.createSlide(); HSLFSlide slide = ppt.createSlide();
HSLFPictureData pd = HSLFPictureData.create(PictureType.PNG);
HSLFPictureShape pict = new HSLFPictureShape(pd); //index to non-existing picture data HSLFPictureShape pict = new HSLFPictureShape(detachedData); //index to non-existing picture data
pict.setAnchor(new Rectangle2D.Double(50, 50, 100, 100)); pict.setAnchor(new Rectangle2D.Double(50, 50, 100, 100));
pict.setSheet(slide); pict.setSheet(slide);
HSLFPictureData data = pict.getPictureData(); HSLFPictureData data = pict.getPictureData();

View File

@ -27,9 +27,14 @@ import java.io.ByteArrayOutputStream;
import java.io.IOException; import java.io.IOException;
import java.net.URL; import java.net.URL;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Random;
import org.apache.poi.POIDataSamples; import org.apache.poi.POIDataSamples;
import org.apache.poi.ddf.EscherBSERecord;
import org.apache.poi.ddf.EscherContainerRecord;
import org.apache.poi.ddf.EscherRecord;
import org.apache.poi.hslf.HSLFTestDataSamples; import org.apache.poi.hslf.HSLFTestDataSamples;
import org.apache.poi.hslf.blip.DIB; import org.apache.poi.hslf.blip.DIB;
import org.apache.poi.hslf.blip.EMF; import org.apache.poi.hslf.blip.EMF;
@ -37,6 +42,7 @@ import org.apache.poi.hslf.blip.JPEG;
import org.apache.poi.hslf.blip.PICT; import org.apache.poi.hslf.blip.PICT;
import org.apache.poi.hslf.blip.PNG; import org.apache.poi.hslf.blip.PNG;
import org.apache.poi.hslf.blip.WMF; import org.apache.poi.hslf.blip.WMF;
import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
import org.apache.poi.sl.image.ImageHeaderEMF; import org.apache.poi.sl.image.ImageHeaderEMF;
import org.apache.poi.sl.image.ImageHeaderPICT; import org.apache.poi.sl.image.ImageHeaderPICT;
import org.apache.poi.sl.image.ImageHeaderWMF; import org.apache.poi.sl.image.ImageHeaderWMF;
@ -512,9 +518,8 @@ public final class TestPictures {
int streamSize = out.size(); int streamSize = out.size();
HSLFPictureData data = HSLFPictureData.create(PictureType.JPEG); HSLFPictureData data = ppt.addPicture(new byte[100], PictureType.JPEG);
data.setData(new byte[100]); int offset = data.getOffset();
int offset = hslf.addPicture(data);
assertEquals(streamSize, offset); assertEquals(streamSize, offset);
assertEquals(3, ppt.getPictureData().size()); assertEquals(3, ppt.getPictureData().size());
@ -560,4 +565,172 @@ public final class TestPictures {
assertEquals(1, picture.getIndex()); assertEquals(1, picture.getIndex());
} }
} }
/**
* Verify that it is possible for a user to change the contents of a {@link HSLFPictureData} using
* {@link HSLFPictureData#setData(byte[])}, and that the changes are saved to the slideshow.
*/
@Test
void testEditPictureData() throws IOException {
byte[] newImage = slTests.readFile("tomcat.png");
ByteArrayOutputStream modifiedSlideShow = new ByteArrayOutputStream();
// Load an existing slideshow and modify the image
try (HSLFSlideShow ppt = HSLFTestDataSamples.getSlideShow("ppt_with_png.ppt")) {
HSLFPictureData picture = ppt.getPictureData().get(0);
picture.setData(newImage);
ppt.write(modifiedSlideShow);
}
// Load the modified slideshow and verify the image content
try (HSLFSlideShow ppt = new HSLFSlideShow(new ByteArrayInputStream(modifiedSlideShow.toByteArray()))) {
HSLFPictureData picture = ppt.getPictureData().get(0);
byte[] modifiedImageData = picture.getData();
assertArrayEquals(newImage, modifiedImageData);
}
}
/**
* Verify that it is possible for a user to change the contents of an encrypted {@link HSLFPictureData} using
* {@link HSLFPictureData#setData(byte[])}, and that the changes are saved to the slideshow.
*/
@Test
void testEditPictureDataEncrypted() throws IOException {
byte[] newImage = slTests.readFile("tomcat.png");
ByteArrayOutputStream modifiedSlideShow = new ByteArrayOutputStream();
Biff8EncryptionKey.setCurrentUserPassword("password");
try {
// Load an existing slideshow and modify the image
try (HSLFSlideShow ppt = HSLFTestDataSamples.getSlideShow("ppt_with_png_encrypted.ppt")) {
HSLFPictureData picture = ppt.getPictureData().get(0);
picture.setData(newImage);
ppt.write(modifiedSlideShow);
}
// Load the modified slideshow and verify the image content
try (HSLFSlideShow ppt = new HSLFSlideShow(new ByteArrayInputStream(modifiedSlideShow.toByteArray()))) {
HSLFPictureData picture = ppt.getPictureData().get(0);
byte[] modifiedImageData = picture.getData();
assertArrayEquals(newImage, modifiedImageData);
}
} finally {
Biff8EncryptionKey.setCurrentUserPassword(null);
}
}
/**
* Verify that the {@link EscherBSERecord#getOffset()} values are modified for all images after the image being
* changed.
*/
@Test
void testEditPictureDataRecordOffsetsAreShifted() throws IOException {
int[] originalOffsets = {0, 12013, 15081, 34162, 59563};
int[] modifiedOffsets = {0, 35, 3103, 22184, 47585};
ByteArrayOutputStream inMemory = new ByteArrayOutputStream();
try (HSLFSlideShow ppt = HSLFTestDataSamples.getSlideShow("pictures.ppt")) {
int[] offsets = ppt.getPictureData().stream().mapToInt(HSLFPictureData::getOffset).toArray();
assertArrayEquals(originalOffsets, offsets);
HSLFPictureData imageBeingChanged = ppt.getPictureData().get(0);
// It doesn't matter that this isn't a valid image. We are just testing offsets here.
imageBeingChanged.setData(new byte[10]);
// Verify that the in-memory representations have all been updated
offsets = ppt.getPictureData().stream().mapToInt(HSLFPictureData::getOffset).toArray();
assertArrayEquals(modifiedOffsets, offsets);
ppt.write(inMemory);
}
try (HSLFSlideShow ppt = new HSLFSlideShow(new ByteArrayInputStream(inMemory.toByteArray()))) {
// Verify that the persisted representations have all been updated
int[] offsets = ppt.getPictureData().stream().mapToInt(HSLFPictureData::getOffset).toArray();
assertArrayEquals(modifiedOffsets, offsets);
}
}
/**
* Verify that the {@link EscherBSERecord#getOffset()} values are modified for all images after the image being
* changed, but assuming that the records are not stored in a sorted-by-offset fashion.
*
* We have not encountered a file that has meaningful data that is not sorted. However, we have encountered files
* that have records with an offset of 0 interspersed between meaningful records. See {@code 53446.ppt} and
* {@code alterman_security.ppt} for examples.
*/
@Test
void testEditPictureDataOutOfOrderRecords() throws IOException {
int[] modifiedOffsets = {0, 35, 3103, 22184, 47585};
ByteArrayOutputStream inMemory = new ByteArrayOutputStream();
try (HSLFSlideShow ppt = HSLFTestDataSamples.getSlideShow("pictures.ppt")) {
// For this test we're going to intentionally manipulate the records into a shuffled order.
EscherContainerRecord container = ppt.getPictureData().get(0).bStore;
List<EscherRecord> children = container.getChildRecords();
for (EscherRecord child : children) {
container.removeChildRecord(child);
}
Collections.shuffle(children);
for (EscherRecord child : children) {
container.addChildRecord(child);
}
HSLFPictureData imageBeingChanged = ppt.getPictureData().get(0);
// It doesn't matter that this isn't a valid image. We are just testing offsets here.
imageBeingChanged.setData(new byte[10]);
// Verify that the in-memory representations have all been updated
int[] offsets = ppt.getPictureData().stream().mapToInt(HSLFPictureData::getOffset).toArray();
Arrays.sort(offsets);
assertArrayEquals(modifiedOffsets, offsets);
ppt.write(inMemory);
}
try (HSLFSlideShow ppt = new HSLFSlideShow(new ByteArrayInputStream(inMemory.toByteArray()))) {
// Verify that the persisted representations have all been updated
int[] offsets = ppt.getPictureData().stream().mapToInt(HSLFPictureData::getOffset).toArray();
Arrays.sort(offsets);
assertArrayEquals(modifiedOffsets, offsets);
}
}
/**
* Verify that a slideshow with records that have offsets not matching those of the pictures in the stream still
* correctly pairs the records and pictures.
*/
@Test
void testSlideshowWithIncorrectOffsets() throws IOException {
int[] originalOffsets;
int originalNumberOfRecords;
// Create a presentation that has records with unmatched offsets, but with matched UIDs.
ByteArrayOutputStream inMemory = new ByteArrayOutputStream();
try (HSLFSlideShow ppt = HSLFTestDataSamples.getSlideShow("pictures.ppt")) {
originalOffsets = ppt.getPictureData().stream().mapToInt(HSLFPictureData::getOffset).toArray();
originalNumberOfRecords = ppt.getPictureData().get(0).bStore.getChildRecords().size();
Random random = new Random();
for (HSLFPictureData picture : ppt.getPictureData()) {
// Bound is arbitrary and irrelevant to the test.
picture.bse.setOffset(random.nextInt(500_000));
}
ppt.write(inMemory);
}
try (HSLFSlideShow ppt = new HSLFSlideShow(new ByteArrayInputStream(inMemory.toByteArray()))) {
// Verify that the offsets all got fixed.
int[] offsets = ppt.getPictureData().stream().mapToInt(HSLFPictureData::getOffset).toArray();
assertArrayEquals(originalOffsets, offsets);
// Verify that there are the same number of records as in the original slideshow.
int numberOfRecords = ppt.getPictureData().get(0).bStore.getChildRecords().size();
assertEquals(originalNumberOfRecords, numberOfRecords);
}
}
} }

Binary file not shown.

Binary file not shown.