diff --git a/src/java/org/apache/poi/sl/usermodel/PictureData.java b/src/java/org/apache/poi/sl/usermodel/PictureData.java index 83b54d75c7..df2ca79b01 100644 --- a/src/java/org/apache/poi/sl/usermodel/PictureData.java +++ b/src/java/org/apache/poi/sl/usermodel/PictureData.java @@ -105,6 +105,10 @@ public interface PictureData { /** * Sets the binary picture data + *

+ * The format of the data must match the format of {@link #getType()}. Failure to match the picture data may result + * in data loss. + * * @param data picture data */ void setData(byte[] data) throws IOException; diff --git a/src/scratchpad/src/org/apache/poi/hslf/blip/Bitmap.java b/src/scratchpad/src/org/apache/poi/hslf/blip/Bitmap.java index afc8cd5481..bb5904da55 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/blip/Bitmap.java +++ b/src/scratchpad/src/org/apache/poi/hslf/blip/Bitmap.java @@ -20,13 +20,17 @@ package org.apache.poi.hslf.blip; import java.awt.Dimension; import java.awt.image.BufferedImage; import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; import java.io.IOException; import javax.imageio.ImageIO; +import org.apache.poi.ddf.EscherBSERecord; +import org.apache.poi.ddf.EscherContainerRecord; import org.apache.poi.hslf.usermodel.HSLFPictureData; +import org.apache.poi.hslf.usermodel.HSLFSlideShow; import org.apache.poi.util.IOUtils; +import org.apache.poi.util.Internal; +import org.apache.poi.util.Removal; import org.apache.poi.util.Units; /** @@ -35,6 +39,29 @@ import org.apache.poi.util.Units; */ public abstract class Bitmap extends HSLFPictureData { + /** + * @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new + * {@link Bitmap}. This API led to detached {@link Bitmap} instances (See Bugzilla + * 46122) and prevented adding additional functionality. + */ + @Deprecated + @Removal(version = "5.3") + public Bitmap() { + this(new EscherContainerRecord(), new EscherBSERecord()); + } + + /** + * Creates a new instance. + * + * @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is + * linked to. + * @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to. + */ + @Internal + protected Bitmap(EscherContainerRecord recordContainer, EscherBSERecord bse) { + super(recordContainer, bse); + } + @Override public byte[] getData(){ byte[] rawdata = getRawData(); @@ -43,17 +70,22 @@ public abstract class Bitmap extends HSLFPictureData { } @Override - public void setData(byte[] data) throws IOException { + protected byte[] formatImageForSlideshow(byte[] data) { byte[] checksum = getChecksum(data); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - out.write(checksum); - if (getUIDInstanceCount() == 2) { - out.write(checksum); - } - out.write(0); - out.write(data); + byte[] rawData = new byte[checksum.length * getUIDInstanceCount() + 1 + data.length]; + int offset = 0; - setRawData(out.toByteArray()); + System.arraycopy(checksum, 0, rawData, offset, checksum.length); + offset += checksum.length; + + if (getUIDInstanceCount() == 2) { + System.arraycopy(checksum, 0, rawData, offset, checksum.length); + offset += checksum.length; + } + + offset++; + System.arraycopy(data, 0, rawData, offset, data.length); + return rawData; } @Override diff --git a/src/scratchpad/src/org/apache/poi/hslf/blip/DIB.java b/src/scratchpad/src/org/apache/poi/hslf/blip/DIB.java index 8a091bd7ab..1840d55409 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/blip/DIB.java +++ b/src/scratchpad/src/org/apache/poi/hslf/blip/DIB.java @@ -17,10 +17,13 @@ package org.apache.poi.hslf.blip; -import java.io.IOException; - +import org.apache.poi.ddf.EscherBSERecord; +import org.apache.poi.ddf.EscherContainerRecord; +import org.apache.poi.hslf.usermodel.HSLFSlideShow; import org.apache.poi.util.IOUtils; +import org.apache.poi.util.Internal; import org.apache.poi.util.LittleEndian; +import org.apache.poi.util.Removal; /** * Represents a DIB picture data in a PPT file @@ -35,6 +38,29 @@ public final class DIB extends Bitmap { */ private static final int HEADER_SIZE = 14; + /** + * @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new + * {@link DIB}. This API led to detached {@link DIB} instances (See Bugzilla + * 46122) and prevented adding additional functionality. + */ + @Deprecated + @Removal(version = "5.3") + public DIB() { + this(new EscherContainerRecord(), new EscherBSERecord()); + } + + /** + * Creates a new instance. + * + * @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is + * linked to. + * @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to. + */ + @Internal + public DIB(EscherContainerRecord recordContainer, EscherBSERecord bse) { + super(recordContainer, bse); + } + @Override public PictureType getType(){ return PictureType.DIB; @@ -100,9 +126,9 @@ public final class DIB extends Bitmap { } @Override - public void setData(byte[] data) throws IOException { + protected byte[] formatImageForSlideshow(byte[] data) { //cut off the bitmap file-header byte[] dib = IOUtils.safelyClone(data, HEADER_SIZE, data.length-HEADER_SIZE, data.length); - super.setData(dib); + return super.formatImageForSlideshow(dib); } } diff --git a/src/scratchpad/src/org/apache/poi/hslf/blip/EMF.java b/src/scratchpad/src/org/apache/poi/hslf/blip/EMF.java index ba90372f85..e6bf60da16 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/blip/EMF.java +++ b/src/scratchpad/src/org/apache/poi/hslf/blip/EMF.java @@ -24,9 +24,14 @@ import java.io.IOException; import java.io.InputStream; import java.util.zip.InflaterInputStream; +import org.apache.poi.ddf.EscherBSERecord; +import org.apache.poi.ddf.EscherContainerRecord; import org.apache.poi.hslf.exceptions.HSLFException; +import org.apache.poi.hslf.usermodel.HSLFSlideShow; import org.apache.poi.sl.image.ImageHeaderEMF; import org.apache.poi.util.IOUtils; +import org.apache.poi.util.Internal; +import org.apache.poi.util.Removal; import org.apache.poi.util.Units; /** @@ -34,6 +39,29 @@ import org.apache.poi.util.Units; */ public final class EMF extends Metafile { + /** + * @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new + * {@link EMF}. This API led to detached {@link EMF} instances (See Bugzilla + * 46122) and prevented adding additional functionality. + */ + @Deprecated + @Removal(version = "5.3") + public EMF() { + this(new EscherContainerRecord(), new EscherBSERecord()); + } + + /** + * Creates a new instance. + * + * @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is + * linked to. + * @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to. + */ + @Internal + public EMF(EscherContainerRecord recordContainer, EscherBSERecord bse) { + super(recordContainer, bse); + } + @Override public byte[] getData(){ try { @@ -60,11 +88,11 @@ public final class EMF extends Metafile { } @Override - public void setData(byte[] data) throws IOException { + protected byte[] formatImageForSlideshow(byte[] data) { byte[] compressed = compress(data, 0, data.length); ImageHeaderEMF nHeader = new ImageHeaderEMF(data, 0); - + Header header = new Header(); header.setWmfSize(data.length); header.setBounds(nHeader.getBounds()); @@ -73,15 +101,22 @@ public final class EMF extends Metafile { header.setZipSize(compressed.length); byte[] checksum = getChecksum(data); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - out.write(checksum); - if (getUIDInstanceCount() == 2) { - out.write(checksum); - } - header.write(out); - out.write(compressed); + byte[] rawData = new byte[checksum.length * getUIDInstanceCount() + header.getSize() + compressed.length]; + int offset = 0; - setRawData(out.toByteArray()); + System.arraycopy(checksum, 0, rawData, offset, checksum.length); + offset += checksum.length; + + if (getUIDInstanceCount() == 2) { + System.arraycopy(checksum, 0, rawData, offset, checksum.length); + offset += checksum.length; + } + + header.write(rawData, offset); + offset += header.getSize(); + System.arraycopy(compressed, 0, rawData, offset, compressed.length); + + return rawData; } @Override diff --git a/src/scratchpad/src/org/apache/poi/hslf/blip/JPEG.java b/src/scratchpad/src/org/apache/poi/hslf/blip/JPEG.java index 08ba6b73e7..cf90746d6b 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/blip/JPEG.java +++ b/src/scratchpad/src/org/apache/poi/hslf/blip/JPEG.java @@ -18,6 +18,12 @@ package org.apache.poi.hslf.blip; +import org.apache.poi.ddf.EscherBSERecord; +import org.apache.poi.ddf.EscherContainerRecord; +import org.apache.poi.hslf.usermodel.HSLFSlideShow; +import org.apache.poi.util.Internal; +import org.apache.poi.util.Removal; + /** * Represents a JPEG picture data in a PPT file */ @@ -26,6 +32,29 @@ public final class JPEG extends Bitmap { public enum ColorSpace { rgb, cymk } private ColorSpace colorSpace = ColorSpace.rgb; + + /** + * @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new + * {@link JPEG}. This API led to detached {@link JPEG} instances (See Bugzilla + * 46122) and prevented adding additional functionality. + */ + @Deprecated + @Removal(version = "5.3") + public JPEG() { + this(new EscherContainerRecord(), new EscherBSERecord()); + } + + /** + * Creates a new instance. + * + * @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is + * linked to. + * @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to. + */ + @Internal + public JPEG(EscherContainerRecord recordContainer, EscherBSERecord bse) { + super(recordContainer, bse); + } @Override public PictureType getType(){ diff --git a/src/scratchpad/src/org/apache/poi/hslf/blip/Metafile.java b/src/scratchpad/src/org/apache/poi/hslf/blip/Metafile.java index 6ebd4a0a61..ea855d460e 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/blip/Metafile.java +++ b/src/scratchpad/src/org/apache/poi/hslf/blip/Metafile.java @@ -25,9 +25,15 @@ import java.io.IOException; import java.io.OutputStream; import java.util.zip.DeflaterOutputStream; +import org.apache.poi.ddf.EscherBSERecord; +import org.apache.poi.ddf.EscherContainerRecord; import org.apache.poi.hslf.usermodel.HSLFPictureData; +import org.apache.poi.hslf.usermodel.HSLFSlideShow; +import org.apache.poi.util.Internal; +import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndianInputStream; import org.apache.poi.util.LittleEndianOutputStream; +import org.apache.poi.util.Removal; import org.apache.poi.util.Units; /** @@ -36,6 +42,29 @@ import org.apache.poi.util.Units; */ public abstract class Metafile extends HSLFPictureData { + /** + * @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new + * {@link Metafile}. This API led to detached {@link Metafile} instances (See Bugzilla + * 46122) and prevented adding additional functionality. + */ + @Deprecated + @Removal(version = "5.3") + public Metafile() { + this(new EscherContainerRecord(), new EscherBSERecord()); + } + + /** + * Creates a new instance. + * + * @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is + * linked to. + * @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to. + */ + @Internal + protected Metafile(EscherContainerRecord recordContainer, EscherBSERecord bse) { + super(recordContainer, bse); + } + /** * A structure which represents a 34-byte header preceding the compressed metafile data */ @@ -117,6 +146,44 @@ public abstract class Metafile extends HSLFPictureData { leos.writeByte(filter); } + void write(byte[] destination, int offset) { + //hmf + LittleEndian.putInt(destination, offset, wmfsize); + offset += 4; + + //left + LittleEndian.putInt(destination, offset, bounds.x); + offset += 4; + + //top + LittleEndian.putInt(destination, offset, bounds.y); + offset += 4; + + //right + LittleEndian.putInt(destination, offset, bounds.x + bounds.width); + offset += 4; + + //bottom + LittleEndian.putInt(destination, offset, bounds.y + bounds.height); + offset += 4; + + //inch + LittleEndian.putInt(destination, offset, size.width); + offset += 4; + + //inch + LittleEndian.putInt(destination, offset, size.height); + offset += 4; + + LittleEndian.putInt(destination, offset, zipsize); + offset += 4; + + destination[offset] = (byte) compression; + offset++; + + destination[offset] = (byte) filter; + } + public int getSize(){ return 34; } @@ -146,11 +213,16 @@ public abstract class Metafile extends HSLFPictureData { } } - protected static byte[] compress(byte[] bytes, int offset, int length) throws IOException { + protected static byte[] compress(byte[] bytes, int offset, int length) { ByteArrayOutputStream out = new ByteArrayOutputStream(); - DeflaterOutputStream deflater = new DeflaterOutputStream( out ); - deflater.write(bytes, offset, length); - deflater.close(); + try (DeflaterOutputStream deflater = new DeflaterOutputStream(out)) { + deflater.write(bytes, offset, length); + } catch (IOException e) { + // IOException won't get thrown by the DeflaterOutputStream in this configuration because: + // 1. ByteArrayOutputStream doesn't throw an IOException during writes. + // 2. The DeflaterOutputStream is not finished until we're done writing. + throw new AssertionError("Won't happen", e); + } return out.toByteArray(); } diff --git a/src/scratchpad/src/org/apache/poi/hslf/blip/PICT.java b/src/scratchpad/src/org/apache/poi/hslf/blip/PICT.java index 33dcde3b0e..a6c2765ec7 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/blip/PICT.java +++ b/src/scratchpad/src/org/apache/poi/hslf/blip/PICT.java @@ -26,9 +26,14 @@ import java.util.zip.InflaterInputStream; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.poi.ddf.EscherBSERecord; +import org.apache.poi.ddf.EscherContainerRecord; import org.apache.poi.hslf.exceptions.HSLFException; +import org.apache.poi.hslf.usermodel.HSLFSlideShow; import org.apache.poi.sl.image.ImageHeaderPICT; import org.apache.poi.util.IOUtils; +import org.apache.poi.util.Internal; +import org.apache.poi.util.Removal; import org.apache.poi.util.Units; import static org.apache.logging.log4j.util.Unbox.box; @@ -39,6 +44,28 @@ import static org.apache.logging.log4j.util.Unbox.box; public final class PICT extends Metafile { private static final Logger LOG = LogManager.getLogger(PICT.class); + /** + * @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new + * {@link PICT}. This API led to detached {@link PICT} instances (See Bugzilla + * 46122) and prevented adding additional functionality. + */ + @Deprecated + @Removal(version = "5.3") + public PICT() { + this(new EscherContainerRecord(), new EscherBSERecord()); + } + + /** + * Creates a new instance. + * + * @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is + * linked to. + * @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to. + */ + @Internal + public PICT(EscherContainerRecord recordContainer, EscherBSERecord bse) { + super(recordContainer, bse); + } @Override public byte[] getData(){ @@ -93,7 +120,7 @@ public final class PICT extends Metafile { } @Override - public void setData(byte[] data) throws IOException { + protected byte[] formatImageForSlideshow(byte[] data) { // skip the first 512 bytes - they are MAC specific crap final int nOffset = ImageHeaderPICT.PICT_HEADER_OFFSET; ImageHeaderPICT nHeader = new ImageHeaderPICT(data, nOffset); @@ -108,15 +135,22 @@ public final class PICT extends Metafile { header.setDimension(new Dimension(Units.toEMU(nDim.getWidth()), Units.toEMU(nDim.getHeight()))); byte[] checksum = getChecksum(data); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - out.write(checksum); - if (getUIDInstanceCount() == 2) { - out.write(checksum); - } - header.write(out); - out.write(compressed); + byte[] rawData = new byte[checksum.length * getUIDInstanceCount() + header.getSize() + compressed.length]; + int offset = 0; - setRawData(out.toByteArray()); + System.arraycopy(checksum, 0, rawData, offset, checksum.length); + offset += checksum.length; + + if (getUIDInstanceCount() == 2) { + System.arraycopy(checksum, 0, rawData, offset, checksum.length); + offset += checksum.length; + } + + header.write(rawData, offset); + offset += header.getSize(); + System.arraycopy(compressed, 0, rawData, offset, compressed.length); + + return rawData; } @Override diff --git a/src/scratchpad/src/org/apache/poi/hslf/blip/PNG.java b/src/scratchpad/src/org/apache/poi/hslf/blip/PNG.java index 8e05aa7611..197cb3d828 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/blip/PNG.java +++ b/src/scratchpad/src/org/apache/poi/hslf/blip/PNG.java @@ -17,13 +17,41 @@ package org.apache.poi.hslf.blip; +import org.apache.poi.ddf.EscherBSERecord; +import org.apache.poi.ddf.EscherContainerRecord; +import org.apache.poi.hslf.usermodel.HSLFSlideShow; import org.apache.poi.sl.image.ImageHeaderPNG; +import org.apache.poi.util.Internal; +import org.apache.poi.util.Removal; /** * Represents a PNG picture data in a PPT file */ public final class PNG extends Bitmap { + /** + * @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new + * {@link PNG}. This API led to detached {@link PNG} instances (See Bugzilla + * 46122) and prevented adding additional functionality. + */ + @Deprecated + @Removal(version = "5.3") + public PNG() { + this(new EscherContainerRecord(), new EscherBSERecord()); + } + + /** + * Creates a new instance. + * + * @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is + * linked to. + * @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to. + */ + @Internal + public PNG(EscherContainerRecord recordContainer, EscherBSERecord bse) { + super(recordContainer, bse); + } + @Override public byte[] getData() { return new ImageHeaderPNG(super.getData()).extractPNG(); diff --git a/src/scratchpad/src/org/apache/poi/hslf/blip/WMF.java b/src/scratchpad/src/org/apache/poi/hslf/blip/WMF.java index 1d00322911..55b74bdb6f 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/blip/WMF.java +++ b/src/scratchpad/src/org/apache/poi/hslf/blip/WMF.java @@ -24,9 +24,14 @@ import java.io.IOException; import java.io.InputStream; import java.util.zip.InflaterInputStream; +import org.apache.poi.ddf.EscherBSERecord; +import org.apache.poi.ddf.EscherContainerRecord; import org.apache.poi.hslf.exceptions.HSLFException; +import org.apache.poi.hslf.usermodel.HSLFSlideShow; import org.apache.poi.sl.image.ImageHeaderWMF; import org.apache.poi.util.IOUtils; +import org.apache.poi.util.Internal; +import org.apache.poi.util.Removal; import org.apache.poi.util.Units; /** @@ -34,6 +39,29 @@ import org.apache.poi.util.Units; */ public final class WMF extends Metafile { + /** + * @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new + * {@link WMF}. This API led to detached {@link WMF} instances (See Bugzilla + * 46122) and prevented adding additional functionality. + */ + @Deprecated + @Removal(version = "5.3") + public WMF() { + this(new EscherContainerRecord(), new EscherBSERecord()); + } + + /** + * Creates a new instance. + * + * @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is + * linked to. + * @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to. + */ + @Internal + public WMF(EscherContainerRecord recordContainer, EscherBSERecord bse) { + super(recordContainer, bse); + } + @Override public byte[] getData(){ try { @@ -64,7 +92,7 @@ public final class WMF extends Metafile { } @Override - public void setData(byte[] data) throws IOException { + protected byte[] formatImageForSlideshow(byte[] data) { int pos = 0; ImageHeaderWMF nHeader = new ImageHeaderWMF(data, pos); pos += nHeader.getLength(); @@ -79,15 +107,22 @@ public final class WMF extends Metafile { header.setZipSize(compressed.length); byte[] checksum = getChecksum(data); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - out.write(checksum); - if (getUIDInstanceCount() == 2) { - out.write(checksum); - } - header.write(out); - out.write(compressed); + byte[] rawData = new byte[checksum.length * getUIDInstanceCount() + header.getSize() + compressed.length]; + int offset = 0; - setRawData(out.toByteArray()); + System.arraycopy(checksum, 0, rawData, offset, checksum.length); + offset += checksum.length; + + if (getUIDInstanceCount() == 2) { + System.arraycopy(checksum, 0, rawData, offset, checksum.length); + offset += checksum.length; + } + + header.write(rawData, offset); + offset += header.getSize(); + System.arraycopy(compressed, 0, rawData, offset, compressed.length); + + return rawData; } @Override diff --git a/src/scratchpad/src/org/apache/poi/hslf/dev/PPTXMLDump.java b/src/scratchpad/src/org/apache/poi/hslf/dev/PPTXMLDump.java index 6b13a07920..4e431976a3 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/dev/PPTXMLDump.java +++ b/src/scratchpad/src/org/apache/poi/hslf/dev/PPTXMLDump.java @@ -173,7 +173,6 @@ public final class PPTXMLDump { return; } - byte[] pictdata = IOUtils.safelyClone(data, pos + PICT_HEADER_SIZE, size, MAX_RECORD_LENGTH); pos += PICT_HEADER_SIZE + size; padding++; @@ -183,7 +182,7 @@ public final class PPTXMLDump { dump(out, header, 0, header.length, padding, true); write(out, "" + CR, padding); write(out, "" + CR, padding); - dump(out, pictdata, 0, Math.min(pictdata.length, 100), padding, true); + dump(out, data, 0, Math.min(size, 100), padding, true); write(out, "" + CR, padding); padding--; write(out, "" + CR, padding); diff --git a/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFFill.java b/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFFill.java index 7fa9323209..5275669b76 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFFill.java +++ b/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFFill.java @@ -570,7 +570,9 @@ public final class HSLFFill { } else { EscherBSERecord bse = (EscherBSERecord)lst.get(idx - 1); for (HSLFPictureData pd : pict) { - if (pd.getOffset() == bse.getOffset()){ + + // Reference equals is safe because these BSE belong to the same slideshow + if (pd.bse == bse) { return pd; } } diff --git a/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFPictureData.java b/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFPictureData.java index 2f3371d714..5b418f98c3 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFPictureData.java +++ b/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFPictureData.java @@ -23,11 +23,18 @@ import java.io.OutputStream; import java.security.MessageDigest; import java.util.Arrays; import java.util.Collections; +import java.util.Comparator; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.function.Supplier; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.poi.common.usermodel.GenericRecord; +import org.apache.poi.ddf.EscherBSERecord; +import org.apache.poi.ddf.EscherContainerRecord; import org.apache.poi.ddf.EscherRecordTypes; import org.apache.poi.hslf.blip.DIB; import org.apache.poi.hslf.blip.EMF; @@ -38,8 +45,10 @@ import org.apache.poi.hslf.blip.WMF; import org.apache.poi.poifs.crypt.CryptoFunctions; import org.apache.poi.poifs.crypt.HashAlgorithm; import org.apache.poi.sl.usermodel.PictureData; +import org.apache.poi.util.Internal; import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndianConsts; +import org.apache.poi.util.Removal; import org.apache.poi.util.Units; /** @@ -47,19 +56,37 @@ import org.apache.poi.util.Units; */ public abstract class HSLFPictureData implements PictureData, GenericRecord { + private static final Logger LOGGER = LogManager.getLogger(HSLFPictureData.class); + /** * Size of the image checksum calculated using MD5 algorithm. */ protected static final int CHECKSUM_SIZE = 16; /** - * Binary data of the picture - */ - private byte[] rawdata; - /** - * The offset to the picture in the stream + * Size of the image preamble in bytes. + *

+ * The preamble describes how the image should be decoded. All image types have the same preamble format. The + * preamble has little endian encoding. Below is a diagram of the preamble contents. + * + *

+     *  0               1               2               3
+     *  0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7
+     * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+     * |           Signature           |          Picture Type         |
+     * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+     * |                       Formatted Length                        |
+     * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+     * 
*/ - private int offset; + static final int PREAMBLE_SIZE = 8; + + /** + * Binary data of the picture, formatted as it will be stored in the {@link HSLFSlideShow}. + *

+ * This does not include the {@link #PREAMBLE_SIZE preamble}. + */ + private byte[] formattedData; /** * The instance type/signatures defines if one or two UID instances will be included @@ -71,6 +98,43 @@ public abstract class HSLFPictureData implements PictureData, GenericRecord { */ private int index = -1; + /** + * {@link EscherRecordTypes#BSTORE_CONTAINER BStore} record tracking all pictures. Should be attached to the + * slideshow that this picture is linked to. + */ + final EscherContainerRecord bStore; + + /** + * Record referencing this picture. Should be attached to the slideshow that this picture is linked to. + */ + final EscherBSERecord bse; + + /** + * @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new + * {@link HSLFPictureData}. This API led to detached {@link HSLFPictureData} instances (See Bugzilla + * 46122) and prevented adding additional functionality. + */ + @Deprecated + @Removal(version = "5.3") + public HSLFPictureData() { + this(new EscherContainerRecord(), new EscherBSERecord()); + LOGGER.atWarn().log("The no-arg constructor is deprecated. Some functionality such as updating pictures won't " + + "work."); + } + + /** + * Creates a new instance. + * + * @param bStore {@link EscherRecordTypes#BSTORE_CONTAINER BStore} record tracking all pictures. Should be attached + * to the slideshow that this picture is linked to. + * @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to. + */ + @Internal + protected HSLFPictureData(EscherContainerRecord bStore, EscherBSERecord bse) { + this.bStore = Objects.requireNonNull(bStore); + this.bse = Objects.requireNonNull(bse); + } + /** * Blip signature. */ @@ -95,17 +159,34 @@ public abstract class HSLFPictureData implements PictureData, GenericRecord { } /** - * Returns the raw binary data of this Picture excluding the first 8 bytes - * which hold image signature and size of the image data. + * Returns the formatted, binary data of this picture excluding the {@link #PREAMBLE_SIZE preamble} bytes. + *

+ * Primarily intended for internal POI use. Use {@link #getData()} to retrieve the picture represented by this + * object. * - * @return picture data + * @return Picture data formatted for the HSLF format. + * @see #getData() + * @see #formatImageForSlideshow(byte[]) */ public byte[] getRawData(){ - return rawdata; + return formattedData; } + /** + * Sets the formatted data for this picture. + *

+ * Primarily intended for internal POI use. Use {@link #setData(byte[])} to change the picture represented by this + * object. + * + * @param data Picture data formatted for the HSLF format. Excludes the {@link #PREAMBLE_SIZE preamble}. + * @see #setData(byte[]) + * @see #formatImageForSlideshow(byte[]) + * @deprecated Set image data using {@link #setData(byte[])}. + */ + @Deprecated + @Removal(version = "5.3") public void setRawData(byte[] data){ - rawdata = (data == null) ? null : data.clone(); + formattedData = (data == null) ? null : data.clone(); } /** @@ -114,7 +195,7 @@ public abstract class HSLFPictureData implements PictureData, GenericRecord { * @return offset in the 'Pictures' stream */ public int getOffset(){ - return offset; + return bse.getOffset(); } /** @@ -122,21 +203,25 @@ public abstract class HSLFPictureData implements PictureData, GenericRecord { * We need to set it when a new picture is created. * * @param offset in the 'Pictures' stream + * @deprecated This function was only intended for POI internal use. If you have a use case you're concerned about, + * please open an issue in the POI issue tracker. */ + @Deprecated + @Removal(version = "5.3") public void setOffset(int offset){ - this.offset = offset; + LOGGER.atWarn().log("HSLFPictureData#setOffset is deprecated."); } /** * Returns 16-byte checksum of this picture */ public byte[] getUID(){ - return Arrays.copyOf(rawdata, 16); + return Arrays.copyOf(formattedData, CHECKSUM_SIZE); } @Override public byte[] getChecksum() { - return getChecksum(getData()); + return getUID(); } /** @@ -173,25 +258,105 @@ public abstract class HSLFPictureData implements PictureData, GenericRecord { } /** - * Create an instance of PictureData by type. + * Create an instance of {@link HSLFPictureData} by type. * - * @param type type of the picture data. - * Must be one of the static constants defined in the Picture class. - * @return concrete instance of PictureData + * @param type type of picture. + * @return concrete instance of {@link HSLFPictureData}. + * @deprecated Use {@link HSLFSlideShow#addPicture(byte[], PictureType)} or one of it's overloads to create new + * {@link HSLFPictureData}. This API led to detached {@link HSLFPictureData} instances (See Bugzilla + * 46122) and prevented adding additional functionality. */ + @Deprecated + @Removal(version = "5.3") public static HSLFPictureData create(PictureType type){ - HSLFPictureData pict; - switch (type){ - case EMF: pict = new EMF(); break; - case WMF: pict = new WMF(); break; - case PICT: pict = new PICT(); break; - case JPEG: pict = new JPEG(); break; - case PNG: pict = new PNG(); break; - case DIB: pict = new DIB(); break; + LOGGER.atWarn().log("HSLFPictureData#create(PictureType) is deprecated. Some functionality such " + + "as updating pictures won't work."); + + // This record code is a stub. It exists only for API compatibility. + EscherContainerRecord record = new EscherContainerRecord(); + EscherBSERecord bse = new EscherBSERecord(); + return new HSLFSlideShowImpl.PictureFactory(record, type, new byte[0], 0, 0) + .setRecord(bse) + .build(); + } + + /** + * Creates a new instance of the given image type using data already formatted for storage inside the slideshow. + *

+ * This function is most handy when parsing an existing slideshow, as the picture data are already formatted. + * @param type Image type. + * @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is + * linked to. + * @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to. + * @param data Image data formatted for storage in the slideshow. This does not include the + * {@link #PREAMBLE_SIZE preamble}. + * @param signature Image format-specific signature. See subclasses for signature details. + * @return New instance. + * + * @see #createFromImageData(PictureType, EscherContainerRecord, EscherBSERecord, byte[]) + */ + static HSLFPictureData createFromSlideshowData( + PictureType type, + EscherContainerRecord recordContainer, + EscherBSERecord bse, + byte[] data, + int signature + ) { + HSLFPictureData instance = newInstance(type, recordContainer, bse); + instance.setSignature(signature); + instance.formattedData = data; + return instance; + } + + /** + * Creates a new instance of the given image type using data already formatted for storage inside the slideshow. + *

+ * This function is most handy when adding new pictures to a slideshow, as the image data provided by users is not + * yet formatted. + * + * @param type Image type. + * @param recordContainer Record tracking all pictures. Should be attached to the slideshow that this picture is + * linked to. + * @param bse Record referencing this picture. Should be attached to the slideshow that this picture is linked to. + * @param data Original image data. If these bytes were written to a disk, a common image viewer would be able to + * render the image. + * @return New instance. + * + * @see #createFromSlideshowData(PictureType, EscherContainerRecord, EscherBSERecord, byte[], int) + * @see #setData(byte[]) + */ + static HSLFPictureData createFromImageData( + PictureType type, + EscherContainerRecord recordContainer, + EscherBSERecord bse, + byte[] data + ) { + HSLFPictureData instance = newInstance(type, recordContainer, bse); + instance.formattedData = instance.formatImageForSlideshow(data); + return instance; + } + + private static HSLFPictureData newInstance( + PictureType type, + EscherContainerRecord recordContainer, + EscherBSERecord bse + ) { + switch (type) { + case EMF: + return new EMF(recordContainer, bse); + case WMF: + return new WMF(recordContainer, bse); + case PICT: + return new PICT(recordContainer, bse); + case JPEG: + return new JPEG(recordContainer, bse); + case PNG: + return new PNG(recordContainer, bse); + case DIB: + return new DIB(recordContainer, bse); default: throw new IllegalArgumentException("Unsupported picture type: " + type); } - return pict; } /** @@ -204,14 +369,15 @@ public abstract class HSLFPictureData implements PictureData, GenericRecord { * @return the 24 byte header which preceeds the actual picture data. */ public byte[] getHeader() { - byte[] header = new byte[16 + 8]; + byte[] header = new byte[CHECKSUM_SIZE + PREAMBLE_SIZE]; LittleEndian.putInt(header, 0, getSignature()); LittleEndian.putInt(header, 4, getRawData().length); - System.arraycopy(rawdata, 0, header, 8, 16); + System.arraycopy(formattedData, 0, header, PREAMBLE_SIZE, CHECKSUM_SIZE); return header; } /** + * Returns the 1-based index of this picture. * @return the 1-based index of this pictures within the pictures stream */ public int getIndex() { @@ -225,6 +391,71 @@ public abstract class HSLFPictureData implements PictureData, GenericRecord { this.index = index; } + /** + * Formats the picture data for storage in the slideshow. + *

+ * Images stored in {@link HSLFSlideShow}s are represented differently than when they are standalone files. The + * exact formatting differs for each image type. + * + * @param data Original image data. If these bytes were written to a disk, a common image viewer would be able to + * render the image. + * @return Formatted image representation. + */ + protected abstract byte[] formatImageForSlideshow(byte[] data); + + /** + * @return Size of this picture when stored in the image stream inside the {@link HSLFSlideShow}. + */ + int getBseSize() { + return formattedData.length + PREAMBLE_SIZE; + } + + @Override + public final void setData(byte[] data) throws IOException { + /* + * When working with slideshow pictures, we need to be aware of 2 container units. The first is a list of + * HSLFPictureData that are the programmatic reference for working with the pictures. The second is the + * Blip Store. For the purposes of this function, you can think of the Blip Store as containing a list of + * pointers (with a small summary) to the picture in the slideshow. + * + * When updating a picture, we need to update the in-memory data structure (this instance), but we also need to + * update the stored pointer. When modifying the pointer, we also need to modify all subsequent pointers, since + * they might shift based on a change in the byte count of the underlying image. + */ + int oldSize = getBseSize(); + formattedData = formatImageForSlideshow(data); + int newSize = getBseSize(); + int changeInSize = newSize - oldSize; + byte[] newUid = getUID(); + + boolean foundBseForOldImage = false; + + // Get the BSE records & sort the list by offset, so we can proceed to shift offsets + @SuppressWarnings("unchecked") // The BStore only contains BSE records + List bseRecords = (List) (Object) bStore.getChildRecords(); + bseRecords.sort(Comparator.comparingInt(EscherBSERecord::getOffset)); + + for (EscherBSERecord bse : bseRecords) { + + if (foundBseForOldImage) { + + // The BSE for this picture was modified in a previous iteration, and we are now adjusting + // subsequent offsets. + bse.setOffset(bse.getOffset() + changeInSize); + + } else if (bse == this.bse) { // Reference equals is safe because these BSE belong to the same slideshow + + // This BSE matches the current image. Update the size and UID. + foundBseForOldImage = true; + + bse.setUid(newUid); + + // Image byte count may have changed, so update the pointer. + bse.setSize(newSize); + } + } + } + @Override public final String getContentType() { return getType().contentType; diff --git a/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFPictureShape.java b/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFPictureShape.java index 49921dc81b..548fdff3d1 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFPictureShape.java +++ b/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFPictureShape.java @@ -125,7 +125,9 @@ public class HSLFPictureShape extends HSLFSimpleShape implements PictureShape(); - // if the presentation doesn't contain pictures - will use a null set instead + // if the presentation doesn't contain pictures, will use an empty collection instead if (!getDirectory().hasEntry("Pictures")) { + _pictures = new ArrayList<>(); return; } DocumentEntry entry = (DocumentEntry) getDirectory().getEntry("Pictures"); - DocumentInputStream is = getDirectory().createDocumentInputStream(entry); - byte[] pictstream = IOUtils.toByteArray(is, entry.getSize()); - is.close(); + EscherContainerRecord blipStore = getBlipStore(); + byte[] pictstream; + try (DocumentInputStream is = getDirectory().createDocumentInputStream(entry)) { + pictstream = IOUtils.toByteArray(is, entry.getSize()); + } + List factories = new ArrayList<>(); try (HSLFSlideShowEncrypted decryptData = new HSLFSlideShowEncrypted(getDocumentEncryptionAtom())) { int pos = 0; // An empty picture record (length 0) will take up 8 bytes - while (pos <= (pictstream.length - 8)) { + while (pos <= (pictstream.length - HSLFPictureData.PREAMBLE_SIZE)) { int offset = pos; decryptData.decryptPicture(pictstream, offset); @@ -388,7 +404,7 @@ public final class HSLFSlideShowImpl extends POIDocument implements Closeable { // (0 is allowed, but odd, since we do wind on by the header each // time, so we won't get stuck) if (imgsize < 0) { - throw new CorruptPowerPointFileException("The file contains a picture, at position " + _pictures.size() + ", which has a negatively sized data length, so we can't trust any of the picture data"); + throw new CorruptPowerPointFileException("The file contains a picture, at position " + factories.size() + ", which has a negatively sized data length, so we can't trust any of the picture data"); } // If the type (including the bonus 0xF018) is 0, skip it @@ -404,26 +420,127 @@ public final class HSLFSlideShowImpl extends POIDocument implements Closeable { "in others, this could indicate a corrupt file"); break; } - // Build the PictureData object from the data - try { - HSLFPictureData pict = HSLFPictureData.create(pt); - pict.setSignature(signature); - // Copy the data, ready to pass to PictureData - byte[] imgdata = IOUtils.safelyClone(pictstream, pos, imgsize, MAX_RECORD_LENGTH); - pict.setRawData(imgdata); + // Copy the data, ready to pass to PictureData + byte[] imgdata = IOUtils.safelyClone(pictstream, pos, imgsize, MAX_RECORD_LENGTH); - pict.setOffset(offset); - pict.setIndex(_pictures.size() + 1); // index is 1-based - _pictures.add(pict); - } catch (IllegalArgumentException e) { - LOG.atError().withThrowable(e).log("Problem reading picture. Your document will probably become corrupted if you save it!"); - } + factories.add(new PictureFactory(blipStore, pt, imgdata, offset, signature)); } pos += imgsize; } } + + matchPicturesAndRecords(factories, blipStore); + + List pictures = new ArrayList<>(); + for (PictureFactory it : factories) { + try { + HSLFPictureData pict = it.build(); + + pict.setIndex(pictures.size() + 1); // index is 1-based + pictures.add(pict); + } catch (IllegalArgumentException e) { + LOG.atError().withThrowable(e).log("Problem reading picture. Your document will probably become corrupted if you save it!"); + } + } + + _pictures = pictures; + } + + /** + * Matches all of the {@link PictureFactory PictureFactories} for a slideshow with {@link EscherBSERecord}s in the + * Blip Store for the slideshow. + *

+ * When reading a slideshow into memory, we have to match the records in the Blip Store with the factories + * representing picture in the pictures stream. This can be difficult, as presentations might have incorrectly + * formatted data. This function attempts to perform matching using multiple heuristics to increase the likelihood + * of finding all pairs, while aiming to reduce the likelihood of associating incorrect pairs. + * + * @param factories Factories for creating {@link HSLFPictureData} out of the pictures stream. + * @param blipStore Blip Store of the presentation being loaded. + */ + private static void matchPicturesAndRecords(List factories, EscherContainerRecord blipStore) { + // LinkedList because we're sorting and removing. + LinkedList unmatchedFactories = new LinkedList<>(factories); + unmatchedFactories.sort(Comparator.comparingInt(PictureFactory::getOffset)); + + // Arrange records by offset. In the common case of a well-formed slideshow, where every factory has a + // matching record, this is somewhat wasteful, but is necessary to handle the uncommon case where multiple + // records share an offset. + Map> unmatchedRecords = new HashMap<>(); + for (EscherRecord child : blipStore) { + EscherBSERecord record = (EscherBSERecord) child; + unmatchedRecords.computeIfAbsent(record.getOffset(), k -> new ArrayList<>()).add(record); + } + + // The first pass through the factories only pairs a factory with a record if we're very confident that they + // are a match. Confidence comes from a perfect match on the offset, and if necessary, the UID. Matched + // factories and records are removed from the unmatched collections. + for (Iterator iterator = unmatchedFactories.iterator(); iterator.hasNext(); ) { + PictureFactory factory = iterator.next(); + int physicalOffset = factory.getOffset(); + List recordsAtOffset = unmatchedRecords.get(physicalOffset); + + if (recordsAtOffset == null || recordsAtOffset.isEmpty()) { + // There are no records that have an offset matching the physical offset in the stream. We'll do + // more complicated and less reliable matching for this factory after all "well known" + // image <-> record pairs have been found. + LOG.atDebug().log("No records with offset {}", box(physicalOffset)); + } else if (recordsAtOffset.size() == 1) { + // Only 1 record has the same offset as the target image. Assume these are a pair. + factory.setRecord(recordsAtOffset.get(0)); + unmatchedRecords.remove(physicalOffset); + iterator.remove(); + } else { + + // Multiple records share an offset. Perform additional matching based on UID. + for (int i = 0; i < recordsAtOffset.size(); i++) { + EscherBSERecord record = recordsAtOffset.get(i); + byte[] recordUid = record.getUid(); + byte[] imageHeader = Arrays.copyOf(factory.imageData, HSLFPictureData.CHECKSUM_SIZE); + if (Arrays.equals(recordUid, imageHeader)) { + factory.setRecord(record); + recordsAtOffset.remove(i); + iterator.remove(); + break; + } + } + } + } + + // At this point, any factories remaining didn't have a record with a matching offset. The second pass + // through the factories pairs based on the UID. Factories for which a record with a matching UID cannot be + // found will get a new record. + List remainingRecords = unmatchedRecords.values() + .stream() + .flatMap(Collection::stream) + .collect(Collectors.toList()); + + for (PictureFactory factory : unmatchedFactories) { + + boolean matched = false; + for (int i = remainingRecords.size() - 1; i >= 0; i--) { + EscherBSERecord record = remainingRecords.get(i); + byte[] recordUid = record.getUid(); + byte[] imageHeader = Arrays.copyOf(factory.imageData, HSLFPictureData.CHECKSUM_SIZE); + if (Arrays.equals(recordUid, imageHeader)) { + remainingRecords.remove(i); + factory.setRecord(record); + record.setOffset(factory.getOffset()); + matched = true; + } + } + + if (!matched) { + // Synthesize a new record + LOG.atDebug().log("No record found for picture at offset {}", box(factory.offset)); + EscherBSERecord record = HSLFSlideShow.addNewEscherBseRecord(blipStore, factory.type, factory.imageData, factory.offset); + factory.setRecord(record); + } + } + + LOG.atDebug().log("Found {} unmatched records.", box(remainingRecords.size())); } /** @@ -756,9 +873,8 @@ public final class HSLFSlideShowImpl extends POIDocument implements Closeable { int offset = 0; if (_pictures.size() > 0) { HSLFPictureData prev = _pictures.get(_pictures.size() - 1); - offset = prev.getOffset() + prev.getRawData().length + 8; + offset = prev.getOffset() + prev.getBseSize(); } - img.setOffset(offset); img.setIndex(_pictures.size() + 1); // index is 1-based _pictures.add(img); return offset; @@ -825,6 +941,32 @@ public final class HSLFSlideShowImpl extends POIDocument implements Closeable { return _objects; } + private EscherContainerRecord getBlipStore() { + Document documentRecord = null; + for (Record record : _records) { + if (record.getRecordType() == RecordTypes.Document.typeID) { + documentRecord = (Document) record; + break; + } + } + + if (documentRecord == null) { + throw new CorruptPowerPointFileException("Document record is missing"); + } + + EscherContainerRecord blipStore; + + EscherContainerRecord dggContainer = documentRecord.getPPDrawingGroup().getDggContainer(); + blipStore = HSLFShape.getEscherChild(dggContainer, EscherContainerRecord.BSTORE_CONTAINER); + if (blipStore == null) { + blipStore = new EscherContainerRecord(); + blipStore.setRecordId(EscherContainerRecord.BSTORE_CONTAINER); + + dggContainer.addChildBefore(blipStore, EscherOptRecord.RECORD_ID); + } + return blipStore; + } + @Override public void close() throws IOException { // only close the filesystem, if we are based on the root node. @@ -903,4 +1045,55 @@ public final class HSLFSlideShowImpl extends POIDocument implements Closeable { return count; } } + + /** + * Assists in creating {@link HSLFPictureData} when parsing a slideshow. + * + * This class is relied upon heavily by {@link #matchPicturesAndRecords(List, EscherContainerRecord)}. + */ + static final class PictureFactory { + final byte[] imageData; + + private final EscherContainerRecord recordContainer; + private final PictureData.PictureType type; + private final int offset; + private final int signature; + private EscherBSERecord record; + + PictureFactory( + EscherContainerRecord recordContainer, + PictureData.PictureType type, + byte[] imageData, + int offset, + int signature + ) { + this.recordContainer = Objects.requireNonNull(recordContainer); + this.type = Objects.requireNonNull(type); + this.imageData = Objects.requireNonNull(imageData); + this.offset = offset; + this.signature = signature; + } + + int getOffset() { + return offset; + } + + /** + * Constructs a new {@link HSLFPictureData}. + *

+ * The {@link EscherBSERecord} must have been set via {@link #setRecord(EscherBSERecord)} prior to invocation. + */ + HSLFPictureData build() { + Objects.requireNonNull(record, "Can't build an instance until the record has been assigned."); + return HSLFPictureData.createFromSlideshowData(type, recordContainer, record, imageData, signature); + } + + /** + * Sets the {@link EscherBSERecord} with which this factory should create a {@link HSLFPictureData}. + */ + PictureFactory setRecord(EscherBSERecord bse) { + record = bse; + return this; + } + } } diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/dev/BaseTestPPTIterating.java b/src/scratchpad/testcases/org/apache/poi/hslf/dev/BaseTestPPTIterating.java index 869b96e4b8..6722f00f41 100644 --- a/src/scratchpad/testcases/org/apache/poi/hslf/dev/BaseTestPPTIterating.java +++ b/src/scratchpad/testcases/org/apache/poi/hslf/dev/BaseTestPPTIterating.java @@ -55,6 +55,7 @@ public abstract class BaseTestPPTIterating { ENCRYPTED_FILES.add("Password_Protected-np-hello.ppt"); ENCRYPTED_FILES.add("Password_Protected-56-hello.ppt"); ENCRYPTED_FILES.add("Password_Protected-hello.ppt"); + ENCRYPTED_FILES.add("ppt_with_png_encrypted.ppt"); } protected static final Map> EXCLUDED = diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestPicture.java b/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestPicture.java index e514f78835..55a50be321 100644 --- a/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestPicture.java +++ b/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestPicture.java @@ -35,6 +35,7 @@ import javax.imageio.ImageIO; import org.apache.poi.POIDataSamples; import org.apache.poi.ddf.EscherBSERecord; +import org.apache.poi.ddf.EscherContainerRecord; import org.apache.poi.sl.usermodel.PictureData.PictureType; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -87,16 +88,19 @@ public final class TestPicture { } /** - * Picture#getEscherBSERecord threw NullPointerException if EscherContainerRecord.BSTORE_CONTAINER - * was not found. The correct behaviour is to return null. + * {@link HSLFPictureShape#getEscherBSERecord()} threw {@link NullPointerException} if + * {@link EscherContainerRecord#BSTORE_CONTAINER} was not found. The correct behaviour is to return null. */ @Test void bug46122() throws IOException { + HSLFPictureData detachedData; + try (HSLFSlideShow ppt = new HSLFSlideShow()) { + detachedData = ppt.addPicture(new byte[0], PictureType.PNG); + } try (HSLFSlideShow ppt = new HSLFSlideShow()) { HSLFSlide slide = ppt.createSlide(); - HSLFPictureData pd = HSLFPictureData.create(PictureType.PNG); - HSLFPictureShape pict = new HSLFPictureShape(pd); //index to non-existing picture data + HSLFPictureShape pict = new HSLFPictureShape(detachedData); //index to non-existing picture data pict.setAnchor(new Rectangle2D.Double(50, 50, 100, 100)); pict.setSheet(slide); HSLFPictureData data = pict.getPictureData(); diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestPictures.java b/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestPictures.java index fce2e1804d..40076c758f 100644 --- a/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestPictures.java +++ b/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestPictures.java @@ -27,9 +27,14 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.net.URL; import java.util.Arrays; +import java.util.Collections; import java.util.List; +import java.util.Random; import org.apache.poi.POIDataSamples; +import org.apache.poi.ddf.EscherBSERecord; +import org.apache.poi.ddf.EscherContainerRecord; +import org.apache.poi.ddf.EscherRecord; import org.apache.poi.hslf.HSLFTestDataSamples; import org.apache.poi.hslf.blip.DIB; import org.apache.poi.hslf.blip.EMF; @@ -37,6 +42,7 @@ import org.apache.poi.hslf.blip.JPEG; import org.apache.poi.hslf.blip.PICT; import org.apache.poi.hslf.blip.PNG; import org.apache.poi.hslf.blip.WMF; +import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey; import org.apache.poi.sl.image.ImageHeaderEMF; import org.apache.poi.sl.image.ImageHeaderPICT; import org.apache.poi.sl.image.ImageHeaderWMF; @@ -512,9 +518,8 @@ public final class TestPictures { int streamSize = out.size(); - HSLFPictureData data = HSLFPictureData.create(PictureType.JPEG); - data.setData(new byte[100]); - int offset = hslf.addPicture(data); + HSLFPictureData data = ppt.addPicture(new byte[100], PictureType.JPEG); + int offset = data.getOffset(); assertEquals(streamSize, offset); assertEquals(3, ppt.getPictureData().size()); @@ -560,4 +565,172 @@ public final class TestPictures { assertEquals(1, picture.getIndex()); } } + + /** + * Verify that it is possible for a user to change the contents of a {@link HSLFPictureData} using + * {@link HSLFPictureData#setData(byte[])}, and that the changes are saved to the slideshow. + */ + @Test + void testEditPictureData() throws IOException { + byte[] newImage = slTests.readFile("tomcat.png"); + ByteArrayOutputStream modifiedSlideShow = new ByteArrayOutputStream(); + + // Load an existing slideshow and modify the image + try (HSLFSlideShow ppt = HSLFTestDataSamples.getSlideShow("ppt_with_png.ppt")) { + HSLFPictureData picture = ppt.getPictureData().get(0); + picture.setData(newImage); + ppt.write(modifiedSlideShow); + } + + // Load the modified slideshow and verify the image content + try (HSLFSlideShow ppt = new HSLFSlideShow(new ByteArrayInputStream(modifiedSlideShow.toByteArray()))) { + HSLFPictureData picture = ppt.getPictureData().get(0); + byte[] modifiedImageData = picture.getData(); + assertArrayEquals(newImage, modifiedImageData); + } + } + + /** + * Verify that it is possible for a user to change the contents of an encrypted {@link HSLFPictureData} using + * {@link HSLFPictureData#setData(byte[])}, and that the changes are saved to the slideshow. + */ + @Test + void testEditPictureDataEncrypted() throws IOException { + byte[] newImage = slTests.readFile("tomcat.png"); + ByteArrayOutputStream modifiedSlideShow = new ByteArrayOutputStream(); + + Biff8EncryptionKey.setCurrentUserPassword("password"); + try { + // Load an existing slideshow and modify the image + try (HSLFSlideShow ppt = HSLFTestDataSamples.getSlideShow("ppt_with_png_encrypted.ppt")) { + HSLFPictureData picture = ppt.getPictureData().get(0); + picture.setData(newImage); + ppt.write(modifiedSlideShow); + } + + // Load the modified slideshow and verify the image content + try (HSLFSlideShow ppt = new HSLFSlideShow(new ByteArrayInputStream(modifiedSlideShow.toByteArray()))) { + HSLFPictureData picture = ppt.getPictureData().get(0); + byte[] modifiedImageData = picture.getData(); + assertArrayEquals(newImage, modifiedImageData); + } + } finally { + Biff8EncryptionKey.setCurrentUserPassword(null); + } + } + + /** + * Verify that the {@link EscherBSERecord#getOffset()} values are modified for all images after the image being + * changed. + */ + @Test + void testEditPictureDataRecordOffsetsAreShifted() throws IOException { + int[] originalOffsets = {0, 12013, 15081, 34162, 59563}; + int[] modifiedOffsets = {0, 35, 3103, 22184, 47585}; + + ByteArrayOutputStream inMemory = new ByteArrayOutputStream(); + try (HSLFSlideShow ppt = HSLFTestDataSamples.getSlideShow("pictures.ppt")) { + int[] offsets = ppt.getPictureData().stream().mapToInt(HSLFPictureData::getOffset).toArray(); + assertArrayEquals(originalOffsets, offsets); + + HSLFPictureData imageBeingChanged = ppt.getPictureData().get(0); + // It doesn't matter that this isn't a valid image. We are just testing offsets here. + imageBeingChanged.setData(new byte[10]); + + // Verify that the in-memory representations have all been updated + offsets = ppt.getPictureData().stream().mapToInt(HSLFPictureData::getOffset).toArray(); + assertArrayEquals(modifiedOffsets, offsets); + + ppt.write(inMemory); + } + + try (HSLFSlideShow ppt = new HSLFSlideShow(new ByteArrayInputStream(inMemory.toByteArray()))) { + + // Verify that the persisted representations have all been updated + int[] offsets = ppt.getPictureData().stream().mapToInt(HSLFPictureData::getOffset).toArray(); + assertArrayEquals(modifiedOffsets, offsets); + } + } + + /** + * Verify that the {@link EscherBSERecord#getOffset()} values are modified for all images after the image being + * changed, but assuming that the records are not stored in a sorted-by-offset fashion. + * + * We have not encountered a file that has meaningful data that is not sorted. However, we have encountered files + * that have records with an offset of 0 interspersed between meaningful records. See {@code 53446.ppt} and + * {@code alterman_security.ppt} for examples. + */ + @Test + void testEditPictureDataOutOfOrderRecords() throws IOException { + int[] modifiedOffsets = {0, 35, 3103, 22184, 47585}; + + ByteArrayOutputStream inMemory = new ByteArrayOutputStream(); + try (HSLFSlideShow ppt = HSLFTestDataSamples.getSlideShow("pictures.ppt")) { + + // For this test we're going to intentionally manipulate the records into a shuffled order. + EscherContainerRecord container = ppt.getPictureData().get(0).bStore; + List children = container.getChildRecords(); + for (EscherRecord child : children) { + container.removeChildRecord(child); + } + Collections.shuffle(children); + for (EscherRecord child : children) { + container.addChildRecord(child); + } + + HSLFPictureData imageBeingChanged = ppt.getPictureData().get(0); + // It doesn't matter that this isn't a valid image. We are just testing offsets here. + imageBeingChanged.setData(new byte[10]); + + // Verify that the in-memory representations have all been updated + int[] offsets = ppt.getPictureData().stream().mapToInt(HSLFPictureData::getOffset).toArray(); + Arrays.sort(offsets); + assertArrayEquals(modifiedOffsets, offsets); + + ppt.write(inMemory); + } + + try (HSLFSlideShow ppt = new HSLFSlideShow(new ByteArrayInputStream(inMemory.toByteArray()))) { + + // Verify that the persisted representations have all been updated + int[] offsets = ppt.getPictureData().stream().mapToInt(HSLFPictureData::getOffset).toArray(); + Arrays.sort(offsets); + assertArrayEquals(modifiedOffsets, offsets); + } + } + + /** + * Verify that a slideshow with records that have offsets not matching those of the pictures in the stream still + * correctly pairs the records and pictures. + */ + @Test + void testSlideshowWithIncorrectOffsets() throws IOException { + int[] originalOffsets; + int originalNumberOfRecords; + + // Create a presentation that has records with unmatched offsets, but with matched UIDs. + ByteArrayOutputStream inMemory = new ByteArrayOutputStream(); + try (HSLFSlideShow ppt = HSLFTestDataSamples.getSlideShow("pictures.ppt")) { + originalOffsets = ppt.getPictureData().stream().mapToInt(HSLFPictureData::getOffset).toArray(); + originalNumberOfRecords = ppt.getPictureData().get(0).bStore.getChildRecords().size(); + + Random random = new Random(); + for (HSLFPictureData picture : ppt.getPictureData()) { + // Bound is arbitrary and irrelevant to the test. + picture.bse.setOffset(random.nextInt(500_000)); + } + ppt.write(inMemory); + } + + try (HSLFSlideShow ppt = new HSLFSlideShow(new ByteArrayInputStream(inMemory.toByteArray()))) { + + // Verify that the offsets all got fixed. + int[] offsets = ppt.getPictureData().stream().mapToInt(HSLFPictureData::getOffset).toArray(); + assertArrayEquals(originalOffsets, offsets); + + // Verify that there are the same number of records as in the original slideshow. + int numberOfRecords = ppt.getPictureData().get(0).bStore.getChildRecords().size(); + assertEquals(originalNumberOfRecords, numberOfRecords); + } + } } diff --git a/test-data/slideshow/ppt_with_png_encrypted.ppt b/test-data/slideshow/ppt_with_png_encrypted.ppt new file mode 100644 index 0000000000..4965d8623e Binary files /dev/null and b/test-data/slideshow/ppt_with_png_encrypted.ppt differ diff --git a/test-data/spreadsheet/stress.xls b/test-data/spreadsheet/stress.xls index 3d286e5e55..b613a89751 100644 Binary files a/test-data/spreadsheet/stress.xls and b/test-data/spreadsheet/stress.xls differ