From e4f6756c932dd178c3164dfc7e4a3f07df344188 Mon Sep 17 00:00:00 2001 From: Marius Volkhart Date: Tue, 9 Mar 2021 20:59:24 +0000 Subject: [PATCH] Parse PPDrawing more deterministically The [MS-PPT] spec is very clear about what the format of the PPDrawing record must be, and parsing deterministically makes for clearer code. git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1887396 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/hslf/record/PPDrawing.java | 141 ++++-------------- 1 file changed, 27 insertions(+), 114 deletions(-) diff --git a/src/scratchpad/src/org/apache/poi/hslf/record/PPDrawing.java b/src/scratchpad/src/org/apache/poi/hslf/record/PPDrawing.java index de65b231d2..396fbc06f4 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/record/PPDrawing.java +++ b/src/scratchpad/src/org/apache/poi/hslf/record/PPDrawing.java @@ -19,8 +19,8 @@ package org.apache.poi.hslf.record; import java.io.IOException; import java.io.OutputStream; -import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -46,11 +46,8 @@ import org.apache.poi.ddf.EscherSpgrRecord; import org.apache.poi.ddf.EscherTextboxRecord; import org.apache.poi.sl.usermodel.ShapeType; import org.apache.poi.util.GenericRecordUtil; -import org.apache.poi.util.IOUtils; import org.apache.poi.util.LittleEndian; -import static org.apache.logging.log4j.util.Unbox.box; - /** * These are actually wrappers onto Escher drawings. Make use of * the DDF classes to do useful things with them. @@ -58,23 +55,22 @@ import static org.apache.logging.log4j.util.Unbox.box; * PowerPoint (hslf) records found within the EscherTextboxRecord * (msofbtClientTextbox) records. * Also provides easy access to the EscherTextboxRecords, so that their - * text may be extracted and used in Sheets + * text may be extracted and used in Sheets. + *

+ * {@code [MS-PPT] - v20210216} refers to this as a {@code DrawingContainer}. */ // For now, pretending to be an atom. Might not always be, but that // would require a wrapping class public final class PPDrawing extends RecordAtom implements Iterable { - //arbitrarily selected; may need to increase - private static final int MAX_RECORD_LENGTH = 10_485_760; - - - private byte[] _header; + private final byte[] _header; private long _type; - private final List childRecords = new ArrayList<>(); private EscherTextboxWrapper[] textboxWrappers; + private final EscherContainerRecord dgContainer = new EscherContainerRecord(); + //cached EscherDgRecord private EscherDgRecord dg; @@ -82,11 +78,11 @@ public final class PPDrawing extends RecordAtom implements Iterable getEscherRecords() { return childRecords; } + public List getEscherRecords() { return Collections.singletonList(dgContainer); } @Override public Iterator iterator() { - return childRecords.iterator(); + return getEscherRecords().iterator(); } /** @@ -121,26 +117,17 @@ public final class PPDrawing extends RecordAtom implements Iterable textboxes = new ArrayList<>(); - findEscherTextboxRecord(childRecords, textboxes); - this.textboxWrappers = textboxes.toArray(new EscherTextboxWrapper[0]); - } + textboxWrappers = Stream.of(dgContainer). + flatMap(findEscherContainer(EscherRecordTypes.SPGR_CONTAINER)). + flatMap(findEscherContainer(EscherRecordTypes.SP_CONTAINER)). + flatMap(PPDrawing::getTextboxHelper). + toArray(EscherTextboxWrapper[]::new); } private static Stream getTextboxHelper(EscherContainerRecord spContainer) { @@ -185,66 +172,6 @@ public final class PPDrawing extends RecordAtom implements Iterable found) { - - int escherBytes = LittleEndian.getInt( source, startPos + 4 ) + 8; - - // Find the record - EscherRecord r = erf.createRecord(source,startPos); - // Fill it in - r.fillFields( source, startPos, erf ); - // Save it - found.add(r); - - // Wind on - int size = r.getRecordSize(); - if(size < 8) { - LOG.atWarn().log("Hit short DDF record at {} - {}", box(startPos),box(size)); - } - - /* - * Sanity check. Always advance the cursor by the correct value. - * - * getRecordSize() must return exactly the same number of bytes that was written in fillFields. - * Sometimes it is not so, see an example in bug #44770. Most likely reason is that one of ddf records calculates wrong size. - */ - if(size != escherBytes){ - LOG.atWarn().log("Record length={} but getRecordSize() returned {}; record: {}", box(escherBytes),box(r.getRecordSize()),r.getClass()); - size = escherBytes; - } - startPos += size; - lenToGo -= size; - if(lenToGo >= 8) { - findEscherChildren(erf, source, startPos, lenToGo, found); - } - } - - /** - * Look for EscherTextboxRecords - */ - private void findEscherTextboxRecord(List toSearch, List found) { - EscherSpRecord sp = null; - for (EscherRecord r : toSearch) { - if (r instanceof EscherSpRecord) { - sp = (EscherSpRecord)r; - } else if (r instanceof EscherTextboxRecord) { - EscherTextboxRecord tbr = (EscherTextboxRecord)r; - EscherTextboxWrapper w = new EscherTextboxWrapper(tbr); - if (sp != null) { - w.setShapeId(sp.getShapeId()); - } - found.add(w); - } else if (r.isContainerRecord()) { - // If it has children, walk them - List children = r.getChildRecords(); - findEscherTextboxRecord(children,found); - } - } - } - /** * We are type 1036 */ @@ -268,9 +195,7 @@ public final class PPDrawing extends RecordAtom implements Iterable firstEscherRecord((EscherContainerRecord)c, EscherRecordTypes.DG)). - ifPresent(c -> dg = (EscherDgRecord)c); - } return dg; } public StyleTextProp9Atom[] getNumberedListInfo() { - EscherContainerRecord dgContainer = getDgContainer(); - - return (dgContainer == null) ? new StyleTextProp9Atom[0] : Stream.of(dgContainer). - flatMap(findEscherContainer(EscherRecordTypes.SPGR_CONTAINER)). - flatMap(findEscherContainer(EscherRecordTypes.SP_CONTAINER)). - map(PPDrawing::findInSpContainer). - filter(Optional::isPresent). - map(Optional::get). - toArray(StyleTextProp9Atom[]::new); + return Stream.of(dgContainer). + flatMap(findEscherContainer(EscherRecordTypes.SPGR_CONTAINER)). + flatMap(findEscherContainer(EscherRecordTypes.SP_CONTAINER)). + map(PPDrawing::findInSpContainer). + filter(Optional::isPresent). + map(Optional::get). + toArray(StyleTextProp9Atom[]::new); } @Override