From 544d8bae08896f7280772d58046c0e0abdbd7631 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Fri, 14 Mar 2014 13:59:24 +0000 Subject: [PATCH] #56260 Partial fix for a slide with a TextHeaderAtom but no other atoms related to it, which is followed by another TextHeaderAtom straight away git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1577537 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/org/apache/poi/hslf/model/Sheet.java | 48 ++++++++++++++++--- .../org/apache/poi/hslf/model/TextRun.java | 10 ++++ .../apache/poi/hslf/usermodel/TestBugs.java | 38 +++++++++++++++ 3 files changed, 89 insertions(+), 7 deletions(-) diff --git a/src/scratchpad/src/org/apache/poi/hslf/model/Sheet.java b/src/scratchpad/src/org/apache/poi/hslf/model/Sheet.java index 46670ff364..875b2dfafa 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/model/Sheet.java +++ b/src/scratchpad/src/org/apache/poi/hslf/model/Sheet.java @@ -17,16 +17,34 @@ package org.apache.poi.hslf.model; -import org.apache.poi.ddf.*; -import org.apache.poi.hslf.record.*; -import org.apache.poi.hslf.usermodel.SlideShow; -import org.apache.poi.util.POILogFactory; -import org.apache.poi.util.POILogger; - +import java.awt.Graphics2D; import java.util.ArrayList; import java.util.Iterator; import java.util.List; -import java.awt.*; + +import org.apache.poi.ddf.EscherContainerRecord; +import org.apache.poi.ddf.EscherDgRecord; +import org.apache.poi.ddf.EscherDggRecord; +import org.apache.poi.ddf.EscherRecord; +import org.apache.poi.hslf.record.CString; +import org.apache.poi.hslf.record.ColorSchemeAtom; +import org.apache.poi.hslf.record.EscherTextboxWrapper; +import org.apache.poi.hslf.record.OEPlaceholderAtom; +import org.apache.poi.hslf.record.PPDrawing; +import org.apache.poi.hslf.record.Record; +import org.apache.poi.hslf.record.RecordContainer; +import org.apache.poi.hslf.record.RecordTypes; +import org.apache.poi.hslf.record.RoundTripHFPlaceholder12; +import org.apache.poi.hslf.record.SheetContainer; +import org.apache.poi.hslf.record.StyleTextProp9Atom; +import org.apache.poi.hslf.record.StyleTextPropAtom; +import org.apache.poi.hslf.record.TextBytesAtom; +import org.apache.poi.hslf.record.TextCharsAtom; +import org.apache.poi.hslf.record.TextHeaderAtom; +import org.apache.poi.hslf.record.TextRulerAtom; +import org.apache.poi.hslf.usermodel.SlideShow; +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; /** * This class defines the common format of "Sheets" in a powerpoint @@ -182,6 +200,7 @@ public abstract class Sheet { // Is there a StyleTextPropAtom after the Text Atom? // TODO Do we need to check for text ones two away as well? + // TODO Refactor this to happen later in this for loop if (i < (records.length - 2)) { next = records[i+2]; if (next instanceof StyleTextPropAtom) { @@ -191,6 +210,16 @@ public abstract class Sheet { // See what follows the TextHeaderAtom next = records[i+1]; + + // Is it one we ignore and check the one after that? + if (i < records.length - 2) { + // TODO MasterTextPropAtom + if (next instanceof TextRulerAtom) { + next = records[i+2]; + } + } + + // Is it one we need to record? if (next instanceof TextCharsAtom) { TextCharsAtom tca = (TextCharsAtom)next; trun = new TextRun(tha, tca, stpa); @@ -199,6 +228,11 @@ public abstract class Sheet { trun = new TextRun(tha, tba, stpa); } else if (next instanceof StyleTextPropAtom) { stpa = (StyleTextPropAtom)next; + } else if (next instanceof TextHeaderAtom) { + // Seems to be a mostly, but not completely deleted block of + // text. Only the header remains, which isn't useful alone + // Skip on to the next TextHeaderAtom + continue; } else if (next.getRecordType() == (long)RecordTypes.TextSpecInfoAtom.typeID || next.getRecordType() == (long)RecordTypes.BaseTextPropAtom.typeID) { // Safe to ignore these ones diff --git a/src/scratchpad/src/org/apache/poi/hslf/model/TextRun.java b/src/scratchpad/src/org/apache/poi/hslf/model/TextRun.java index fdc30f6ea0..c5c876a827 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/model/TextRun.java +++ b/src/scratchpad/src/org/apache/poi/hslf/model/TextRun.java @@ -22,8 +22,10 @@ import java.util.LinkedList; import java.util.List; import org.apache.poi.hslf.model.textproperties.TextPropCollection; +import org.apache.poi.hslf.record.PPDrawing; import org.apache.poi.hslf.record.Record; import org.apache.poi.hslf.record.RecordContainer; +import org.apache.poi.hslf.record.SlideListWithText; import org.apache.poi.hslf.record.StyleTextProp9Atom; import org.apache.poi.hslf.record.StyleTextPropAtom; import org.apache.poi.hslf.record.TextBytesAtom; @@ -656,6 +658,14 @@ public final class TextRun protected void setIndex(int id){ slwtIndex = id; } + + /** + * Is this Text Run one from a {@link PPDrawing}, or is it + * one from the {@link SlideListWithText}? + */ + public boolean isDrawingBased() { + return (slwtIndex == -1); + } /** * Returns the array of all hyperlinks in this text run diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestBugs.java b/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestBugs.java index 50b21a007d..efc196d3f7 100644 --- a/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestBugs.java +++ b/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestBugs.java @@ -51,6 +51,11 @@ import org.apache.poi.hslf.model.TextBox; import org.apache.poi.hslf.model.TextRun; import org.apache.poi.hslf.model.TextShape; import org.apache.poi.hslf.model.TitleMaster; +import org.apache.poi.hslf.record.Document; +import org.apache.poi.hslf.record.Record; +import org.apache.poi.hslf.record.SlideListWithText; +import org.apache.poi.hslf.record.SlideListWithText.SlideAtomsSet; +import org.apache.poi.hslf.record.TextHeaderAtom; import org.junit.Test; /** @@ -502,4 +507,37 @@ public final class TestBugs { assertTrue("No Exceptions while reading headers", true); } + @Test + public void bug56260() throws Exception { + File file = _slTests.getFile("56260.ppt"); + + HSLFSlideShow ss = new HSLFSlideShow(file.getAbsolutePath()); + SlideShow _show = new SlideShow(ss); + Slide[] _slides = _show.getSlides(); + assertEquals(13, _slides.length); + + // Check the number of TextHeaderAtoms on Slide 1 + Document dr = _show.getDocumentRecord(); + SlideListWithText slidesSLWT = dr.getSlideSlideListWithText(); + SlideAtomsSet s1 = slidesSLWT.getSlideAtomsSets()[0]; + + int tha = 0; + for (Record r : s1.getSlideRecords()) { + if (r instanceof TextHeaderAtom) tha++; + } + assertEquals(2, tha); + + // Check to see that we have a pair next to each other + assertEquals(TextHeaderAtom.class, s1.getSlideRecords()[0].getClass()); + assertEquals(TextHeaderAtom.class, s1.getSlideRecords()[1].getClass()); + + + // Check the number of text runs based on the slide (not textbox) + // Will have skipped the empty one + int str = 0; + for (TextRun tr : _slides[0].getTextRuns()) { + if (! tr.isDrawingBased()) str++; + } + assertEquals(1, str); + } }