diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java index ab32cb05a1..71b36a4f46 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java @@ -317,6 +317,11 @@ public class HWPFDocument extends POIDocument * document, but excludes any headers and footers. */ public Range getRange() { + // First up, trigger a full-recalculate + // Needed in case of deletes etc + getOverallRange(); + + // Now, return the real one return new Range( _cpSplit.getMainDocumentStart(), _cpSplit.getMainDocumentEnd(), diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java index 20f9b63b98..fefcf442be 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java @@ -64,10 +64,12 @@ public class PAPFormattedDiskPage extends FormattedDiskPage { super(documentStream, offset); - for (int x = 0; x < _crun; x++) - { - boolean isUnicode = tpt.isUnicodeAt( getStart(x) ); - _papxList.add(new PAPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), getParagraphHeight(x), dataStream, isUnicode)); + for (int x = 0; x < _crun; x++) { + int startAt = getStart(x) - fcMin; + int endAt = getEnd(x) - fcMin; + boolean isUnicode = tpt.isUnicodeAt(startAt); + + _papxList.add(new PAPX(startAt, endAt, getGrpprl(x), getParagraphHeight(x), dataStream, isUnicode)); } _fkp = null; _dataStream = dataStream; diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java index 5ae16aa5e3..f369c169dc 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java @@ -85,18 +85,18 @@ public abstract class PropertyNode implements Comparable, Cloneable { int end = start + length; - if (_cpEnd > start) - { - if (_cpStart < end) - { - _cpEnd = end >= _cpEnd ? start : _cpEnd - length; - _cpStart = Math.min(start, _cpStart); - } - else - { - _cpEnd -= length; - _cpStart -= length; - } + if (_cpEnd > start) { + // The start of the change is before we end + + if (_cpStart < end) { + // The delete was somewhere in the middle of us + _cpEnd = end >= _cpEnd ? start : _cpEnd - length; + _cpStart = Math.min(start, _cpStart); + } else { + // The delete was before us + _cpEnd -= length; + _cpStart -= length; + } } } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java b/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java index c66f8aa349..92ec6cfbb3 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java @@ -20,11 +20,14 @@ package org.apache.poi.hwpf.model; -import org.apache.poi.hwpf.sprm.SprmBuffer; -import org.apache.poi.hwpf.sprm.SectionSprmUncompressor; import org.apache.poi.hwpf.sprm.SectionSprmCompressor; +import org.apache.poi.hwpf.sprm.SectionSprmUncompressor; import org.apache.poi.hwpf.usermodel.SectionProperties; +/** + * TODO - figure out if this works in characters, like most + * things do, or in bytes as PAPX / CHPX does. + */ public class SEPX extends PropertyNode { diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java index 351c06ae74..bf2cf3014a 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java @@ -118,6 +118,9 @@ public class TextPiece extends PropertyNode implements Comparable if(end > buf.length()) { throw new StringIndexOutOfBoundsException("Index " + end + " out of range 0 -> " + buf.length()); } + if(end < start) { + throw new StringIndexOutOfBoundsException("Asked for text from " + start + " to " + end + ", which has an end before the start!"); + } return buf.substring(start, end); } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java index 109fd175b4..6e230a5dd1 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java @@ -650,14 +650,6 @@ public class Range absPlaceHolderIndex, (absPlaceHolderIndex + pPlaceHolder.length()), getDocument() ); - if (subRange.usesUnicode()) { - absPlaceHolderIndex = getStartOffset() + (pOffset * 2); - subRange = new Range( - absPlaceHolderIndex, - (absPlaceHolderIndex + (pPlaceHolder.length() * 2)), - getDocument() - ); - } // this Range isn't a proper parent of the subRange() so we'll have to keep // track of an updated endOffset on our own @@ -674,12 +666,6 @@ public class Range (absPlaceHolderIndex + pPlaceHolder.length() + pValue.length()), getDocument() ); - if (subRange.usesUnicode()) - subRange = new Range( - (absPlaceHolderIndex + (pValue.length() * 2)), - (absPlaceHolderIndex + (pPlaceHolder.length() * 2) + - (pValue.length() * 2)), getDocument() - ); // deletes are automagically propagated subRange.delete(); diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java index 7cbd75d6b8..4c7d7b92e8 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java @@ -27,12 +27,14 @@ import org.apache.poi.hwpf.HWPFDocument; /** * Test to see if Range.delete() works even if the Range contains a * CharacterRun that uses Unicode characters. - * - * TODO - re-enable me when unicode paragraph stuff is fixed! */ public class TestRangeDelete extends TestCase { // u201c and u201d are "smart-quotes" + private String introText = + "Introduction\r"; + private String fillerText = + "${delete} This is an MS-Word 97 formatted document created using NeoOffice v. 2.2.4 Patch 0 (OpenOffice.org v. 2.2.1).\r"; private String originalText = "It is used to confirm that text delete works even if Unicode characters (such as \u201c\u2014\u201d (U+2014), \u201c\u2e8e\u201d (U+2E8E), or \u201c\u2714\u201d (U+2714)) are present. Everybody should be thankful to the ${organization} ${delete} and all the POI contributors for their assistance in this matter.\r"; private String searchText = "${delete}"; @@ -64,31 +66,34 @@ public class TestRangeDelete extends TestCase { public void testDocStructure() throws Exception { HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile)); + Range range; + Section section; + Paragraph para; - Range range = daDoc.getOverallRange(); - + // First, check overall + range = daDoc.getOverallRange(); assertEquals(1, range.numSections()); - Section section = range.getSection(0); - - assertEquals(5, section.numParagraphs()); - Paragraph para = section.getParagraph(2); - - assertEquals(5, para.numCharacterRuns()); - - assertEquals(originalText, para.text()); + assertEquals(4, range.numParagraphs()); - // Now check on just the main text + // Now, onto just the doc bit range = daDoc.getRange(); - + assertEquals(1, range.numSections()); section = range.getSection(0); - assertEquals(5, section.numParagraphs()); + assertEquals(4, section.numParagraphs()); + + para = section.getParagraph(0); + assertEquals(1, para.numCharacterRuns()); + assertEquals(introText, para.text()); + + para = section.getParagraph(1); + assertEquals(2, para.numCharacterRuns()); + assertEquals(fillerText, para.text()); + para = section.getParagraph(2); - - assertEquals(5, para.numCharacterRuns()); - + assertEquals(6, para.numCharacterRuns()); assertEquals(originalText, para.text()); } @@ -103,7 +108,7 @@ public class TestRangeDelete extends TestCase { assertEquals(1, range.numSections()); Section section = range.getSection(0); - assertEquals(5, section.numParagraphs()); + assertEquals(4, section.numParagraphs()); Paragraph para = section.getParagraph(2); @@ -114,12 +119,7 @@ public class TestRangeDelete extends TestCase { assertEquals(192, offset); int absOffset = para.getStartOffset() + offset; - if (para.usesUnicode()) - absOffset = para.getStartOffset() + (offset * 2); - Range subRange = new Range(absOffset, (absOffset + searchText.length()), para.getDocument()); - if (subRange.usesUnicode()) - subRange = new Range(absOffset, (absOffset + (searchText.length() * 2)), para.getDocument()); assertEquals(searchText, subRange.text()); @@ -131,7 +131,7 @@ public class TestRangeDelete extends TestCase { assertEquals(1, range.numSections()); section = range.getSection(0); - assertEquals(5, section.numParagraphs()); + assertEquals(4, section.numParagraphs()); para = section.getParagraph(2); text = para.text(); @@ -154,7 +154,7 @@ public class TestRangeDelete extends TestCase { assertEquals(1, range.numSections()); Section section = range.getSection(0); - assertEquals(5, section.numParagraphs()); + assertEquals(4, section.numParagraphs()); Paragraph para = section.getParagraph(2); @@ -163,26 +163,23 @@ public class TestRangeDelete extends TestCase { boolean keepLooking = true; while (keepLooking) { - + // Reload the range every time + range = daDoc.getRange(); int offset = range.text().indexOf(searchText); if (offset >= 0) { int absOffset = range.getStartOffset() + offset; - if (range.usesUnicode()) - absOffset = range.getStartOffset() + (offset * 2); Range subRange = new Range( absOffset, (absOffset + searchText.length()), range.getDocument()); - if (subRange.usesUnicode()) - subRange = new Range( - absOffset, (absOffset + (searchText.length() * 2)), range.getDocument()); assertEquals(searchText, subRange.text()); subRange.delete(); - } else + } else { keepLooking = false; + } } // we need to let the model re-calculate the Range before we use it @@ -191,7 +188,11 @@ public class TestRangeDelete extends TestCase { assertEquals(1, range.numSections()); section = range.getSection(0); - assertEquals(5, section.numParagraphs()); + assertEquals(4, section.numParagraphs()); + + para = section.getParagraph(0); + text = para.text(); + assertEquals(introText, text); para = section.getParagraph(1); text = para.text(); diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeInsertion.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeInsertion.java index b4d7470387..69be319cf2 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeInsertion.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeInsertion.java @@ -36,7 +36,7 @@ public class TestRangeInsertion extends TestCase { private String originalText = "It is used to confirm that text insertion works even if Unicode characters (such as \u201c\u2014\u201d (U+2014), \u201c\u2e8e\u201d (U+2E8E), or \u201c\u2714\u201d (U+2714)) are present.\r"; private String textToInsert = "Look at me! I'm cool! "; - private int insertionPoint = 244; + private int insertionPoint = 122; private String illustrativeDocFile; @@ -69,12 +69,21 @@ public class TestRangeInsertion extends TestCase { assertEquals(3, section.numParagraphs()); Paragraph para = section.getParagraph(2); + assertEquals(originalText, para.text()); - assertEquals(3, para.numCharacterRuns()); - String text = para.getCharacterRun(0).text() + para.getCharacterRun(1).text() + - para.getCharacterRun(2).text(); + assertEquals(6, para.numCharacterRuns()); + String text = + para.getCharacterRun(0).text() + + para.getCharacterRun(1).text() + + para.getCharacterRun(2).text() + + para.getCharacterRun(3).text() + + para.getCharacterRun(4).text() + + para.getCharacterRun(5).text() + ; assertEquals(originalText, text); + + assertEquals(insertionPoint, para.getStartOffset()); } /** @@ -105,10 +114,17 @@ public class TestRangeInsertion extends TestCase { assertEquals(3, section.numParagraphs()); Paragraph para = section.getParagraph(2); + assertEquals((textToInsert + originalText), para.text()); - assertEquals(3, para.numCharacterRuns()); - String text = para.getCharacterRun(0).text() + para.getCharacterRun(1).text() + - para.getCharacterRun(2).text(); + assertEquals(6, para.numCharacterRuns()); + String text = + para.getCharacterRun(0).text() + + para.getCharacterRun(1).text() + + para.getCharacterRun(2).text() + + para.getCharacterRun(3).text() + + para.getCharacterRun(4).text() + + para.getCharacterRun(5).text() + ; // System.out.println(text); diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java index f8a251b693..59754fc21e 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java @@ -257,6 +257,91 @@ public class TestRangeProperties extends TestCase { assertEquals(p2_parts[0] + "\r", r.getParagraph(11).text()); } public void testUnicodeStyling() throws Exception { - // TODO + Range r = u.getRange(); + String[] p1_parts = u_page_1.split("\r"); + + Paragraph p1 = r.getParagraph(0); + Paragraph p7 = r.getParagraph(6); + + // Line ending in its own run each time! + assertEquals(2, p1.numCharacterRuns()); + assertEquals(2, p7.numCharacterRuns()); + + CharacterRun c1a = p1.getCharacterRun(0); + CharacterRun c1b = p1.getCharacterRun(1); + CharacterRun c7a = p7.getCharacterRun(0); + CharacterRun c7b = p7.getCharacterRun(1); + + assertEquals("Times New Roman", c1a.getFontName()); // No Calibri + assertEquals(22, c1a.getFontSize()); + + assertEquals("Times New Roman", c1b.getFontName()); // No Calibri + assertEquals(22, c1b.getFontSize()); + + assertEquals("Times New Roman", c7a.getFontName()); + assertEquals(48, c7a.getFontSize()); + + assertEquals("Times New Roman", c7b.getFontName()); + assertEquals(48, c7b.getFontSize()); + + // Now check where they crop up + assertEquals( + 0, + c1a.getStartOffset() + ); + assertEquals( + p1_parts[0].length(), + c1a.getEndOffset() + ); + + assertEquals( + p1_parts[0].length(), + c1b.getStartOffset() + ); + assertEquals( + p1_parts[0].length()+1, + c1b.getEndOffset() + ); + + assertEquals( + p1_parts[0].length() + 1 + + p1_parts[1].length() + 1 + + p1_parts[2].length() + 1 + + p1_parts[3].length() + 1 + + p1_parts[4].length() + 1 + + p1_parts[5].length() + 1, + c7a.getStartOffset() + ); + assertEquals( + p1_parts[0].length() + 1 + + p1_parts[1].length() + 1 + + p1_parts[2].length() + 1 + + p1_parts[3].length() + 1 + + p1_parts[4].length() + 1 + + p1_parts[5].length() + 1 + + 1, + c7a.getEndOffset() + ); + + assertEquals( + p1_parts[0].length() + 1 + + p1_parts[1].length() + 1 + + p1_parts[2].length() + 1 + + p1_parts[3].length() + 1 + + p1_parts[4].length() + 1 + + p1_parts[5].length() + 1 + + 1, + c7b.getStartOffset() + ); + assertEquals( + p1_parts[0].length() + 1 + + p1_parts[1].length() + 1 + + p1_parts[2].length() + 1 + + p1_parts[3].length() + 1 + + p1_parts[4].length() + 1 + + p1_parts[5].length() + 1 + + p1_parts[6].length() + 1, + c7b.getEndOffset() + ); } } diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java index a342fdfd7b..05dec843a1 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java @@ -70,12 +70,18 @@ public class TestRangeReplacement extends TestCase { assertEquals(1, range.numSections()); Section section = range.getSection(0); - assertEquals(5, section.numParagraphs()); + assertEquals(4, section.numParagraphs()); Paragraph para = section.getParagraph(2); - assertEquals(5, para.numCharacterRuns()); - String text = para.getCharacterRun(0).text() + para.getCharacterRun(1).text() + - para.getCharacterRun(2).text() + para.getCharacterRun(3).text() + para.getCharacterRun(4).text(); + assertEquals(6, para.numCharacterRuns()); + String text = + para.getCharacterRun(0).text() + + para.getCharacterRun(1).text() + + para.getCharacterRun(2).text() + + para.getCharacterRun(3).text() + + para.getCharacterRun(4).text() + + para.getCharacterRun(5).text() + ; assertEquals(originalText, text); } @@ -91,7 +97,7 @@ public class TestRangeReplacement extends TestCase { assertEquals(1, range.numSections()); Section section = range.getSection(0); - assertEquals(5, section.numParagraphs()); + assertEquals(4, section.numParagraphs()); Paragraph para = section.getParagraph(2); @@ -124,7 +130,7 @@ public class TestRangeReplacement extends TestCase { assertEquals(1, range.numSections()); Section section = range.getSection(0); - assertEquals(5, section.numParagraphs()); + assertEquals(4, section.numParagraphs()); Paragraph para = section.getParagraph(2); @@ -135,7 +141,7 @@ public class TestRangeReplacement extends TestCase { assertEquals(1, range.numSections()); section = range.getSection(0); - assertEquals(5, section.numParagraphs()); + assertEquals(4, section.numParagraphs()); para = section.getParagraph(2); text = para.text();