More fixes for auto-saved documents

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@979286 13f79535-47bb-0310-9956-ffa450edef68
2010-07-26 14:04:27 +00:00 · 2010-07-26 14:04:27 +00:00 · 145e221f87
parent b735d6736a
commit 145e221f87
9 changed files with 111 additions and 73 deletions
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java
@ -62,7 +62,12 @@ public final class CHPFormattedDiskPage extends FormattedDiskPage
      {
    	int startAt = getStart(x);
 		int endAt = getEnd(x);
-		_chpxList.add(new CHPX(startAt, endAt, tpt, getGrpprl(x)));
+
+        if (!tpt.isIndexInTable(startAt) && !tpt.isIndexInTable(endAt)) {
+            _chpxList.add(null);
+        } else {
+		    _chpxList.add(new CHPX(startAt, endAt, tpt, getGrpprl(x)));
+        }
      }
    }

--- a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java
@ -36,12 +36,12 @@ public final class CHPX extends BytePropertyNode

  public CHPX(int fcStart, int fcEnd, CharIndexTranslator translator, byte[] grpprl)
  {
-    super(fcStart, fcEnd, translator, new SprmBuffer(grpprl));
+    super(fcStart, translator.lookIndexBackward(fcEnd), translator, new SprmBuffer(grpprl));
  }

  public CHPX(int fcStart, int fcEnd, CharIndexTranslator translator, SprmBuffer buf)
  {
-    super(fcStart, fcEnd, translator ,buf);
+    super(fcStart, translator.lookIndexBackward(fcEnd), translator ,buf);
  }


--- a/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java
@ -18,9 +18,9 @@
 package org.apache.poi.hwpf.model;

 public interface CharIndexTranslator {
-
    /**
     * Calculates the char index of the given byte index.
+     * Look forward if index is not in table
     *
     * @param bytePos The character offset to check 
     * @return the char index
@ -28,13 +28,28 @@ public interface CharIndexTranslator {
    int getCharIndex(int bytePos);

    /**
-     * Is the text at the given byte offset unicode, or plain old ascii? In a
-     * very evil fashion, you have to actually know this to make sense of
-     * character and paragraph properties :(
+     * Check if index is in table
     *
-     * @param bytePos The character offset to check about
-     * @return true if the text at the given byte offset is unicode
+     * @param bytePos
+     * @return true if index in table, false if not
     */
-    boolean isUnicodeAtByteOffset(int bytePos);
+
+    boolean isIndexInTable(int bytePos);
+
+    /**
+     * Return first index >= bytePos that is in table
+     *
+     * @param bytePos
+     * @return
+     */
+    public int lookIndexForward(int bytePos);
+
+    /**
+     * Return last index <= bytePos that is in table
+     *
+     * @param bytePos
+     * @return
+     */
+    public int lookIndexBackward(int bytePos);

 }
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java
@ -196,7 +196,11 @@ public final class PicturesTable
    Range range = _document.getOverallRange();
    for (int i = 0; i < range.numCharacterRuns(); i++) {
    	CharacterRun run = range.getCharacterRun(i);
-    	String text = run.text();
+
+        if (run==null) {
+            continue;
+        }
+
    	Picture picture = extractPicture(run, false);
    	if (picture != null) {
    		pictures.add(picture);
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
@ -33,7 +33,7 @@ import java.util.List;
 *
 * @author Ryan Ackley
 */
-public final class TextPieceTable implements CharIndexTranslator {
+public class TextPieceTable implements CharIndexTranslator {
 	protected ArrayList<TextPiece> _textPieces = new ArrayList<TextPiece>();
    protected ArrayList<TextPiece> _textPiecesFCOrder = new ArrayList<TextPiece>();
 	// int _multiple;
@ -118,51 +118,6 @@ public final class TextPieceTable implements CharIndexTranslator {
        Collections.sort(_textPiecesFCOrder, new FCComparator());
    }

-	/**
-	 * Is the text at the given Character offset unicode, or plain old ascii? In
-	 * a very evil fashion, you have to actually know this to make sense of
-	 * character and paragraph properties :(
-	 *
-	 * @param cp
-	 *            The character offset to check about
-	 */
-	public boolean isUnicodeAtCharOffset(int cp) {
-		boolean lastWas = false;
-
-		for(TextPiece tp : _textPieces) {
-			// If the text piece covers the character, all good
-			if (tp.getStart() <= cp && tp.getEnd() >= cp) {
-				return tp.isUnicode();
-			}
-			// Otherwise keep track for the last one
-			lastWas = tp.isUnicode();
-		}
-
-		// If they ask off the end, just go with the last one...
-		return lastWas;
-	}
-
-	public boolean isUnicodeAtByteOffset(int bytePos) {
-		boolean lastWas = false;
-
-        for(TextPiece tp : _textPieces) {
-			int curByte = tp.getPieceDescriptor().getFilePosition();
-			int pieceEnd = curByte + tp.bytesLength();
-
-			// If the text piece covers the character, all good
-			if (curByte <= bytePos && pieceEnd > bytePos) {
-				return tp.isUnicode();
-			}
-			// Otherwise keep track for the last one
-			lastWas = tp.isUnicode();
-			// Move along
-			curByte = pieceEnd;
-		}
-
-		// If they ask off the end, just go with the last one...
-		return lastWas;
-	}
-
 	public byte[] writeTo(HWPFOutputStream docStream) throws IOException {

 		PlexOfCps textPlex = new PlexOfCps(PieceDescriptor.getSizeInBytes());
@ -245,19 +200,7 @@ public final class TextPieceTable implements CharIndexTranslator {
    public int getCharIndex(int bytePos) {
        int charCount = 0;

-        for(TextPiece tp : _textPiecesFCOrder) {
-			int pieceStart = tp.getPieceDescriptor().getFilePosition();
-
-            if (bytePos > pieceStart + tp.bytesLength()) {
-                continue;
-            }
-
-			if (pieceStart > bytePos) {
-				bytePos = pieceStart;
-			}
-
-            break;
-        }
+        bytePos = lookIndexForward(bytePos);

        for(TextPiece tp : _textPieces) {
            int pieceStart = tp.getPieceDescriptor().getFilePosition();
@ -287,6 +230,62 @@ public final class TextPieceTable implements CharIndexTranslator {
        return charCount;
    }

+    public int lookIndexForward(int bytePos) {
+        for(TextPiece tp : _textPiecesFCOrder) {
+			int pieceStart = tp.getPieceDescriptor().getFilePosition();
+
+            if (bytePos > pieceStart + tp.bytesLength()) {
+                continue;
+            }
+
+			if (pieceStart > bytePos) {
+				bytePos = pieceStart;
+			}
+
+            break;
+        }
+        return bytePos;
+    }
+
+    public int lookIndexBackward(int bytePos) {
+        int lastEnd = 0;
+
+        for(TextPiece tp : _textPiecesFCOrder) {
+			int pieceStart = tp.getPieceDescriptor().getFilePosition();
+
+            if (bytePos > pieceStart + tp.bytesLength()) {
+                lastEnd = pieceStart + tp.bytesLength();
+                continue;
+            }
+
+			if (pieceStart > bytePos) {
+				bytePos = lastEnd;
+			}
+
+            break;
+        }
+
+        return bytePos;
+    }
+
+    public boolean isIndexInTable(int bytePos) {
+        for(TextPiece tp : _textPiecesFCOrder) {
+			int pieceStart = tp.getPieceDescriptor().getFilePosition();
+
+            if (bytePos > pieceStart + tp.bytesLength()) {
+                continue;
+            }
+
+			if (pieceStart > bytePos) {
+				return false;
+			}
+
+            return true;
+        }
+
+        return false;
+    }
+
    private static class FCComparator implements Comparator<TextPiece> {
        public int compare(TextPiece textPiece, TextPiece textPiece1) {
            if (textPiece.getPieceDescriptor().fc>textPiece1.getPieceDescriptor().fc) {
--- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java
@ -784,6 +784,10 @@ public class Range { // TODO -instantiable superclass
 	public CharacterRun getCharacterRun(int index) {
 		initCharacterRuns();
 		CHPX chpx = _characters.get(index + _charStart);
+        
+        if (chpx == null) {
+            return null;
+        }

 		int[] point = findRange(_paragraphs, _parStart, Math.max(chpx.getStart(), _start), chpx
 				.getEnd());
@ -963,7 +967,7 @@ public class Range { // TODO -instantiable superclass
 		int x = min;
 		PropertyNode node = (PropertyNode) rpl.get(x);

-		while (node.getEnd() <= start && x < rpl.size() - 1) {
+		while (node==null || (node.getEnd() <= start && x < rpl.size() - 1)) {
 			x++;
 			node = (PropertyNode) rpl.get(x);
 		}
@ -978,7 +982,7 @@ public class Range { // TODO -instantiable superclass

 		int y = x;
 		node = (PropertyNode) rpl.get(y);
-		while (node.getEnd() < end && y < rpl.size() - 1) {
+		while (node==null || (node.getEnd() < end && y < rpl.size() - 1)) {
 			y++;
 			node = (PropertyNode) rpl.get(y);
 		}
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java
@ -31,7 +31,12 @@ public final class TestCHPBinTable
  private CHPBinTable _cHPBinTable = null;
  private HWPFDocFixture _hWPFDocFixture;

-  private TextPieceTable fakeTPT = new TextPieceTable();
+  private TextPieceTable fakeTPT = new TextPieceTable() {
+      @Override
+      public boolean isIndexInTable(int bytePos) {
+          return true;
+      }
+  };

  public void testReadWrite()
    throws Exception
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestPictures.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestPictures.java
@ -157,4 +157,10 @@ public final class TestPictures extends TestCase {
            doc.getPicturesTable().getAllPictures(); // just check that we do not throw Exception
    }

+    public void testFastSaved2() {
+            HWPFDocument doc = HWPFTestDataSamples.openSampleFile("o_kurs.doc");
+
+            doc.getPicturesTable().getAllPictures(); // just check that we do not throw Exception
+    }
+
 }
--- a/test-data/document/o_kurs.doc
+++ b/test-data/document/o_kurs.doc