CHPXs and PAPXs are apparently cp based, but are really byte based! Work around this

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@684939 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2008-08-11 21:25:17 +00:00
parent 829b2eb084
commit 36821ec463
17 changed files with 321 additions and 83 deletions

View File

@ -219,24 +219,25 @@ public class HWPFDocument extends POIDocument
_dataStream = new byte[0]; _dataStream = new byte[0];
} }
// get the start of text in the main stream // Get the cp of the start of text in the main stream
int fcMin = _fib.getFcMin(); // The latest spec doc says this is always zero!
int fcMin = 0;
//fcMin = _fib.getFcMin()
// load up our standard structures. // Start to load up our standard structures.
_dop = new DocumentProperties(_tableStream, _fib.getFcDop()); _dop = new DocumentProperties(_tableStream, _fib.getFcDop());
_cft = new ComplexFileTable(_mainStream, _tableStream, _fib.getFcClx(), fcMin); _cft = new ComplexFileTable(_mainStream, _tableStream, _fib.getFcClx(), fcMin);
_tpt = _cft.getTextPieceTable(); _tpt = _cft.getTextPieceTable();
_cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), fcMin);
_pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), fcMin);
// Word XP puts in a zero filled buffer in front of the text and it screws // Word XP and later all put in a zero filled buffer in
// up my system for offsets. This is an adjustment. // front of the text. This screws up the system for offsets,
// which assume we always start at zero. This is an adjustment.
int cpMin = _tpt.getCpMin(); int cpMin = _tpt.getCpMin();
if (cpMin > 0)
{ // Now load the rest of the properties, which need to be adjusted
_cbt.adjustForDelete(0, 0, cpMin); // for where text really begin
_pbt.adjustForDelete(0, 0, cpMin); _cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), cpMin, _tpt);
} _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), cpMin, _tpt);
// Read FSPA and Escher information // Read FSPA and Escher information
_fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(), _fib.getLcbPlcspaMom(), getTextTable().getTextPieces()); _fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(), _fib.getLcbPlcspaMom(), getTextTable().getTextPieces());

View File

@ -0,0 +1,59 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hwpf.model;
/**
* Normally PropertyNodes only ever work in characters, but
* a few cases actually store bytes, and this lets everything
* still work despite that.
* It handles the conversion as required between bytes
* and characters.
*/
public abstract class BytePropertyNode extends PropertyNode {
private boolean isUnicode;
/**
* @param fcStart The start of the text for this property, in _bytes_
* @param fcEnd The end of the text for this property, in _bytes_
*/
public BytePropertyNode(int fcStart, int fcEnd, Object buf, boolean isUnicode) {
super(
generateCp(fcStart, isUnicode),
generateCp(fcEnd, isUnicode),
buf
);
}
private static int generateCp(int val, boolean isUnicode) {
if(isUnicode)
return val/2;
return val;
}
public boolean isUnicode() {
return isUnicode;
}
public int getStartBytes() {
if(isUnicode)
return getStart()*2;
return getStart();
}
public int getEndBytes() {
if(isUnicode)
return getEnd()*2;
return getEnd();
}
}

View File

@ -37,6 +37,8 @@ public class CHPBinTable
/** List of character properties.*/ /** List of character properties.*/
protected ArrayList _textRuns = new ArrayList(); protected ArrayList _textRuns = new ArrayList();
/** So we can know if things are unicode or not */
private TextPieceTable tpt;
public CHPBinTable() public CHPBinTable()
{ {
@ -52,9 +54,10 @@ public class CHPBinTable
* @param fcMin * @param fcMin
*/ */
public CHPBinTable(byte[] documentStream, byte[] tableStream, int offset, public CHPBinTable(byte[] documentStream, byte[] tableStream, int offset,
int size, int fcMin) int size, int fcMin, TextPieceTable tpt)
{ {
PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4); PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4);
this.tpt = tpt;
int length = binTable.length(); int length = binTable.length();
for (int x = 0; x < length; x++) for (int x = 0; x < length; x++)
@ -65,7 +68,7 @@ public class CHPBinTable
int pageOffset = POIFSConstants.BIG_BLOCK_SIZE * pageNum; int pageOffset = POIFSConstants.BIG_BLOCK_SIZE * pageNum;
CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(documentStream, CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(documentStream,
pageOffset, fcMin); pageOffset, fcMin, tpt);
int fkpSize = cfkp.size(); int fkpSize = cfkp.size();
@ -116,7 +119,14 @@ public class CHPBinTable
public void insert(int listIndex, int cpStart, SprmBuffer buf) public void insert(int listIndex, int cpStart, SprmBuffer buf)
{ {
CHPX insertChpx = new CHPX(cpStart, cpStart, buf); boolean needsToBeUnicode = tpt.isUnicodeAt(cpStart);
CHPX insertChpx = new CHPX(0, 0, buf, needsToBeUnicode);
// Ensure character offsets are really characters
insertChpx.setStart(cpStart);
insertChpx.setEnd(cpStart);
if (listIndex == _textRuns.size()) if (listIndex == _textRuns.size())
{ {
_textRuns.add(insertChpx); _textRuns.add(insertChpx);
@ -126,7 +136,16 @@ public class CHPBinTable
CHPX chpx = (CHPX)_textRuns.get(listIndex); CHPX chpx = (CHPX)_textRuns.get(listIndex);
if (chpx.getStart() < cpStart) if (chpx.getStart() < cpStart)
{ {
CHPX clone = new CHPX(cpStart, chpx.getEnd(), chpx.getSprmBuf()); // Copy the properties of the one before to afterwards
// Will go:
// Original, until insert at point
// New one
// Clone of original, on to the old end
CHPX clone = new CHPX(0, 0, chpx.getSprmBuf(), needsToBeUnicode);
// Again ensure contains character based offsets no matter what
clone.setStart(cpStart);
clone.setEnd(chpx.getEnd());
chpx.setEnd(cpStart); chpx.setEnd(cpStart);
_textRuns.add(listIndex + 1, insertChpx); _textRuns.add(listIndex + 1, insertChpx);

View File

@ -55,13 +55,14 @@ public class CHPFormattedDiskPage extends FormattedDiskPage
* This constructs a CHPFormattedDiskPage from a raw fkp (512 byte array * This constructs a CHPFormattedDiskPage from a raw fkp (512 byte array
* read from a Word file). * read from a Word file).
*/ */
public CHPFormattedDiskPage(byte[] documentStream, int offset, int fcMin) public CHPFormattedDiskPage(byte[] documentStream, int offset, int fcMin, TextPieceTable tpt)
{ {
super(documentStream, offset); super(documentStream, offset);
for (int x = 0; x < _crun; x++) for (int x = 0; x < _crun; x++)
{ {
_chpxList.add(new CHPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x))); boolean isUnicode = tpt.isUnicodeAt( getStart(x) );
_chpxList.add(new CHPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), isUnicode));
} }
} }
@ -157,7 +158,7 @@ public class CHPFormattedDiskPage extends FormattedDiskPage
chpx = (CHPX)_chpxList.get(x); chpx = (CHPX)_chpxList.get(x);
byte[] grpprl = chpx.getGrpprl(); byte[] grpprl = chpx.getGrpprl();
LittleEndian.putInt(buf, fcOffset, chpx.getStart() + fcMin); LittleEndian.putInt(buf, fcOffset, chpx.getStartBytes() + fcMin);
grpprlOffset -= (1 + grpprl.length); grpprlOffset -= (1 + grpprl.length);
grpprlOffset -= (grpprlOffset % 2); grpprlOffset -= (grpprlOffset % 2);
buf[offsetOffset] = (byte)(grpprlOffset/2); buf[offsetOffset] = (byte)(grpprlOffset/2);
@ -168,7 +169,7 @@ public class CHPFormattedDiskPage extends FormattedDiskPage
fcOffset += FC_SIZE; fcOffset += FC_SIZE;
} }
// put the last chpx's end in // put the last chpx's end in
LittleEndian.putInt(buf, fcOffset, chpx.getEnd() + fcMin); LittleEndian.putInt(buf, fcOffset, chpx.getEndBytes() + fcMin);
return buf; return buf;
} }

View File

@ -25,22 +25,26 @@ import org.apache.poi.hwpf.sprm.SprmBuffer;
import org.apache.poi.hwpf.sprm.CharacterSprmUncompressor; import org.apache.poi.hwpf.sprm.CharacterSprmUncompressor;
/** /**
* Comment me * DANGER - works in bytes!
*
* Make sure you call getStart() / getEnd() when you want characters
* (normal use), but getStartByte() / getEndByte() when you're
* reading in / writing out!
* *
* @author Ryan Ackley * @author Ryan Ackley
*/ */
public class CHPX extends PropertyNode public class CHPX extends BytePropertyNode
{ {
public CHPX(int fcStart, int fcEnd, byte[] grpprl) public CHPX(int fcStart, int fcEnd, byte[] grpprl, boolean isUnicode)
{ {
super(fcStart, fcEnd, new SprmBuffer(grpprl)); super(fcStart, fcEnd, new SprmBuffer(grpprl), isUnicode);
} }
public CHPX(int fcStart, int fcEnd, SprmBuffer buf) public CHPX(int fcStart, int fcEnd, SprmBuffer buf, boolean isUnicode)
{ {
super(fcStart, fcEnd, buf); super(fcStart, fcEnd, buf, isUnicode);
} }

View File

@ -39,14 +39,18 @@ public class PAPBinTable
protected ArrayList _paragraphs = new ArrayList(); protected ArrayList _paragraphs = new ArrayList();
byte[] _dataStream; byte[] _dataStream;
/** So we can know if things are unicode or not */
private TextPieceTable tpt;
public PAPBinTable() public PAPBinTable()
{ {
} }
public PAPBinTable(byte[] documentStream, byte[] tableStream, byte[] dataStream, int offset, public PAPBinTable(byte[] documentStream, byte[] tableStream, byte[] dataStream, int offset,
int size, int fcMin) int size, int fcMin, TextPieceTable tpt)
{ {
PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4); PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4);
this.tpt = tpt;
int length = binTable.length(); int length = binTable.length();
for (int x = 0; x < length; x++) for (int x = 0; x < length; x++)
@ -57,13 +61,14 @@ public class PAPBinTable
int pageOffset = POIFSConstants.BIG_BLOCK_SIZE * pageNum; int pageOffset = POIFSConstants.BIG_BLOCK_SIZE * pageNum;
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream, PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream,
dataStream, pageOffset, fcMin); dataStream, pageOffset, fcMin, tpt);
int fkpSize = pfkp.size(); int fkpSize = pfkp.size();
for (int y = 0; y < fkpSize; y++) for (int y = 0; y < fkpSize; y++)
{ {
_paragraphs.add(pfkp.getPAPX(y)); PAPX papx = pfkp.getPAPX(y);
_paragraphs.add(papx);
} }
} }
_dataStream = dataStream; _dataStream = dataStream;
@ -71,7 +76,14 @@ public class PAPBinTable
public void insert(int listIndex, int cpStart, SprmBuffer buf) public void insert(int listIndex, int cpStart, SprmBuffer buf)
{ {
PAPX forInsert = new PAPX(cpStart, cpStart, buf, _dataStream); boolean needsToBeUnicode = tpt.isUnicodeAt(cpStart);
PAPX forInsert = new PAPX(0, 0, buf, _dataStream, needsToBeUnicode);
// Ensure character offsets are really characters
forInsert.setStart(cpStart);
forInsert.setEnd(cpStart);
if (listIndex == _paragraphs.size()) if (listIndex == _paragraphs.size())
{ {
_paragraphs.add(forInsert); _paragraphs.add(forInsert);
@ -90,10 +102,21 @@ public class PAPBinTable
{ {
exc.printStackTrace(); exc.printStackTrace();
} }
// Copy the properties of the one before to afterwards
// Will go:
// Original, until insert at point
// New one
// Clone of original, on to the old end
PAPX clone = new PAPX(0, 0, clonedBuf, _dataStream, needsToBeUnicode);
// Again ensure contains character based offsets no matter what
clone.setStart(cpStart);
clone.setEnd(currentPap.getEnd());
currentPap.setEnd(cpStart); currentPap.setEnd(cpStart);
PAPX splitPap = new PAPX(cpStart, currentPap.getEnd(), clonedBuf, _dataStream);
_paragraphs.add(++listIndex, forInsert); _paragraphs.add(listIndex + 1, forInsert);
_paragraphs.add(++listIndex, splitPap); _paragraphs.add(listIndex + 2, clone);
} }
else else
{ {

View File

@ -60,13 +60,14 @@ public class PAPFormattedDiskPage extends FormattedDiskPage
/** /**
* Creates a PAPFormattedDiskPage from a 512 byte array * Creates a PAPFormattedDiskPage from a 512 byte array
*/ */
public PAPFormattedDiskPage(byte[] documentStream, byte[] dataStream, int offset, int fcMin) public PAPFormattedDiskPage(byte[] documentStream, byte[] dataStream, int offset, int fcMin, TextPieceTable tpt)
{ {
super(documentStream, offset); super(documentStream, offset);
for (int x = 0; x < _crun; x++) for (int x = 0; x < _crun; x++)
{ {
_papxList.add(new PAPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), getParagraphHeight(x), dataStream)); boolean isUnicode = tpt.isUnicodeAt( getStart(x) );
_papxList.add(new PAPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), getParagraphHeight(x), dataStream, isUnicode));
} }
_fkp = null; _fkp = null;
_dataStream = dataStream; _dataStream = dataStream;
@ -110,7 +111,7 @@ public class PAPFormattedDiskPage extends FormattedDiskPage
} }
/** /**
* Gets the papx for the paragraph at index in this fkp. * Gets the papx grpprl for the paragraph at index in this fkp.
* *
* @param index The index of the papx to get. * @param index The index of the papx to get.
* @return a papx grpprl. * @return a papx grpprl.
@ -259,7 +260,7 @@ public class PAPFormattedDiskPage extends FormattedDiskPage
grpprlOffset -= (grpprl.length + (2 - grpprl.length % 2)); grpprlOffset -= (grpprl.length + (2 - grpprl.length % 2));
grpprlOffset -= (grpprlOffset % 2); grpprlOffset -= (grpprlOffset % 2);
} }
LittleEndian.putInt(buf, fcOffset, papx.getStart() + fcMin); LittleEndian.putInt(buf, fcOffset, papx.getStartBytes() + fcMin);
buf[bxOffset] = (byte)(grpprlOffset/2); buf[bxOffset] = (byte)(grpprlOffset/2);
System.arraycopy(phe, 0, buf, bxOffset + 1, phe.length); System.arraycopy(phe, 0, buf, bxOffset + 1, phe.length);
@ -287,7 +288,7 @@ public class PAPFormattedDiskPage extends FormattedDiskPage
} }
LittleEndian.putInt(buf, fcOffset, papx.getEnd() + fcMin); LittleEndian.putInt(buf, fcOffset, papx.getEndBytes() + fcMin);
return buf; return buf;
} }

View File

@ -29,29 +29,32 @@ import org.apache.poi.hwpf.sprm.SprmBuffer;
import org.apache.poi.hwpf.sprm.SprmOperation; import org.apache.poi.hwpf.sprm.SprmOperation;
/** /**
* Comment me * DANGER - works in bytes!
*
* Make sure you call getStart() / getEnd() when you want characters
* (normal use), but getStartByte() / getEndByte() when you're
* reading in / writing out!
* *
* @author Ryan Ackley * @author Ryan Ackley
*/ */
public class PAPX extends PropertyNode public class PAPX extends BytePropertyNode {
{
private ParagraphHeight _phe; private ParagraphHeight _phe;
private int _hugeGrpprlOffset = -1; private int _hugeGrpprlOffset = -1;
public PAPX(int fcStart, int fcEnd, byte[] papx, ParagraphHeight phe, byte[] dataStream) public PAPX(int fcStart, int fcEnd, byte[] papx, ParagraphHeight phe, byte[] dataStream, boolean isUnicode)
{ {
super(fcStart, fcEnd, new SprmBuffer(papx)); super(fcStart, fcEnd, new SprmBuffer(papx), isUnicode);
_phe = phe; _phe = phe;
SprmBuffer buf = findHuge(new SprmBuffer(papx), dataStream); SprmBuffer buf = findHuge(new SprmBuffer(papx), dataStream);
if(buf != null) if(buf != null)
_buf = buf; _buf = buf;
} }
public PAPX(int fcStart, int fcEnd, SprmBuffer buf, byte[] dataStream) public PAPX(int fcStart, int fcEnd, SprmBuffer buf, byte[] dataStream, boolean isUnicode)
{ {
super(fcStart, fcEnd, buf); super(fcStart, fcEnd, buf, isUnicode);
_phe = new ParagraphHeight(); _phe = new ParagraphHeight();
buf = findHuge(buf, dataStream); buf = findHuge(buf, dataStream);
if(buf != null) if(buf != null)

View File

@ -22,7 +22,10 @@ import java.util.Arrays;
/** /**
* Represents a lightweight node in the Trees used to store content * Represents a lightweight node in the Trees used to store content
* properties. Works only in characters. * properties.
* This only ever works in characters. For the few odd cases when
* the start and end aren't in characters (eg PAPX and CHPX), use
* {@link BytePropertyNode} between you and this.
* *
* @author Ryan Ackley * @author Ryan Ackley
*/ */

View File

@ -25,6 +25,7 @@ import org.apache.poi.poifs.common.POIFSConstants;
import java.io.IOException; import java.io.IOException;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Iterator;
import java.util.List; import java.util.List;
/** /**
@ -62,8 +63,17 @@ public class TextPieceTable
pieces[x] = new PieceDescriptor(node.getBytes(), 0); pieces[x] = new PieceDescriptor(node.getBytes(), 0);
} }
int firstPieceFilePosition = pieces[0].getFilePosition();
_cpMin = firstPieceFilePosition - fcMin; // Figure out the cp of the earliest text piece
// Note that text pieces don't have to be stored in order!
_cpMin = pieces[0].getFilePosition() - fcMin;
for (int x = 0; x < pieces.length; x++) {
int start = pieces[x].getFilePosition() - fcMin;
if(start < _cpMin) {
_cpMin = start;
}
}
// using the PieceDescriptors, build our list of TextPieces. // using the PieceDescriptors, build our list of TextPieces.
for (int x = 0; x < pieces.length; x++) for (int x = 0; x < pieces.length; x++)
@ -105,6 +115,35 @@ public class TextPieceTable
return _textPieces; return _textPieces;
} }
/**
* Is the text at the given Character offset
* unicode, or plain old ascii?
* In a very evil fashion, you have to actually
* know this to make sense of character and
* paragraph properties :(
* @param cp The character offset to check about
*/
public boolean isUnicodeAt(int cp) {
boolean lastWas = false;
int lastAt = 0;
Iterator it = _textPieces.iterator();
while(it.hasNext()) {
TextPiece tp = (TextPiece)it.next();
// If the text piece covers the character, all good
if(tp.getStart() <= cp && tp.getEnd() >= cp) {
return tp.isUnicode();
}
// Otherwise keep track for the last one
if(tp.getStart() > lastAt) {
lastWas = tp.isUnicode();
}
}
// If they ask off the end, just go with the last one...
return lastWas;
}
public byte[] writeTo(HWPFOutputStream docStream) public byte[] writeTo(HWPFOutputStream docStream)
throws IOException throws IOException
{ {

View File

@ -70,10 +70,10 @@ public abstract class FIBAbstractType
private static BitField fFutureSavedUndo = BitFieldFactory.getInstance(0x0008); private static BitField fFutureSavedUndo = BitFieldFactory.getInstance(0x0008);
private static BitField fWord97Saved = BitFieldFactory.getInstance(0x0010); private static BitField fWord97Saved = BitFieldFactory.getInstance(0x0010);
private static BitField fSpare0 = BitFieldFactory.getInstance(0x00FE); private static BitField fSpare0 = BitFieldFactory.getInstance(0x00FE);
protected int field_11_chs; protected int field_11_chs; /** Latest docs say this is Reserved3! */
protected int field_12_chsTables; protected int field_12_chsTables; /** Latest docs say this is Reserved4! */
protected int field_13_fcMin; protected int field_13_fcMin; /** Latest docs say this is Reserved5! */
protected int field_14_fcMac; protected int field_14_fcMac; /** Latest docs say this is Reserved6! */
public FIBAbstractType() public FIBAbstractType()

View File

@ -33,6 +33,8 @@ public class TestCHPBinTable
private CHPBinTable _cHPBinTable = null; private CHPBinTable _cHPBinTable = null;
private HWPFDocFixture _hWPFDocFixture; private HWPFDocFixture _hWPFDocFixture;
private TextPieceTable fakeTPT = new TextPieceTable();
public TestCHPBinTable(String name) public TestCHPBinTable(String name)
{ {
super(name); super(name);
@ -46,7 +48,7 @@ public class TestCHPBinTable
byte[] tableStream = _hWPFDocFixture._tableStream; byte[] tableStream = _hWPFDocFixture._tableStream;
int fcMin = fib.getFcMin(); int fcMin = fib.getFcMin();
_cHPBinTable = new CHPBinTable(mainStream, tableStream, fib.getFcPlcfbteChpx(), fib.getLcbPlcfbteChpx(), fcMin); _cHPBinTable = new CHPBinTable(mainStream, tableStream, fib.getFcPlcfbteChpx(), fib.getLcbPlcfbteChpx(), fcMin, fakeTPT);
HWPFFileSystem fileSys = new HWPFFileSystem(); HWPFFileSystem fileSys = new HWPFFileSystem();
@ -57,7 +59,7 @@ public class TestCHPBinTable
byte[] newTableStream = tableOut.toByteArray(); byte[] newTableStream = tableOut.toByteArray();
byte[] newMainStream = mainOut.toByteArray(); byte[] newMainStream = mainOut.toByteArray();
CHPBinTable newBinTable = new CHPBinTable(newMainStream, newTableStream, 0, newTableStream.length, 0); CHPBinTable newBinTable = new CHPBinTable(newMainStream, newTableStream, 0, newTableStream.length, 0, fakeTPT);
ArrayList oldTextRuns = _cHPBinTable._textRuns; ArrayList oldTextRuns = _cHPBinTable._textRuns;
ArrayList newTextRuns = newBinTable._textRuns; ArrayList newTextRuns = newBinTable._textRuns;

View File

@ -32,6 +32,8 @@ public class TestPAPBinTable
private PAPBinTable _pAPBinTable = null; private PAPBinTable _pAPBinTable = null;
private HWPFDocFixture _hWPFDocFixture; private HWPFDocFixture _hWPFDocFixture;
private TextPieceTable fakeTPT = new TextPieceTable();
public TestPAPBinTable(String name) public TestPAPBinTable(String name)
{ {
super(name); super(name);
@ -45,7 +47,7 @@ public class TestPAPBinTable
byte[] tableStream = _hWPFDocFixture._tableStream; byte[] tableStream = _hWPFDocFixture._tableStream;
int fcMin = fib.getFcMin(); int fcMin = fib.getFcMin();
_pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fcMin); _pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fcMin, fakeTPT);
HWPFFileSystem fileSys = new HWPFFileSystem(); HWPFFileSystem fileSys = new HWPFFileSystem();
@ -56,7 +58,7 @@ public class TestPAPBinTable
byte[] newTableStream = tableOut.toByteArray(); byte[] newTableStream = tableOut.toByteArray();
byte[] newMainStream = mainOut.toByteArray(); byte[] newMainStream = mainOut.toByteArray();
PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, 0); PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, 0, fakeTPT);
ArrayList oldTextRuns = _pAPBinTable.getParagraphs(); ArrayList oldTextRuns = _pAPBinTable.getParagraphs();
ArrayList newTextRuns = newBinTable.getParagraphs(); ArrayList newTextRuns = newBinTable.getParagraphs();

View File

@ -18,23 +18,19 @@
package org.apache.poi.hwpf.usermodel; package org.apache.poi.hwpf.usermodel;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.util.List;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.usermodel.Picture;
import junit.framework.TestCase; import junit.framework.TestCase;
import org.apache.poi.hwpf.HWPFDocument;
/** /**
* Test to see if Range.delete() works even if the Range contains a * Test to see if Range.delete() works even if the Range contains a
* CharacterRun that uses Unicode characters. * CharacterRun that uses Unicode characters.
* *
* TODO - re-enable me when unicode paragraph stuff is fixed! * TODO - re-enable me when unicode paragraph stuff is fixed!
*/ */
public abstract class TestRangeDelete extends TestCase { public class TestRangeDelete extends TestCase {
// u201c and u201d are "smart-quotes" // u201c and u201d are "smart-quotes"
private String originalText = private String originalText =

View File

@ -18,23 +18,19 @@
package org.apache.poi.hwpf.usermodel; package org.apache.poi.hwpf.usermodel;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.util.List;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.usermodel.Picture;
import junit.framework.TestCase; import junit.framework.TestCase;
import org.apache.poi.hwpf.HWPFDocument;
/** /**
* Test to see if Range.insertBefore() works even if the Range contains a * Test to see if Range.insertBefore() works even if the Range contains a
* CharacterRun that uses Unicode characters. * CharacterRun that uses Unicode characters.
* *
* TODO - re-enable me when unicode paragraph stuff is fixed! * TODO - re-enable me when unicode paragraph stuff is fixed!
*/ */
public abstract class TestRangeInsertion extends TestCase { public class TestRangeInsertion extends TestCase {
// u201c and u201d are "smart-quotes" // u201c and u201d are "smart-quotes"
private String originalText = private String originalText =

View File

@ -18,8 +18,10 @@ package org.apache.poi.hwpf.usermodel;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.util.List;
import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.PropertyNode;
import junit.framework.TestCase; import junit.framework.TestCase;
@ -30,7 +32,7 @@ import junit.framework.TestCase;
* *
* TODO - re-enable me when unicode paragraph stuff is fixed! * TODO - re-enable me when unicode paragraph stuff is fixed!
*/ */
public abstract class TestRangeProperties extends TestCase { public class TestRangeProperties extends TestCase {
private static final char page_break = (char)12; private static final char page_break = (char)12;
private static final String u_page_1 = private static final String u_page_1 =
@ -143,7 +145,87 @@ public abstract class TestRangeProperties extends TestCase {
assertEquals(32, c7.getFontSize()); assertEquals(32, c7.getFontSize());
} }
/**
* Tests the raw definitions of the paragraphs of
* a unicode document
*/
public void testUnicodeParagraphDefinitions() throws Exception {
Range r = u.getRange();
String[] p1_parts = u_page_1.split("\r");
String[] p2_parts = u_page_2.split("\r");
assertEquals(
u_page_1 + page_break + "\r" + u_page_2,
r.text()
);
assertEquals(
408, r.text().length()
);
List pDefs = r._paragraphs;
assertEquals(35, pDefs.size());
// Check that the last paragraph ends where it should do
assertEquals(531, u.getOverallRange().text().length());
assertEquals(530, u.getCPSplitCalculator().getHeaderTextboxEnd());
PropertyNode pLast = (PropertyNode)pDefs.get(34);
// assertEquals(530, pLast.getEnd());
// Only care about the first few really though
PropertyNode p0 = (PropertyNode)pDefs.get(0);
PropertyNode p1 = (PropertyNode)pDefs.get(1);
PropertyNode p2 = (PropertyNode)pDefs.get(2);
PropertyNode p3 = (PropertyNode)pDefs.get(3);
PropertyNode p4 = (PropertyNode)pDefs.get(4);
// 5 paragraphs should get us to the end of our text
assertTrue(p0.getStart() < 408);
assertTrue(p0.getEnd() < 408);
assertTrue(p1.getStart() < 408);
assertTrue(p1.getEnd() < 408);
assertTrue(p2.getStart() < 408);
assertTrue(p2.getEnd() < 408);
assertTrue(p3.getStart() < 408);
assertTrue(p3.getEnd() < 408);
assertTrue(p4.getStart() < 408);
assertTrue(p4.getEnd() < 408);
// Paragraphs should match with lines
assertEquals(
0,
p0.getStart()
);
assertEquals(
p1_parts[0].length() + 1,
p0.getEnd()
);
assertEquals(
p1_parts[0].length() + 1,
p1.getStart()
);
assertEquals(
p1_parts[0].length() + 1 +
p1_parts[1].length() + 1,
p1.getEnd()
);
assertEquals(
p1_parts[0].length() + 1 +
p1_parts[1].length() + 1,
p2.getStart()
);
assertEquals(
p1_parts[0].length() + 1 +
p1_parts[1].length() + 1 +
p1_parts[2].length() + 1,
p2.getEnd()
);
}
/**
* Tests the paragraph text of a unicode document
*/
public void testUnicodeTextParagraphs() throws Exception { public void testUnicodeTextParagraphs() throws Exception {
Range r = u.getRange(); Range r = u.getRange();
assertEquals( assertEquals(
@ -154,14 +236,25 @@ public abstract class TestRangeProperties extends TestCase {
); );
assertEquals( assertEquals(
5, 12,
r.numParagraphs() r.numParagraphs()
); );
String[] p1_parts = u_page_1.split("\r"); String[] p1_parts = u_page_1.split("\r");
String[] p2_parts = u_page_2.split("\r"); String[] p2_parts = u_page_2.split("\r");
System.out.println(r.getParagraph(2).text()); // Check text all matches up properly
// TODO assertEquals(p1_parts[0] + "\r", r.getParagraph(0).text());
assertEquals(p1_parts[1] + "\r", r.getParagraph(1).text());
assertEquals(p1_parts[2] + "\r", r.getParagraph(2).text());
assertEquals(p1_parts[3] + "\r", r.getParagraph(3).text());
assertEquals(p1_parts[4] + "\r", r.getParagraph(4).text());
assertEquals(p1_parts[5] + "\r", r.getParagraph(5).text());
assertEquals(p1_parts[6] + "\r", r.getParagraph(6).text());
assertEquals(p1_parts[7] + "\r", r.getParagraph(7).text());
assertEquals(p1_parts[8] + "\r", r.getParagraph(8).text());
assertEquals(p1_parts[9] + "\r", r.getParagraph(9).text());
assertEquals(page_break + "\r", r.getParagraph(10).text());
assertEquals(p2_parts[0] + "\r", r.getParagraph(11).text());
} }
public void testUnicodeStyling() throws Exception { public void testUnicodeStyling() throws Exception {
// TODO // TODO

View File

@ -18,23 +18,19 @@
package org.apache.poi.hwpf.usermodel; package org.apache.poi.hwpf.usermodel;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.util.List;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.usermodel.Picture;
import junit.framework.TestCase; import junit.framework.TestCase;
import org.apache.poi.hwpf.HWPFDocument;
/** /**
* Test to see if Range.replaceText() works even if the Range contains a * Test to see if Range.replaceText() works even if the Range contains a
* CharacterRun that uses Unicode characters. * CharacterRun that uses Unicode characters.
* *
* TODO - re-enable me when unicode paragraph stuff is fixed! * TODO - re-enable me when unicode paragraph stuff is fixed!
*/ */
public abstract class TestRangeReplacement extends TestCase { public class TestRangeReplacement extends TestCase {
// u201c and u201d are "smart-quotes" // u201c and u201d are "smart-quotes"
private String originalText = private String originalText =