mirror of https://github.com/apache/poi.git
CHPXs and PAPXs are apparently cp based, but are really byte based! Work around this
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@684939 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
829b2eb084
commit
36821ec463
|
@ -219,24 +219,25 @@ public class HWPFDocument extends POIDocument
|
||||||
_dataStream = new byte[0];
|
_dataStream = new byte[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
// get the start of text in the main stream
|
// Get the cp of the start of text in the main stream
|
||||||
int fcMin = _fib.getFcMin();
|
// The latest spec doc says this is always zero!
|
||||||
|
int fcMin = 0;
|
||||||
|
//fcMin = _fib.getFcMin()
|
||||||
|
|
||||||
// load up our standard structures.
|
// Start to load up our standard structures.
|
||||||
_dop = new DocumentProperties(_tableStream, _fib.getFcDop());
|
_dop = new DocumentProperties(_tableStream, _fib.getFcDop());
|
||||||
_cft = new ComplexFileTable(_mainStream, _tableStream, _fib.getFcClx(), fcMin);
|
_cft = new ComplexFileTable(_mainStream, _tableStream, _fib.getFcClx(), fcMin);
|
||||||
_tpt = _cft.getTextPieceTable();
|
_tpt = _cft.getTextPieceTable();
|
||||||
_cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), fcMin);
|
|
||||||
_pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), fcMin);
|
|
||||||
|
|
||||||
// Word XP puts in a zero filled buffer in front of the text and it screws
|
// Word XP and later all put in a zero filled buffer in
|
||||||
// up my system for offsets. This is an adjustment.
|
// front of the text. This screws up the system for offsets,
|
||||||
|
// which assume we always start at zero. This is an adjustment.
|
||||||
int cpMin = _tpt.getCpMin();
|
int cpMin = _tpt.getCpMin();
|
||||||
if (cpMin > 0)
|
|
||||||
{
|
// Now load the rest of the properties, which need to be adjusted
|
||||||
_cbt.adjustForDelete(0, 0, cpMin);
|
// for where text really begin
|
||||||
_pbt.adjustForDelete(0, 0, cpMin);
|
_cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), cpMin, _tpt);
|
||||||
}
|
_pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), cpMin, _tpt);
|
||||||
|
|
||||||
// Read FSPA and Escher information
|
// Read FSPA and Escher information
|
||||||
_fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(), _fib.getLcbPlcspaMom(), getTextTable().getTextPieces());
|
_fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(), _fib.getLcbPlcspaMom(), getTextTable().getTextPieces());
|
||||||
|
|
|
@ -0,0 +1,59 @@
|
||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
package org.apache.poi.hwpf.model;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Normally PropertyNodes only ever work in characters, but
|
||||||
|
* a few cases actually store bytes, and this lets everything
|
||||||
|
* still work despite that.
|
||||||
|
* It handles the conversion as required between bytes
|
||||||
|
* and characters.
|
||||||
|
*/
|
||||||
|
public abstract class BytePropertyNode extends PropertyNode {
|
||||||
|
private boolean isUnicode;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param fcStart The start of the text for this property, in _bytes_
|
||||||
|
* @param fcEnd The end of the text for this property, in _bytes_
|
||||||
|
*/
|
||||||
|
public BytePropertyNode(int fcStart, int fcEnd, Object buf, boolean isUnicode) {
|
||||||
|
super(
|
||||||
|
generateCp(fcStart, isUnicode),
|
||||||
|
generateCp(fcEnd, isUnicode),
|
||||||
|
buf
|
||||||
|
);
|
||||||
|
}
|
||||||
|
private static int generateCp(int val, boolean isUnicode) {
|
||||||
|
if(isUnicode)
|
||||||
|
return val/2;
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isUnicode() {
|
||||||
|
return isUnicode;
|
||||||
|
}
|
||||||
|
public int getStartBytes() {
|
||||||
|
if(isUnicode)
|
||||||
|
return getStart()*2;
|
||||||
|
return getStart();
|
||||||
|
}
|
||||||
|
public int getEndBytes() {
|
||||||
|
if(isUnicode)
|
||||||
|
return getEnd()*2;
|
||||||
|
return getEnd();
|
||||||
|
}
|
||||||
|
}
|
|
@ -37,6 +37,8 @@ public class CHPBinTable
|
||||||
/** List of character properties.*/
|
/** List of character properties.*/
|
||||||
protected ArrayList _textRuns = new ArrayList();
|
protected ArrayList _textRuns = new ArrayList();
|
||||||
|
|
||||||
|
/** So we can know if things are unicode or not */
|
||||||
|
private TextPieceTable tpt;
|
||||||
|
|
||||||
public CHPBinTable()
|
public CHPBinTable()
|
||||||
{
|
{
|
||||||
|
@ -52,9 +54,10 @@ public class CHPBinTable
|
||||||
* @param fcMin
|
* @param fcMin
|
||||||
*/
|
*/
|
||||||
public CHPBinTable(byte[] documentStream, byte[] tableStream, int offset,
|
public CHPBinTable(byte[] documentStream, byte[] tableStream, int offset,
|
||||||
int size, int fcMin)
|
int size, int fcMin, TextPieceTable tpt)
|
||||||
{
|
{
|
||||||
PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4);
|
PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4);
|
||||||
|
this.tpt = tpt;
|
||||||
|
|
||||||
int length = binTable.length();
|
int length = binTable.length();
|
||||||
for (int x = 0; x < length; x++)
|
for (int x = 0; x < length; x++)
|
||||||
|
@ -65,7 +68,7 @@ public class CHPBinTable
|
||||||
int pageOffset = POIFSConstants.BIG_BLOCK_SIZE * pageNum;
|
int pageOffset = POIFSConstants.BIG_BLOCK_SIZE * pageNum;
|
||||||
|
|
||||||
CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(documentStream,
|
CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(documentStream,
|
||||||
pageOffset, fcMin);
|
pageOffset, fcMin, tpt);
|
||||||
|
|
||||||
int fkpSize = cfkp.size();
|
int fkpSize = cfkp.size();
|
||||||
|
|
||||||
|
@ -116,7 +119,14 @@ public class CHPBinTable
|
||||||
|
|
||||||
public void insert(int listIndex, int cpStart, SprmBuffer buf)
|
public void insert(int listIndex, int cpStart, SprmBuffer buf)
|
||||||
{
|
{
|
||||||
CHPX insertChpx = new CHPX(cpStart, cpStart, buf);
|
boolean needsToBeUnicode = tpt.isUnicodeAt(cpStart);
|
||||||
|
|
||||||
|
CHPX insertChpx = new CHPX(0, 0, buf, needsToBeUnicode);
|
||||||
|
|
||||||
|
// Ensure character offsets are really characters
|
||||||
|
insertChpx.setStart(cpStart);
|
||||||
|
insertChpx.setEnd(cpStart);
|
||||||
|
|
||||||
if (listIndex == _textRuns.size())
|
if (listIndex == _textRuns.size())
|
||||||
{
|
{
|
||||||
_textRuns.add(insertChpx);
|
_textRuns.add(insertChpx);
|
||||||
|
@ -126,7 +136,16 @@ public class CHPBinTable
|
||||||
CHPX chpx = (CHPX)_textRuns.get(listIndex);
|
CHPX chpx = (CHPX)_textRuns.get(listIndex);
|
||||||
if (chpx.getStart() < cpStart)
|
if (chpx.getStart() < cpStart)
|
||||||
{
|
{
|
||||||
CHPX clone = new CHPX(cpStart, chpx.getEnd(), chpx.getSprmBuf());
|
// Copy the properties of the one before to afterwards
|
||||||
|
// Will go:
|
||||||
|
// Original, until insert at point
|
||||||
|
// New one
|
||||||
|
// Clone of original, on to the old end
|
||||||
|
CHPX clone = new CHPX(0, 0, chpx.getSprmBuf(), needsToBeUnicode);
|
||||||
|
// Again ensure contains character based offsets no matter what
|
||||||
|
clone.setStart(cpStart);
|
||||||
|
clone.setEnd(chpx.getEnd());
|
||||||
|
|
||||||
chpx.setEnd(cpStart);
|
chpx.setEnd(cpStart);
|
||||||
|
|
||||||
_textRuns.add(listIndex + 1, insertChpx);
|
_textRuns.add(listIndex + 1, insertChpx);
|
||||||
|
|
|
@ -55,13 +55,14 @@ public class CHPFormattedDiskPage extends FormattedDiskPage
|
||||||
* This constructs a CHPFormattedDiskPage from a raw fkp (512 byte array
|
* This constructs a CHPFormattedDiskPage from a raw fkp (512 byte array
|
||||||
* read from a Word file).
|
* read from a Word file).
|
||||||
*/
|
*/
|
||||||
public CHPFormattedDiskPage(byte[] documentStream, int offset, int fcMin)
|
public CHPFormattedDiskPage(byte[] documentStream, int offset, int fcMin, TextPieceTable tpt)
|
||||||
{
|
{
|
||||||
super(documentStream, offset);
|
super(documentStream, offset);
|
||||||
|
|
||||||
for (int x = 0; x < _crun; x++)
|
for (int x = 0; x < _crun; x++)
|
||||||
{
|
{
|
||||||
_chpxList.add(new CHPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x)));
|
boolean isUnicode = tpt.isUnicodeAt( getStart(x) );
|
||||||
|
_chpxList.add(new CHPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), isUnicode));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -157,7 +158,7 @@ public class CHPFormattedDiskPage extends FormattedDiskPage
|
||||||
chpx = (CHPX)_chpxList.get(x);
|
chpx = (CHPX)_chpxList.get(x);
|
||||||
byte[] grpprl = chpx.getGrpprl();
|
byte[] grpprl = chpx.getGrpprl();
|
||||||
|
|
||||||
LittleEndian.putInt(buf, fcOffset, chpx.getStart() + fcMin);
|
LittleEndian.putInt(buf, fcOffset, chpx.getStartBytes() + fcMin);
|
||||||
grpprlOffset -= (1 + grpprl.length);
|
grpprlOffset -= (1 + grpprl.length);
|
||||||
grpprlOffset -= (grpprlOffset % 2);
|
grpprlOffset -= (grpprlOffset % 2);
|
||||||
buf[offsetOffset] = (byte)(grpprlOffset/2);
|
buf[offsetOffset] = (byte)(grpprlOffset/2);
|
||||||
|
@ -168,7 +169,7 @@ public class CHPFormattedDiskPage extends FormattedDiskPage
|
||||||
fcOffset += FC_SIZE;
|
fcOffset += FC_SIZE;
|
||||||
}
|
}
|
||||||
// put the last chpx's end in
|
// put the last chpx's end in
|
||||||
LittleEndian.putInt(buf, fcOffset, chpx.getEnd() + fcMin);
|
LittleEndian.putInt(buf, fcOffset, chpx.getEndBytes() + fcMin);
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,22 +25,26 @@ import org.apache.poi.hwpf.sprm.SprmBuffer;
|
||||||
import org.apache.poi.hwpf.sprm.CharacterSprmUncompressor;
|
import org.apache.poi.hwpf.sprm.CharacterSprmUncompressor;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Comment me
|
* DANGER - works in bytes!
|
||||||
|
*
|
||||||
|
* Make sure you call getStart() / getEnd() when you want characters
|
||||||
|
* (normal use), but getStartByte() / getEndByte() when you're
|
||||||
|
* reading in / writing out!
|
||||||
*
|
*
|
||||||
* @author Ryan Ackley
|
* @author Ryan Ackley
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class CHPX extends PropertyNode
|
public class CHPX extends BytePropertyNode
|
||||||
{
|
{
|
||||||
|
|
||||||
public CHPX(int fcStart, int fcEnd, byte[] grpprl)
|
public CHPX(int fcStart, int fcEnd, byte[] grpprl, boolean isUnicode)
|
||||||
{
|
{
|
||||||
super(fcStart, fcEnd, new SprmBuffer(grpprl));
|
super(fcStart, fcEnd, new SprmBuffer(grpprl), isUnicode);
|
||||||
}
|
}
|
||||||
|
|
||||||
public CHPX(int fcStart, int fcEnd, SprmBuffer buf)
|
public CHPX(int fcStart, int fcEnd, SprmBuffer buf, boolean isUnicode)
|
||||||
{
|
{
|
||||||
super(fcStart, fcEnd, buf);
|
super(fcStart, fcEnd, buf, isUnicode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -39,14 +39,18 @@ public class PAPBinTable
|
||||||
protected ArrayList _paragraphs = new ArrayList();
|
protected ArrayList _paragraphs = new ArrayList();
|
||||||
byte[] _dataStream;
|
byte[] _dataStream;
|
||||||
|
|
||||||
|
/** So we can know if things are unicode or not */
|
||||||
|
private TextPieceTable tpt;
|
||||||
|
|
||||||
public PAPBinTable()
|
public PAPBinTable()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
public PAPBinTable(byte[] documentStream, byte[] tableStream, byte[] dataStream, int offset,
|
public PAPBinTable(byte[] documentStream, byte[] tableStream, byte[] dataStream, int offset,
|
||||||
int size, int fcMin)
|
int size, int fcMin, TextPieceTable tpt)
|
||||||
{
|
{
|
||||||
PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4);
|
PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4);
|
||||||
|
this.tpt = tpt;
|
||||||
|
|
||||||
int length = binTable.length();
|
int length = binTable.length();
|
||||||
for (int x = 0; x < length; x++)
|
for (int x = 0; x < length; x++)
|
||||||
|
@ -57,13 +61,14 @@ public class PAPBinTable
|
||||||
int pageOffset = POIFSConstants.BIG_BLOCK_SIZE * pageNum;
|
int pageOffset = POIFSConstants.BIG_BLOCK_SIZE * pageNum;
|
||||||
|
|
||||||
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream,
|
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream,
|
||||||
dataStream, pageOffset, fcMin);
|
dataStream, pageOffset, fcMin, tpt);
|
||||||
|
|
||||||
int fkpSize = pfkp.size();
|
int fkpSize = pfkp.size();
|
||||||
|
|
||||||
for (int y = 0; y < fkpSize; y++)
|
for (int y = 0; y < fkpSize; y++)
|
||||||
{
|
{
|
||||||
_paragraphs.add(pfkp.getPAPX(y));
|
PAPX papx = pfkp.getPAPX(y);
|
||||||
|
_paragraphs.add(papx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_dataStream = dataStream;
|
_dataStream = dataStream;
|
||||||
|
@ -71,7 +76,14 @@ public class PAPBinTable
|
||||||
|
|
||||||
public void insert(int listIndex, int cpStart, SprmBuffer buf)
|
public void insert(int listIndex, int cpStart, SprmBuffer buf)
|
||||||
{
|
{
|
||||||
PAPX forInsert = new PAPX(cpStart, cpStart, buf, _dataStream);
|
boolean needsToBeUnicode = tpt.isUnicodeAt(cpStart);
|
||||||
|
|
||||||
|
PAPX forInsert = new PAPX(0, 0, buf, _dataStream, needsToBeUnicode);
|
||||||
|
|
||||||
|
// Ensure character offsets are really characters
|
||||||
|
forInsert.setStart(cpStart);
|
||||||
|
forInsert.setEnd(cpStart);
|
||||||
|
|
||||||
if (listIndex == _paragraphs.size())
|
if (listIndex == _paragraphs.size())
|
||||||
{
|
{
|
||||||
_paragraphs.add(forInsert);
|
_paragraphs.add(forInsert);
|
||||||
|
@ -90,10 +102,21 @@ public class PAPBinTable
|
||||||
{
|
{
|
||||||
exc.printStackTrace();
|
exc.printStackTrace();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Copy the properties of the one before to afterwards
|
||||||
|
// Will go:
|
||||||
|
// Original, until insert at point
|
||||||
|
// New one
|
||||||
|
// Clone of original, on to the old end
|
||||||
|
PAPX clone = new PAPX(0, 0, clonedBuf, _dataStream, needsToBeUnicode);
|
||||||
|
// Again ensure contains character based offsets no matter what
|
||||||
|
clone.setStart(cpStart);
|
||||||
|
clone.setEnd(currentPap.getEnd());
|
||||||
|
|
||||||
currentPap.setEnd(cpStart);
|
currentPap.setEnd(cpStart);
|
||||||
PAPX splitPap = new PAPX(cpStart, currentPap.getEnd(), clonedBuf, _dataStream);
|
|
||||||
_paragraphs.add(++listIndex, forInsert);
|
_paragraphs.add(listIndex + 1, forInsert);
|
||||||
_paragraphs.add(++listIndex, splitPap);
|
_paragraphs.add(listIndex + 2, clone);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -60,13 +60,14 @@ public class PAPFormattedDiskPage extends FormattedDiskPage
|
||||||
/**
|
/**
|
||||||
* Creates a PAPFormattedDiskPage from a 512 byte array
|
* Creates a PAPFormattedDiskPage from a 512 byte array
|
||||||
*/
|
*/
|
||||||
public PAPFormattedDiskPage(byte[] documentStream, byte[] dataStream, int offset, int fcMin)
|
public PAPFormattedDiskPage(byte[] documentStream, byte[] dataStream, int offset, int fcMin, TextPieceTable tpt)
|
||||||
{
|
{
|
||||||
super(documentStream, offset);
|
super(documentStream, offset);
|
||||||
|
|
||||||
for (int x = 0; x < _crun; x++)
|
for (int x = 0; x < _crun; x++)
|
||||||
{
|
{
|
||||||
_papxList.add(new PAPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), getParagraphHeight(x), dataStream));
|
boolean isUnicode = tpt.isUnicodeAt( getStart(x) );
|
||||||
|
_papxList.add(new PAPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), getParagraphHeight(x), dataStream, isUnicode));
|
||||||
}
|
}
|
||||||
_fkp = null;
|
_fkp = null;
|
||||||
_dataStream = dataStream;
|
_dataStream = dataStream;
|
||||||
|
@ -110,7 +111,7 @@ public class PAPFormattedDiskPage extends FormattedDiskPage
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the papx for the paragraph at index in this fkp.
|
* Gets the papx grpprl for the paragraph at index in this fkp.
|
||||||
*
|
*
|
||||||
* @param index The index of the papx to get.
|
* @param index The index of the papx to get.
|
||||||
* @return a papx grpprl.
|
* @return a papx grpprl.
|
||||||
|
@ -259,7 +260,7 @@ public class PAPFormattedDiskPage extends FormattedDiskPage
|
||||||
grpprlOffset -= (grpprl.length + (2 - grpprl.length % 2));
|
grpprlOffset -= (grpprl.length + (2 - grpprl.length % 2));
|
||||||
grpprlOffset -= (grpprlOffset % 2);
|
grpprlOffset -= (grpprlOffset % 2);
|
||||||
}
|
}
|
||||||
LittleEndian.putInt(buf, fcOffset, papx.getStart() + fcMin);
|
LittleEndian.putInt(buf, fcOffset, papx.getStartBytes() + fcMin);
|
||||||
buf[bxOffset] = (byte)(grpprlOffset/2);
|
buf[bxOffset] = (byte)(grpprlOffset/2);
|
||||||
System.arraycopy(phe, 0, buf, bxOffset + 1, phe.length);
|
System.arraycopy(phe, 0, buf, bxOffset + 1, phe.length);
|
||||||
|
|
||||||
|
@ -287,7 +288,7 @@ public class PAPFormattedDiskPage extends FormattedDiskPage
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
LittleEndian.putInt(buf, fcOffset, papx.getEnd() + fcMin);
|
LittleEndian.putInt(buf, fcOffset, papx.getEndBytes() + fcMin);
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -29,29 +29,32 @@ import org.apache.poi.hwpf.sprm.SprmBuffer;
|
||||||
import org.apache.poi.hwpf.sprm.SprmOperation;
|
import org.apache.poi.hwpf.sprm.SprmOperation;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Comment me
|
* DANGER - works in bytes!
|
||||||
|
*
|
||||||
|
* Make sure you call getStart() / getEnd() when you want characters
|
||||||
|
* (normal use), but getStartByte() / getEndByte() when you're
|
||||||
|
* reading in / writing out!
|
||||||
*
|
*
|
||||||
* @author Ryan Ackley
|
* @author Ryan Ackley
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class PAPX extends PropertyNode
|
public class PAPX extends BytePropertyNode {
|
||||||
{
|
|
||||||
|
|
||||||
private ParagraphHeight _phe;
|
private ParagraphHeight _phe;
|
||||||
private int _hugeGrpprlOffset = -1;
|
private int _hugeGrpprlOffset = -1;
|
||||||
|
|
||||||
public PAPX(int fcStart, int fcEnd, byte[] papx, ParagraphHeight phe, byte[] dataStream)
|
public PAPX(int fcStart, int fcEnd, byte[] papx, ParagraphHeight phe, byte[] dataStream, boolean isUnicode)
|
||||||
{
|
{
|
||||||
super(fcStart, fcEnd, new SprmBuffer(papx));
|
super(fcStart, fcEnd, new SprmBuffer(papx), isUnicode);
|
||||||
_phe = phe;
|
_phe = phe;
|
||||||
SprmBuffer buf = findHuge(new SprmBuffer(papx), dataStream);
|
SprmBuffer buf = findHuge(new SprmBuffer(papx), dataStream);
|
||||||
if(buf != null)
|
if(buf != null)
|
||||||
_buf = buf;
|
_buf = buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
public PAPX(int fcStart, int fcEnd, SprmBuffer buf, byte[] dataStream)
|
public PAPX(int fcStart, int fcEnd, SprmBuffer buf, byte[] dataStream, boolean isUnicode)
|
||||||
{
|
{
|
||||||
super(fcStart, fcEnd, buf);
|
super(fcStart, fcEnd, buf, isUnicode);
|
||||||
_phe = new ParagraphHeight();
|
_phe = new ParagraphHeight();
|
||||||
buf = findHuge(buf, dataStream);
|
buf = findHuge(buf, dataStream);
|
||||||
if(buf != null)
|
if(buf != null)
|
||||||
|
|
|
@ -22,7 +22,10 @@ import java.util.Arrays;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represents a lightweight node in the Trees used to store content
|
* Represents a lightweight node in the Trees used to store content
|
||||||
* properties. Works only in characters.
|
* properties.
|
||||||
|
* This only ever works in characters. For the few odd cases when
|
||||||
|
* the start and end aren't in characters (eg PAPX and CHPX), use
|
||||||
|
* {@link BytePropertyNode} between you and this.
|
||||||
*
|
*
|
||||||
* @author Ryan Ackley
|
* @author Ryan Ackley
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.poi.poifs.common.POIFSConstants;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -62,8 +63,17 @@ public class TextPieceTable
|
||||||
pieces[x] = new PieceDescriptor(node.getBytes(), 0);
|
pieces[x] = new PieceDescriptor(node.getBytes(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
int firstPieceFilePosition = pieces[0].getFilePosition();
|
|
||||||
_cpMin = firstPieceFilePosition - fcMin;
|
// Figure out the cp of the earliest text piece
|
||||||
|
// Note that text pieces don't have to be stored in order!
|
||||||
|
_cpMin = pieces[0].getFilePosition() - fcMin;
|
||||||
|
for (int x = 0; x < pieces.length; x++) {
|
||||||
|
int start = pieces[x].getFilePosition() - fcMin;
|
||||||
|
if(start < _cpMin) {
|
||||||
|
_cpMin = start;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// using the PieceDescriptors, build our list of TextPieces.
|
// using the PieceDescriptors, build our list of TextPieces.
|
||||||
for (int x = 0; x < pieces.length; x++)
|
for (int x = 0; x < pieces.length; x++)
|
||||||
|
@ -105,6 +115,35 @@ public class TextPieceTable
|
||||||
return _textPieces;
|
return _textPieces;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is the text at the given Character offset
|
||||||
|
* unicode, or plain old ascii?
|
||||||
|
* In a very evil fashion, you have to actually
|
||||||
|
* know this to make sense of character and
|
||||||
|
* paragraph properties :(
|
||||||
|
* @param cp The character offset to check about
|
||||||
|
*/
|
||||||
|
public boolean isUnicodeAt(int cp) {
|
||||||
|
boolean lastWas = false;
|
||||||
|
int lastAt = 0;
|
||||||
|
|
||||||
|
Iterator it = _textPieces.iterator();
|
||||||
|
while(it.hasNext()) {
|
||||||
|
TextPiece tp = (TextPiece)it.next();
|
||||||
|
// If the text piece covers the character, all good
|
||||||
|
if(tp.getStart() <= cp && tp.getEnd() >= cp) {
|
||||||
|
return tp.isUnicode();
|
||||||
|
}
|
||||||
|
// Otherwise keep track for the last one
|
||||||
|
if(tp.getStart() > lastAt) {
|
||||||
|
lastWas = tp.isUnicode();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If they ask off the end, just go with the last one...
|
||||||
|
return lastWas;
|
||||||
|
}
|
||||||
|
|
||||||
public byte[] writeTo(HWPFOutputStream docStream)
|
public byte[] writeTo(HWPFOutputStream docStream)
|
||||||
throws IOException
|
throws IOException
|
||||||
{
|
{
|
||||||
|
|
|
@ -70,10 +70,10 @@ public abstract class FIBAbstractType
|
||||||
private static BitField fFutureSavedUndo = BitFieldFactory.getInstance(0x0008);
|
private static BitField fFutureSavedUndo = BitFieldFactory.getInstance(0x0008);
|
||||||
private static BitField fWord97Saved = BitFieldFactory.getInstance(0x0010);
|
private static BitField fWord97Saved = BitFieldFactory.getInstance(0x0010);
|
||||||
private static BitField fSpare0 = BitFieldFactory.getInstance(0x00FE);
|
private static BitField fSpare0 = BitFieldFactory.getInstance(0x00FE);
|
||||||
protected int field_11_chs;
|
protected int field_11_chs; /** Latest docs say this is Reserved3! */
|
||||||
protected int field_12_chsTables;
|
protected int field_12_chsTables; /** Latest docs say this is Reserved4! */
|
||||||
protected int field_13_fcMin;
|
protected int field_13_fcMin; /** Latest docs say this is Reserved5! */
|
||||||
protected int field_14_fcMac;
|
protected int field_14_fcMac; /** Latest docs say this is Reserved6! */
|
||||||
|
|
||||||
|
|
||||||
public FIBAbstractType()
|
public FIBAbstractType()
|
||||||
|
|
|
@ -33,6 +33,8 @@ public class TestCHPBinTable
|
||||||
private CHPBinTable _cHPBinTable = null;
|
private CHPBinTable _cHPBinTable = null;
|
||||||
private HWPFDocFixture _hWPFDocFixture;
|
private HWPFDocFixture _hWPFDocFixture;
|
||||||
|
|
||||||
|
private TextPieceTable fakeTPT = new TextPieceTable();
|
||||||
|
|
||||||
public TestCHPBinTable(String name)
|
public TestCHPBinTable(String name)
|
||||||
{
|
{
|
||||||
super(name);
|
super(name);
|
||||||
|
@ -46,7 +48,7 @@ public class TestCHPBinTable
|
||||||
byte[] tableStream = _hWPFDocFixture._tableStream;
|
byte[] tableStream = _hWPFDocFixture._tableStream;
|
||||||
int fcMin = fib.getFcMin();
|
int fcMin = fib.getFcMin();
|
||||||
|
|
||||||
_cHPBinTable = new CHPBinTable(mainStream, tableStream, fib.getFcPlcfbteChpx(), fib.getLcbPlcfbteChpx(), fcMin);
|
_cHPBinTable = new CHPBinTable(mainStream, tableStream, fib.getFcPlcfbteChpx(), fib.getLcbPlcfbteChpx(), fcMin, fakeTPT);
|
||||||
|
|
||||||
HWPFFileSystem fileSys = new HWPFFileSystem();
|
HWPFFileSystem fileSys = new HWPFFileSystem();
|
||||||
|
|
||||||
|
@ -57,7 +59,7 @@ public class TestCHPBinTable
|
||||||
byte[] newTableStream = tableOut.toByteArray();
|
byte[] newTableStream = tableOut.toByteArray();
|
||||||
byte[] newMainStream = mainOut.toByteArray();
|
byte[] newMainStream = mainOut.toByteArray();
|
||||||
|
|
||||||
CHPBinTable newBinTable = new CHPBinTable(newMainStream, newTableStream, 0, newTableStream.length, 0);
|
CHPBinTable newBinTable = new CHPBinTable(newMainStream, newTableStream, 0, newTableStream.length, 0, fakeTPT);
|
||||||
|
|
||||||
ArrayList oldTextRuns = _cHPBinTable._textRuns;
|
ArrayList oldTextRuns = _cHPBinTable._textRuns;
|
||||||
ArrayList newTextRuns = newBinTable._textRuns;
|
ArrayList newTextRuns = newBinTable._textRuns;
|
||||||
|
|
|
@ -32,6 +32,8 @@ public class TestPAPBinTable
|
||||||
private PAPBinTable _pAPBinTable = null;
|
private PAPBinTable _pAPBinTable = null;
|
||||||
private HWPFDocFixture _hWPFDocFixture;
|
private HWPFDocFixture _hWPFDocFixture;
|
||||||
|
|
||||||
|
private TextPieceTable fakeTPT = new TextPieceTable();
|
||||||
|
|
||||||
public TestPAPBinTable(String name)
|
public TestPAPBinTable(String name)
|
||||||
{
|
{
|
||||||
super(name);
|
super(name);
|
||||||
|
@ -45,7 +47,7 @@ public class TestPAPBinTable
|
||||||
byte[] tableStream = _hWPFDocFixture._tableStream;
|
byte[] tableStream = _hWPFDocFixture._tableStream;
|
||||||
int fcMin = fib.getFcMin();
|
int fcMin = fib.getFcMin();
|
||||||
|
|
||||||
_pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fcMin);
|
_pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fcMin, fakeTPT);
|
||||||
|
|
||||||
HWPFFileSystem fileSys = new HWPFFileSystem();
|
HWPFFileSystem fileSys = new HWPFFileSystem();
|
||||||
|
|
||||||
|
@ -56,7 +58,7 @@ public class TestPAPBinTable
|
||||||
byte[] newTableStream = tableOut.toByteArray();
|
byte[] newTableStream = tableOut.toByteArray();
|
||||||
byte[] newMainStream = mainOut.toByteArray();
|
byte[] newMainStream = mainOut.toByteArray();
|
||||||
|
|
||||||
PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, 0);
|
PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, 0, fakeTPT);
|
||||||
|
|
||||||
ArrayList oldTextRuns = _pAPBinTable.getParagraphs();
|
ArrayList oldTextRuns = _pAPBinTable.getParagraphs();
|
||||||
ArrayList newTextRuns = newBinTable.getParagraphs();
|
ArrayList newTextRuns = newBinTable.getParagraphs();
|
||||||
|
|
|
@ -18,23 +18,19 @@
|
||||||
|
|
||||||
package org.apache.poi.hwpf.usermodel;
|
package org.apache.poi.hwpf.usermodel;
|
||||||
|
|
||||||
import java.io.ByteArrayOutputStream;
|
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.poi.hwpf.HWPFDocument;
|
|
||||||
import org.apache.poi.hwpf.model.PicturesTable;
|
|
||||||
import org.apache.poi.hwpf.usermodel.Picture;
|
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
import org.apache.poi.hwpf.HWPFDocument;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test to see if Range.delete() works even if the Range contains a
|
* Test to see if Range.delete() works even if the Range contains a
|
||||||
* CharacterRun that uses Unicode characters.
|
* CharacterRun that uses Unicode characters.
|
||||||
*
|
*
|
||||||
* TODO - re-enable me when unicode paragraph stuff is fixed!
|
* TODO - re-enable me when unicode paragraph stuff is fixed!
|
||||||
*/
|
*/
|
||||||
public abstract class TestRangeDelete extends TestCase {
|
public class TestRangeDelete extends TestCase {
|
||||||
|
|
||||||
// u201c and u201d are "smart-quotes"
|
// u201c and u201d are "smart-quotes"
|
||||||
private String originalText =
|
private String originalText =
|
||||||
|
|
|
@ -18,23 +18,19 @@
|
||||||
|
|
||||||
package org.apache.poi.hwpf.usermodel;
|
package org.apache.poi.hwpf.usermodel;
|
||||||
|
|
||||||
import java.io.ByteArrayOutputStream;
|
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.poi.hwpf.HWPFDocument;
|
|
||||||
import org.apache.poi.hwpf.model.PicturesTable;
|
|
||||||
import org.apache.poi.hwpf.usermodel.Picture;
|
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
import org.apache.poi.hwpf.HWPFDocument;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test to see if Range.insertBefore() works even if the Range contains a
|
* Test to see if Range.insertBefore() works even if the Range contains a
|
||||||
* CharacterRun that uses Unicode characters.
|
* CharacterRun that uses Unicode characters.
|
||||||
*
|
*
|
||||||
* TODO - re-enable me when unicode paragraph stuff is fixed!
|
* TODO - re-enable me when unicode paragraph stuff is fixed!
|
||||||
*/
|
*/
|
||||||
public abstract class TestRangeInsertion extends TestCase {
|
public class TestRangeInsertion extends TestCase {
|
||||||
|
|
||||||
// u201c and u201d are "smart-quotes"
|
// u201c and u201d are "smart-quotes"
|
||||||
private String originalText =
|
private String originalText =
|
||||||
|
|
|
@ -18,8 +18,10 @@ package org.apache.poi.hwpf.usermodel;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.poi.hwpf.HWPFDocument;
|
import org.apache.poi.hwpf.HWPFDocument;
|
||||||
|
import org.apache.poi.hwpf.model.PropertyNode;
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
@ -30,7 +32,7 @@ import junit.framework.TestCase;
|
||||||
*
|
*
|
||||||
* TODO - re-enable me when unicode paragraph stuff is fixed!
|
* TODO - re-enable me when unicode paragraph stuff is fixed!
|
||||||
*/
|
*/
|
||||||
public abstract class TestRangeProperties extends TestCase {
|
public class TestRangeProperties extends TestCase {
|
||||||
private static final char page_break = (char)12;
|
private static final char page_break = (char)12;
|
||||||
|
|
||||||
private static final String u_page_1 =
|
private static final String u_page_1 =
|
||||||
|
@ -143,7 +145,87 @@ public abstract class TestRangeProperties extends TestCase {
|
||||||
assertEquals(32, c7.getFontSize());
|
assertEquals(32, c7.getFontSize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests the raw definitions of the paragraphs of
|
||||||
|
* a unicode document
|
||||||
|
*/
|
||||||
|
public void testUnicodeParagraphDefinitions() throws Exception {
|
||||||
|
Range r = u.getRange();
|
||||||
|
String[] p1_parts = u_page_1.split("\r");
|
||||||
|
String[] p2_parts = u_page_2.split("\r");
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
u_page_1 + page_break + "\r" + u_page_2,
|
||||||
|
r.text()
|
||||||
|
);
|
||||||
|
assertEquals(
|
||||||
|
408, r.text().length()
|
||||||
|
);
|
||||||
|
|
||||||
|
List pDefs = r._paragraphs;
|
||||||
|
assertEquals(35, pDefs.size());
|
||||||
|
|
||||||
|
// Check that the last paragraph ends where it should do
|
||||||
|
assertEquals(531, u.getOverallRange().text().length());
|
||||||
|
assertEquals(530, u.getCPSplitCalculator().getHeaderTextboxEnd());
|
||||||
|
PropertyNode pLast = (PropertyNode)pDefs.get(34);
|
||||||
|
// assertEquals(530, pLast.getEnd());
|
||||||
|
|
||||||
|
// Only care about the first few really though
|
||||||
|
PropertyNode p0 = (PropertyNode)pDefs.get(0);
|
||||||
|
PropertyNode p1 = (PropertyNode)pDefs.get(1);
|
||||||
|
PropertyNode p2 = (PropertyNode)pDefs.get(2);
|
||||||
|
PropertyNode p3 = (PropertyNode)pDefs.get(3);
|
||||||
|
PropertyNode p4 = (PropertyNode)pDefs.get(4);
|
||||||
|
|
||||||
|
// 5 paragraphs should get us to the end of our text
|
||||||
|
assertTrue(p0.getStart() < 408);
|
||||||
|
assertTrue(p0.getEnd() < 408);
|
||||||
|
assertTrue(p1.getStart() < 408);
|
||||||
|
assertTrue(p1.getEnd() < 408);
|
||||||
|
assertTrue(p2.getStart() < 408);
|
||||||
|
assertTrue(p2.getEnd() < 408);
|
||||||
|
assertTrue(p3.getStart() < 408);
|
||||||
|
assertTrue(p3.getEnd() < 408);
|
||||||
|
assertTrue(p4.getStart() < 408);
|
||||||
|
assertTrue(p4.getEnd() < 408);
|
||||||
|
|
||||||
|
// Paragraphs should match with lines
|
||||||
|
assertEquals(
|
||||||
|
0,
|
||||||
|
p0.getStart()
|
||||||
|
);
|
||||||
|
assertEquals(
|
||||||
|
p1_parts[0].length() + 1,
|
||||||
|
p0.getEnd()
|
||||||
|
);
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
p1_parts[0].length() + 1,
|
||||||
|
p1.getStart()
|
||||||
|
);
|
||||||
|
assertEquals(
|
||||||
|
p1_parts[0].length() + 1 +
|
||||||
|
p1_parts[1].length() + 1,
|
||||||
|
p1.getEnd()
|
||||||
|
);
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
p1_parts[0].length() + 1 +
|
||||||
|
p1_parts[1].length() + 1,
|
||||||
|
p2.getStart()
|
||||||
|
);
|
||||||
|
assertEquals(
|
||||||
|
p1_parts[0].length() + 1 +
|
||||||
|
p1_parts[1].length() + 1 +
|
||||||
|
p1_parts[2].length() + 1,
|
||||||
|
p2.getEnd()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests the paragraph text of a unicode document
|
||||||
|
*/
|
||||||
public void testUnicodeTextParagraphs() throws Exception {
|
public void testUnicodeTextParagraphs() throws Exception {
|
||||||
Range r = u.getRange();
|
Range r = u.getRange();
|
||||||
assertEquals(
|
assertEquals(
|
||||||
|
@ -154,14 +236,25 @@ public abstract class TestRangeProperties extends TestCase {
|
||||||
);
|
);
|
||||||
|
|
||||||
assertEquals(
|
assertEquals(
|
||||||
5,
|
12,
|
||||||
r.numParagraphs()
|
r.numParagraphs()
|
||||||
);
|
);
|
||||||
String[] p1_parts = u_page_1.split("\r");
|
String[] p1_parts = u_page_1.split("\r");
|
||||||
String[] p2_parts = u_page_2.split("\r");
|
String[] p2_parts = u_page_2.split("\r");
|
||||||
|
|
||||||
System.out.println(r.getParagraph(2).text());
|
// Check text all matches up properly
|
||||||
// TODO
|
assertEquals(p1_parts[0] + "\r", r.getParagraph(0).text());
|
||||||
|
assertEquals(p1_parts[1] + "\r", r.getParagraph(1).text());
|
||||||
|
assertEquals(p1_parts[2] + "\r", r.getParagraph(2).text());
|
||||||
|
assertEquals(p1_parts[3] + "\r", r.getParagraph(3).text());
|
||||||
|
assertEquals(p1_parts[4] + "\r", r.getParagraph(4).text());
|
||||||
|
assertEquals(p1_parts[5] + "\r", r.getParagraph(5).text());
|
||||||
|
assertEquals(p1_parts[6] + "\r", r.getParagraph(6).text());
|
||||||
|
assertEquals(p1_parts[7] + "\r", r.getParagraph(7).text());
|
||||||
|
assertEquals(p1_parts[8] + "\r", r.getParagraph(8).text());
|
||||||
|
assertEquals(p1_parts[9] + "\r", r.getParagraph(9).text());
|
||||||
|
assertEquals(page_break + "\r", r.getParagraph(10).text());
|
||||||
|
assertEquals(p2_parts[0] + "\r", r.getParagraph(11).text());
|
||||||
}
|
}
|
||||||
public void testUnicodeStyling() throws Exception {
|
public void testUnicodeStyling() throws Exception {
|
||||||
// TODO
|
// TODO
|
||||||
|
|
|
@ -18,23 +18,19 @@
|
||||||
|
|
||||||
package org.apache.poi.hwpf.usermodel;
|
package org.apache.poi.hwpf.usermodel;
|
||||||
|
|
||||||
import java.io.ByteArrayOutputStream;
|
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.poi.hwpf.HWPFDocument;
|
|
||||||
import org.apache.poi.hwpf.model.PicturesTable;
|
|
||||||
import org.apache.poi.hwpf.usermodel.Picture;
|
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
import org.apache.poi.hwpf.HWPFDocument;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test to see if Range.replaceText() works even if the Range contains a
|
* Test to see if Range.replaceText() works even if the Range contains a
|
||||||
* CharacterRun that uses Unicode characters.
|
* CharacterRun that uses Unicode characters.
|
||||||
*
|
*
|
||||||
* TODO - re-enable me when unicode paragraph stuff is fixed!
|
* TODO - re-enable me when unicode paragraph stuff is fixed!
|
||||||
*/
|
*/
|
||||||
public abstract class TestRangeReplacement extends TestCase {
|
public class TestRangeReplacement extends TestCase {
|
||||||
|
|
||||||
// u201c and u201d are "smart-quotes"
|
// u201c and u201d are "smart-quotes"
|
||||||
private String originalText =
|
private String originalText =
|
||||||
|
|
Loading…
Reference in New Issue