fix 47286 - Word documents saves in wrong format if source contains form elements

correctly translate char=>byte on saving CHPX and PAPX tables

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1150704 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sergey Vladimirov 2011-07-25 13:58:05 +00:00
parent 4c724bf71c
commit 32dbdf8071
8 changed files with 284 additions and 252 deletions

View File

@ -255,7 +255,7 @@ public final class HWPFDocument extends HWPFDocumentCore
_text = _tpt.getText(); _text = _tpt.getText();
_cbt.rebuild( _cft ); _cbt.rebuild( _cft );
_pbt.rebuild( _text, _dataStream, _cft ); _pbt.rebuild( _text, _cft );
boolean preserve = false; boolean preserve = false;
try try
@ -643,7 +643,7 @@ public final class HWPFDocument extends HWPFDocumentCore
// write out the CHPBinTable. // write out the CHPBinTable.
_fib.setFcPlcfbteChpx(tableOffset); _fib.setFcPlcfbteChpx(tableOffset);
_cbt.writeTo(docSys, fcMin); _cbt.writeTo(docSys, fcMin, _cft.getTextPieceTable());
_fib.setLcbPlcfbteChpx(tableStream.getOffset() - tableOffset); _fib.setLcbPlcfbteChpx(tableStream.getOffset() - tableOffset);
tableOffset = tableStream.getOffset(); tableOffset = tableStream.getOffset();
@ -657,7 +657,7 @@ public final class HWPFDocument extends HWPFDocumentCore
// write out the PAPBinTable. // write out the PAPBinTable.
_fib.setFcPlcfbtePapx(tableOffset); _fib.setFcPlcfbtePapx(tableOffset);
_pbt.writeTo(docSys, fcMin, _cft.getTextPieceTable()); _pbt.writeTo(docSys, _cft.getTextPieceTable());
_fib.setLcbPlcfbtePapx(tableStream.getOffset() - tableOffset); _fib.setLcbPlcfbtePapx(tableStream.getOffset() - tableOffset);
tableOffset = tableStream.getOffset(); tableOffset = tableStream.getOffset();

View File

@ -37,6 +37,7 @@ import org.apache.poi.hwpf.sprm.SprmBuffer;
import org.apache.poi.hwpf.sprm.SprmIterator; import org.apache.poi.hwpf.sprm.SprmIterator;
import org.apache.poi.hwpf.sprm.SprmOperation; import org.apache.poi.hwpf.sprm.SprmOperation;
import org.apache.poi.poifs.common.POIFSConstants; import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger; import org.apache.poi.util.POILogger;
@ -46,6 +47,7 @@ import org.apache.poi.util.POILogger;
* *
* @author Ryan Ackley * @author Ryan Ackley
*/ */
@Internal
public class CHPBinTable public class CHPBinTable
{ {
private static final POILogger logger = POILogFactory private static final POILogger logger = POILogFactory
@ -54,9 +56,6 @@ public class CHPBinTable
/** List of character properties.*/ /** List of character properties.*/
protected ArrayList<CHPX> _textRuns = new ArrayList<CHPX>(); protected ArrayList<CHPX> _textRuns = new ArrayList<CHPX>();
/** So we can know if things are unicode or not */
private TextPieceTable tpt;
public CHPBinTable() public CHPBinTable()
{ {
} }
@ -78,7 +77,7 @@ public class CHPBinTable
* Constructor used to read a binTable in from a Word document. * Constructor used to read a binTable in from a Word document.
*/ */
public CHPBinTable( byte[] documentStream, byte[] tableStream, int offset, public CHPBinTable( byte[] documentStream, byte[] tableStream, int offset,
int size, TextPieceTable tpt ) int size, CharIndexTranslator translator )
{ {
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
/* /*
@ -90,7 +89,6 @@ public class CHPBinTable
* further partitions an interval into runs of exception text." * further partitions an interval into runs of exception text."
*/ */
PlexOfCps bte = new PlexOfCps( tableStream, offset, size, 4 ); PlexOfCps bte = new PlexOfCps( tableStream, offset, size, 4 );
this.tpt = tpt;
int length = bte.length(); int length = bte.length();
for (int x = 0; x < length; x++) for (int x = 0; x < length; x++)
@ -101,7 +99,7 @@ public class CHPBinTable
int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum; int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum;
CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(documentStream, CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(documentStream,
pageOffset, tpt); pageOffset, translator);
int fkpSize = cfkp.size(); int fkpSize = cfkp.size();
@ -126,7 +124,8 @@ public class CHPBinTable
SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls(); SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
// adding CHPX from fast-saved SPRMs // adding CHPX from fast-saved SPRMs
for ( TextPiece textPiece : tpt.getTextPieces() ) for ( TextPiece textPiece : complexFileTable.getTextPieceTable()
.getTextPieces() )
{ {
PropertyModifier prm = textPiece.getPieceDescriptor().getPrm(); PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
if ( !prm.isComplex() ) if ( !prm.isComplex() )
@ -396,7 +395,7 @@ public class CHPBinTable
public void insert(int listIndex, int cpStart, SprmBuffer buf) public void insert(int listIndex, int cpStart, SprmBuffer buf)
{ {
CHPX insertChpx = new CHPX(0, 0, tpt,buf); CHPX insertChpx = new CHPX(0, 0, buf);
// Ensure character offsets are really characters // Ensure character offsets are really characters
insertChpx.setStart(cpStart); insertChpx.setStart(cpStart);
@ -416,7 +415,7 @@ public class CHPBinTable
// Original, until insert at point // Original, until insert at point
// New one // New one
// Clone of original, on to the old end // Clone of original, on to the old end
CHPX clone = new CHPX(0, 0, tpt,chpx.getSprmBuf()); CHPX clone = new CHPX(0, 0, chpx.getSprmBuf());
// Again ensure contains character based offsets no matter what // Again ensure contains character based offsets no matter what
clone.setStart(cpStart); clone.setStart(cpStart);
clone.setEnd(chpx.getEnd()); clone.setEnd(chpx.getEnd());
@ -452,7 +451,7 @@ public class CHPBinTable
return _textRuns; return _textRuns;
} }
public void writeTo(HWPFFileSystem sys, int fcMin) public void writeTo(HWPFFileSystem sys, int fcMin, CharIndexTranslator translator)
throws IOException throws IOException
{ {
@ -482,29 +481,32 @@ public class CHPBinTable
docOffset = docStream.getOffset(); docOffset = docStream.getOffset();
int pageNum = docOffset/POIFSConstants.SMALLER_BIG_BLOCK_SIZE; int pageNum = docOffset/POIFSConstants.SMALLER_BIG_BLOCK_SIZE;
// get the ending fc // get the ending fc
CHPX lastRun = _textRuns.get(_textRuns.size() - 1); // CHPX lastRun = _textRuns.get(_textRuns.size() - 1);
int endingFc = lastRun.getEnd(); // int endingFc = lastRun.getEnd();
endingFc += fcMin; // endingFc += fcMin;
int endingFc = translator.getByteIndex( _textRuns.get(
_textRuns.size() - 1 ).getEnd() );
ArrayList<CHPX> overflow = _textRuns; ArrayList<CHPX> overflow = _textRuns;
do do
{ {
CHPX startingProp = overflow.get(0); CHPX startingProp = overflow.get(0);
int start = startingProp.getStart() + fcMin; // int start = startingProp.getStart() + fcMin;
int start = translator.getByteIndex( startingProp.getStart() );
CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(); CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage();
cfkp.fill(overflow); cfkp.fill(overflow);
byte[] bufFkp = cfkp.toByteArray( tpt ); byte[] bufFkp = cfkp.toByteArray( translator );
docStream.write(bufFkp); docStream.write(bufFkp);
overflow = cfkp.getOverflow(); overflow = cfkp.getOverflow();
int end = endingFc; int end = endingFc;
if (overflow != null) if (overflow != null)
{ {
end = overflow.get(0).getStart() + fcMin; // end = overflow.get(0).getStart() + fcMin;
end = translator.getByteIndex( overflow.get( 0 ).getStart() );
} }
byte[] intHolder = new byte[4]; byte[] intHolder = new byte[4];

View File

@ -21,9 +21,8 @@ import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.apache.poi.hwpf.sprm.SprmBuffer; import org.apache.poi.hwpf.sprm.SprmBuffer;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
/** /**
* Represents a CHP fkp. The style properties for paragraph and character runs * Represents a CHP fkp. The style properties for paragraph and character runs
@ -41,11 +40,9 @@ import org.apache.poi.util.POILogger;
* *
* @author Ryan Ackley * @author Ryan Ackley
*/ */
@Internal
public final class CHPFormattedDiskPage extends FormattedDiskPage public final class CHPFormattedDiskPage extends FormattedDiskPage
{ {
private static final POILogger logger = POILogFactory
.getLogger( CHPFormattedDiskPage.class );
private static final int FC_SIZE = 4; private static final int FC_SIZE = 4;
private ArrayList<CHPX> _chpxList = new ArrayList<CHPX>(); private ArrayList<CHPX> _chpxList = new ArrayList<CHPX>();
@ -76,7 +73,7 @@ public final class CHPFormattedDiskPage extends FormattedDiskPage
* read from a Word file). * read from a Word file).
*/ */
public CHPFormattedDiskPage( byte[] documentStream, int offset, public CHPFormattedDiskPage( byte[] documentStream, int offset,
TextPieceTable tpt ) CharIndexTranslator translator )
{ {
super( documentStream, offset ); super( documentStream, offset );
@ -85,8 +82,8 @@ public final class CHPFormattedDiskPage extends FormattedDiskPage
int bytesStartAt = getStart( x ); int bytesStartAt = getStart( x );
int bytesEndAt = getEnd( x ); int bytesEndAt = getEnd( x );
int charStartAt = tpt.getCharIndex( bytesStartAt ); int charStartAt = translator.getCharIndex( bytesStartAt );
int charEndAt = tpt.getCharIndex( bytesEndAt, charStartAt ); int charEndAt = translator.getCharIndex( bytesEndAt, charStartAt );
// TODO: CHECK! // TODO: CHECK!
// CHPX chpx = new CHPX( bytesStartAt, bytesEndAt, tpt, getGrpprl( x // CHPX chpx = new CHPX( bytesStartAt, bytesEndAt, tpt, getGrpprl( x
@ -146,75 +143,76 @@ public final class CHPFormattedDiskPage extends FormattedDiskPage
return toByteArray( translator ); return toByteArray( translator );
} }
protected byte[] toByteArray(CharIndexTranslator translator) protected byte[] toByteArray( CharIndexTranslator translator )
{ {
byte[] buf = new byte[512]; byte[] buf = new byte[512];
int size = _chpxList.size(); int size = _chpxList.size();
int grpprlOffset = 511; int grpprlOffset = 511;
int offsetOffset = 0; int offsetOffset = 0;
int fcOffset = 0; int fcOffset = 0;
// total size is currently the size of one FC // total size is currently the size of one FC
int totalSize = FC_SIZE + 2; int totalSize = FC_SIZE + 2;
int index = 0; int index = 0;
for (; index < size; index++) for ( ; index < size; index++ )
{
int grpprlLength = (_chpxList.get(index)).getGrpprl().length;
// check to see if we have enough room for an FC, the grpprl offset,
// the grpprl size byte and the grpprl.
totalSize += (FC_SIZE + 2 + grpprlLength);
// if size is uneven we will have to add one so the first grpprl falls
// on a word boundary
if (totalSize > 511 + (index % 2))
{ {
totalSize -= (FC_SIZE + 2 + grpprlLength); int grpprlLength = ( _chpxList.get( index ) ).getGrpprl().length;
break;
// check to see if we have enough room for an FC, the grpprl offset,
// the grpprl size byte and the grpprl.
totalSize += ( FC_SIZE + 2 + grpprlLength );
// if size is uneven we will have to add one so the first grpprl
// falls
// on a word boundary
if ( totalSize > 511 + ( index % 2 ) )
{
totalSize -= ( FC_SIZE + 2 + grpprlLength );
break;
}
// grpprls must fall on word boundaries
if ( ( 1 + grpprlLength ) % 2 > 0 )
{
totalSize += 1;
}
} }
// grpprls must fall on word boundaries // see if we couldn't fit some
if ((1 + grpprlLength) % 2 > 0) if ( index != size )
{ {
totalSize += 1; _overFlow = new ArrayList<CHPX>();
_overFlow.addAll( _chpxList.subList( index, size ) );
} }
}
// see if we couldn't fit some // index should equal number of CHPXs that will be in this fkp now.
if (index != size) buf[511] = (byte) index;
{
_overFlow = new ArrayList<CHPX>();
_overFlow.addAll(_chpxList.subList(index, size));
}
// index should equal number of CHPXs that will be in this fkp now. offsetOffset = ( FC_SIZE * index ) + FC_SIZE;
buf[511] = (byte)index; // grpprlOffset = offsetOffset + index + (grpprlOffset % 2);
offsetOffset = (FC_SIZE * index) + FC_SIZE; CHPX chpx = null;
//grpprlOffset = offsetOffset + index + (grpprlOffset % 2); for ( int x = 0; x < index; x++ )
{
CHPX chpx = null; chpx = _chpxList.get( x );
for (int x = 0; x < index; x++) byte[] grpprl = chpx.getGrpprl();
{
chpx = _chpxList.get(x);
byte[] grpprl = chpx.getGrpprl();
LittleEndian.putInt( buf, fcOffset, LittleEndian.putInt( buf, fcOffset,
translator.getByteIndex( chpx.getStart() ) ); translator.getByteIndex( chpx.getStart() ) );
grpprlOffset -= (1 + grpprl.length); grpprlOffset -= ( 1 + grpprl.length );
grpprlOffset -= (grpprlOffset % 2); grpprlOffset -= ( grpprlOffset % 2 );
buf[offsetOffset] = (byte)(grpprlOffset/2); buf[offsetOffset] = (byte) ( grpprlOffset / 2 );
buf[grpprlOffset] = (byte)grpprl.length; buf[grpprlOffset] = (byte) grpprl.length;
System.arraycopy(grpprl, 0, buf, grpprlOffset + 1, grpprl.length); System.arraycopy( grpprl, 0, buf, grpprlOffset + 1, grpprl.length );
offsetOffset += 1; offsetOffset += 1;
fcOffset += FC_SIZE; fcOffset += FC_SIZE;
} }
// put the last chpx's end in // put the last chpx's end in
LittleEndian.putInt( buf, fcOffset, LittleEndian.putInt( buf, fcOffset,
translator.getByteIndex( chpx.getEnd() ) ); translator.getByteIndex( chpx.getEnd() ) );
return buf; return buf;
} }
} }

View File

@ -52,7 +52,6 @@ public class PAPBinTable
.getLogger( PAPBinTable.class ); .getLogger( PAPBinTable.class );
protected ArrayList<PAPX> _paragraphs = new ArrayList<PAPX>(); protected ArrayList<PAPX> _paragraphs = new ArrayList<PAPX>();
byte[] _dataStream;
public PAPBinTable() public PAPBinTable()
{ {
@ -72,7 +71,8 @@ public class PAPBinTable
} }
public PAPBinTable( byte[] documentStream, byte[] tableStream, public PAPBinTable( byte[] documentStream, byte[] tableStream,
byte[] dataStream, int offset, int size, TextPieceTable tpt ) byte[] dataStream, int offset, int size,
CharIndexTranslator charIndexTranslator )
{ {
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
@ -89,7 +89,8 @@ public class PAPBinTable
* pageNum; * pageNum;
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage( PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(
documentStream, dataStream, pageOffset, tpt ); documentStream, dataStream, pageOffset,
charIndexTranslator );
int fkpSize = pfkp.size(); int fkpSize = pfkp.size();
@ -108,7 +109,7 @@ public class PAPBinTable
Integer.valueOf( _paragraphs.size() ), " elements)" ); Integer.valueOf( _paragraphs.size() ), " elements)" );
} }
public void rebuild( final StringBuilder docText, byte[] dataStream, public void rebuild( final StringBuilder docText,
ComplexFileTable complexFileTable ) ComplexFileTable complexFileTable )
{ {
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
@ -152,7 +153,7 @@ public class PAPBinTable
newSprmBuffer.append( sprmBuffer.toByteArray() ); newSprmBuffer.append( sprmBuffer.toByteArray() );
PAPX papx = new PAPX( textPiece.getStart(), PAPX papx = new PAPX( textPiece.getStart(),
textPiece.getEnd(), newSprmBuffer, dataStream ); textPiece.getEnd(), newSprmBuffer );
_paragraphs.add( papx ); _paragraphs.add( papx );
} }
} }
@ -233,7 +234,7 @@ public class PAPBinTable
") has no PAPX. Creating new one." ); ") has no PAPX. Creating new one." );
// create it manually // create it manually
PAPX papx = new PAPX( startInclusive, endExclusive, PAPX papx = new PAPX( startInclusive, endExclusive,
new SprmBuffer( 2 ), dataStream ); new SprmBuffer( 2 ) );
newPapxs.add( papx ); newPapxs.add( papx );
lastParStart = endExclusive; lastParStart = endExclusive;
@ -272,8 +273,7 @@ public class PAPBinTable
else else
sprmBuffer.append( papx.getGrpprl(), 2 ); sprmBuffer.append( papx.getGrpprl(), 2 );
} }
PAPX newPapx = new PAPX( startInclusive, endExclusive, sprmBuffer, PAPX newPapx = new PAPX( startInclusive, endExclusive, sprmBuffer );
dataStream );
newPapxs.add( newPapx ); newPapxs.add( newPapx );
lastParStart = endExclusive; lastParStart = endExclusive;
@ -285,14 +285,12 @@ public class PAPBinTable
Long.valueOf( System.currentTimeMillis() - start ), " ms (", Long.valueOf( System.currentTimeMillis() - start ), " ms (",
Integer.valueOf( _paragraphs.size() ), " elements)" ); Integer.valueOf( _paragraphs.size() ), " elements)" );
start = System.currentTimeMillis(); start = System.currentTimeMillis();
_dataStream = dataStream;
} }
public void insert(int listIndex, int cpStart, SprmBuffer buf) public void insert(int listIndex, int cpStart, SprmBuffer buf)
{ {
PAPX forInsert = new PAPX(0, 0, buf, _dataStream); PAPX forInsert = new PAPX(0, 0, buf);
// Ensure character offsets are really characters // Ensure character offsets are really characters
forInsert.setStart(cpStart); forInsert.setStart(cpStart);
@ -322,7 +320,7 @@ public class PAPBinTable
// Original, until insert at point // Original, until insert at point
// New one // New one
// Clone of original, on to the old end // Clone of original, on to the old end
PAPX clone = new PAPX(0, 0, clonedBuf, _dataStream); PAPX clone = new PAPX(0, 0, clonedBuf);
// Again ensure contains character based offsets no matter what // Again ensure contains character based offsets no matter what
clone.setStart(cpStart); clone.setStart(cpStart);
clone.setEnd(currentPap.getEnd()); clone.setEnd(currentPap.getEnd());
@ -399,11 +397,12 @@ public class PAPBinTable
return _paragraphs; return _paragraphs;
} }
public void writeTo( HWPFFileSystem sys, int fcMin, CharIndexTranslator translator ) throws IOException public void writeTo( HWPFFileSystem sys, CharIndexTranslator translator ) throws IOException
{ {
HWPFOutputStream docStream = sys.getStream("WordDocument"); HWPFOutputStream docStream = sys.getStream("WordDocument");
OutputStream tableStream = sys.getStream("1Table"); OutputStream tableStream = sys.getStream("1Table");
HWPFOutputStream dataStream = sys.getStream("1Table");
PlexOfCps binTable = new PlexOfCps(4); PlexOfCps binTable = new PlexOfCps(4);
@ -420,28 +419,32 @@ public class PAPBinTable
docOffset = docStream.getOffset(); docOffset = docStream.getOffset();
int pageNum = docOffset/POIFSConstants.SMALLER_BIG_BLOCK_SIZE; int pageNum = docOffset/POIFSConstants.SMALLER_BIG_BLOCK_SIZE;
// get the ending fc // get the ending fc
int endingFc = _paragraphs.get(_paragraphs.size() - 1).getEnd(); // int endingFc = _paragraphs.get(_paragraphs.size() - 1).getEnd();
endingFc += fcMin; // endingFc += fcMin;
int endingFc = translator.getByteIndex( _paragraphs.get(
_paragraphs.size() - 1 ).getEnd() );
ArrayList<PAPX> overflow = _paragraphs; ArrayList<PAPX> overflow = _paragraphs;
do do
{ {
PAPX startingProp = overflow.get(0); PAPX startingProp = overflow.get(0);
int start = startingProp.getStart() + fcMin;
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(_dataStream); // int start = startingProp.getStart() + fcMin;
int start = translator.getByteIndex( startingProp.getStart() );
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage();
pfkp.fill(overflow); pfkp.fill(overflow);
byte[] bufFkp = pfkp.toByteArray(translator, fcMin); byte[] bufFkp = pfkp.toByteArray(dataStream, translator);
docStream.write(bufFkp); docStream.write(bufFkp);
overflow = pfkp.getOverflow(); overflow = pfkp.getOverflow();
int end = endingFc; int end = endingFc;
if (overflow != null) if (overflow != null)
{ {
end = overflow.get(0).getStart() + fcMin; // end = overflow.get(0).getStart() + fcMin;
end = translator.getByteIndex( overflow.get( 0 ).getStart() );
} }
byte[] intHolder = new byte[4]; byte[] intHolder = new byte[4];

View File

@ -17,11 +17,13 @@
package org.apache.poi.hwpf.model; package org.apache.poi.hwpf.model;
import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
import org.apache.poi.util.Internal; import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
@ -48,12 +50,17 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
private ArrayList<PAPX> _papxList = new ArrayList<PAPX>(); private ArrayList<PAPX> _papxList = new ArrayList<PAPX>();
private ArrayList<PAPX> _overFlow; private ArrayList<PAPX> _overFlow;
private byte[] _dataStream;
/**
public PAPFormattedDiskPage(byte[] dataStream) * @deprecated Use {@link #PAPFormattedDiskPage()} instead
*/
public PAPFormattedDiskPage( byte[] dataStream )
{
this();
}
public PAPFormattedDiskPage()
{ {
_dataStream = dataStream;
} }
/** /**
@ -73,20 +80,22 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
* Creates a PAPFormattedDiskPage from a 512 byte array * Creates a PAPFormattedDiskPage from a 512 byte array
*/ */
public PAPFormattedDiskPage( byte[] documentStream, byte[] dataStream, public PAPFormattedDiskPage( byte[] documentStream, byte[] dataStream,
int offset, TextPieceTable tpt ) int offset, CharIndexTranslator translator )
{ {
super( documentStream, offset ); super( documentStream, offset );
for ( int x = 0; x < _crun; x++ ) for ( int x = 0; x < _crun; x++ )
{ {
int startAt = getStart( x ); int bytesStartAt = getStart( x );
int endAt = getEnd( x ); int bytesEndAt = getEnd( x );
PAPX papx = new PAPX( startAt, endAt, tpt, getGrpprl( x ), int charStartAt = translator.getCharIndex( bytesStartAt );
int charEndAt = translator.getCharIndex( bytesEndAt, charStartAt );
PAPX papx = new PAPX( charStartAt, charEndAt, getGrpprl( x ),
getParagraphHeight( x ), dataStream ); getParagraphHeight( x ), dataStream );
_papxList.add( papx ); _papxList.add( papx );
} }
_fkp = null; _fkp = null;
_dataStream = dataStream;
} }
/** /**
@ -159,162 +168,180 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
* Creates a byte array representation of this data structure. Suitable for * Creates a byte array representation of this data structure. Suitable for
* writing to a Word document. * writing to a Word document.
* *
* @param fcMin The file offset in the main stream where text begins. * @param dataStream required if PAPX is too big to fit in FKP
*
* @return A byte array representing this data structure. * @return A byte array representing this data structure.
* @throws IOException
* if an I/O error occurs.
*/ */
protected byte[] toByteArray(CharIndexTranslator translator, int fcMin) protected byte[] toByteArray( HWPFOutputStream dataStream,
CharIndexTranslator translator ) throws IOException
{ {
byte[] buf = new byte[512]; byte[] buf = new byte[512];
int size = _papxList.size(); int size = _papxList.size();
int grpprlOffset = 0; int grpprlOffset = 0;
int bxOffset = 0; int bxOffset = 0;
int fcOffset = 0; int fcOffset = 0;
byte[] lastGrpprl = new byte[0]; byte[] lastGrpprl = new byte[0];
// total size is currently the size of one FC // total size is currently the size of one FC
int totalSize = FC_SIZE; int totalSize = FC_SIZE;
int index = 0; int index = 0;
for (; index < size; index++) for ( ; index < size; index++ )
{
byte[] grpprl = _papxList.get(index).getGrpprl();
int grpprlLength = grpprl.length;
// is grpprl huge?
if(grpprlLength > 488)
{ {
grpprlLength = 8; // set equal to size of sprmPHugePapx grpprl byte[] grpprl = _papxList.get( index ).getGrpprl();
int grpprlLength = grpprl.length;
// is grpprl huge?
if ( grpprlLength > 488 )
{
grpprlLength = 8; // set equal to size of sprmPHugePapx grpprl
}
// check to see if we have enough room for an FC, a BX, and the
// grpprl
// and the 1 byte size of the grpprl.
int addition = 0;
if ( !Arrays.equals( grpprl, lastGrpprl ) )
{
addition = ( FC_SIZE + BX_SIZE + grpprlLength + 1 );
}
else
{
addition = ( FC_SIZE + BX_SIZE );
}
totalSize += addition;
// if size is uneven we will have to add one so the first grpprl
// falls
// on a word boundary
if ( totalSize > 511 + ( index % 2 ) )
{
totalSize -= addition;
break;
}
// grpprls must fall on word boundaries
if ( grpprlLength % 2 > 0 )
{
totalSize += 1;
}
else
{
totalSize += 2;
}
lastGrpprl = grpprl;
} }
// check to see if we have enough room for an FC, a BX, and the grpprl // see if we couldn't fit some
// and the 1 byte size of the grpprl. if ( index != size )
int addition = 0;
if (!Arrays.equals(grpprl, lastGrpprl))
{ {
addition = (FC_SIZE + BX_SIZE + grpprlLength + 1); _overFlow = new ArrayList<PAPX>();
} _overFlow.addAll( _papxList.subList( index, size ) );
else
{
addition = (FC_SIZE + BX_SIZE);
} }
totalSize += addition; // index should equal number of papxs that will be in this fkp now.
buf[511] = (byte) index;
// if size is uneven we will have to add one so the first grpprl falls bxOffset = ( FC_SIZE * index ) + FC_SIZE;
// on a word boundary grpprlOffset = 511;
if (totalSize > 511 + (index % 2))
PAPX papx = null;
lastGrpprl = new byte[0];
for ( int x = 0; x < index; x++ )
{ {
totalSize -= addition; papx = _papxList.get( x );
break; byte[] phe = papx.getParagraphHeight().toByteArray();
} byte[] grpprl = papx.getGrpprl();
// grpprls must fall on word boundaries // is grpprl huge?
if (grpprlLength % 2 > 0) if ( grpprl.length > 488 )
{ {
totalSize += 1; // if so do we have storage at getHugeGrpprlOffset()
} // int hugeGrpprlOffset = papx.getHugeGrpprlOffset();
else // if ( hugeGrpprlOffset == -1 ) // then we have no storage...
{ // {
totalSize += 2; // throw new UnsupportedOperationException(
} // "This Paragraph has no dataStream storage." );
lastGrpprl = grpprl; // }
} // we have some storage...
// see if we couldn't fit some // get the size of the existing storage
if (index != size) // int maxHugeGrpprlSize = LittleEndian.getUShort( dataStream,
{ // hugeGrpprlOffset );
_overFlow = new ArrayList<PAPX>(); //
_overFlow.addAll(_papxList.subList(index, size)); // if ( maxHugeGrpprlSize < grpprl.length - 2 )
} // { // grpprl.length-2 because we don't store the istd
// throw new UnsupportedOperationException(
// "This Paragraph's dataStream storage is too small." );
// }
// index should equal number of papxs that will be in this fkp now. // store grpprl at hugeGrpprlOffset
buf[511] = (byte)index; // grpprl.length-2 because we don't store the istd
// System.arraycopy( grpprl, 2, dataStream, hugeGrpprlOffset +
// 2,
// grpprl.length - 2 );
// LittleEndian.putUShort( dataStream, hugeGrpprlOffset,
// grpprl.length - 2 );
bxOffset = (FC_SIZE * index) + FC_SIZE; byte[] hugePapx = new byte[grpprl.length - 2];
grpprlOffset = 511; System.arraycopy( grpprl, 2, hugePapx, 0, grpprl.length - 2 );
int dataStreamOffset = dataStream.getOffset();
dataStream.write( hugePapx );
PAPX papx = null; // grpprl = grpprl containing only a sprmPHugePapx2
lastGrpprl = new byte[0]; int istd = LittleEndian.getUShort( grpprl, 0 );
for (int x = 0; x < index; x++)
{
papx = _papxList.get(x);
byte[] phe = papx.getParagraphHeight().toByteArray();
byte[] grpprl = papx.getGrpprl();
// is grpprl huge? grpprl = new byte[8];
if(grpprl.length > 488) LittleEndian.putUShort( grpprl, 0, istd );
{ LittleEndian.putUShort( grpprl, 2, 0x6646 ); // sprmPHugePapx2
// if so do we have storage at getHugeGrpprlOffset() LittleEndian.putInt( grpprl, 4, dataStreamOffset );
int hugeGrpprlOffset = papx.getHugeGrpprlOffset(); }
if(hugeGrpprlOffset == -1) // then we have no storage...
{
throw new UnsupportedOperationException(
"This Paragraph has no dataStream storage.");
}
// we have some storage...
// get the size of the existing storage boolean same = Arrays.equals( lastGrpprl, grpprl );
int maxHugeGrpprlSize = LittleEndian.getUShort(_dataStream, hugeGrpprlOffset); if ( !same )
{
if (maxHugeGrpprlSize < grpprl.length-2) { // grpprl.length-2 because we don't store the istd grpprlOffset -= ( grpprl.length + ( 2 - grpprl.length % 2 ) );
throw new UnsupportedOperationException( grpprlOffset -= ( grpprlOffset % 2 );
"This Paragraph's dataStream storage is too small."); }
} // LittleEndian.putInt( buf, fcOffset, papx.getStartBytes() );
// store grpprl at hugeGrpprlOffset
System.arraycopy(grpprl, 2, _dataStream, hugeGrpprlOffset + 2,
grpprl.length - 2); // grpprl.length-2 because we don't store the istd
LittleEndian.putUShort(_dataStream, hugeGrpprlOffset, grpprl.length - 2);
// grpprl = grpprl containing only a sprmPHugePapx2
int istd = LittleEndian.getUShort(grpprl, 0);
grpprl = new byte[8];
LittleEndian.putUShort(grpprl, 0, istd);
LittleEndian.putUShort(grpprl, 2, 0x6646); // sprmPHugePapx2
LittleEndian.putInt(grpprl, 4, hugeGrpprlOffset);
}
boolean same = Arrays.equals(lastGrpprl, grpprl);
if (!same)
{
grpprlOffset -= (grpprl.length + (2 - grpprl.length % 2));
grpprlOffset -= (grpprlOffset % 2);
}
// LittleEndian.putInt( buf, fcOffset,
// papx.getStartBytes() );
LittleEndian.putInt( buf, fcOffset, LittleEndian.putInt( buf, fcOffset,
translator.getByteIndex( papx.getStart() ) ); translator.getByteIndex( papx.getStart() ) );
buf[bxOffset] = (byte)(grpprlOffset/2); buf[bxOffset] = (byte) ( grpprlOffset / 2 );
System.arraycopy(phe, 0, buf, bxOffset + 1, phe.length); System.arraycopy( phe, 0, buf, bxOffset + 1, phe.length );
/*
* refer to the section on PAPX in the spec. Places a size on the
* front of the PAPX. Has to do with how the grpprl stays on word
* boundaries.
*/
if ( !same )
{
int copyOffset = grpprlOffset;
if ( ( grpprl.length % 2 ) > 0 )
{
buf[copyOffset++] = (byte) ( ( grpprl.length + 1 ) / 2 );
}
else
{
buf[++copyOffset] = (byte) ( ( grpprl.length ) / 2 );
copyOffset++;
}
System.arraycopy( grpprl, 0, buf, copyOffset, grpprl.length );
lastGrpprl = grpprl;
}
bxOffset += BX_SIZE;
fcOffset += FC_SIZE;
// refer to the section on PAPX in the spec. Places a size on the front
// of the PAPX. Has to do with how the grpprl stays on word
// boundaries.
if (!same)
{
int copyOffset = grpprlOffset;
if ( (grpprl.length % 2) > 0)
{
buf[copyOffset++] = (byte) ( (grpprl.length + 1) / 2);
}
else
{
buf[++copyOffset] = (byte) ( (grpprl.length) / 2);
copyOffset++;
}
System.arraycopy(grpprl, 0, buf, copyOffset, grpprl.length);
lastGrpprl = grpprl;
} }
bxOffset += BX_SIZE;
fcOffset += FC_SIZE;
}
// LittleEndian.putInt(buf, fcOffset, papx.getEndBytes() + fcMin); // LittleEndian.putInt(buf, fcOffset, papx.getEndBytes() + fcMin);
LittleEndian.putInt( buf, fcOffset, LittleEndian.putInt( buf, fcOffset,
translator.getByteIndex( papx.getEnd() ) ); translator.getByteIndex( papx.getEnd() ) );
return buf; return buf;
} }
/** /**

View File

@ -22,6 +22,7 @@ import org.apache.poi.hwpf.sprm.ParagraphSprmUncompressor;
import org.apache.poi.hwpf.sprm.SprmBuffer; import org.apache.poi.hwpf.sprm.SprmBuffer;
import org.apache.poi.hwpf.sprm.SprmOperation; import org.apache.poi.hwpf.sprm.SprmOperation;
import org.apache.poi.hwpf.usermodel.ParagraphProperties; import org.apache.poi.hwpf.usermodel.ParagraphProperties;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
/** /**
@ -33,11 +34,11 @@ import org.apache.poi.util.LittleEndian;
* *
* @author Ryan Ackley * @author Ryan Ackley
*/ */
@Internal
@SuppressWarnings( "deprecation" ) @SuppressWarnings( "deprecation" )
public final class PAPX extends BytePropertyNode<PAPX> { public final class PAPX extends BytePropertyNode<PAPX> {
private ParagraphHeight _phe; private ParagraphHeight _phe;
private int _hugeGrpprlOffset = -1;
public PAPX(int fcStart, int fcEnd, CharIndexTranslator translator, byte[] papx, ParagraphHeight phe, byte[] dataStream) public PAPX(int fcStart, int fcEnd, CharIndexTranslator translator, byte[] papx, ParagraphHeight phe, byte[] dataStream)
{ {
@ -48,6 +49,17 @@ public final class PAPX extends BytePropertyNode<PAPX> {
_buf = buf; _buf = buf;
} }
public PAPX( int charStart, int charEnd, byte[] papx, ParagraphHeight phe,
byte[] dataStream )
{
super( charStart, charEnd, new SprmBuffer( papx, 2 ) );
_phe = phe;
SprmBuffer buf = findHuge( new SprmBuffer( papx, 2 ), dataStream );
if ( buf != null )
_buf = buf;
}
@Deprecated
public PAPX(int fcStart, int fcEnd, CharIndexTranslator translator, SprmBuffer buf, byte[] dataStream) public PAPX(int fcStart, int fcEnd, CharIndexTranslator translator, SprmBuffer buf, byte[] dataStream)
{ {
super(fcStart, fcEnd, translator, buf); super(fcStart, fcEnd, translator, buf);
@ -57,13 +69,10 @@ public final class PAPX extends BytePropertyNode<PAPX> {
_buf = buf; _buf = buf;
} }
public PAPX( int charStart, int charEnd, SprmBuffer buf, byte[] dataStream ) public PAPX( int charStart, int charEnd, SprmBuffer buf )
{ {
super( charStart, charEnd, buf ); super( charStart, charEnd, buf );
_phe = new ParagraphHeight(); _phe = new ParagraphHeight();
buf = findHuge( buf, dataStream );
if ( buf != null )
_buf = buf;
} }
private SprmBuffer findHuge(SprmBuffer buf, byte[] datastream) private SprmBuffer findHuge(SprmBuffer buf, byte[] datastream)
@ -87,8 +96,6 @@ public final class PAPX extends BytePropertyNode<PAPX> {
// copy Grpprl from dataStream // copy Grpprl from dataStream
System.arraycopy(datastream, hugeGrpprlOffset + 2, hugeGrpprl, 2, System.arraycopy(datastream, hugeGrpprlOffset + 2, hugeGrpprl, 2,
grpprlSize); grpprlSize);
// save a pointer to where we got the huge Grpprl from
_hugeGrpprlOffset = hugeGrpprlOffset;
return new SprmBuffer(hugeGrpprl, 2); return new SprmBuffer(hugeGrpprl, 2);
} }
} }
@ -108,11 +115,6 @@ public final class PAPX extends BytePropertyNode<PAPX> {
return ((SprmBuffer)_buf).toByteArray(); return ((SprmBuffer)_buf).toByteArray();
} }
public int getHugeGrpprlOffset()
{
return _hugeGrpprlOffset;
}
public short getIstd() public short getIstd()
{ {
byte[] buf = getGrpprl(); byte[] buf = getGrpprl();

View File

@ -50,7 +50,7 @@ public final class TestCHPBinTable
HWPFFileSystem fileSys = new HWPFFileSystem(); HWPFFileSystem fileSys = new HWPFFileSystem();
_cHPBinTable.writeTo(fileSys, 0); _cHPBinTable.writeTo(fileSys, 0, fakeTPT);
ByteArrayOutputStream tableOut = fileSys.getStream("1Table"); ByteArrayOutputStream tableOut = fileSys.getStream("1Table");
ByteArrayOutputStream mainOut = fileSys.getStream("WordDocument"); ByteArrayOutputStream mainOut = fileSys.getStream("WordDocument");

View File

@ -53,7 +53,7 @@ public final class TestPAPBinTable extends TestCase
HWPFFileSystem fileSys = new HWPFFileSystem(); HWPFFileSystem fileSys = new HWPFFileSystem();
_pAPBinTable.writeTo( fileSys, 0, fakeTPT ); _pAPBinTable.writeTo( fileSys, fakeTPT );
ByteArrayOutputStream tableOut = fileSys.getStream( "1Table" ); ByteArrayOutputStream tableOut = fileSys.getStream( "1Table" );
ByteArrayOutputStream mainOut = fileSys.getStream( "WordDocument" ); ByteArrayOutputStream mainOut = fileSys.getStream( "WordDocument" );