fix 47286 - Word documents saves in wrong format if source contains form elements

correctly translate char=>byte on saving CHPX and PAPX tables

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1150704 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sergey Vladimirov 2011-07-25 13:58:05 +00:00
parent 4c724bf71c
commit 32dbdf8071
8 changed files with 284 additions and 252 deletions

View File

@ -255,7 +255,7 @@ public final class HWPFDocument extends HWPFDocumentCore
_text = _tpt.getText();
_cbt.rebuild( _cft );
_pbt.rebuild( _text, _dataStream, _cft );
_pbt.rebuild( _text, _cft );
boolean preserve = false;
try
@ -643,7 +643,7 @@ public final class HWPFDocument extends HWPFDocumentCore
// write out the CHPBinTable.
_fib.setFcPlcfbteChpx(tableOffset);
_cbt.writeTo(docSys, fcMin);
_cbt.writeTo(docSys, fcMin, _cft.getTextPieceTable());
_fib.setLcbPlcfbteChpx(tableStream.getOffset() - tableOffset);
tableOffset = tableStream.getOffset();
@ -657,7 +657,7 @@ public final class HWPFDocument extends HWPFDocumentCore
// write out the PAPBinTable.
_fib.setFcPlcfbtePapx(tableOffset);
_pbt.writeTo(docSys, fcMin, _cft.getTextPieceTable());
_pbt.writeTo(docSys, _cft.getTextPieceTable());
_fib.setLcbPlcfbtePapx(tableStream.getOffset() - tableOffset);
tableOffset = tableStream.getOffset();

View File

@ -37,6 +37,7 @@ import org.apache.poi.hwpf.sprm.SprmBuffer;
import org.apache.poi.hwpf.sprm.SprmIterator;
import org.apache.poi.hwpf.sprm.SprmOperation;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
@ -46,6 +47,7 @@ import org.apache.poi.util.POILogger;
*
* @author Ryan Ackley
*/
@Internal
public class CHPBinTable
{
private static final POILogger logger = POILogFactory
@ -54,9 +56,6 @@ public class CHPBinTable
/** List of character properties.*/
protected ArrayList<CHPX> _textRuns = new ArrayList<CHPX>();
/** So we can know if things are unicode or not */
private TextPieceTable tpt;
public CHPBinTable()
{
}
@ -78,7 +77,7 @@ public class CHPBinTable
* Constructor used to read a binTable in from a Word document.
*/
public CHPBinTable( byte[] documentStream, byte[] tableStream, int offset,
int size, TextPieceTable tpt )
int size, CharIndexTranslator translator )
{
long start = System.currentTimeMillis();
/*
@ -90,7 +89,6 @@ public class CHPBinTable
* further partitions an interval into runs of exception text."
*/
PlexOfCps bte = new PlexOfCps( tableStream, offset, size, 4 );
this.tpt = tpt;
int length = bte.length();
for (int x = 0; x < length; x++)
@ -101,7 +99,7 @@ public class CHPBinTable
int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum;
CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(documentStream,
pageOffset, tpt);
pageOffset, translator);
int fkpSize = cfkp.size();
@ -126,7 +124,8 @@ public class CHPBinTable
SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
// adding CHPX from fast-saved SPRMs
for ( TextPiece textPiece : tpt.getTextPieces() )
for ( TextPiece textPiece : complexFileTable.getTextPieceTable()
.getTextPieces() )
{
PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
if ( !prm.isComplex() )
@ -396,7 +395,7 @@ public class CHPBinTable
public void insert(int listIndex, int cpStart, SprmBuffer buf)
{
CHPX insertChpx = new CHPX(0, 0, tpt,buf);
CHPX insertChpx = new CHPX(0, 0, buf);
// Ensure character offsets are really characters
insertChpx.setStart(cpStart);
@ -416,7 +415,7 @@ public class CHPBinTable
// Original, until insert at point
// New one
// Clone of original, on to the old end
CHPX clone = new CHPX(0, 0, tpt,chpx.getSprmBuf());
CHPX clone = new CHPX(0, 0, chpx.getSprmBuf());
// Again ensure contains character based offsets no matter what
clone.setStart(cpStart);
clone.setEnd(chpx.getEnd());
@ -452,7 +451,7 @@ public class CHPBinTable
return _textRuns;
}
public void writeTo(HWPFFileSystem sys, int fcMin)
public void writeTo(HWPFFileSystem sys, int fcMin, CharIndexTranslator translator)
throws IOException
{
@ -483,28 +482,31 @@ public class CHPBinTable
int pageNum = docOffset/POIFSConstants.SMALLER_BIG_BLOCK_SIZE;
// get the ending fc
CHPX lastRun = _textRuns.get(_textRuns.size() - 1);
int endingFc = lastRun.getEnd();
endingFc += fcMin;
// CHPX lastRun = _textRuns.get(_textRuns.size() - 1);
// int endingFc = lastRun.getEnd();
// endingFc += fcMin;
int endingFc = translator.getByteIndex( _textRuns.get(
_textRuns.size() - 1 ).getEnd() );
ArrayList<CHPX> overflow = _textRuns;
do
{
CHPX startingProp = overflow.get(0);
int start = startingProp.getStart() + fcMin;
// int start = startingProp.getStart() + fcMin;
int start = translator.getByteIndex( startingProp.getStart() );
CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage();
cfkp.fill(overflow);
byte[] bufFkp = cfkp.toByteArray( tpt );
byte[] bufFkp = cfkp.toByteArray( translator );
docStream.write(bufFkp);
overflow = cfkp.getOverflow();
int end = endingFc;
if (overflow != null)
{
end = overflow.get(0).getStart() + fcMin;
// end = overflow.get(0).getStart() + fcMin;
end = translator.getByteIndex( overflow.get( 0 ).getStart() );
}
byte[] intHolder = new byte[4];

View File

@ -21,9 +21,8 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.poi.hwpf.sprm.SprmBuffer;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
/**
* Represents a CHP fkp. The style properties for paragraph and character runs
@ -41,11 +40,9 @@ import org.apache.poi.util.POILogger;
*
* @author Ryan Ackley
*/
@Internal
public final class CHPFormattedDiskPage extends FormattedDiskPage
{
private static final POILogger logger = POILogFactory
.getLogger( CHPFormattedDiskPage.class );
private static final int FC_SIZE = 4;
private ArrayList<CHPX> _chpxList = new ArrayList<CHPX>();
@ -76,7 +73,7 @@ public final class CHPFormattedDiskPage extends FormattedDiskPage
* read from a Word file).
*/
public CHPFormattedDiskPage( byte[] documentStream, int offset,
TextPieceTable tpt )
CharIndexTranslator translator )
{
super( documentStream, offset );
@ -85,8 +82,8 @@ public final class CHPFormattedDiskPage extends FormattedDiskPage
int bytesStartAt = getStart( x );
int bytesEndAt = getEnd( x );
int charStartAt = tpt.getCharIndex( bytesStartAt );
int charEndAt = tpt.getCharIndex( bytesEndAt, charStartAt );
int charStartAt = translator.getCharIndex( bytesStartAt );
int charEndAt = translator.getCharIndex( bytesEndAt, charStartAt );
// TODO: CHECK!
// CHPX chpx = new CHPX( bytesStartAt, bytesEndAt, tpt, getGrpprl( x
@ -165,7 +162,8 @@ public final class CHPFormattedDiskPage extends FormattedDiskPage
// check to see if we have enough room for an FC, the grpprl offset,
// the grpprl size byte and the grpprl.
totalSize += ( FC_SIZE + 2 + grpprlLength );
// if size is uneven we will have to add one so the first grpprl falls
// if size is uneven we will have to add one so the first grpprl
// falls
// on a word boundary
if ( totalSize > 511 + ( index % 2 ) )
{

View File

@ -52,7 +52,6 @@ public class PAPBinTable
.getLogger( PAPBinTable.class );
protected ArrayList<PAPX> _paragraphs = new ArrayList<PAPX>();
byte[] _dataStream;
public PAPBinTable()
{
@ -72,7 +71,8 @@ public class PAPBinTable
}
public PAPBinTable( byte[] documentStream, byte[] tableStream,
byte[] dataStream, int offset, int size, TextPieceTable tpt )
byte[] dataStream, int offset, int size,
CharIndexTranslator charIndexTranslator )
{
long start = System.currentTimeMillis();
@ -89,7 +89,8 @@ public class PAPBinTable
* pageNum;
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(
documentStream, dataStream, pageOffset, tpt );
documentStream, dataStream, pageOffset,
charIndexTranslator );
int fkpSize = pfkp.size();
@ -108,7 +109,7 @@ public class PAPBinTable
Integer.valueOf( _paragraphs.size() ), " elements)" );
}
public void rebuild( final StringBuilder docText, byte[] dataStream,
public void rebuild( final StringBuilder docText,
ComplexFileTable complexFileTable )
{
long start = System.currentTimeMillis();
@ -152,7 +153,7 @@ public class PAPBinTable
newSprmBuffer.append( sprmBuffer.toByteArray() );
PAPX papx = new PAPX( textPiece.getStart(),
textPiece.getEnd(), newSprmBuffer, dataStream );
textPiece.getEnd(), newSprmBuffer );
_paragraphs.add( papx );
}
}
@ -233,7 +234,7 @@ public class PAPBinTable
") has no PAPX. Creating new one." );
// create it manually
PAPX papx = new PAPX( startInclusive, endExclusive,
new SprmBuffer( 2 ), dataStream );
new SprmBuffer( 2 ) );
newPapxs.add( papx );
lastParStart = endExclusive;
@ -272,8 +273,7 @@ public class PAPBinTable
else
sprmBuffer.append( papx.getGrpprl(), 2 );
}
PAPX newPapx = new PAPX( startInclusive, endExclusive, sprmBuffer,
dataStream );
PAPX newPapx = new PAPX( startInclusive, endExclusive, sprmBuffer );
newPapxs.add( newPapx );
lastParStart = endExclusive;
@ -285,14 +285,12 @@ public class PAPBinTable
Long.valueOf( System.currentTimeMillis() - start ), " ms (",
Integer.valueOf( _paragraphs.size() ), " elements)" );
start = System.currentTimeMillis();
_dataStream = dataStream;
}
public void insert(int listIndex, int cpStart, SprmBuffer buf)
{
PAPX forInsert = new PAPX(0, 0, buf, _dataStream);
PAPX forInsert = new PAPX(0, 0, buf);
// Ensure character offsets are really characters
forInsert.setStart(cpStart);
@ -322,7 +320,7 @@ public class PAPBinTable
// Original, until insert at point
// New one
// Clone of original, on to the old end
PAPX clone = new PAPX(0, 0, clonedBuf, _dataStream);
PAPX clone = new PAPX(0, 0, clonedBuf);
// Again ensure contains character based offsets no matter what
clone.setStart(cpStart);
clone.setEnd(currentPap.getEnd());
@ -399,11 +397,12 @@ public class PAPBinTable
return _paragraphs;
}
public void writeTo( HWPFFileSystem sys, int fcMin, CharIndexTranslator translator ) throws IOException
public void writeTo( HWPFFileSystem sys, CharIndexTranslator translator ) throws IOException
{
HWPFOutputStream docStream = sys.getStream("WordDocument");
OutputStream tableStream = sys.getStream("1Table");
HWPFOutputStream dataStream = sys.getStream("1Table");
PlexOfCps binTable = new PlexOfCps(4);
@ -421,27 +420,31 @@ public class PAPBinTable
int pageNum = docOffset/POIFSConstants.SMALLER_BIG_BLOCK_SIZE;
// get the ending fc
int endingFc = _paragraphs.get(_paragraphs.size() - 1).getEnd();
endingFc += fcMin;
// int endingFc = _paragraphs.get(_paragraphs.size() - 1).getEnd();
// endingFc += fcMin;
int endingFc = translator.getByteIndex( _paragraphs.get(
_paragraphs.size() - 1 ).getEnd() );
ArrayList<PAPX> overflow = _paragraphs;
do
{
PAPX startingProp = overflow.get(0);
int start = startingProp.getStart() + fcMin;
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(_dataStream);
// int start = startingProp.getStart() + fcMin;
int start = translator.getByteIndex( startingProp.getStart() );
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage();
pfkp.fill(overflow);
byte[] bufFkp = pfkp.toByteArray(translator, fcMin);
byte[] bufFkp = pfkp.toByteArray(dataStream, translator);
docStream.write(bufFkp);
overflow = pfkp.getOverflow();
int end = endingFc;
if (overflow != null)
{
end = overflow.get(0).getStart() + fcMin;
// end = overflow.get(0).getStart() + fcMin;
end = translator.getByteIndex( overflow.get( 0 ).getStart() );
}
byte[] intHolder = new byte[4];

View File

@ -17,11 +17,13 @@
package org.apache.poi.hwpf.model;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian;
@ -48,12 +50,17 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
private ArrayList<PAPX> _papxList = new ArrayList<PAPX>();
private ArrayList<PAPX> _overFlow;
private byte[] _dataStream;
/**
* @deprecated Use {@link #PAPFormattedDiskPage()} instead
*/
public PAPFormattedDiskPage( byte[] dataStream )
{
_dataStream = dataStream;
this();
}
public PAPFormattedDiskPage()
{
}
/**
@ -73,20 +80,22 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
* Creates a PAPFormattedDiskPage from a 512 byte array
*/
public PAPFormattedDiskPage( byte[] documentStream, byte[] dataStream,
int offset, TextPieceTable tpt )
int offset, CharIndexTranslator translator )
{
super( documentStream, offset );
for ( int x = 0; x < _crun; x++ )
{
int startAt = getStart( x );
int endAt = getEnd( x );
int bytesStartAt = getStart( x );
int bytesEndAt = getEnd( x );
PAPX papx = new PAPX( startAt, endAt, tpt, getGrpprl( x ),
int charStartAt = translator.getCharIndex( bytesStartAt );
int charEndAt = translator.getCharIndex( bytesEndAt, charStartAt );
PAPX papx = new PAPX( charStartAt, charEndAt, getGrpprl( x ),
getParagraphHeight( x ), dataStream );
_papxList.add( papx );
}
_fkp = null;
_dataStream = dataStream;
}
/**
@ -159,10 +168,14 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
* Creates a byte array representation of this data structure. Suitable for
* writing to a Word document.
*
* @param fcMin The file offset in the main stream where text begins.
* @param dataStream required if PAPX is too big to fit in FKP
*
* @return A byte array representing this data structure.
* @throws IOException
* if an I/O error occurs.
*/
protected byte[] toByteArray(CharIndexTranslator translator, int fcMin)
protected byte[] toByteArray( HWPFOutputStream dataStream,
CharIndexTranslator translator ) throws IOException
{
byte[] buf = new byte[512];
int size = _papxList.size();
@ -186,7 +199,8 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
grpprlLength = 8; // set equal to size of sprmPHugePapx grpprl
}
// check to see if we have enough room for an FC, a BX, and the grpprl
// check to see if we have enough room for an FC, a BX, and the
// grpprl
// and the 1 byte size of the grpprl.
int addition = 0;
if ( !Arrays.equals( grpprl, lastGrpprl ) )
@ -200,7 +214,8 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
totalSize += addition;
// if size is uneven we will have to add one so the first grpprl falls
// if size is uneven we will have to add one so the first grpprl
// falls
// on a word boundary
if ( totalSize > 511 + ( index % 2 ) )
{
@ -245,33 +260,44 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
if ( grpprl.length > 488 )
{
// if so do we have storage at getHugeGrpprlOffset()
int hugeGrpprlOffset = papx.getHugeGrpprlOffset();
if(hugeGrpprlOffset == -1) // then we have no storage...
{
throw new UnsupportedOperationException(
"This Paragraph has no dataStream storage.");
}
// int hugeGrpprlOffset = papx.getHugeGrpprlOffset();
// if ( hugeGrpprlOffset == -1 ) // then we have no storage...
// {
// throw new UnsupportedOperationException(
// "This Paragraph has no dataStream storage." );
// }
// we have some storage...
// get the size of the existing storage
int maxHugeGrpprlSize = LittleEndian.getUShort(_dataStream, hugeGrpprlOffset);
if (maxHugeGrpprlSize < grpprl.length-2) { // grpprl.length-2 because we don't store the istd
throw new UnsupportedOperationException(
"This Paragraph's dataStream storage is too small.");
}
// int maxHugeGrpprlSize = LittleEndian.getUShort( dataStream,
// hugeGrpprlOffset );
//
// if ( maxHugeGrpprlSize < grpprl.length - 2 )
// { // grpprl.length-2 because we don't store the istd
// throw new UnsupportedOperationException(
// "This Paragraph's dataStream storage is too small." );
// }
// store grpprl at hugeGrpprlOffset
System.arraycopy(grpprl, 2, _dataStream, hugeGrpprlOffset + 2,
grpprl.length - 2); // grpprl.length-2 because we don't store the istd
LittleEndian.putUShort(_dataStream, hugeGrpprlOffset, grpprl.length - 2);
// grpprl.length-2 because we don't store the istd
// System.arraycopy( grpprl, 2, dataStream, hugeGrpprlOffset +
// 2,
// grpprl.length - 2 );
// LittleEndian.putUShort( dataStream, hugeGrpprlOffset,
// grpprl.length - 2 );
byte[] hugePapx = new byte[grpprl.length - 2];
System.arraycopy( grpprl, 2, hugePapx, 0, grpprl.length - 2 );
int dataStreamOffset = dataStream.getOffset();
dataStream.write( hugePapx );
// grpprl = grpprl containing only a sprmPHugePapx2
int istd = LittleEndian.getUShort( grpprl, 0 );
grpprl = new byte[8];
LittleEndian.putUShort( grpprl, 0, istd );
LittleEndian.putUShort( grpprl, 2, 0x6646 ); // sprmPHugePapx2
LittleEndian.putInt(grpprl, 4, hugeGrpprlOffset);
LittleEndian.putInt( grpprl, 4, dataStreamOffset );
}
boolean same = Arrays.equals( lastGrpprl, grpprl );
@ -280,16 +306,17 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
grpprlOffset -= ( grpprl.length + ( 2 - grpprl.length % 2 ) );
grpprlOffset -= ( grpprlOffset % 2 );
}
// LittleEndian.putInt( buf, fcOffset,
// papx.getStartBytes() );
// LittleEndian.putInt( buf, fcOffset, papx.getStartBytes() );
LittleEndian.putInt( buf, fcOffset,
translator.getByteIndex( papx.getStart() ) );
buf[bxOffset] = (byte) ( grpprlOffset / 2 );
System.arraycopy( phe, 0, buf, bxOffset + 1, phe.length );
// refer to the section on PAPX in the spec. Places a size on the front
// of the PAPX. Has to do with how the grpprl stays on word
// boundaries.
/*
* refer to the section on PAPX in the spec. Places a size on the
* front of the PAPX. Has to do with how the grpprl stays on word
* boundaries.
*/
if ( !same )
{
int copyOffset = grpprlOffset;

View File

@ -22,6 +22,7 @@ import org.apache.poi.hwpf.sprm.ParagraphSprmUncompressor;
import org.apache.poi.hwpf.sprm.SprmBuffer;
import org.apache.poi.hwpf.sprm.SprmOperation;
import org.apache.poi.hwpf.usermodel.ParagraphProperties;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian;
/**
@ -33,11 +34,11 @@ import org.apache.poi.util.LittleEndian;
*
* @author Ryan Ackley
*/
@Internal
@SuppressWarnings( "deprecation" )
public final class PAPX extends BytePropertyNode<PAPX> {
private ParagraphHeight _phe;
private int _hugeGrpprlOffset = -1;
public PAPX(int fcStart, int fcEnd, CharIndexTranslator translator, byte[] papx, ParagraphHeight phe, byte[] dataStream)
{
@ -48,6 +49,17 @@ public final class PAPX extends BytePropertyNode<PAPX> {
_buf = buf;
}
public PAPX( int charStart, int charEnd, byte[] papx, ParagraphHeight phe,
byte[] dataStream )
{
super( charStart, charEnd, new SprmBuffer( papx, 2 ) );
_phe = phe;
SprmBuffer buf = findHuge( new SprmBuffer( papx, 2 ), dataStream );
if ( buf != null )
_buf = buf;
}
@Deprecated
public PAPX(int fcStart, int fcEnd, CharIndexTranslator translator, SprmBuffer buf, byte[] dataStream)
{
super(fcStart, fcEnd, translator, buf);
@ -57,13 +69,10 @@ public final class PAPX extends BytePropertyNode<PAPX> {
_buf = buf;
}
public PAPX( int charStart, int charEnd, SprmBuffer buf, byte[] dataStream )
public PAPX( int charStart, int charEnd, SprmBuffer buf )
{
super( charStart, charEnd, buf );
_phe = new ParagraphHeight();
buf = findHuge( buf, dataStream );
if ( buf != null )
_buf = buf;
}
private SprmBuffer findHuge(SprmBuffer buf, byte[] datastream)
@ -87,8 +96,6 @@ public final class PAPX extends BytePropertyNode<PAPX> {
// copy Grpprl from dataStream
System.arraycopy(datastream, hugeGrpprlOffset + 2, hugeGrpprl, 2,
grpprlSize);
// save a pointer to where we got the huge Grpprl from
_hugeGrpprlOffset = hugeGrpprlOffset;
return new SprmBuffer(hugeGrpprl, 2);
}
}
@ -108,11 +115,6 @@ public final class PAPX extends BytePropertyNode<PAPX> {
return ((SprmBuffer)_buf).toByteArray();
}
public int getHugeGrpprlOffset()
{
return _hugeGrpprlOffset;
}
public short getIstd()
{
byte[] buf = getGrpprl();

View File

@ -50,7 +50,7 @@ public final class TestCHPBinTable
HWPFFileSystem fileSys = new HWPFFileSystem();
_cHPBinTable.writeTo(fileSys, 0);
_cHPBinTable.writeTo(fileSys, 0, fakeTPT);
ByteArrayOutputStream tableOut = fileSys.getStream("1Table");
ByteArrayOutputStream mainOut = fileSys.getStream("WordDocument");

View File

@ -53,7 +53,7 @@ public final class TestPAPBinTable extends TestCase
HWPFFileSystem fileSys = new HWPFFileSystem();
_pAPBinTable.writeTo( fileSys, 0, fakeTPT );
_pAPBinTable.writeTo( fileSys, fakeTPT );
ByteArrayOutputStream tableOut = fileSys.getStream( "1Table" );
ByteArrayOutputStream mainOut = fileSys.getStream( "WordDocument" );