mirror of https://github.com/apache/poi.git
add initial support for fast-saved files
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1145410 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
93ec152831
commit
fd8a518446
|
@ -216,8 +216,8 @@ public final class HWPFDocument extends HWPFDocumentCore
|
|||
|
||||
// Now load the rest of the properties, which need to be adjusted
|
||||
// for where text really begin
|
||||
_cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _tpt, true);
|
||||
_pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), _tpt, true);
|
||||
_cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _cft, _tpt, true);
|
||||
_pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), _cft, _tpt, true);
|
||||
|
||||
// Read FSPA and Escher information
|
||||
_fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(), _fib.getLcbPlcspaMom(), getTextTable().getTextPieces());
|
||||
|
|
|
@ -29,6 +29,8 @@ import java.util.Set;
|
|||
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
|
||||
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
|
||||
import org.apache.poi.hwpf.sprm.SprmBuffer;
|
||||
import org.apache.poi.hwpf.sprm.SprmIterator;
|
||||
import org.apache.poi.hwpf.sprm.SprmOperation;
|
||||
import org.apache.poi.poifs.common.POIFSConstants;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
|
@ -58,20 +60,21 @@ public class CHPBinTable
|
|||
* Constructor used to read a binTable in from a Word document.
|
||||
*
|
||||
* @deprecated Use
|
||||
* {@link #CHPBinTable(byte[],byte[],int,int,TextPieceTable,boolean)}
|
||||
* {@link #CHPBinTable(byte[],byte[],int,int,ComplexFileTable,TextPieceTable, boolean)}
|
||||
* instead
|
||||
*/
|
||||
public CHPBinTable( byte[] documentStream, byte[] tableStream, int offset,
|
||||
int size, int fcMin, TextPieceTable tpt )
|
||||
{
|
||||
this( documentStream, tableStream, offset, size, tpt, true );
|
||||
this( documentStream, tableStream, offset, size, null, tpt, true );
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor used to read a binTable in from a Word document.
|
||||
*/
|
||||
public CHPBinTable( byte[] documentStream, byte[] tableStream, int offset,
|
||||
int size, TextPieceTable tpt, boolean ignoreChpxWithoutTextPieces )
|
||||
int size, ComplexFileTable complexFileTable, TextPieceTable tpt,
|
||||
boolean ignoreChpxWithoutTextPieces )
|
||||
{
|
||||
/*
|
||||
* Page 35:
|
||||
|
@ -105,6 +108,58 @@ public class CHPBinTable
|
|||
}
|
||||
}
|
||||
|
||||
if ( complexFileTable != null )
|
||||
{
|
||||
SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
|
||||
|
||||
// adding CHPX from fast-saved SPRMs
|
||||
for ( TextPiece textPiece : tpt.getTextPieces() )
|
||||
{
|
||||
PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
|
||||
if ( !prm.isComplex() )
|
||||
continue;
|
||||
int igrpprl = prm.getIgrpprl();
|
||||
|
||||
if ( igrpprl < 0 || igrpprl >= sprmBuffers.length )
|
||||
{
|
||||
logger.log( POILogger.WARN, textPiece
|
||||
+ "'s PRM references to unknown grpprl" );
|
||||
continue;
|
||||
}
|
||||
|
||||
boolean hasChp = false;
|
||||
SprmBuffer sprmBuffer = sprmBuffers[igrpprl];
|
||||
for ( SprmIterator iterator = sprmBuffer.iterator(); iterator
|
||||
.hasNext(); )
|
||||
{
|
||||
SprmOperation sprmOperation = iterator.next();
|
||||
if ( sprmOperation.getType() == SprmOperation.TYPE_CHP )
|
||||
{
|
||||
hasChp = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( hasChp )
|
||||
{
|
||||
SprmBuffer newSprmBuffer;
|
||||
try
|
||||
{
|
||||
newSprmBuffer = (SprmBuffer) sprmBuffer.clone();
|
||||
}
|
||||
catch ( CloneNotSupportedException e )
|
||||
{
|
||||
// shall not happen
|
||||
throw new Error( e );
|
||||
}
|
||||
|
||||
CHPX chpx = new CHPX( textPiece.getStart(),
|
||||
textPiece.getEnd(), newSprmBuffer );
|
||||
_textRuns.add( chpx );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// rebuild document paragraphs structure
|
||||
StringBuilder docText = new StringBuilder();
|
||||
for ( TextPiece textPiece : tpt.getTextPieces() )
|
||||
|
|
|
@ -18,9 +18,13 @@
|
|||
package org.apache.poi.hwpf.model;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
|
||||
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
|
||||
import org.apache.poi.hwpf.sprm.SprmBuffer;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.hwpf.model.io.*;
|
||||
|
||||
public final class ComplexFileTable
|
||||
{
|
||||
|
@ -30,6 +34,8 @@ public final class ComplexFileTable
|
|||
|
||||
protected TextPieceTable _tpt;
|
||||
|
||||
private SprmBuffer[] _grpprls;
|
||||
|
||||
public ComplexFileTable()
|
||||
{
|
||||
_tpt = new TextPieceTable();
|
||||
|
@ -39,12 +45,20 @@ public final class ComplexFileTable
|
|||
{
|
||||
//skips through the prms before we reach the piece table. These contain data
|
||||
//for actual fast saved files
|
||||
List<SprmBuffer> sprmBuffers = new LinkedList<SprmBuffer>();
|
||||
while ( tableStream[offset] == GRPPRL_TYPE )
|
||||
{
|
||||
offset++;
|
||||
int skip = LittleEndian.getShort(tableStream, offset);
|
||||
offset += LittleEndian.SHORT_SIZE + skip;
|
||||
int size = LittleEndian.getShort( tableStream, offset );
|
||||
offset += LittleEndian.SHORT_SIZE;
|
||||
byte[] bs = LittleEndian.getByteArray( tableStream, offset, size );
|
||||
offset += size;
|
||||
|
||||
SprmBuffer sprmBuffer = new SprmBuffer( bs, false, 0 );
|
||||
sprmBuffers.add( sprmBuffer );
|
||||
}
|
||||
this._grpprls = sprmBuffers.toArray( new SprmBuffer[sprmBuffers.size()] );
|
||||
|
||||
if(tableStream[offset] != TEXT_PIECE_TABLE_TYPE)
|
||||
{
|
||||
throw new IOException("The text piece table is corrupted");
|
||||
|
@ -59,6 +73,11 @@ public final class ComplexFileTable
|
|||
return _tpt;
|
||||
}
|
||||
|
||||
public SprmBuffer[] getGrpprls()
|
||||
{
|
||||
return _grpprls;
|
||||
}
|
||||
|
||||
public void writeTo(HWPFFileSystem sys)
|
||||
throws IOException
|
||||
{
|
||||
|
|
|
@ -26,6 +26,8 @@ import java.util.List;
|
|||
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
|
||||
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
|
||||
import org.apache.poi.hwpf.sprm.SprmBuffer;
|
||||
import org.apache.poi.hwpf.sprm.SprmIterator;
|
||||
import org.apache.poi.hwpf.sprm.SprmOperation;
|
||||
import org.apache.poi.poifs.common.POIFSConstants;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
|
@ -62,12 +64,12 @@ public class PAPBinTable
|
|||
byte[] dataStream, int offset, int size, int fcMin,
|
||||
TextPieceTable tpt )
|
||||
{
|
||||
this( documentStream, tableStream, dataStream, offset, size, tpt, true );
|
||||
this( documentStream, tableStream, dataStream, offset, size, null, tpt, true );
|
||||
}
|
||||
|
||||
public PAPBinTable( byte[] documentStream, byte[] tableStream,
|
||||
byte[] dataStream, int offset, int size, TextPieceTable tpt,
|
||||
boolean ignorePapxWithoutTextPieces )
|
||||
byte[] dataStream, int offset, int size, ComplexFileTable complexFileTable,
|
||||
TextPieceTable tpt, boolean ignorePapxWithoutTextPieces )
|
||||
{
|
||||
PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4);
|
||||
this.tpt = tpt;
|
||||
|
@ -94,6 +96,50 @@ public class PAPBinTable
|
|||
}
|
||||
}
|
||||
|
||||
if ( complexFileTable != null )
|
||||
{
|
||||
SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
|
||||
|
||||
// adding CHPX from fast-saved SPRMs
|
||||
for ( TextPiece textPiece : tpt.getTextPieces() )
|
||||
{
|
||||
PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
|
||||
if ( !prm.isComplex() )
|
||||
continue;
|
||||
int igrpprl = prm.getIgrpprl();
|
||||
|
||||
if ( igrpprl < 0 || igrpprl >= sprmBuffers.length )
|
||||
{
|
||||
logger.log( POILogger.WARN, textPiece
|
||||
+ "'s PRM references to unknown grpprl" );
|
||||
continue;
|
||||
}
|
||||
|
||||
boolean hasPap = false;
|
||||
SprmBuffer sprmBuffer = sprmBuffers[igrpprl];
|
||||
for ( SprmIterator iterator = sprmBuffer.iterator(); iterator
|
||||
.hasNext(); )
|
||||
{
|
||||
SprmOperation sprmOperation = iterator.next();
|
||||
if ( sprmOperation.getType() == SprmOperation.TYPE_PAP )
|
||||
{
|
||||
hasPap = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( hasPap )
|
||||
{
|
||||
SprmBuffer newSprmBuffer = new SprmBuffer(2);
|
||||
newSprmBuffer.append( sprmBuffer.toByteArray() );
|
||||
|
||||
PAPX papx = new PAPX( textPiece.getStart(),
|
||||
textPiece.getEnd(), newSprmBuffer, dataStream );
|
||||
_paragraphs.add( papx );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// rebuild document paragraphs structure
|
||||
StringBuilder docText = new StringBuilder();
|
||||
for ( TextPiece textPiece : tpt.getTextPieces() )
|
||||
|
@ -144,11 +190,6 @@ public class PAPBinTable
|
|||
if ( fChar == 13 || fChar == 7 || fChar == 12 )
|
||||
break;
|
||||
}
|
||||
// if ( papx.getStart() <= charIndex && charIndex <
|
||||
// papx.getEnd() )
|
||||
// {
|
||||
// papxs.add( papx );
|
||||
// }
|
||||
}
|
||||
|
||||
if ( papxs.size() == 0 )
|
||||
|
@ -178,9 +219,20 @@ public class PAPBinTable
|
|||
}
|
||||
}
|
||||
|
||||
SprmBuffer sprmBuffer = new SprmBuffer( 2 );
|
||||
SprmBuffer sprmBuffer = null;
|
||||
for ( PAPX papx : papxs )
|
||||
{
|
||||
if ( sprmBuffer == null )
|
||||
try
|
||||
{
|
||||
sprmBuffer = (SprmBuffer) papx.getSprmBuf().clone();
|
||||
}
|
||||
catch ( CloneNotSupportedException e )
|
||||
{
|
||||
// can't happen
|
||||
throw new Error( e );
|
||||
}
|
||||
else
|
||||
sprmBuffer.append( papx.getGrpprl(), 2 );
|
||||
}
|
||||
PAPX newPapx = new PAPX( startInclusive, endExclusive, sprmBuffer,
|
||||
|
|
|
@ -19,8 +19,10 @@ package org.apache.poi.hwpf.model;
|
|||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.poi.hwpf.sprm.SprmBuffer;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
|
@ -90,7 +92,8 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
|
|||
Integer.valueOf( startAt ), "; ",
|
||||
Integer.valueOf( endAt ),
|
||||
") (bytes) doesn't have corresponding text pieces "
|
||||
+ "and will be skipped" );
|
||||
+ "and will be skipped\n\tSkipped SPRM: "
|
||||
+ new SprmBuffer( getGrpprl( x ), 2 ) );
|
||||
_papxList.add( null );
|
||||
continue;
|
||||
}
|
||||
|
@ -150,6 +153,11 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
|
|||
return _papxList.get(index);
|
||||
}
|
||||
|
||||
public List<PAPX> getPAPXs()
|
||||
{
|
||||
return Collections.unmodifiableList( _papxList );
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the papx grpprl for the paragraph at index in this fkp.
|
||||
*
|
||||
|
|
|
@ -46,7 +46,7 @@ public final class TestCHPBinTable
|
|||
byte[] tableStream = _hWPFDocFixture._tableStream;
|
||||
int fcMin = fib.getFcMin();
|
||||
|
||||
_cHPBinTable = new CHPBinTable(mainStream, tableStream, fib.getFcPlcfbteChpx(), fib.getLcbPlcfbteChpx(), fakeTPT, false);
|
||||
_cHPBinTable = new CHPBinTable(mainStream, tableStream, fib.getFcPlcfbteChpx(), fib.getLcbPlcfbteChpx(), null, fakeTPT, false);
|
||||
|
||||
HWPFFileSystem fileSys = new HWPFFileSystem();
|
||||
|
||||
|
@ -57,7 +57,7 @@ public final class TestCHPBinTable
|
|||
byte[] newTableStream = tableOut.toByteArray();
|
||||
byte[] newMainStream = mainOut.toByteArray();
|
||||
|
||||
CHPBinTable newBinTable = new CHPBinTable(newMainStream, newTableStream, 0, newTableStream.length, fakeTPT, false);
|
||||
CHPBinTable newBinTable = new CHPBinTable(newMainStream, newTableStream, 0, newTableStream.length, null, fakeTPT, false);
|
||||
|
||||
ArrayList oldTextRuns = _cHPBinTable._textRuns;
|
||||
ArrayList newTextRuns = newBinTable._textRuns;
|
||||
|
|
|
@ -40,7 +40,7 @@ public final class TestPAPBinTable
|
|||
byte[] mainStream = _hWPFDocFixture._mainStream;
|
||||
byte[] tableStream = _hWPFDocFixture._tableStream;
|
||||
|
||||
_pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fakeTPT, false);
|
||||
_pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), null, fakeTPT, false);
|
||||
|
||||
HWPFFileSystem fileSys = new HWPFFileSystem();
|
||||
|
||||
|
@ -51,7 +51,7 @@ public final class TestPAPBinTable
|
|||
byte[] newTableStream = tableOut.toByteArray();
|
||||
byte[] newMainStream = mainOut.toByteArray();
|
||||
|
||||
PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, fakeTPT, false);
|
||||
PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, null, fakeTPT, false);
|
||||
|
||||
ArrayList oldTextRuns = _pAPBinTable.getParagraphs();
|
||||
ArrayList newTextRuns = newBinTable.getParagraphs();
|
||||
|
|
Loading…
Reference in New Issue