mirror of https://github.com/apache/poi.git
add initial support for fast-saved files
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1145410 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
93ec152831
commit
fd8a518446
|
@ -216,8 +216,8 @@ public final class HWPFDocument extends HWPFDocumentCore
|
||||||
|
|
||||||
// Now load the rest of the properties, which need to be adjusted
|
// Now load the rest of the properties, which need to be adjusted
|
||||||
// for where text really begin
|
// for where text really begin
|
||||||
_cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _tpt, true);
|
_cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _cft, _tpt, true);
|
||||||
_pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), _tpt, true);
|
_pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), _cft, _tpt, true);
|
||||||
|
|
||||||
// Read FSPA and Escher information
|
// Read FSPA and Escher information
|
||||||
_fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(), _fib.getLcbPlcspaMom(), getTextTable().getTextPieces());
|
_fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(), _fib.getLcbPlcspaMom(), getTextTable().getTextPieces());
|
||||||
|
|
|
@ -29,6 +29,8 @@ import java.util.Set;
|
||||||
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
|
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
|
||||||
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
|
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
|
||||||
import org.apache.poi.hwpf.sprm.SprmBuffer;
|
import org.apache.poi.hwpf.sprm.SprmBuffer;
|
||||||
|
import org.apache.poi.hwpf.sprm.SprmIterator;
|
||||||
|
import org.apache.poi.hwpf.sprm.SprmOperation;
|
||||||
import org.apache.poi.poifs.common.POIFSConstants;
|
import org.apache.poi.poifs.common.POIFSConstants;
|
||||||
import org.apache.poi.util.LittleEndian;
|
import org.apache.poi.util.LittleEndian;
|
||||||
import org.apache.poi.util.POILogFactory;
|
import org.apache.poi.util.POILogFactory;
|
||||||
|
@ -58,20 +60,21 @@ public class CHPBinTable
|
||||||
* Constructor used to read a binTable in from a Word document.
|
* Constructor used to read a binTable in from a Word document.
|
||||||
*
|
*
|
||||||
* @deprecated Use
|
* @deprecated Use
|
||||||
* {@link #CHPBinTable(byte[],byte[],int,int,TextPieceTable,boolean)}
|
* {@link #CHPBinTable(byte[],byte[],int,int,ComplexFileTable,TextPieceTable, boolean)}
|
||||||
* instead
|
* instead
|
||||||
*/
|
*/
|
||||||
public CHPBinTable( byte[] documentStream, byte[] tableStream, int offset,
|
public CHPBinTable( byte[] documentStream, byte[] tableStream, int offset,
|
||||||
int size, int fcMin, TextPieceTable tpt )
|
int size, int fcMin, TextPieceTable tpt )
|
||||||
{
|
{
|
||||||
this( documentStream, tableStream, offset, size, tpt, true );
|
this( documentStream, tableStream, offset, size, null, tpt, true );
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor used to read a binTable in from a Word document.
|
* Constructor used to read a binTable in from a Word document.
|
||||||
*/
|
*/
|
||||||
public CHPBinTable( byte[] documentStream, byte[] tableStream, int offset,
|
public CHPBinTable( byte[] documentStream, byte[] tableStream, int offset,
|
||||||
int size, TextPieceTable tpt, boolean ignoreChpxWithoutTextPieces )
|
int size, ComplexFileTable complexFileTable, TextPieceTable tpt,
|
||||||
|
boolean ignoreChpxWithoutTextPieces )
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Page 35:
|
* Page 35:
|
||||||
|
@ -105,6 +108,58 @@ public class CHPBinTable
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( complexFileTable != null )
|
||||||
|
{
|
||||||
|
SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
|
||||||
|
|
||||||
|
// adding CHPX from fast-saved SPRMs
|
||||||
|
for ( TextPiece textPiece : tpt.getTextPieces() )
|
||||||
|
{
|
||||||
|
PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
|
||||||
|
if ( !prm.isComplex() )
|
||||||
|
continue;
|
||||||
|
int igrpprl = prm.getIgrpprl();
|
||||||
|
|
||||||
|
if ( igrpprl < 0 || igrpprl >= sprmBuffers.length )
|
||||||
|
{
|
||||||
|
logger.log( POILogger.WARN, textPiece
|
||||||
|
+ "'s PRM references to unknown grpprl" );
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean hasChp = false;
|
||||||
|
SprmBuffer sprmBuffer = sprmBuffers[igrpprl];
|
||||||
|
for ( SprmIterator iterator = sprmBuffer.iterator(); iterator
|
||||||
|
.hasNext(); )
|
||||||
|
{
|
||||||
|
SprmOperation sprmOperation = iterator.next();
|
||||||
|
if ( sprmOperation.getType() == SprmOperation.TYPE_CHP )
|
||||||
|
{
|
||||||
|
hasChp = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( hasChp )
|
||||||
|
{
|
||||||
|
SprmBuffer newSprmBuffer;
|
||||||
|
try
|
||||||
|
{
|
||||||
|
newSprmBuffer = (SprmBuffer) sprmBuffer.clone();
|
||||||
|
}
|
||||||
|
catch ( CloneNotSupportedException e )
|
||||||
|
{
|
||||||
|
// shall not happen
|
||||||
|
throw new Error( e );
|
||||||
|
}
|
||||||
|
|
||||||
|
CHPX chpx = new CHPX( textPiece.getStart(),
|
||||||
|
textPiece.getEnd(), newSprmBuffer );
|
||||||
|
_textRuns.add( chpx );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// rebuild document paragraphs structure
|
// rebuild document paragraphs structure
|
||||||
StringBuilder docText = new StringBuilder();
|
StringBuilder docText = new StringBuilder();
|
||||||
for ( TextPiece textPiece : tpt.getTextPieces() )
|
for ( TextPiece textPiece : tpt.getTextPieces() )
|
||||||
|
|
|
@ -18,9 +18,13 @@
|
||||||
package org.apache.poi.hwpf.model;
|
package org.apache.poi.hwpf.model;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
|
||||||
|
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
|
||||||
|
import org.apache.poi.hwpf.sprm.SprmBuffer;
|
||||||
import org.apache.poi.util.LittleEndian;
|
import org.apache.poi.util.LittleEndian;
|
||||||
import org.apache.poi.hwpf.model.io.*;
|
|
||||||
|
|
||||||
public final class ComplexFileTable
|
public final class ComplexFileTable
|
||||||
{
|
{
|
||||||
|
@ -30,6 +34,8 @@ public final class ComplexFileTable
|
||||||
|
|
||||||
protected TextPieceTable _tpt;
|
protected TextPieceTable _tpt;
|
||||||
|
|
||||||
|
private SprmBuffer[] _grpprls;
|
||||||
|
|
||||||
public ComplexFileTable()
|
public ComplexFileTable()
|
||||||
{
|
{
|
||||||
_tpt = new TextPieceTable();
|
_tpt = new TextPieceTable();
|
||||||
|
@ -39,12 +45,20 @@ public final class ComplexFileTable
|
||||||
{
|
{
|
||||||
//skips through the prms before we reach the piece table. These contain data
|
//skips through the prms before we reach the piece table. These contain data
|
||||||
//for actual fast saved files
|
//for actual fast saved files
|
||||||
while (tableStream[offset] == GRPPRL_TYPE)
|
List<SprmBuffer> sprmBuffers = new LinkedList<SprmBuffer>();
|
||||||
{
|
while ( tableStream[offset] == GRPPRL_TYPE )
|
||||||
offset++;
|
{
|
||||||
int skip = LittleEndian.getShort(tableStream, offset);
|
offset++;
|
||||||
offset += LittleEndian.SHORT_SIZE + skip;
|
int size = LittleEndian.getShort( tableStream, offset );
|
||||||
}
|
offset += LittleEndian.SHORT_SIZE;
|
||||||
|
byte[] bs = LittleEndian.getByteArray( tableStream, offset, size );
|
||||||
|
offset += size;
|
||||||
|
|
||||||
|
SprmBuffer sprmBuffer = new SprmBuffer( bs, false, 0 );
|
||||||
|
sprmBuffers.add( sprmBuffer );
|
||||||
|
}
|
||||||
|
this._grpprls = sprmBuffers.toArray( new SprmBuffer[sprmBuffers.size()] );
|
||||||
|
|
||||||
if(tableStream[offset] != TEXT_PIECE_TABLE_TYPE)
|
if(tableStream[offset] != TEXT_PIECE_TABLE_TYPE)
|
||||||
{
|
{
|
||||||
throw new IOException("The text piece table is corrupted");
|
throw new IOException("The text piece table is corrupted");
|
||||||
|
@ -59,6 +73,11 @@ public final class ComplexFileTable
|
||||||
return _tpt;
|
return _tpt;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public SprmBuffer[] getGrpprls()
|
||||||
|
{
|
||||||
|
return _grpprls;
|
||||||
|
}
|
||||||
|
|
||||||
public void writeTo(HWPFFileSystem sys)
|
public void writeTo(HWPFFileSystem sys)
|
||||||
throws IOException
|
throws IOException
|
||||||
{
|
{
|
||||||
|
|
|
@ -26,6 +26,8 @@ import java.util.List;
|
||||||
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
|
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
|
||||||
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
|
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
|
||||||
import org.apache.poi.hwpf.sprm.SprmBuffer;
|
import org.apache.poi.hwpf.sprm.SprmBuffer;
|
||||||
|
import org.apache.poi.hwpf.sprm.SprmIterator;
|
||||||
|
import org.apache.poi.hwpf.sprm.SprmOperation;
|
||||||
import org.apache.poi.poifs.common.POIFSConstants;
|
import org.apache.poi.poifs.common.POIFSConstants;
|
||||||
import org.apache.poi.util.LittleEndian;
|
import org.apache.poi.util.LittleEndian;
|
||||||
import org.apache.poi.util.POILogFactory;
|
import org.apache.poi.util.POILogFactory;
|
||||||
|
@ -62,12 +64,12 @@ public class PAPBinTable
|
||||||
byte[] dataStream, int offset, int size, int fcMin,
|
byte[] dataStream, int offset, int size, int fcMin,
|
||||||
TextPieceTable tpt )
|
TextPieceTable tpt )
|
||||||
{
|
{
|
||||||
this( documentStream, tableStream, dataStream, offset, size, tpt, true );
|
this( documentStream, tableStream, dataStream, offset, size, null, tpt, true );
|
||||||
}
|
}
|
||||||
|
|
||||||
public PAPBinTable( byte[] documentStream, byte[] tableStream,
|
public PAPBinTable( byte[] documentStream, byte[] tableStream,
|
||||||
byte[] dataStream, int offset, int size, TextPieceTable tpt,
|
byte[] dataStream, int offset, int size, ComplexFileTable complexFileTable,
|
||||||
boolean ignorePapxWithoutTextPieces )
|
TextPieceTable tpt, boolean ignorePapxWithoutTextPieces )
|
||||||
{
|
{
|
||||||
PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4);
|
PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4);
|
||||||
this.tpt = tpt;
|
this.tpt = tpt;
|
||||||
|
@ -94,6 +96,50 @@ public class PAPBinTable
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( complexFileTable != null )
|
||||||
|
{
|
||||||
|
SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
|
||||||
|
|
||||||
|
// adding CHPX from fast-saved SPRMs
|
||||||
|
for ( TextPiece textPiece : tpt.getTextPieces() )
|
||||||
|
{
|
||||||
|
PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
|
||||||
|
if ( !prm.isComplex() )
|
||||||
|
continue;
|
||||||
|
int igrpprl = prm.getIgrpprl();
|
||||||
|
|
||||||
|
if ( igrpprl < 0 || igrpprl >= sprmBuffers.length )
|
||||||
|
{
|
||||||
|
logger.log( POILogger.WARN, textPiece
|
||||||
|
+ "'s PRM references to unknown grpprl" );
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean hasPap = false;
|
||||||
|
SprmBuffer sprmBuffer = sprmBuffers[igrpprl];
|
||||||
|
for ( SprmIterator iterator = sprmBuffer.iterator(); iterator
|
||||||
|
.hasNext(); )
|
||||||
|
{
|
||||||
|
SprmOperation sprmOperation = iterator.next();
|
||||||
|
if ( sprmOperation.getType() == SprmOperation.TYPE_PAP )
|
||||||
|
{
|
||||||
|
hasPap = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( hasPap )
|
||||||
|
{
|
||||||
|
SprmBuffer newSprmBuffer = new SprmBuffer(2);
|
||||||
|
newSprmBuffer.append( sprmBuffer.toByteArray() );
|
||||||
|
|
||||||
|
PAPX papx = new PAPX( textPiece.getStart(),
|
||||||
|
textPiece.getEnd(), newSprmBuffer, dataStream );
|
||||||
|
_paragraphs.add( papx );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// rebuild document paragraphs structure
|
// rebuild document paragraphs structure
|
||||||
StringBuilder docText = new StringBuilder();
|
StringBuilder docText = new StringBuilder();
|
||||||
for ( TextPiece textPiece : tpt.getTextPieces() )
|
for ( TextPiece textPiece : tpt.getTextPieces() )
|
||||||
|
@ -144,11 +190,6 @@ public class PAPBinTable
|
||||||
if ( fChar == 13 || fChar == 7 || fChar == 12 )
|
if ( fChar == 13 || fChar == 7 || fChar == 12 )
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// if ( papx.getStart() <= charIndex && charIndex <
|
|
||||||
// papx.getEnd() )
|
|
||||||
// {
|
|
||||||
// papxs.add( papx );
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( papxs.size() == 0 )
|
if ( papxs.size() == 0 )
|
||||||
|
@ -178,10 +219,21 @@ public class PAPBinTable
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SprmBuffer sprmBuffer = new SprmBuffer( 2 );
|
SprmBuffer sprmBuffer = null;
|
||||||
for ( PAPX papx : papxs )
|
for ( PAPX papx : papxs )
|
||||||
{
|
{
|
||||||
sprmBuffer.append( papx.getGrpprl(), 2 );
|
if ( sprmBuffer == null )
|
||||||
|
try
|
||||||
|
{
|
||||||
|
sprmBuffer = (SprmBuffer) papx.getSprmBuf().clone();
|
||||||
|
}
|
||||||
|
catch ( CloneNotSupportedException e )
|
||||||
|
{
|
||||||
|
// can't happen
|
||||||
|
throw new Error( e );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
sprmBuffer.append( papx.getGrpprl(), 2 );
|
||||||
}
|
}
|
||||||
PAPX newPapx = new PAPX( startInclusive, endExclusive, sprmBuffer,
|
PAPX newPapx = new PAPX( startInclusive, endExclusive, sprmBuffer,
|
||||||
dataStream );
|
dataStream );
|
||||||
|
|
|
@ -19,8 +19,10 @@ package org.apache.poi.hwpf.model;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.poi.hwpf.sprm.SprmBuffer;
|
||||||
import org.apache.poi.util.LittleEndian;
|
import org.apache.poi.util.LittleEndian;
|
||||||
import org.apache.poi.util.POILogFactory;
|
import org.apache.poi.util.POILogFactory;
|
||||||
import org.apache.poi.util.POILogger;
|
import org.apache.poi.util.POILogger;
|
||||||
|
@ -90,7 +92,8 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
|
||||||
Integer.valueOf( startAt ), "; ",
|
Integer.valueOf( startAt ), "; ",
|
||||||
Integer.valueOf( endAt ),
|
Integer.valueOf( endAt ),
|
||||||
") (bytes) doesn't have corresponding text pieces "
|
") (bytes) doesn't have corresponding text pieces "
|
||||||
+ "and will be skipped" );
|
+ "and will be skipped\n\tSkipped SPRM: "
|
||||||
|
+ new SprmBuffer( getGrpprl( x ), 2 ) );
|
||||||
_papxList.add( null );
|
_papxList.add( null );
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -150,6 +153,11 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
|
||||||
return _papxList.get(index);
|
return _papxList.get(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<PAPX> getPAPXs()
|
||||||
|
{
|
||||||
|
return Collections.unmodifiableList( _papxList );
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the papx grpprl for the paragraph at index in this fkp.
|
* Gets the papx grpprl for the paragraph at index in this fkp.
|
||||||
*
|
*
|
||||||
|
|
|
@ -46,7 +46,7 @@ public final class TestCHPBinTable
|
||||||
byte[] tableStream = _hWPFDocFixture._tableStream;
|
byte[] tableStream = _hWPFDocFixture._tableStream;
|
||||||
int fcMin = fib.getFcMin();
|
int fcMin = fib.getFcMin();
|
||||||
|
|
||||||
_cHPBinTable = new CHPBinTable(mainStream, tableStream, fib.getFcPlcfbteChpx(), fib.getLcbPlcfbteChpx(), fakeTPT, false);
|
_cHPBinTable = new CHPBinTable(mainStream, tableStream, fib.getFcPlcfbteChpx(), fib.getLcbPlcfbteChpx(), null, fakeTPT, false);
|
||||||
|
|
||||||
HWPFFileSystem fileSys = new HWPFFileSystem();
|
HWPFFileSystem fileSys = new HWPFFileSystem();
|
||||||
|
|
||||||
|
@ -57,7 +57,7 @@ public final class TestCHPBinTable
|
||||||
byte[] newTableStream = tableOut.toByteArray();
|
byte[] newTableStream = tableOut.toByteArray();
|
||||||
byte[] newMainStream = mainOut.toByteArray();
|
byte[] newMainStream = mainOut.toByteArray();
|
||||||
|
|
||||||
CHPBinTable newBinTable = new CHPBinTable(newMainStream, newTableStream, 0, newTableStream.length, fakeTPT, false);
|
CHPBinTable newBinTable = new CHPBinTable(newMainStream, newTableStream, 0, newTableStream.length, null, fakeTPT, false);
|
||||||
|
|
||||||
ArrayList oldTextRuns = _cHPBinTable._textRuns;
|
ArrayList oldTextRuns = _cHPBinTable._textRuns;
|
||||||
ArrayList newTextRuns = newBinTable._textRuns;
|
ArrayList newTextRuns = newBinTable._textRuns;
|
||||||
|
|
|
@ -40,7 +40,7 @@ public final class TestPAPBinTable
|
||||||
byte[] mainStream = _hWPFDocFixture._mainStream;
|
byte[] mainStream = _hWPFDocFixture._mainStream;
|
||||||
byte[] tableStream = _hWPFDocFixture._tableStream;
|
byte[] tableStream = _hWPFDocFixture._tableStream;
|
||||||
|
|
||||||
_pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fakeTPT, false);
|
_pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), null, fakeTPT, false);
|
||||||
|
|
||||||
HWPFFileSystem fileSys = new HWPFFileSystem();
|
HWPFFileSystem fileSys = new HWPFFileSystem();
|
||||||
|
|
||||||
|
@ -51,7 +51,7 @@ public final class TestPAPBinTable
|
||||||
byte[] newTableStream = tableOut.toByteArray();
|
byte[] newTableStream = tableOut.toByteArray();
|
||||||
byte[] newMainStream = mainOut.toByteArray();
|
byte[] newMainStream = mainOut.toByteArray();
|
||||||
|
|
||||||
PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, fakeTPT, false);
|
PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, null, fakeTPT, false);
|
||||||
|
|
||||||
ArrayList oldTextRuns = _pAPBinTable.getParagraphs();
|
ArrayList oldTextRuns = _pAPBinTable.getParagraphs();
|
||||||
ArrayList newTextRuns = newBinTable.getParagraphs();
|
ArrayList newTextRuns = newBinTable.getParagraphs();
|
||||||
|
|
Loading…
Reference in New Issue