mirror of https://github.com/apache/poi.git
#61162 - En-/decryption support for HWPF
Decryption for Binary RC4 and CryptoAPI (... XOR is missing) git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1797837 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8909c066be
commit
1bcde5f6d4
|
@ -195,7 +195,7 @@ public abstract class POIDocument implements Closeable {
|
|||
NPOIFSFileSystem encPoifs = null;
|
||||
String step = "getting";
|
||||
try {
|
||||
if (encryptionInfo != null) {
|
||||
if (encryptionInfo != null && encryptionInfo.isDocPropsEncrypted()) {
|
||||
step = "getting encrypted";
|
||||
String encryptedStream = null;
|
||||
for (String s : encryptedStreamNames) {
|
||||
|
|
|
@ -32,7 +32,11 @@ public final class Biff8EncryptionKey {
|
|||
* @param password pass <code>null</code> to clear user password (and use default)
|
||||
*/
|
||||
public static void setCurrentUserPassword(String password) {
|
||||
_userPasswordTLS.set(password);
|
||||
if (password == null) {
|
||||
_userPasswordTLS.remove();
|
||||
} else {
|
||||
_userPasswordTLS.set(password);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -122,8 +122,11 @@ public class EncryptionInfo implements Cloneable {
|
|||
} else if (
|
||||
2 <= versionMajor && versionMajor <= 4
|
||||
&& versionMinor == 2) {
|
||||
encryptionMode = (preferredEncryptionMode == cryptoAPI) ? cryptoAPI : standard;
|
||||
encryptionFlags = dis.readInt();
|
||||
encryptionMode = (
|
||||
preferredEncryptionMode == cryptoAPI
|
||||
|| !flagAES.isSet(encryptionFlags))
|
||||
? cryptoAPI : standard;
|
||||
} else if (
|
||||
versionMajor == agile.versionMajor
|
||||
&& versionMinor == agile.versionMinor){
|
||||
|
@ -268,6 +271,14 @@ public class EncryptionInfo implements Cloneable {
|
|||
return encryptionMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true, if Document Summary / Summary are encrypted and stored in the {@code EncryptedStream} stream,
|
||||
* otherwise the Summaries aren't encrypted and located in their usual streams
|
||||
*/
|
||||
public boolean isDocPropsEncrypted() {
|
||||
return !flagDocProps.isSet(getEncryptionFlags());
|
||||
}
|
||||
|
||||
@Override
|
||||
public EncryptionInfo clone() throws CloneNotSupportedException {
|
||||
EncryptionInfo other = (EncryptionInfo)super.clone();
|
||||
|
|
|
@ -51,9 +51,9 @@ public class BinaryRC4Decryptor extends Decryptor implements Cloneable {
|
|||
super(stream, size, chunkSize);
|
||||
}
|
||||
|
||||
public BinaryRC4CipherInputStream(InputStream stream)
|
||||
public BinaryRC4CipherInputStream(InputStream stream, int size, int initialPos)
|
||||
throws GeneralSecurityException {
|
||||
super(stream, Integer.MAX_VALUE, chunkSize);
|
||||
super(stream, size, chunkSize, initialPos);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -141,7 +141,7 @@ public class BinaryRC4Decryptor extends Decryptor implements Cloneable {
|
|||
@Override
|
||||
public InputStream getDataStream(InputStream stream, int size, int initialPos)
|
||||
throws IOException, GeneralSecurityException {
|
||||
return new BinaryRC4CipherInputStream(stream);
|
||||
return new BinaryRC4CipherInputStream(stream, size, initialPos);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
package org.apache.poi.hwpf;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
@ -25,9 +26,29 @@ import java.io.OutputStream;
|
|||
|
||||
import org.apache.poi.hpsf.DocumentSummaryInformation;
|
||||
import org.apache.poi.hpsf.SummaryInformation;
|
||||
import org.apache.poi.hwpf.model.*;
|
||||
import org.apache.poi.hwpf.model.BookmarksTables;
|
||||
import org.apache.poi.hwpf.model.CHPBinTable;
|
||||
import org.apache.poi.hwpf.model.ComplexFileTable;
|
||||
import org.apache.poi.hwpf.model.DocumentProperties;
|
||||
import org.apache.poi.hwpf.model.EscherRecordHolder;
|
||||
import org.apache.poi.hwpf.model.FSPADocumentPart;
|
||||
import org.apache.poi.hwpf.model.FSPATable;
|
||||
import org.apache.poi.hwpf.model.FieldsTables;
|
||||
import org.apache.poi.hwpf.model.FontTable;
|
||||
import org.apache.poi.hwpf.model.ListTables;
|
||||
import org.apache.poi.hwpf.model.NoteType;
|
||||
import org.apache.poi.hwpf.model.NotesTables;
|
||||
import org.apache.poi.hwpf.model.PAPBinTable;
|
||||
import org.apache.poi.hwpf.model.PicturesTable;
|
||||
import org.apache.poi.hwpf.model.RevisionMarkAuthorTable;
|
||||
import org.apache.poi.hwpf.model.SavedByTable;
|
||||
import org.apache.poi.hwpf.model.SectionTable;
|
||||
import org.apache.poi.hwpf.model.SinglentonTextPiece;
|
||||
import org.apache.poi.hwpf.model.StyleSheet;
|
||||
import org.apache.poi.hwpf.model.SubdocumentType;
|
||||
import org.apache.poi.hwpf.model.TextPiece;
|
||||
import org.apache.poi.hwpf.model.TextPieceTable;
|
||||
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
|
||||
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
|
||||
import org.apache.poi.hwpf.usermodel.Bookmarks;
|
||||
import org.apache.poi.hwpf.usermodel.BookmarksImpl;
|
||||
import org.apache.poi.hwpf.usermodel.Field;
|
||||
|
@ -40,13 +61,12 @@ import org.apache.poi.hwpf.usermodel.OfficeDrawings;
|
|||
import org.apache.poi.hwpf.usermodel.OfficeDrawingsImpl;
|
||||
import org.apache.poi.hwpf.usermodel.Range;
|
||||
import org.apache.poi.poifs.common.POIFSConstants;
|
||||
import org.apache.poi.poifs.crypt.EncryptionInfo;
|
||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||
import org.apache.poi.poifs.filesystem.DocumentEntry;
|
||||
import org.apache.poi.poifs.filesystem.Entry;
|
||||
import org.apache.poi.poifs.filesystem.EntryUtils;
|
||||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.apache.poi.util.Internal;
|
||||
|
||||
/**
|
||||
|
@ -59,8 +79,6 @@ public final class HWPFDocument extends HWPFDocumentCore {
|
|||
private static final String PROPERTY_PRESERVE_TEXT_TABLE = "org.apache.poi.hwpf.preserveTextTable";
|
||||
|
||||
private static final String STREAM_DATA = "Data";
|
||||
private static final String STREAM_TABLE_0 = "0Table";
|
||||
private static final String STREAM_TABLE_1 = "1Table";
|
||||
|
||||
/** table stream buffer*/
|
||||
protected byte[] _tableStream;
|
||||
|
@ -178,11 +196,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
|
|||
}
|
||||
|
||||
// use the fib to determine the name of the table stream.
|
||||
String name = STREAM_TABLE_0;
|
||||
if (_fib.getFibBase().isFWhichTblStm())
|
||||
{
|
||||
name = STREAM_TABLE_1;
|
||||
}
|
||||
String name = (_fib.getFibBase().isFWhichTblStm()) ? STREAM_TABLE_1 : STREAM_TABLE_0;
|
||||
|
||||
// Grab the table stream.
|
||||
if (!directory.hasEntry(name)) {
|
||||
|
@ -190,25 +204,12 @@ public final class HWPFDocument extends HWPFDocumentCore {
|
|||
}
|
||||
|
||||
// read in the table stream.
|
||||
InputStream is = directory.createDocumentInputStream(name);
|
||||
_tableStream = IOUtils.toByteArray(is);
|
||||
is.close();
|
||||
_tableStream = getDocumentEntryBytes(name, _fib.getFibBase().getLKey(), Integer.MAX_VALUE);
|
||||
|
||||
_fib.fillVariableFields(_mainStream, _tableStream);
|
||||
|
||||
// read in the data stream.
|
||||
InputStream dis = null;
|
||||
try {
|
||||
DocumentEntry dataProps = (DocumentEntry)directory.getEntry(STREAM_DATA);
|
||||
dis = directory.createDocumentInputStream(STREAM_DATA);
|
||||
_dataStream = IOUtils.toByteArray(dis, dataProps.getSize());
|
||||
} catch(IOException e) {
|
||||
_dataStream = new byte[0];
|
||||
} finally {
|
||||
if (dis != null) {
|
||||
dis.close();
|
||||
}
|
||||
}
|
||||
_dataStream = directory.hasEntry(STREAM_DATA) ? getDocumentEntryBytes(STREAM_DATA, 0, Integer.MAX_VALUE) : new byte[0];
|
||||
|
||||
// Get the cp of the start of text in the main stream
|
||||
// The latest spec doc says this is always zero!
|
||||
|
@ -233,8 +234,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
|
|||
*/
|
||||
boolean preserveBinTables = false;
|
||||
try {
|
||||
preserveBinTables = Boolean.parseBoolean( System
|
||||
.getProperty( PROPERTY_PRESERVE_BIN_TABLES ) );
|
||||
preserveBinTables = Boolean.parseBoolean( System.getProperty( PROPERTY_PRESERVE_BIN_TABLES ) );
|
||||
} catch ( Exception exc ) {
|
||||
// ignore;
|
||||
}
|
||||
|
@ -250,8 +250,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
|
|||
*/
|
||||
boolean preserveTextTable = false;
|
||||
try {
|
||||
preserveTextTable = Boolean.parseBoolean( System
|
||||
.getProperty( PROPERTY_PRESERVE_TEXT_TABLE ) );
|
||||
preserveTextTable = Boolean.parseBoolean( System.getProperty( PROPERTY_PRESERVE_TEXT_TABLE ) );
|
||||
} catch ( Exception exc ) {
|
||||
// ignore;
|
||||
}
|
||||
|
@ -612,8 +611,8 @@ public final class HWPFDocument extends HWPFDocumentCore {
|
|||
private void write(NPOIFSFileSystem pfs, boolean copyOtherEntries) throws IOException {
|
||||
// initialize our streams for writing.
|
||||
HWPFFileSystem docSys = new HWPFFileSystem();
|
||||
HWPFOutputStream wordDocumentStream = docSys.getStream(STREAM_WORD_DOCUMENT);
|
||||
HWPFOutputStream tableStream = docSys.getStream(STREAM_TABLE_1);
|
||||
ByteArrayOutputStream wordDocumentStream = docSys.getStream(STREAM_WORD_DOCUMENT);
|
||||
ByteArrayOutputStream tableStream = docSys.getStream(STREAM_TABLE_1);
|
||||
//HWPFOutputStream dataStream = docSys.getStream("Data");
|
||||
int tableOffset = 0;
|
||||
|
||||
|
@ -630,13 +629,13 @@ public final class HWPFDocument extends HWPFDocumentCore {
|
|||
// it after we write everything else.
|
||||
byte[] placeHolder = new byte[fibSize];
|
||||
wordDocumentStream.write(placeHolder);
|
||||
int mainOffset = wordDocumentStream.getOffset();
|
||||
int mainOffset = wordDocumentStream.size();
|
||||
|
||||
// write out the StyleSheet.
|
||||
_fib.setFcStshf(tableOffset);
|
||||
_ss.writeTo(tableStream);
|
||||
_fib.setLcbStshf(tableStream.getOffset() - tableOffset);
|
||||
tableOffset = tableStream.getOffset();
|
||||
_fib.setLcbStshf(tableStream.size() - tableOffset);
|
||||
tableOffset = tableStream.size();
|
||||
|
||||
// get fcMin and fcMac because we will be writing the actual text with the
|
||||
// complex table.
|
||||
|
@ -654,9 +653,9 @@ public final class HWPFDocument extends HWPFDocumentCore {
|
|||
// write out the Complex table, includes text.
|
||||
_fib.setFcClx(tableOffset);
|
||||
_cft.writeTo(wordDocumentStream, tableStream);
|
||||
_fib.setLcbClx(tableStream.getOffset() - tableOffset);
|
||||
tableOffset = tableStream.getOffset();
|
||||
int fcMac = wordDocumentStream.getOffset();
|
||||
_fib.setLcbClx(tableStream.size() - tableOffset);
|
||||
tableOffset = tableStream.size();
|
||||
int fcMac = wordDocumentStream.size();
|
||||
|
||||
/*
|
||||
* dop (document properties record) Written immediately after the end of
|
||||
|
@ -670,8 +669,8 @@ public final class HWPFDocument extends HWPFDocumentCore {
|
|||
// write out the DocumentProperties.
|
||||
_fib.setFcDop(tableOffset);
|
||||
_dop.writeTo(tableStream);
|
||||
_fib.setLcbDop(tableStream.getOffset() - tableOffset);
|
||||
tableOffset = tableStream.getOffset();
|
||||
_fib.setLcbDop(tableStream.size() - tableOffset);
|
||||
tableOffset = tableStream.size();
|
||||
|
||||
/*
|
||||
* plcfBkmkf (table recording beginning CPs of bookmarks) Written
|
||||
|
@ -683,7 +682,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
|
|||
if ( _bookmarksTables != null )
|
||||
{
|
||||
_bookmarksTables.writePlcfBkmkf( _fib, tableStream );
|
||||
tableOffset = tableStream.getOffset();
|
||||
tableOffset = tableStream.size();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -696,7 +695,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
|
|||
if ( _bookmarksTables != null )
|
||||
{
|
||||
_bookmarksTables.writePlcfBkmkl( _fib, tableStream );
|
||||
tableOffset = tableStream.getOffset();
|
||||
tableOffset = tableStream.size();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -710,8 +709,8 @@ public final class HWPFDocument extends HWPFDocumentCore {
|
|||
// write out the CHPBinTable.
|
||||
_fib.setFcPlcfbteChpx(tableOffset);
|
||||
_cbt.writeTo(wordDocumentStream, tableStream, fcMin, _cft.getTextPieceTable());
|
||||
_fib.setLcbPlcfbteChpx(tableStream.getOffset() - tableOffset);
|
||||
tableOffset = tableStream.getOffset();
|
||||
_fib.setLcbPlcfbteChpx(tableStream.size() - tableOffset);
|
||||
tableOffset = tableStream.size();
|
||||
|
||||
/*
|
||||
* plcfbtePapx (bin table for PAP FKPs) Written immediately after the
|
||||
|
@ -724,8 +723,8 @@ public final class HWPFDocument extends HWPFDocumentCore {
|
|||
// write out the PAPBinTable.
|
||||
_fib.setFcPlcfbtePapx(tableOffset);
|
||||
_pbt.writeTo(wordDocumentStream, tableStream, _cft.getTextPieceTable());
|
||||
_fib.setLcbPlcfbtePapx(tableStream.getOffset() - tableOffset);
|
||||
tableOffset = tableStream.getOffset();
|
||||
_fib.setLcbPlcfbtePapx(tableStream.size() - tableOffset);
|
||||
tableOffset = tableStream.size();
|
||||
|
||||
/*
|
||||
* plcfendRef (endnote reference position table) Written immediately
|
||||
|
@ -739,7 +738,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
|
|||
*/
|
||||
_endnotesTables.writeRef( _fib, tableStream );
|
||||
_endnotesTables.writeTxt( _fib, tableStream );
|
||||
tableOffset = tableStream.getOffset();
|
||||
tableOffset = tableStream.size();
|
||||
|
||||
/*
|
||||
* plcffld*** (table of field positions and statuses for annotation
|
||||
|
@ -753,7 +752,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
|
|||
if ( _fieldsTables != null )
|
||||
{
|
||||
_fieldsTables.write( _fib, tableStream );
|
||||
tableOffset = tableStream.getOffset();
|
||||
tableOffset = tableStream.size();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -768,7 +767,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
|
|||
*/
|
||||
_footnotesTables.writeRef( _fib, tableStream );
|
||||
_footnotesTables.writeTxt( _fib, tableStream );
|
||||
tableOffset = tableStream.getOffset();
|
||||
tableOffset = tableStream.size();
|
||||
|
||||
/*
|
||||
* plcfsed (section table) Written immediately after the previously
|
||||
|
@ -781,8 +780,8 @@ public final class HWPFDocument extends HWPFDocumentCore {
|
|||
// write out the SectionTable.
|
||||
_fib.setFcPlcfsed(tableOffset);
|
||||
_st.writeTo(wordDocumentStream, tableStream);
|
||||
_fib.setLcbPlcfsed(tableStream.getOffset() - tableOffset);
|
||||
tableOffset = tableStream.getOffset();
|
||||
_fib.setLcbPlcfsed(tableStream.size() - tableOffset);
|
||||
tableOffset = tableStream.size();
|
||||
|
||||
// write out the list tables
|
||||
if ( _lt != null )
|
||||
|
@ -800,7 +799,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
|
|||
* Specification; Page 25 of 210
|
||||
*/
|
||||
_lt.writeListDataTo( _fib, tableStream );
|
||||
tableOffset = tableStream.getOffset();
|
||||
tableOffset = tableStream.size();
|
||||
|
||||
/*
|
||||
* plflfo (more list formats) Written immediately after the end of
|
||||
|
@ -814,7 +813,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
|
|||
* Specification; Page 26 of 210
|
||||
*/
|
||||
_lt.writeListOverridesTo( _fib, tableStream );
|
||||
tableOffset = tableStream.getOffset();
|
||||
tableOffset = tableStream.size();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -827,7 +826,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
|
|||
if ( _bookmarksTables != null )
|
||||
{
|
||||
_bookmarksTables.writeSttbfBkmk( _fib, tableStream );
|
||||
tableOffset = tableStream.getOffset();
|
||||
tableOffset = tableStream.size();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -843,9 +842,9 @@ public final class HWPFDocument extends HWPFDocumentCore {
|
|||
{
|
||||
_fib.setFcSttbSavedBy(tableOffset);
|
||||
_sbt.writeTo(tableStream);
|
||||
_fib.setLcbSttbSavedBy(tableStream.getOffset() - tableOffset);
|
||||
_fib.setLcbSttbSavedBy(tableStream.size() - tableOffset);
|
||||
|
||||
tableOffset = tableStream.getOffset();
|
||||
tableOffset = tableStream.size();
|
||||
}
|
||||
|
||||
// write out the revision mark authors table.
|
||||
|
@ -853,21 +852,21 @@ public final class HWPFDocument extends HWPFDocumentCore {
|
|||
{
|
||||
_fib.setFcSttbfRMark(tableOffset);
|
||||
_rmat.writeTo(tableStream);
|
||||
_fib.setLcbSttbfRMark(tableStream.getOffset() - tableOffset);
|
||||
_fib.setLcbSttbfRMark(tableStream.size() - tableOffset);
|
||||
|
||||
tableOffset = tableStream.getOffset();
|
||||
tableOffset = tableStream.size();
|
||||
}
|
||||
|
||||
// write out the FontTable.
|
||||
_fib.setFcSttbfffn(tableOffset);
|
||||
_ft.writeTo(tableStream);
|
||||
_fib.setLcbSttbfffn(tableStream.getOffset() - tableOffset);
|
||||
tableOffset = tableStream.getOffset();
|
||||
_fib.setLcbSttbfffn(tableStream.size() - tableOffset);
|
||||
tableOffset = tableStream.size();
|
||||
|
||||
// set some variables in the FileInformationBlock.
|
||||
_fib.getFibBase().setFcMin(fcMin);
|
||||
_fib.getFibBase().setFcMac(fcMac);
|
||||
_fib.setCbMac(wordDocumentStream.getOffset());
|
||||
_fib.setCbMac(wordDocumentStream.size());
|
||||
|
||||
// make sure that the table, doc and data streams use big blocks.
|
||||
byte[] mainBuf = wordDocumentStream.toByteArray();
|
||||
|
|
|
@ -17,13 +17,19 @@
|
|||
|
||||
package org.apache.poi.hwpf;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.PushbackInputStream;
|
||||
import java.security.GeneralSecurityException;
|
||||
|
||||
import org.apache.poi.EncryptedDocumentException;
|
||||
import org.apache.poi.POIDocument;
|
||||
import org.apache.poi.hpsf.PropertySet;
|
||||
import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
|
||||
import org.apache.poi.hwpf.model.CHPBinTable;
|
||||
import org.apache.poi.hwpf.model.FibBase;
|
||||
import org.apache.poi.hwpf.model.FileInformationBlock;
|
||||
import org.apache.poi.hwpf.model.FontTable;
|
||||
import org.apache.poi.hwpf.model.ListTables;
|
||||
|
@ -34,145 +40,242 @@ import org.apache.poi.hwpf.model.TextPieceTable;
|
|||
import org.apache.poi.hwpf.usermodel.ObjectPoolImpl;
|
||||
import org.apache.poi.hwpf.usermodel.ObjectsPool;
|
||||
import org.apache.poi.hwpf.usermodel.Range;
|
||||
import org.apache.poi.poifs.crypt.ChunkedCipherInputStream;
|
||||
import org.apache.poi.poifs.crypt.Decryptor;
|
||||
import org.apache.poi.poifs.crypt.EncryptionInfo;
|
||||
import org.apache.poi.poifs.crypt.EncryptionMode;
|
||||
import org.apache.poi.poifs.filesystem.DirectoryEntry;
|
||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||
import org.apache.poi.poifs.filesystem.DocumentEntry;
|
||||
import org.apache.poi.poifs.filesystem.DocumentInputStream;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.apache.poi.util.BoundedInputStream;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.apache.poi.util.Internal;
|
||||
import org.apache.poi.util.LittleEndianByteArrayInputStream;
|
||||
|
||||
|
||||
/**
|
||||
* This class holds much of the core of a Word document, but
|
||||
* without some of the table structure information.
|
||||
* You generally want to work with one of
|
||||
* {@link HWPFDocument} or {@link HWPFOldDocument}
|
||||
* {@link HWPFDocument} or {@link HWPFOldDocument}
|
||||
*/
|
||||
public abstract class HWPFDocumentCore extends POIDocument
|
||||
{
|
||||
public abstract class HWPFDocumentCore extends POIDocument {
|
||||
protected static final String STREAM_OBJECT_POOL = "ObjectPool";
|
||||
protected static final String STREAM_WORD_DOCUMENT = "WordDocument";
|
||||
protected static final String STREAM_TABLE_0 = "0Table";
|
||||
protected static final String STREAM_TABLE_1 = "1Table";
|
||||
|
||||
/** Holds OLE2 objects */
|
||||
protected ObjectPoolImpl _objectPool;
|
||||
private static final int FIB_BASE_LEN = 68;
|
||||
|
||||
/** The FIB */
|
||||
protected FileInformationBlock _fib;
|
||||
/** Holds OLE2 objects */
|
||||
protected ObjectPoolImpl _objectPool;
|
||||
|
||||
/** Holds styles for this document.*/
|
||||
protected StyleSheet _ss;
|
||||
/** The FIB */
|
||||
protected FileInformationBlock _fib;
|
||||
|
||||
/** Contains formatting properties for text*/
|
||||
protected CHPBinTable _cbt;
|
||||
/** Holds styles for this document.*/
|
||||
protected StyleSheet _ss;
|
||||
|
||||
/** Contains formatting properties for paragraphs*/
|
||||
protected PAPBinTable _pbt;
|
||||
/** Contains formatting properties for text*/
|
||||
protected CHPBinTable _cbt;
|
||||
|
||||
/** Contains formatting properties for sections.*/
|
||||
protected SectionTable _st;
|
||||
/** Contains formatting properties for paragraphs*/
|
||||
protected PAPBinTable _pbt;
|
||||
|
||||
/** Holds fonts for this document.*/
|
||||
protected FontTable _ft;
|
||||
/** Contains formatting properties for sections.*/
|
||||
protected SectionTable _st;
|
||||
|
||||
/** Hold list tables */
|
||||
protected ListTables _lt;
|
||||
/** Holds fonts for this document.*/
|
||||
protected FontTable _ft;
|
||||
|
||||
/** main document stream buffer*/
|
||||
protected byte[] _mainStream;
|
||||
/** Hold list tables */
|
||||
protected ListTables _lt;
|
||||
|
||||
protected HWPFDocumentCore()
|
||||
{
|
||||
super((DirectoryNode)null);
|
||||
}
|
||||
/** main document stream buffer*/
|
||||
protected byte[] _mainStream;
|
||||
|
||||
/**
|
||||
* Takes an InputStream, verifies that it's not RTF or PDF, builds a
|
||||
* POIFSFileSystem from it, and returns that.
|
||||
*/
|
||||
public static POIFSFileSystem verifyAndBuildPOIFS(InputStream istream) throws IOException {
|
||||
// Open a PushbackInputStream, so we can peek at the first few bytes
|
||||
PushbackInputStream pis = new PushbackInputStream(istream,6);
|
||||
byte[] first6 = IOUtils.toByteArray(pis, 6);
|
||||
private EncryptionInfo _encryptionInfo;
|
||||
|
||||
// Does it start with {\rtf ? If so, it's really RTF
|
||||
if(first6[0] == '{' && first6[1] == '\\' && first6[2] == 'r'
|
||||
&& first6[3] == 't' && first6[4] == 'f') {
|
||||
throw new IllegalArgumentException("The document is really a RTF file");
|
||||
} else if(first6[0] == '%' && first6[1] == 'P' && first6[2] == 'D' && first6[3] == 'F' ) {
|
||||
throw new IllegalArgumentException("The document is really a PDF file");
|
||||
}
|
||||
protected HWPFDocumentCore() {
|
||||
super((DirectoryNode)null);
|
||||
}
|
||||
|
||||
// OK, so it's neither RTF nor PDF
|
||||
// Open a POIFSFileSystem on the (pushed back) stream
|
||||
pis.unread(first6);
|
||||
return new POIFSFileSystem(pis);
|
||||
}
|
||||
/**
|
||||
* Takes an InputStream, verifies that it's not RTF or PDF, builds a
|
||||
* POIFSFileSystem from it, and returns that.
|
||||
*/
|
||||
public static POIFSFileSystem verifyAndBuildPOIFS(InputStream istream) throws IOException {
|
||||
// Open a PushbackInputStream, so we can peek at the first few bytes
|
||||
PushbackInputStream pis = new PushbackInputStream(istream,6);
|
||||
byte[] first6 = IOUtils.toByteArray(pis, 6);
|
||||
|
||||
/**
|
||||
* This constructor loads a Word document from an InputStream.
|
||||
*
|
||||
* @param istream The InputStream that contains the Word document.
|
||||
* @throws IOException If there is an unexpected IOException from the passed
|
||||
* in InputStream.
|
||||
*/
|
||||
public HWPFDocumentCore(InputStream istream) throws IOException
|
||||
{
|
||||
//do Ole stuff
|
||||
this( verifyAndBuildPOIFS(istream) );
|
||||
}
|
||||
// Does it start with {\rtf ? If so, it's really RTF
|
||||
if(first6[0] == '{' && first6[1] == '\\' && first6[2] == 'r'
|
||||
&& first6[3] == 't' && first6[4] == 'f') {
|
||||
throw new IllegalArgumentException("The document is really a RTF file");
|
||||
} else if(first6[0] == '%' && first6[1] == 'P' && first6[2] == 'D' && first6[3] == 'F' ) {
|
||||
throw new IllegalArgumentException("The document is really a PDF file");
|
||||
}
|
||||
|
||||
/**
|
||||
* This constructor loads a Word document from a POIFSFileSystem
|
||||
*
|
||||
* @param pfilesystem The POIFSFileSystem that contains the Word document.
|
||||
* @throws IOException If there is an unexpected IOException from the passed
|
||||
* in POIFSFileSystem.
|
||||
*/
|
||||
public HWPFDocumentCore(POIFSFileSystem pfilesystem) throws IOException
|
||||
{
|
||||
this(pfilesystem.getRoot());
|
||||
}
|
||||
// OK, so it's neither RTF nor PDF
|
||||
// Open a POIFSFileSystem on the (pushed back) stream
|
||||
pis.unread(first6);
|
||||
return new POIFSFileSystem(pis);
|
||||
}
|
||||
|
||||
/**
|
||||
* This constructor loads a Word document from a specific point
|
||||
* in a POIFSFileSystem, probably not the default.
|
||||
* Used typically to open embeded documents.
|
||||
*
|
||||
* @param directory The DirectoryNode that contains the Word document.
|
||||
* @throws IOException If there is an unexpected IOException from the passed
|
||||
* in POIFSFileSystem.
|
||||
*/
|
||||
public HWPFDocumentCore(DirectoryNode directory) throws IOException {
|
||||
// Sort out the hpsf properties
|
||||
super(directory);
|
||||
/**
|
||||
* This constructor loads a Word document from an InputStream.
|
||||
*
|
||||
* @param istream The InputStream that contains the Word document.
|
||||
* @throws IOException If there is an unexpected IOException from the passed
|
||||
* in InputStream.
|
||||
*/
|
||||
public HWPFDocumentCore(InputStream istream) throws IOException {
|
||||
//do Ole stuff
|
||||
this( verifyAndBuildPOIFS(istream) );
|
||||
}
|
||||
|
||||
// read in the main stream.
|
||||
DocumentEntry documentProps = (DocumentEntry)directory.getEntry("WordDocument");
|
||||
DocumentInputStream dis = null;
|
||||
try {
|
||||
dis = directory.createDocumentInputStream(STREAM_WORD_DOCUMENT);
|
||||
_mainStream = IOUtils.toByteArray(dis, documentProps.getSize());
|
||||
} finally {
|
||||
if (dis != null) {
|
||||
dis.close();
|
||||
/**
|
||||
* This constructor loads a Word document from a POIFSFileSystem
|
||||
*
|
||||
* @param pfilesystem The POIFSFileSystem that contains the Word document.
|
||||
* @throws IOException If there is an unexpected IOException from the passed
|
||||
* in POIFSFileSystem.
|
||||
*/
|
||||
public HWPFDocumentCore(POIFSFileSystem pfilesystem) throws IOException {
|
||||
this(pfilesystem.getRoot());
|
||||
}
|
||||
|
||||
/**
|
||||
* This constructor loads a Word document from a specific point
|
||||
* in a POIFSFileSystem, probably not the default.
|
||||
* Used typically to open embeded documents.
|
||||
*
|
||||
* @param directory The DirectoryNode that contains the Word document.
|
||||
* @throws IOException If there is an unexpected IOException from the passed
|
||||
* in POIFSFileSystem.
|
||||
*/
|
||||
public HWPFDocumentCore(DirectoryNode directory) throws IOException {
|
||||
// Sort out the hpsf properties
|
||||
super(directory);
|
||||
|
||||
// read in the main stream.
|
||||
_mainStream = getDocumentEntryBytes(STREAM_WORD_DOCUMENT, FIB_BASE_LEN, Integer.MAX_VALUE);
|
||||
_fib = new FileInformationBlock(_mainStream);
|
||||
|
||||
DirectoryEntry objectPoolEntry = null;
|
||||
if (directory.hasEntry(STREAM_OBJECT_POOL)) {
|
||||
objectPoolEntry = (DirectoryEntry) directory.getEntry(STREAM_OBJECT_POOL);
|
||||
}
|
||||
_objectPool = new ObjectPoolImpl(objectPoolEntry);
|
||||
}
|
||||
|
||||
/**
|
||||
* For a given named property entry, either return it or null if
|
||||
* if it wasn't found
|
||||
*
|
||||
* @param setName The property to read
|
||||
* @return The value of the given property or null if it wasn't found.
|
||||
*/
|
||||
@Override
|
||||
protected PropertySet getPropertySet(String setName) {
|
||||
EncryptionInfo ei;
|
||||
try {
|
||||
ei = getEncryptionInfo();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
return (ei == null)
|
||||
? super.getPropertySet(setName)
|
||||
: super.getPropertySet(setName, ei);
|
||||
}
|
||||
|
||||
protected EncryptionInfo getEncryptionInfo() throws IOException {
|
||||
if (_encryptionInfo != null) {
|
||||
return _encryptionInfo;
|
||||
}
|
||||
|
||||
// Create our FIB, and check for the doc being encrypted
|
||||
byte[] fibBaseBytes = (_mainStream != null) ? _mainStream : getDocumentEntryBytes(STREAM_WORD_DOCUMENT, -1, FIB_BASE_LEN);
|
||||
FibBase fibBase = new FibBase( fibBaseBytes, 0 );
|
||||
if (!fibBase.isFEncrypted()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
String tableStrmName = fibBase.isFWhichTblStm() ? STREAM_TABLE_1 : STREAM_TABLE_0;
|
||||
byte[] tableStream = getDocumentEntryBytes(tableStrmName, -1, fibBase.getLKey());
|
||||
LittleEndianByteArrayInputStream leis = new LittleEndianByteArrayInputStream(tableStream);
|
||||
EncryptionMode em = fibBase.isFObfuscated() ? EncryptionMode.xor : null;
|
||||
EncryptionInfo ei = new EncryptionInfo(leis, em);
|
||||
Decryptor dec = ei.getDecryptor();
|
||||
dec.setChunkSize(512);
|
||||
try {
|
||||
String pass = Biff8EncryptionKey.getCurrentUserPassword();
|
||||
if (pass == null) {
|
||||
pass = Decryptor.DEFAULT_PASSWORD;
|
||||
}
|
||||
if (!dec.verifyPassword(pass)) {
|
||||
throw new EncryptedDocumentException("document is encrypted, password is invalid - use Biff8EncryptionKey.setCurrentUserPasswort() to set password before opening");
|
||||
}
|
||||
} catch (GeneralSecurityException e) {
|
||||
throw new IOException(e.getMessage(), e);
|
||||
}
|
||||
_encryptionInfo = ei;
|
||||
return ei;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads OLE Stream into byte array - if an {@link EncryptionInfo} is available,
|
||||
* decrypt the bytes starting at encryptionOffset. If encryptionOffset = -1, then do not try
|
||||
* to decrypt the bytes
|
||||
*
|
||||
* @param name the name of the stream
|
||||
* @param encryptionOffset the offset from which to start decrypting, use {@code -1} for no decryption
|
||||
* @param len length of the bytes to be read, use {@link Integer#MAX_VALUE} for all bytes
|
||||
* @return the read bytes
|
||||
* @throws IOException if the stream can't be found
|
||||
*/
|
||||
protected byte[] getDocumentEntryBytes(String name, int encryptionOffset, int len) throws IOException {
|
||||
DirectoryNode dir = getDirectory();
|
||||
DocumentEntry documentProps = (DocumentEntry)dir.getEntry(name);
|
||||
DocumentInputStream dis = dir.createDocumentInputStream(documentProps);
|
||||
EncryptionInfo ei = (encryptionOffset > -1) ? getEncryptionInfo() : null;
|
||||
int streamSize = documentProps.getSize();
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(Math.min(streamSize,len));
|
||||
|
||||
InputStream is = dis;
|
||||
try {
|
||||
if (ei != null) {
|
||||
try {
|
||||
Decryptor dec = ei.getDecryptor();
|
||||
is = dec.getDataStream(dis, streamSize, 0);
|
||||
if (encryptionOffset > 0) {
|
||||
ChunkedCipherInputStream cis = (ChunkedCipherInputStream)is;
|
||||
byte plain[] = new byte[encryptionOffset];
|
||||
cis.readPlain(plain, 0, encryptionOffset);
|
||||
bos.write(plain);
|
||||
}
|
||||
} catch (GeneralSecurityException e) {
|
||||
throw new IOException(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
// This simplifies a few combinations, so we actually always try to copy len bytes
|
||||
// regardless if encryptionOffset is greater than 0
|
||||
if (len < Integer.MAX_VALUE) {
|
||||
is = new BoundedInputStream(is, len);
|
||||
}
|
||||
IOUtils.copy(is, bos);
|
||||
return bos.toByteArray();
|
||||
} finally {
|
||||
IOUtils.closeQuietly(is);
|
||||
IOUtils.closeQuietly(dis);
|
||||
}
|
||||
}
|
||||
|
||||
// Create our FIB, and check for the doc being encrypted
|
||||
_fib = new FileInformationBlock(_mainStream);
|
||||
|
||||
DirectoryEntry objectPoolEntry;
|
||||
try {
|
||||
objectPoolEntry = (DirectoryEntry) directory
|
||||
.getEntry(STREAM_OBJECT_POOL);
|
||||
} catch (FileNotFoundException exc) {
|
||||
objectPoolEntry = null;
|
||||
}
|
||||
_objectPool = new ObjectPoolImpl(objectPoolEntry);
|
||||
}
|
||||
|
||||
/**
|
||||
/**
|
||||
* Returns the range which covers the whole of the document, but excludes
|
||||
* any headers and footers.
|
||||
*/
|
||||
|
@ -198,43 +301,35 @@ public abstract class HWPFDocumentCore extends POIDocument
|
|||
@Internal
|
||||
public abstract StringBuilder getText();
|
||||
|
||||
public CHPBinTable getCharacterTable()
|
||||
{
|
||||
return _cbt;
|
||||
}
|
||||
public CHPBinTable getCharacterTable() {
|
||||
return _cbt;
|
||||
}
|
||||
|
||||
public PAPBinTable getParagraphTable()
|
||||
{
|
||||
return _pbt;
|
||||
}
|
||||
public PAPBinTable getParagraphTable() {
|
||||
return _pbt;
|
||||
}
|
||||
|
||||
public SectionTable getSectionTable()
|
||||
{
|
||||
return _st;
|
||||
}
|
||||
public SectionTable getSectionTable() {
|
||||
return _st;
|
||||
}
|
||||
|
||||
public StyleSheet getStyleSheet()
|
||||
{
|
||||
return _ss;
|
||||
}
|
||||
public StyleSheet getStyleSheet() {
|
||||
return _ss;
|
||||
}
|
||||
|
||||
public ListTables getListTables()
|
||||
{
|
||||
return _lt;
|
||||
}
|
||||
public ListTables getListTables() {
|
||||
return _lt;
|
||||
}
|
||||
|
||||
public FontTable getFontTable()
|
||||
{
|
||||
return _ft;
|
||||
}
|
||||
public FontTable getFontTable() {
|
||||
return _ft;
|
||||
}
|
||||
|
||||
public FileInformationBlock getFileInformationBlock()
|
||||
{
|
||||
return _fib;
|
||||
}
|
||||
public FileInformationBlock getFileInformationBlock() {
|
||||
return _fib;
|
||||
}
|
||||
|
||||
public ObjectsPool getObjectsPool()
|
||||
{
|
||||
public ObjectsPool getObjectsPool() {
|
||||
return _objectPool;
|
||||
}
|
||||
|
||||
|
@ -244,4 +339,4 @@ public abstract class HWPFDocumentCore extends POIDocument
|
|||
public byte[] getMainStream() {
|
||||
return _mainStream;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.hwpf;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
|
||||
import org.apache.poi.hwpf.extractor.WordExtractor;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
import org.junit.runners.Parameterized.Parameter;
|
||||
import org.junit.runners.Parameterized.Parameters;
|
||||
|
||||
@RunWith(Parameterized.class)
|
||||
public class HWPFTestEncryption {
|
||||
@AfterClass
|
||||
public static void clearPass() {
|
||||
Biff8EncryptionKey.setCurrentUserPassword(null);
|
||||
}
|
||||
|
||||
@Parameter(value = 0)
|
||||
public String file;
|
||||
|
||||
@Parameter(value = 1)
|
||||
public String password;
|
||||
|
||||
@Parameter(value = 2)
|
||||
public String expected;
|
||||
|
||||
@Parameters(name="{0}")
|
||||
public static Collection<String[]> data() {
|
||||
return Arrays.asList(
|
||||
new String[]{ "password_tika_binaryrc4.doc", "tika", "This is an encrypted Word 2007 File." },
|
||||
new String[]{ "password_password_cryptoapi.doc", "password", "This is a test" }
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void extract() throws IOException {
|
||||
Biff8EncryptionKey.setCurrentUserPassword(password);
|
||||
HWPFDocument docD = HWPFTestDataSamples.openSampleFile(file);
|
||||
WordExtractor we = new WordExtractor(docD);
|
||||
String actual = we.getText().trim();
|
||||
assertEquals(expected, actual);
|
||||
we.close();
|
||||
docD.close();
|
||||
}
|
||||
}
|
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue