#61162 - En-/decryption support for HWPF

Decryption for Binary RC4 and CryptoAPI (... XOR is missing)

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1797837 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andreas Beeker 2017-06-06 22:21:11 +00:00
parent 8909c066be
commit 1bcde5f6d4
9 changed files with 378 additions and 200 deletions

View File

@ -195,7 +195,7 @@ public abstract class POIDocument implements Closeable {
NPOIFSFileSystem encPoifs = null;
String step = "getting";
try {
if (encryptionInfo != null) {
if (encryptionInfo != null && encryptionInfo.isDocPropsEncrypted()) {
step = "getting encrypted";
String encryptedStream = null;
for (String s : encryptedStreamNames) {

View File

@ -32,7 +32,11 @@ public final class Biff8EncryptionKey {
* @param password pass <code>null</code> to clear user password (and use default)
*/
public static void setCurrentUserPassword(String password) {
_userPasswordTLS.set(password);
if (password == null) {
_userPasswordTLS.remove();
} else {
_userPasswordTLS.set(password);
}
}
/**

View File

@ -122,8 +122,11 @@ public class EncryptionInfo implements Cloneable {
} else if (
2 <= versionMajor && versionMajor <= 4
&& versionMinor == 2) {
encryptionMode = (preferredEncryptionMode == cryptoAPI) ? cryptoAPI : standard;
encryptionFlags = dis.readInt();
encryptionMode = (
preferredEncryptionMode == cryptoAPI
|| !flagAES.isSet(encryptionFlags))
? cryptoAPI : standard;
} else if (
versionMajor == agile.versionMajor
&& versionMinor == agile.versionMinor){
@ -268,6 +271,14 @@ public class EncryptionInfo implements Cloneable {
return encryptionMode;
}
/**
* @return true, if Document Summary / Summary are encrypted and stored in the {@code EncryptedStream} stream,
* otherwise the Summaries aren't encrypted and located in their usual streams
*/
public boolean isDocPropsEncrypted() {
return !flagDocProps.isSet(getEncryptionFlags());
}
@Override
public EncryptionInfo clone() throws CloneNotSupportedException {
EncryptionInfo other = (EncryptionInfo)super.clone();

View File

@ -51,9 +51,9 @@ public class BinaryRC4Decryptor extends Decryptor implements Cloneable {
super(stream, size, chunkSize);
}
public BinaryRC4CipherInputStream(InputStream stream)
public BinaryRC4CipherInputStream(InputStream stream, int size, int initialPos)
throws GeneralSecurityException {
super(stream, Integer.MAX_VALUE, chunkSize);
super(stream, size, chunkSize, initialPos);
}
}
@ -141,7 +141,7 @@ public class BinaryRC4Decryptor extends Decryptor implements Cloneable {
@Override
public InputStream getDataStream(InputStream stream, int size, int initialPos)
throws IOException, GeneralSecurityException {
return new BinaryRC4CipherInputStream(stream);
return new BinaryRC4CipherInputStream(stream, size, initialPos);
}

View File

@ -18,6 +18,7 @@
package org.apache.poi.hwpf;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
@ -25,9 +26,29 @@ import java.io.OutputStream;
import org.apache.poi.hpsf.DocumentSummaryInformation;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hwpf.model.*;
import org.apache.poi.hwpf.model.BookmarksTables;
import org.apache.poi.hwpf.model.CHPBinTable;
import org.apache.poi.hwpf.model.ComplexFileTable;
import org.apache.poi.hwpf.model.DocumentProperties;
import org.apache.poi.hwpf.model.EscherRecordHolder;
import org.apache.poi.hwpf.model.FSPADocumentPart;
import org.apache.poi.hwpf.model.FSPATable;
import org.apache.poi.hwpf.model.FieldsTables;
import org.apache.poi.hwpf.model.FontTable;
import org.apache.poi.hwpf.model.ListTables;
import org.apache.poi.hwpf.model.NoteType;
import org.apache.poi.hwpf.model.NotesTables;
import org.apache.poi.hwpf.model.PAPBinTable;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.model.RevisionMarkAuthorTable;
import org.apache.poi.hwpf.model.SavedByTable;
import org.apache.poi.hwpf.model.SectionTable;
import org.apache.poi.hwpf.model.SinglentonTextPiece;
import org.apache.poi.hwpf.model.StyleSheet;
import org.apache.poi.hwpf.model.SubdocumentType;
import org.apache.poi.hwpf.model.TextPiece;
import org.apache.poi.hwpf.model.TextPieceTable;
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
import org.apache.poi.hwpf.usermodel.Bookmarks;
import org.apache.poi.hwpf.usermodel.BookmarksImpl;
import org.apache.poi.hwpf.usermodel.Field;
@ -40,13 +61,12 @@ import org.apache.poi.hwpf.usermodel.OfficeDrawings;
import org.apache.poi.hwpf.usermodel.OfficeDrawingsImpl;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.poifs.crypt.EncryptionInfo;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.EntryUtils;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.Internal;
/**
@ -59,8 +79,6 @@ public final class HWPFDocument extends HWPFDocumentCore {
private static final String PROPERTY_PRESERVE_TEXT_TABLE = "org.apache.poi.hwpf.preserveTextTable";
private static final String STREAM_DATA = "Data";
private static final String STREAM_TABLE_0 = "0Table";
private static final String STREAM_TABLE_1 = "1Table";
/** table stream buffer*/
protected byte[] _tableStream;
@ -178,11 +196,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
}
// use the fib to determine the name of the table stream.
String name = STREAM_TABLE_0;
if (_fib.getFibBase().isFWhichTblStm())
{
name = STREAM_TABLE_1;
}
String name = (_fib.getFibBase().isFWhichTblStm()) ? STREAM_TABLE_1 : STREAM_TABLE_0;
// Grab the table stream.
if (!directory.hasEntry(name)) {
@ -190,25 +204,12 @@ public final class HWPFDocument extends HWPFDocumentCore {
}
// read in the table stream.
InputStream is = directory.createDocumentInputStream(name);
_tableStream = IOUtils.toByteArray(is);
is.close();
_tableStream = getDocumentEntryBytes(name, _fib.getFibBase().getLKey(), Integer.MAX_VALUE);
_fib.fillVariableFields(_mainStream, _tableStream);
// read in the data stream.
InputStream dis = null;
try {
DocumentEntry dataProps = (DocumentEntry)directory.getEntry(STREAM_DATA);
dis = directory.createDocumentInputStream(STREAM_DATA);
_dataStream = IOUtils.toByteArray(dis, dataProps.getSize());
} catch(IOException e) {
_dataStream = new byte[0];
} finally {
if (dis != null) {
dis.close();
}
}
_dataStream = directory.hasEntry(STREAM_DATA) ? getDocumentEntryBytes(STREAM_DATA, 0, Integer.MAX_VALUE) : new byte[0];
// Get the cp of the start of text in the main stream
// The latest spec doc says this is always zero!
@ -233,8 +234,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
*/
boolean preserveBinTables = false;
try {
preserveBinTables = Boolean.parseBoolean( System
.getProperty( PROPERTY_PRESERVE_BIN_TABLES ) );
preserveBinTables = Boolean.parseBoolean( System.getProperty( PROPERTY_PRESERVE_BIN_TABLES ) );
} catch ( Exception exc ) {
// ignore;
}
@ -250,8 +250,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
*/
boolean preserveTextTable = false;
try {
preserveTextTable = Boolean.parseBoolean( System
.getProperty( PROPERTY_PRESERVE_TEXT_TABLE ) );
preserveTextTable = Boolean.parseBoolean( System.getProperty( PROPERTY_PRESERVE_TEXT_TABLE ) );
} catch ( Exception exc ) {
// ignore;
}
@ -612,8 +611,8 @@ public final class HWPFDocument extends HWPFDocumentCore {
private void write(NPOIFSFileSystem pfs, boolean copyOtherEntries) throws IOException {
// initialize our streams for writing.
HWPFFileSystem docSys = new HWPFFileSystem();
HWPFOutputStream wordDocumentStream = docSys.getStream(STREAM_WORD_DOCUMENT);
HWPFOutputStream tableStream = docSys.getStream(STREAM_TABLE_1);
ByteArrayOutputStream wordDocumentStream = docSys.getStream(STREAM_WORD_DOCUMENT);
ByteArrayOutputStream tableStream = docSys.getStream(STREAM_TABLE_1);
//HWPFOutputStream dataStream = docSys.getStream("Data");
int tableOffset = 0;
@ -630,13 +629,13 @@ public final class HWPFDocument extends HWPFDocumentCore {
// it after we write everything else.
byte[] placeHolder = new byte[fibSize];
wordDocumentStream.write(placeHolder);
int mainOffset = wordDocumentStream.getOffset();
int mainOffset = wordDocumentStream.size();
// write out the StyleSheet.
_fib.setFcStshf(tableOffset);
_ss.writeTo(tableStream);
_fib.setLcbStshf(tableStream.getOffset() - tableOffset);
tableOffset = tableStream.getOffset();
_fib.setLcbStshf(tableStream.size() - tableOffset);
tableOffset = tableStream.size();
// get fcMin and fcMac because we will be writing the actual text with the
// complex table.
@ -654,9 +653,9 @@ public final class HWPFDocument extends HWPFDocumentCore {
// write out the Complex table, includes text.
_fib.setFcClx(tableOffset);
_cft.writeTo(wordDocumentStream, tableStream);
_fib.setLcbClx(tableStream.getOffset() - tableOffset);
tableOffset = tableStream.getOffset();
int fcMac = wordDocumentStream.getOffset();
_fib.setLcbClx(tableStream.size() - tableOffset);
tableOffset = tableStream.size();
int fcMac = wordDocumentStream.size();
/*
* dop (document properties record) Written immediately after the end of
@ -670,8 +669,8 @@ public final class HWPFDocument extends HWPFDocumentCore {
// write out the DocumentProperties.
_fib.setFcDop(tableOffset);
_dop.writeTo(tableStream);
_fib.setLcbDop(tableStream.getOffset() - tableOffset);
tableOffset = tableStream.getOffset();
_fib.setLcbDop(tableStream.size() - tableOffset);
tableOffset = tableStream.size();
/*
* plcfBkmkf (table recording beginning CPs of bookmarks) Written
@ -683,7 +682,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
if ( _bookmarksTables != null )
{
_bookmarksTables.writePlcfBkmkf( _fib, tableStream );
tableOffset = tableStream.getOffset();
tableOffset = tableStream.size();
}
/*
@ -696,7 +695,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
if ( _bookmarksTables != null )
{
_bookmarksTables.writePlcfBkmkl( _fib, tableStream );
tableOffset = tableStream.getOffset();
tableOffset = tableStream.size();
}
/*
@ -710,8 +709,8 @@ public final class HWPFDocument extends HWPFDocumentCore {
// write out the CHPBinTable.
_fib.setFcPlcfbteChpx(tableOffset);
_cbt.writeTo(wordDocumentStream, tableStream, fcMin, _cft.getTextPieceTable());
_fib.setLcbPlcfbteChpx(tableStream.getOffset() - tableOffset);
tableOffset = tableStream.getOffset();
_fib.setLcbPlcfbteChpx(tableStream.size() - tableOffset);
tableOffset = tableStream.size();
/*
* plcfbtePapx (bin table for PAP FKPs) Written immediately after the
@ -724,8 +723,8 @@ public final class HWPFDocument extends HWPFDocumentCore {
// write out the PAPBinTable.
_fib.setFcPlcfbtePapx(tableOffset);
_pbt.writeTo(wordDocumentStream, tableStream, _cft.getTextPieceTable());
_fib.setLcbPlcfbtePapx(tableStream.getOffset() - tableOffset);
tableOffset = tableStream.getOffset();
_fib.setLcbPlcfbtePapx(tableStream.size() - tableOffset);
tableOffset = tableStream.size();
/*
* plcfendRef (endnote reference position table) Written immediately
@ -739,7 +738,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
*/
_endnotesTables.writeRef( _fib, tableStream );
_endnotesTables.writeTxt( _fib, tableStream );
tableOffset = tableStream.getOffset();
tableOffset = tableStream.size();
/*
* plcffld*** (table of field positions and statuses for annotation
@ -753,7 +752,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
if ( _fieldsTables != null )
{
_fieldsTables.write( _fib, tableStream );
tableOffset = tableStream.getOffset();
tableOffset = tableStream.size();
}
/*
@ -768,7 +767,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
*/
_footnotesTables.writeRef( _fib, tableStream );
_footnotesTables.writeTxt( _fib, tableStream );
tableOffset = tableStream.getOffset();
tableOffset = tableStream.size();
/*
* plcfsed (section table) Written immediately after the previously
@ -781,8 +780,8 @@ public final class HWPFDocument extends HWPFDocumentCore {
// write out the SectionTable.
_fib.setFcPlcfsed(tableOffset);
_st.writeTo(wordDocumentStream, tableStream);
_fib.setLcbPlcfsed(tableStream.getOffset() - tableOffset);
tableOffset = tableStream.getOffset();
_fib.setLcbPlcfsed(tableStream.size() - tableOffset);
tableOffset = tableStream.size();
// write out the list tables
if ( _lt != null )
@ -800,7 +799,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
* Specification; Page 25 of 210
*/
_lt.writeListDataTo( _fib, tableStream );
tableOffset = tableStream.getOffset();
tableOffset = tableStream.size();
/*
* plflfo (more list formats) Written immediately after the end of
@ -814,7 +813,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
* Specification; Page 26 of 210
*/
_lt.writeListOverridesTo( _fib, tableStream );
tableOffset = tableStream.getOffset();
tableOffset = tableStream.size();
}
/*
@ -827,7 +826,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
if ( _bookmarksTables != null )
{
_bookmarksTables.writeSttbfBkmk( _fib, tableStream );
tableOffset = tableStream.getOffset();
tableOffset = tableStream.size();
}
/*
@ -843,9 +842,9 @@ public final class HWPFDocument extends HWPFDocumentCore {
{
_fib.setFcSttbSavedBy(tableOffset);
_sbt.writeTo(tableStream);
_fib.setLcbSttbSavedBy(tableStream.getOffset() - tableOffset);
_fib.setLcbSttbSavedBy(tableStream.size() - tableOffset);
tableOffset = tableStream.getOffset();
tableOffset = tableStream.size();
}
// write out the revision mark authors table.
@ -853,21 +852,21 @@ public final class HWPFDocument extends HWPFDocumentCore {
{
_fib.setFcSttbfRMark(tableOffset);
_rmat.writeTo(tableStream);
_fib.setLcbSttbfRMark(tableStream.getOffset() - tableOffset);
_fib.setLcbSttbfRMark(tableStream.size() - tableOffset);
tableOffset = tableStream.getOffset();
tableOffset = tableStream.size();
}
// write out the FontTable.
_fib.setFcSttbfffn(tableOffset);
_ft.writeTo(tableStream);
_fib.setLcbSttbfffn(tableStream.getOffset() - tableOffset);
tableOffset = tableStream.getOffset();
_fib.setLcbSttbfffn(tableStream.size() - tableOffset);
tableOffset = tableStream.size();
// set some variables in the FileInformationBlock.
_fib.getFibBase().setFcMin(fcMin);
_fib.getFibBase().setFcMac(fcMac);
_fib.setCbMac(wordDocumentStream.getOffset());
_fib.setCbMac(wordDocumentStream.size());
// make sure that the table, doc and data streams use big blocks.
byte[] mainBuf = wordDocumentStream.toByteArray();

View File

@ -17,13 +17,19 @@
package org.apache.poi.hwpf;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.security.GeneralSecurityException;
import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.POIDocument;
import org.apache.poi.hpsf.PropertySet;
import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
import org.apache.poi.hwpf.model.CHPBinTable;
import org.apache.poi.hwpf.model.FibBase;
import org.apache.poi.hwpf.model.FileInformationBlock;
import org.apache.poi.hwpf.model.FontTable;
import org.apache.poi.hwpf.model.ListTables;
@ -34,145 +40,242 @@ import org.apache.poi.hwpf.model.TextPieceTable;
import org.apache.poi.hwpf.usermodel.ObjectPoolImpl;
import org.apache.poi.hwpf.usermodel.ObjectsPool;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.poifs.crypt.ChunkedCipherInputStream;
import org.apache.poi.poifs.crypt.Decryptor;
import org.apache.poi.poifs.crypt.EncryptionInfo;
import org.apache.poi.poifs.crypt.EncryptionMode;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.BoundedInputStream;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndianByteArrayInputStream;
/**
* This class holds much of the core of a Word document, but
* without some of the table structure information.
* You generally want to work with one of
* {@link HWPFDocument} or {@link HWPFOldDocument}
* {@link HWPFDocument} or {@link HWPFOldDocument}
*/
public abstract class HWPFDocumentCore extends POIDocument
{
public abstract class HWPFDocumentCore extends POIDocument {
protected static final String STREAM_OBJECT_POOL = "ObjectPool";
protected static final String STREAM_WORD_DOCUMENT = "WordDocument";
protected static final String STREAM_TABLE_0 = "0Table";
protected static final String STREAM_TABLE_1 = "1Table";
/** Holds OLE2 objects */
protected ObjectPoolImpl _objectPool;
private static final int FIB_BASE_LEN = 68;
/** The FIB */
protected FileInformationBlock _fib;
/** Holds OLE2 objects */
protected ObjectPoolImpl _objectPool;
/** Holds styles for this document.*/
protected StyleSheet _ss;
/** The FIB */
protected FileInformationBlock _fib;
/** Contains formatting properties for text*/
protected CHPBinTable _cbt;
/** Holds styles for this document.*/
protected StyleSheet _ss;
/** Contains formatting properties for paragraphs*/
protected PAPBinTable _pbt;
/** Contains formatting properties for text*/
protected CHPBinTable _cbt;
/** Contains formatting properties for sections.*/
protected SectionTable _st;
/** Contains formatting properties for paragraphs*/
protected PAPBinTable _pbt;
/** Holds fonts for this document.*/
protected FontTable _ft;
/** Contains formatting properties for sections.*/
protected SectionTable _st;
/** Hold list tables */
protected ListTables _lt;
/** Holds fonts for this document.*/
protected FontTable _ft;
/** main document stream buffer*/
protected byte[] _mainStream;
/** Hold list tables */
protected ListTables _lt;
protected HWPFDocumentCore()
{
super((DirectoryNode)null);
}
/** main document stream buffer*/
protected byte[] _mainStream;
/**
* Takes an InputStream, verifies that it's not RTF or PDF, builds a
* POIFSFileSystem from it, and returns that.
*/
public static POIFSFileSystem verifyAndBuildPOIFS(InputStream istream) throws IOException {
// Open a PushbackInputStream, so we can peek at the first few bytes
PushbackInputStream pis = new PushbackInputStream(istream,6);
byte[] first6 = IOUtils.toByteArray(pis, 6);
private EncryptionInfo _encryptionInfo;
// Does it start with {\rtf ? If so, it's really RTF
if(first6[0] == '{' && first6[1] == '\\' && first6[2] == 'r'
&& first6[3] == 't' && first6[4] == 'f') {
throw new IllegalArgumentException("The document is really a RTF file");
} else if(first6[0] == '%' && first6[1] == 'P' && first6[2] == 'D' && first6[3] == 'F' ) {
throw new IllegalArgumentException("The document is really a PDF file");
}
protected HWPFDocumentCore() {
super((DirectoryNode)null);
}
// OK, so it's neither RTF nor PDF
// Open a POIFSFileSystem on the (pushed back) stream
pis.unread(first6);
return new POIFSFileSystem(pis);
}
/**
* Takes an InputStream, verifies that it's not RTF or PDF, builds a
* POIFSFileSystem from it, and returns that.
*/
public static POIFSFileSystem verifyAndBuildPOIFS(InputStream istream) throws IOException {
// Open a PushbackInputStream, so we can peek at the first few bytes
PushbackInputStream pis = new PushbackInputStream(istream,6);
byte[] first6 = IOUtils.toByteArray(pis, 6);
/**
* This constructor loads a Word document from an InputStream.
*
* @param istream The InputStream that contains the Word document.
* @throws IOException If there is an unexpected IOException from the passed
* in InputStream.
*/
public HWPFDocumentCore(InputStream istream) throws IOException
{
//do Ole stuff
this( verifyAndBuildPOIFS(istream) );
}
// Does it start with {\rtf ? If so, it's really RTF
if(first6[0] == '{' && first6[1] == '\\' && first6[2] == 'r'
&& first6[3] == 't' && first6[4] == 'f') {
throw new IllegalArgumentException("The document is really a RTF file");
} else if(first6[0] == '%' && first6[1] == 'P' && first6[2] == 'D' && first6[3] == 'F' ) {
throw new IllegalArgumentException("The document is really a PDF file");
}
/**
* This constructor loads a Word document from a POIFSFileSystem
*
* @param pfilesystem The POIFSFileSystem that contains the Word document.
* @throws IOException If there is an unexpected IOException from the passed
* in POIFSFileSystem.
*/
public HWPFDocumentCore(POIFSFileSystem pfilesystem) throws IOException
{
this(pfilesystem.getRoot());
}
// OK, so it's neither RTF nor PDF
// Open a POIFSFileSystem on the (pushed back) stream
pis.unread(first6);
return new POIFSFileSystem(pis);
}
/**
* This constructor loads a Word document from a specific point
* in a POIFSFileSystem, probably not the default.
* Used typically to open embeded documents.
*
* @param directory The DirectoryNode that contains the Word document.
* @throws IOException If there is an unexpected IOException from the passed
* in POIFSFileSystem.
*/
public HWPFDocumentCore(DirectoryNode directory) throws IOException {
// Sort out the hpsf properties
super(directory);
/**
* This constructor loads a Word document from an InputStream.
*
* @param istream The InputStream that contains the Word document.
* @throws IOException If there is an unexpected IOException from the passed
* in InputStream.
*/
public HWPFDocumentCore(InputStream istream) throws IOException {
//do Ole stuff
this( verifyAndBuildPOIFS(istream) );
}
// read in the main stream.
DocumentEntry documentProps = (DocumentEntry)directory.getEntry("WordDocument");
DocumentInputStream dis = null;
try {
dis = directory.createDocumentInputStream(STREAM_WORD_DOCUMENT);
_mainStream = IOUtils.toByteArray(dis, documentProps.getSize());
} finally {
if (dis != null) {
dis.close();
/**
* This constructor loads a Word document from a POIFSFileSystem
*
* @param pfilesystem The POIFSFileSystem that contains the Word document.
* @throws IOException If there is an unexpected IOException from the passed
* in POIFSFileSystem.
*/
public HWPFDocumentCore(POIFSFileSystem pfilesystem) throws IOException {
this(pfilesystem.getRoot());
}
/**
* This constructor loads a Word document from a specific point
* in a POIFSFileSystem, probably not the default.
* Used typically to open embeded documents.
*
* @param directory The DirectoryNode that contains the Word document.
* @throws IOException If there is an unexpected IOException from the passed
* in POIFSFileSystem.
*/
public HWPFDocumentCore(DirectoryNode directory) throws IOException {
// Sort out the hpsf properties
super(directory);
// read in the main stream.
_mainStream = getDocumentEntryBytes(STREAM_WORD_DOCUMENT, FIB_BASE_LEN, Integer.MAX_VALUE);
_fib = new FileInformationBlock(_mainStream);
DirectoryEntry objectPoolEntry = null;
if (directory.hasEntry(STREAM_OBJECT_POOL)) {
objectPoolEntry = (DirectoryEntry) directory.getEntry(STREAM_OBJECT_POOL);
}
_objectPool = new ObjectPoolImpl(objectPoolEntry);
}
/**
* For a given named property entry, either return it or null if
* if it wasn't found
*
* @param setName The property to read
* @return The value of the given property or null if it wasn't found.
*/
@Override
protected PropertySet getPropertySet(String setName) {
EncryptionInfo ei;
try {
ei = getEncryptionInfo();
} catch (IOException e) {
throw new RuntimeException(e);
}
return (ei == null)
? super.getPropertySet(setName)
: super.getPropertySet(setName, ei);
}
protected EncryptionInfo getEncryptionInfo() throws IOException {
if (_encryptionInfo != null) {
return _encryptionInfo;
}
// Create our FIB, and check for the doc being encrypted
byte[] fibBaseBytes = (_mainStream != null) ? _mainStream : getDocumentEntryBytes(STREAM_WORD_DOCUMENT, -1, FIB_BASE_LEN);
FibBase fibBase = new FibBase( fibBaseBytes, 0 );
if (!fibBase.isFEncrypted()) {
return null;
}
String tableStrmName = fibBase.isFWhichTblStm() ? STREAM_TABLE_1 : STREAM_TABLE_0;
byte[] tableStream = getDocumentEntryBytes(tableStrmName, -1, fibBase.getLKey());
LittleEndianByteArrayInputStream leis = new LittleEndianByteArrayInputStream(tableStream);
EncryptionMode em = fibBase.isFObfuscated() ? EncryptionMode.xor : null;
EncryptionInfo ei = new EncryptionInfo(leis, em);
Decryptor dec = ei.getDecryptor();
dec.setChunkSize(512);
try {
String pass = Biff8EncryptionKey.getCurrentUserPassword();
if (pass == null) {
pass = Decryptor.DEFAULT_PASSWORD;
}
if (!dec.verifyPassword(pass)) {
throw new EncryptedDocumentException("document is encrypted, password is invalid - use Biff8EncryptionKey.setCurrentUserPasswort() to set password before opening");
}
} catch (GeneralSecurityException e) {
throw new IOException(e.getMessage(), e);
}
_encryptionInfo = ei;
return ei;
}
/**
* Reads OLE Stream into byte array - if an {@link EncryptionInfo} is available,
* decrypt the bytes starting at encryptionOffset. If encryptionOffset = -1, then do not try
* to decrypt the bytes
*
* @param name the name of the stream
* @param encryptionOffset the offset from which to start decrypting, use {@code -1} for no decryption
* @param len length of the bytes to be read, use {@link Integer#MAX_VALUE} for all bytes
* @return the read bytes
* @throws IOException if the stream can't be found
*/
protected byte[] getDocumentEntryBytes(String name, int encryptionOffset, int len) throws IOException {
DirectoryNode dir = getDirectory();
DocumentEntry documentProps = (DocumentEntry)dir.getEntry(name);
DocumentInputStream dis = dir.createDocumentInputStream(documentProps);
EncryptionInfo ei = (encryptionOffset > -1) ? getEncryptionInfo() : null;
int streamSize = documentProps.getSize();
ByteArrayOutputStream bos = new ByteArrayOutputStream(Math.min(streamSize,len));
InputStream is = dis;
try {
if (ei != null) {
try {
Decryptor dec = ei.getDecryptor();
is = dec.getDataStream(dis, streamSize, 0);
if (encryptionOffset > 0) {
ChunkedCipherInputStream cis = (ChunkedCipherInputStream)is;
byte plain[] = new byte[encryptionOffset];
cis.readPlain(plain, 0, encryptionOffset);
bos.write(plain);
}
} catch (GeneralSecurityException e) {
throw new IOException(e.getMessage(), e);
}
}
// This simplifies a few combinations, so we actually always try to copy len bytes
// regardless if encryptionOffset is greater than 0
if (len < Integer.MAX_VALUE) {
is = new BoundedInputStream(is, len);
}
IOUtils.copy(is, bos);
return bos.toByteArray();
} finally {
IOUtils.closeQuietly(is);
IOUtils.closeQuietly(dis);
}
}
// Create our FIB, and check for the doc being encrypted
_fib = new FileInformationBlock(_mainStream);
DirectoryEntry objectPoolEntry;
try {
objectPoolEntry = (DirectoryEntry) directory
.getEntry(STREAM_OBJECT_POOL);
} catch (FileNotFoundException exc) {
objectPoolEntry = null;
}
_objectPool = new ObjectPoolImpl(objectPoolEntry);
}
/**
/**
* Returns the range which covers the whole of the document, but excludes
* any headers and footers.
*/
@ -198,43 +301,35 @@ public abstract class HWPFDocumentCore extends POIDocument
@Internal
public abstract StringBuilder getText();
public CHPBinTable getCharacterTable()
{
return _cbt;
}
public CHPBinTable getCharacterTable() {
return _cbt;
}
public PAPBinTable getParagraphTable()
{
return _pbt;
}
public PAPBinTable getParagraphTable() {
return _pbt;
}
public SectionTable getSectionTable()
{
return _st;
}
public SectionTable getSectionTable() {
return _st;
}
public StyleSheet getStyleSheet()
{
return _ss;
}
public StyleSheet getStyleSheet() {
return _ss;
}
public ListTables getListTables()
{
return _lt;
}
public ListTables getListTables() {
return _lt;
}
public FontTable getFontTable()
{
return _ft;
}
public FontTable getFontTable() {
return _ft;
}
public FileInformationBlock getFileInformationBlock()
{
return _fib;
}
public FileInformationBlock getFileInformationBlock() {
return _fib;
}
public ObjectsPool getObjectsPool()
{
public ObjectsPool getObjectsPool() {
return _objectPool;
}
@ -244,4 +339,4 @@ public abstract class HWPFDocumentCore extends POIDocument
public byte[] getMainStream() {
return _mainStream;
}
}
}

View File

@ -0,0 +1,69 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hwpf;
import static org.junit.Assert.assertEquals;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.junit.AfterClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameter;
import org.junit.runners.Parameterized.Parameters;
@RunWith(Parameterized.class)
public class HWPFTestEncryption {
@AfterClass
public static void clearPass() {
Biff8EncryptionKey.setCurrentUserPassword(null);
}
@Parameter(value = 0)
public String file;
@Parameter(value = 1)
public String password;
@Parameter(value = 2)
public String expected;
@Parameters(name="{0}")
public static Collection<String[]> data() {
return Arrays.asList(
new String[]{ "password_tika_binaryrc4.doc", "tika", "This is an encrypted Word 2007 File." },
new String[]{ "password_password_cryptoapi.doc", "password", "This is a test" }
);
}
@Test
public void extract() throws IOException {
Biff8EncryptionKey.setCurrentUserPassword(password);
HWPFDocument docD = HWPFTestDataSamples.openSampleFile(file);
WordExtractor we = new WordExtractor(docD);
String actual = we.getText().trim();
assertEquals(expected, actual);
we.close();
docD.close();
}
}

Binary file not shown.

Binary file not shown.