Strip trailing padding from HMEF compressed rtf when decoding

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1081414 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2011-03-14 15:11:03 +00:00
parent d2d927df6a
commit 08488cf978
4 changed files with 56 additions and 11 deletions

View File

@ -31,9 +31,6 @@ import org.apache.poi.util.LittleEndian;
* Within a {@link HMEFMessage}, the content is often
* stored in as RTF, but LZW compressed. This class
* handles decompressing it for you.
*
* Note - this doesn't quite decompress the data correctly,
* more work and unit testing is required...
*/
public final class CompressedRTF extends LZWDecompresser {
public static final byte[] COMPRESSED_SIGNATURE =
@ -52,6 +49,9 @@ public final class CompressedRTF extends LZWDecompresser {
"{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}{\\f0\\fnil \\froman \\fswiss " +
"\\fmodern \\fscript \\fdecor MS Sans SerifSymbolArialTimes New RomanCourier" +
"{\\colortbl\\red0\\green0\\blue0\n\r\\par \\pard\\plain\\f0\\fs20\\b\\i\\u\\tab\\tx";
private int compressedSize;
private int decompressedSize;
public CompressedRTF() {
// Out flag has the normal meaning
@ -60,10 +60,18 @@ public final class CompressedRTF extends LZWDecompresser {
super(true, 2, true);
}
/**
* Decompresses the whole of the compressed RTF
* stream, outputting the resulting RTF bytes.
* Note - will decompress any padding at the end of
* the input, if present, use {@link #getDeCompressedSize()}
* if you need to know how much of the result is
* real. (Padding may be up to 7 bytes).
*/
public void decompress(InputStream src, OutputStream res) throws IOException {
// Validate the header on the front of the RTF
int compressedSize = LittleEndian.readInt(src);
int uncompressedSize = LittleEndian.readInt(src);
compressedSize = LittleEndian.readInt(src);
decompressedSize = LittleEndian.readInt(src);
int compressionType = LittleEndian.readInt(src);
int dataCRC = LittleEndian.readInt(src);
@ -82,6 +90,21 @@ public final class CompressedRTF extends LZWDecompresser {
// Have it processed
super.decompress(src, res);
}
/**
* Returns how big the compressed version was.
*/
public int getCompressedSize() {
// Return the size less the header
return compressedSize - 12;
}
/**
* Returns how big the decompressed version was.
*/
public int getDeCompressedSize() {
return decompressedSize;
}
/**
* We use regular dictionary offsets, so no

View File

@ -37,9 +37,17 @@ public final class MAPIRtfAttribute extends MAPIAttribute {
public MAPIRtfAttribute(MAPIProperty property, int type, byte[] data) throws IOException {
super(property, type, data);
// Decompress it, removing any trailing padding as needed
CompressedRTF rtf = new CompressedRTF();
this.decompressed = rtf.decompress(new ByteArrayInputStream(data));
byte[] tmp = rtf.decompress(new ByteArrayInputStream(data));
if(tmp.length > rtf.getDeCompressedSize()) {
this.decompressed = new byte[rtf.getDeCompressedSize()];
System.arraycopy(tmp, 0, decompressed, 0, decompressed.length);
} else {
this.decompressed = tmp;
}
// Turn the RTF data into a more useful string
this.data = StringUtil.getFromCompressedUnicode(decompressed, 0, decompressed.length);
}

View File

@ -148,7 +148,7 @@ public final class TestCompressedRTF extends TestCase {
* Check that we can correctly decode the whole file
* TODO Fix what looks like a padding issue
*/
public void DISABLEDtestFull() throws Exception {
public void testFull() throws Exception {
HMEFMessage msg = new HMEFMessage(
_samples.openResourceAsStream("quick-winmail.dat")
);
@ -160,11 +160,26 @@ public final class TestCompressedRTF extends TestCase {
byte[] expected = IOUtils.toByteArray(
_samples.openResourceAsStream("quick-contents/message.rtf")
);
byte[] decomp = rtfAttr.getData();
CompressedRTF comp = new CompressedRTF();
byte[] data = rtfAttr.getRawData();
byte[] decomp = comp.decompress(new ByteArrayInputStream(data));
// Check the length was as expected
assertEquals(data.length, comp.getCompressedSize() + 16);
assertEquals(expected.length, comp.getDeCompressedSize());
// Will have been padded though
assertEquals(expected.length+2, decomp.length);
byte[] tmp = new byte[expected.length];
System.arraycopy(decomp, 0, tmp, 0, tmp.length);
decomp = tmp;
// By byte
assertEquals(expected.length, decomp.length);
assertEquals(expected, decomp);
for(int i=0; i<expected.length; i++) {
assertEquals(expected[i], decomp[i]);
}
// By String
String expString = new String(expected, "ASCII");

View File

@ -103,9 +103,8 @@ public final class TestHMEFMessage extends HMEFTest {
/**
* Checks that the compressed RTF message contents
* can be correctly extracted
* TODO Fix what looks like a padding issue
*/
public void DISABLEDtestMessageContents() throws Exception {
public void testMessageContents() throws Exception {
HMEFMessage msg = new HMEFMessage(
_samples.openResourceAsStream("quick-winmail.dat")
);