mirror of https://github.com/apache/poi.git
Fix Visio compression
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1872223 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
07b5bc667c
commit
adb8424bc1
|
@ -23,184 +23,167 @@ import java.io.OutputStream;
|
|||
|
||||
/**
|
||||
* This class provides common functionality for the
|
||||
* various LZW implementations in the different file
|
||||
* formats.
|
||||
* various LZW implementations in the different file
|
||||
* formats.
|
||||
* It's currently used by HDGF and HMEF.
|
||||
*
|
||||
* <p>
|
||||
* Two good resources on LZW are:
|
||||
* http://en.wikipedia.org/wiki/LZW
|
||||
* http://marknelson.us/1989/10/01/lzw-data-compression/
|
||||
* http://en.wikipedia.org/wiki/LZW
|
||||
* http://marknelson.us/1989/10/01/lzw-data-compression/
|
||||
*/
|
||||
public abstract class LZWDecompresser {
|
||||
|
||||
//arbitrarily selected; may need to increase
|
||||
private static final int MAX_RECORD_LENGTH = 1_000_000;
|
||||
/** the size of our dictionary */
|
||||
public static final int DICT_SIZE = 0x1000;
|
||||
/** the mask for calculating / wrapping dictionary offsets */
|
||||
public static final int DICT_MASK = 0xFFF;
|
||||
|
||||
/**
|
||||
* Does the mask bit mean it's compressed or uncompressed?
|
||||
*/
|
||||
private final boolean maskMeansCompressed;
|
||||
/**
|
||||
* How much to append to the code length in the stream
|
||||
* to get the real code length? Normally 2 or 3
|
||||
*/
|
||||
private final int codeLengthIncrease;
|
||||
/**
|
||||
* Does the 12 bits of the position get stored in
|
||||
* Little Endian or Big Endian form?
|
||||
* This controls whether a pos+length of 0x12 0x34
|
||||
* becomes a position of 0x123 or 0x312
|
||||
*/
|
||||
private final boolean positionIsBigEndian;
|
||||
|
||||
protected LZWDecompresser(boolean maskMeansCompressed,
|
||||
int codeLengthIncrease, boolean positionIsBigEndian) {
|
||||
this.maskMeansCompressed = maskMeansCompressed;
|
||||
this.codeLengthIncrease = codeLengthIncrease;
|
||||
this.positionIsBigEndian = positionIsBigEndian;
|
||||
}
|
||||
|
||||
/**
|
||||
* Populates the dictionary, and returns where in it
|
||||
* to begin writing new codes.
|
||||
* Generally, if the dictionary is pre-populated, then new
|
||||
* codes should be placed at the end of that block.
|
||||
* Equally, if the dictionary is left with all zeros, then
|
||||
* usually the new codes can go in at the start.
|
||||
*/
|
||||
protected abstract int populateDictionary(byte[] dict);
|
||||
|
||||
/**
|
||||
* Adjusts the position offset if needed when looking
|
||||
* something up in the dictionary.
|
||||
*/
|
||||
protected abstract int adjustDictionaryOffset(int offset);
|
||||
|
||||
/**
|
||||
* Decompresses the given input stream, returning the array of bytes
|
||||
* of the decompressed input.
|
||||
*/
|
||||
public byte[] decompress(InputStream src) throws IOException {
|
||||
ByteArrayOutputStream res = new ByteArrayOutputStream();
|
||||
decompress(src,res);
|
||||
return res.toByteArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform a streaming decompression of the input.
|
||||
* Works by:
|
||||
* 1) Reading a flag byte, the 8 bits of which tell you if the
|
||||
* following 8 codes are compressed our un-compressed
|
||||
* 2) Consider the 8 bits in turn
|
||||
* 3) If the bit is set, the next code is un-compressed, so
|
||||
* add it to the dictionary and output it
|
||||
* 4) If the bit isn't set, then read in the length and start
|
||||
* position in the dictionary, and output the bytes there
|
||||
* 5) Loop until we've done all 8 bits, then read in the next
|
||||
* flag byte
|
||||
*/
|
||||
public void decompress(InputStream src, OutputStream res) throws IOException {
|
||||
// How far through the output we've got
|
||||
// (This is normally used &4095, so it nicely wraps)
|
||||
// The initial value is set when populating the dictionary
|
||||
int pos;
|
||||
// The flag byte is treated as its 8 individual
|
||||
// bits, which tell us if the following 8 codes
|
||||
// are compressed or un-compressed
|
||||
int flag;
|
||||
// The mask, between 1 and 255, which is used when
|
||||
// processing each bit of the flag byte in turn
|
||||
int mask;
|
||||
//arbitrarily selected; may need to increase
|
||||
private static final int MAX_RECORD_LENGTH = 1_000_000;
|
||||
|
||||
// We use 12 bit codes:
|
||||
// * 0-255 are real bytes
|
||||
// * 256-4095 are the substring codes
|
||||
// Java handily initialises our buffer / dictionary
|
||||
// to all zeros
|
||||
byte[] buffer = new byte[4096];
|
||||
pos = populateDictionary(buffer);
|
||||
/**
|
||||
* Does the mask bit mean it's compressed or uncompressed?
|
||||
*/
|
||||
private final boolean maskMeansCompressed;
|
||||
/**
|
||||
* How much to append to the code length in the stream
|
||||
* to get the real code length? Normally 2 or 3
|
||||
*/
|
||||
private final int codeLengthIncrease;
|
||||
/**
|
||||
* Does the 12 bits of the position get stored in
|
||||
* Little Endian or Big Endian form?
|
||||
* This controls whether a pos+length of 0x12 0x34
|
||||
* becomes a position of 0x123 or 0x312
|
||||
*/
|
||||
private final boolean positionIsBigEndian;
|
||||
|
||||
// These are bytes as looked up in the dictionary
|
||||
// It needs to be signed, as it'll get passed on to
|
||||
// the output stream
|
||||
byte[] dataB = IOUtils.safelyAllocate(16+codeLengthIncrease, MAX_RECORD_LENGTH);
|
||||
// This is an unsigned byte read from the stream
|
||||
// It needs to be unsigned, so that bit stuff works
|
||||
int dataI;
|
||||
// The compressed code sequence is held over 2 bytes
|
||||
int dataIPt1, dataIPt2;
|
||||
// How long a code sequence is, and where in the
|
||||
// dictionary to start at
|
||||
int len, pntr;
|
||||
protected LZWDecompresser(boolean maskMeansCompressed,
|
||||
int codeLengthIncrease, boolean positionIsBigEndian) {
|
||||
this.maskMeansCompressed = maskMeansCompressed;
|
||||
this.codeLengthIncrease = codeLengthIncrease;
|
||||
this.positionIsBigEndian = positionIsBigEndian;
|
||||
}
|
||||
|
||||
while( (flag = src.read()) != -1 ) {
|
||||
// Compare each bit in our flag byte in turn:
|
||||
for(mask = 1; mask < 256 ; mask <<= 1) {
|
||||
// Is this a new code (un-compressed), or
|
||||
// the use of existing codes (compressed)?
|
||||
boolean isMaskSet = (flag & mask) > 0;
|
||||
if( isMaskSet ^ maskMeansCompressed ) {
|
||||
// Retrieve the un-compressed code
|
||||
if( (dataI = src.read()) != -1) {
|
||||
// Save the byte into the dictionary
|
||||
buffer[(pos&4095)] = fromInt(dataI);
|
||||
pos++;
|
||||
// And output the byte
|
||||
res.write( new byte[] {fromInt(dataI)} );
|
||||
}
|
||||
} else {
|
||||
// We have a compressed sequence
|
||||
// Grab the next 16 bits of data
|
||||
dataIPt1 = src.read();
|
||||
dataIPt2 = src.read();
|
||||
if(dataIPt1 == -1 || dataIPt2 == -1) break;
|
||||
/**
|
||||
* Populates the dictionary, and returns where in it
|
||||
* to begin writing new codes.
|
||||
* Generally, if the dictionary is pre-populated, then new
|
||||
* codes should be placed at the end of that block.
|
||||
* Equally, if the dictionary is left with all zeros, then
|
||||
* usually the new codes can go in at the start.
|
||||
*/
|
||||
protected abstract int populateDictionary(byte[] dict);
|
||||
|
||||
// Build up how long the code sequence is, and
|
||||
// what position of the code to start at
|
||||
// (The position is the usually the first 12 bits,
|
||||
// and the length is usually the last 4 bits)
|
||||
len = (dataIPt2 & 15) + codeLengthIncrease;
|
||||
if(positionIsBigEndian) {
|
||||
pntr = (dataIPt1<<4) + (dataIPt2>>4);
|
||||
} else {
|
||||
pntr = dataIPt1 + ((dataIPt2&0xF0)<<4);
|
||||
}
|
||||
|
||||
// Adjust the pointer as needed
|
||||
pntr = adjustDictionaryOffset(pntr);
|
||||
/**
|
||||
* Adjusts the position offset if needed when looking
|
||||
* something up in the dictionary.
|
||||
*/
|
||||
protected abstract int adjustDictionaryOffset(int offset);
|
||||
|
||||
// Loop over the codes, outputting what they correspond to
|
||||
for(int i=0; i<len; i++) {
|
||||
dataB[i] = buffer[(pntr + i) & 4095];
|
||||
buffer[ (pos + i) & 4095 ] = dataB[i];
|
||||
}
|
||||
res.write(dataB, 0, len);
|
||||
/**
|
||||
* Decompresses the given input stream, returning the array of bytes
|
||||
* of the decompressed input.
|
||||
*/
|
||||
public byte[] decompress(InputStream src) throws IOException {
|
||||
ByteArrayOutputStream res = new ByteArrayOutputStream();
|
||||
decompress(src, res);
|
||||
return res.toByteArray();
|
||||
}
|
||||
|
||||
// Record how far along the stream we have moved
|
||||
pos = pos + len;
|
||||
/**
|
||||
* Perform a streaming decompression of the input.
|
||||
* Works by:
|
||||
* 1) Reading a flag byte, the 8 bits of which tell you if the
|
||||
* following 8 codes are compressed our un-compressed
|
||||
* 2) Consider the 8 bits in turn
|
||||
* 3) If the bit is set, the next code is un-compressed, so
|
||||
* add it to the dictionary and output it
|
||||
* 4) If the bit isn't set, then read in the length and start
|
||||
* position in the dictionary, and output the bytes there
|
||||
* 5) Loop until we've done all 8 bits, then read in the next
|
||||
* flag byte
|
||||
*/
|
||||
public void decompress(InputStream src, OutputStream res) throws IOException {
|
||||
// How far through the output we've got
|
||||
// (This is normally used &4095, so it nicely wraps)
|
||||
// The initial value is set when populating the dictionary
|
||||
int pos;
|
||||
// The flag byte is treated as its 8 individual
|
||||
// bits, which tell us if the following 8 codes
|
||||
// are compressed or un-compressed
|
||||
int flag;
|
||||
// The mask, between 1 and 255, which is used when
|
||||
// processing each bit of the flag byte in turn
|
||||
int mask;
|
||||
|
||||
// We use 12 bit codes:
|
||||
// * 0-255 are real bytes
|
||||
// * 256-4095 are the substring codes
|
||||
// Java handily initialises our buffer / dictionary
|
||||
// to all zeros
|
||||
final byte[] buffer = new byte[DICT_SIZE];
|
||||
pos = populateDictionary(buffer);
|
||||
|
||||
// These are bytes as looked up in the dictionary
|
||||
// It needs to be signed, as it'll get passed on to
|
||||
// the output stream
|
||||
final byte[] dataB = IOUtils.safelyAllocate(16 + codeLengthIncrease, MAX_RECORD_LENGTH);
|
||||
// This is an unsigned byte read from the stream
|
||||
// It needs to be unsigned, so that bit stuff works
|
||||
int dataI;
|
||||
// The compressed code sequence is held over 2 bytes
|
||||
int dataIPt1, dataIPt2;
|
||||
// How long a code sequence is, and where in the
|
||||
// dictionary to start at
|
||||
int len, pntr;
|
||||
|
||||
while ((flag = src.read()) != -1) {
|
||||
// Compare each bit in our flag byte in turn:
|
||||
for (mask = 1; mask < 0x100; mask <<= 1) {
|
||||
// Is this a new code (un-compressed), or
|
||||
// the use of existing codes (compressed)?
|
||||
boolean isMaskSet = (flag & mask) > 0;
|
||||
if (isMaskSet ^ maskMeansCompressed) {
|
||||
// Retrieve the un-compressed code
|
||||
if ((dataI = src.read()) != -1) {
|
||||
// Save the byte into the dictionary
|
||||
buffer[pos++ & DICT_MASK] = (byte) dataI;
|
||||
// And output the byte
|
||||
res.write(dataI);
|
||||
}
|
||||
} else {
|
||||
// We have a compressed sequence
|
||||
// Grab the next 16 bits of data
|
||||
dataIPt1 = src.read();
|
||||
dataIPt2 = src.read();
|
||||
if (dataIPt1 == -1 || dataIPt2 == -1) break;
|
||||
|
||||
// Build up how long the code sequence is, and
|
||||
// what position of the code to start at
|
||||
// (The position is the usually the first 12 bits,
|
||||
// and the length is usually the last 4 bits)
|
||||
len = (dataIPt2 & 0x0F) + codeLengthIncrease;
|
||||
if (positionIsBigEndian) {
|
||||
pntr = (dataIPt1 << 4) + (dataIPt2 >>> 4);
|
||||
} else {
|
||||
pntr = dataIPt1 + ((dataIPt2 & 0xF0) << 4);
|
||||
}
|
||||
|
||||
// Adjust the pointer as needed
|
||||
pntr = adjustDictionaryOffset(pntr);
|
||||
|
||||
// Loop over the codes, outputting what they correspond to
|
||||
for (int i = 0; i < len; i++) {
|
||||
dataB[i] = buffer[(pntr + i) & DICT_MASK];
|
||||
buffer[(pos + i) & DICT_MASK] = dataB[i];
|
||||
}
|
||||
res.write(dataB, 0, len);
|
||||
|
||||
// Record how far along the stream we have moved
|
||||
pos += len;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Given an integer, turn it into a java byte, handling
|
||||
* the wrapping.
|
||||
* This is a convenience method
|
||||
*/
|
||||
public static byte fromInt(int b) {
|
||||
if(b < 128) return (byte)b;
|
||||
return (byte)(b - 256);
|
||||
}
|
||||
/**
|
||||
* Given a java byte, turn it into an integer between 0
|
||||
* and 255 (i.e. handle the unwrapping).
|
||||
* This is a convenience method
|
||||
*/
|
||||
public static int fromByte(byte b) {
|
||||
if(b >= 0) {
|
||||
return b;
|
||||
}
|
||||
return b + 256;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -70,7 +70,7 @@ public class HDGFLZW extends LZWDecompresser {
|
|||
}
|
||||
return pntr;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* We want an empty dictionary, so do nothing
|
||||
*/
|
||||
|
@ -89,7 +89,7 @@ public class HDGFLZW extends LZWDecompresser {
|
|||
* or the OutputStream can't be written to
|
||||
*/
|
||||
public void compress(InputStream src, OutputStream res) throws IOException {
|
||||
HDGFLZWCompressor c = new HDGFLZWCompressor();
|
||||
c.compress(src, res);
|
||||
HDGFLZWCompressor c = new HDGFLZWCompressor(res);
|
||||
c.compress(src);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,242 +17,227 @@
|
|||
|
||||
package org.apache.poi.hdgf;
|
||||
|
||||
import static org.apache.poi.util.LZWDecompresser.DICT_MASK;
|
||||
import static org.apache.poi.util.LZWDecompresser.DICT_SIZE;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
|
||||
/**
|
||||
* Helper class to handle the Visio compatible
|
||||
* streaming LZW compression.
|
||||
* Need our own class to handle keeping track of the
|
||||
* code buffer, pending bytes to write out etc.
|
||||
*
|
||||
* TODO Fix this, as it starts to go wrong on
|
||||
* large streams
|
||||
* Helper class to handle the Visio compatible streaming LZW compression.
|
||||
* Need our own class to handle keeping track of the code buffer, pending bytes to write out etc.
|
||||
* <p>
|
||||
* TODO Fix this, as it starts to go wrong on large streams
|
||||
*/
|
||||
/* package */ final class HDGFLZWCompressor {
|
||||
// We use 12 bit codes:
|
||||
// * 0-255 are real bytes
|
||||
// * 256-4095 are the substring codes
|
||||
// Java handily initialises our buffer / dictionary
|
||||
// to all zeros
|
||||
private byte[] dict = new byte[4096];
|
||||
// We use 12 bit codes:
|
||||
// * 0-255 are real bytes
|
||||
// * 256-4095 are the substring codes
|
||||
// Java handily initialises our buffer / dictionary
|
||||
// to all zeros
|
||||
private final byte[] dict = new byte[DICT_SIZE];
|
||||
|
||||
// The next block of data to be written out, minus
|
||||
// its mask byte
|
||||
private byte[] buffer = new byte[16];
|
||||
// And how long it is
|
||||
// (Un-compressed codes are 1 byte each, compressed codes
|
||||
// are two)
|
||||
private int bufferLen;
|
||||
// The next block of data to be written out, minus its mask byte
|
||||
private final byte[] buffer = new byte[16];
|
||||
// And how long it is
|
||||
// (Un-compressed codes are 1 byte each, compressed codes are two)
|
||||
private int bufferLen;
|
||||
|
||||
// The raw length of a code is limited to 4 bits + 2
|
||||
private byte[] rawCode = new byte[18];
|
||||
// And how much we're using
|
||||
private int rawCodeLen;
|
||||
// The raw length of a code is limited to 4 bits + 2
|
||||
private final byte[] rawCode = new byte[18];
|
||||
// And how much we're using
|
||||
private int rawCodeLen;
|
||||
|
||||
// How far through the input and output streams we are
|
||||
private int posInp;
|
||||
private int posOut;
|
||||
// How far through the input and output streams we are
|
||||
private int posInp;
|
||||
private int posOut;
|
||||
|
||||
// What the next mask byte to output will be
|
||||
private int nextMask;
|
||||
// And how many bits we've already set
|
||||
private int maskBitsSet;
|
||||
// What the next mask byte to output will be
|
||||
private int nextMask;
|
||||
// And how many bits we've already set
|
||||
private int maskBitsSet;
|
||||
|
||||
public HDGFLZWCompressor() {}
|
||||
|
||||
/**
|
||||
* Returns the last place that the bytes from rawCode are found
|
||||
* at in the buffer, or -1 if they can't be found
|
||||
*/
|
||||
private int findRawCodeInBuffer() {
|
||||
// Work our way through all the codes until we
|
||||
// find the right one. Visio starts from the end
|
||||
for(int i=4096-rawCodeLen; i>0; i--) {
|
||||
boolean matches = true;
|
||||
for(int j=0; matches && j<rawCodeLen; j++) {
|
||||
if(dict[i+j] == rawCode[j]) {
|
||||
// Fits
|
||||
} else {
|
||||
// Doesn't fit, can't be a match
|
||||
matches = false;
|
||||
}
|
||||
}
|
||||
private final OutputStream res;
|
||||
|
||||
// Was this position a match?
|
||||
if(matches) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
public HDGFLZWCompressor(OutputStream res) {
|
||||
this.res = res;
|
||||
}
|
||||
|
||||
// Not found
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Output the compressed representation for the bytes
|
||||
* found in rawCode
|
||||
*/
|
||||
private void outputCompressed(OutputStream res) throws IOException {
|
||||
// It's not worth compressing only 1 or two bytes,
|
||||
// due to the overheads
|
||||
// So if asked, just output uncompressed
|
||||
if(rawCodeLen < 3) {
|
||||
for(int i=0; i<rawCodeLen; i++) {
|
||||
outputUncompressed(rawCode[i], res);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Grab where the data lives
|
||||
int codesAt = findRawCodeInBuffer();
|
||||
codesAt -= 18;
|
||||
if(codesAt < 0) {
|
||||
codesAt += 4096;
|
||||
}
|
||||
|
||||
// Increment the mask bit count, we've done another code
|
||||
maskBitsSet++;
|
||||
|
||||
// Add the length+code to the buffer
|
||||
// (The position is the first 12 bits, the
|
||||
// length is the last 4 bits)
|
||||
int bp1 = (codesAt & 255);
|
||||
int bp2 = (rawCodeLen-3) + ((codesAt-bp1) >> 4);
|
||||
buffer[bufferLen] = HDGFLZW.fromInt(bp1);
|
||||
bufferLen++;
|
||||
buffer[bufferLen] = HDGFLZW.fromInt(bp2);
|
||||
bufferLen++;
|
||||
|
||||
// Copy the data to the dictionary in the new place
|
||||
for(int i=0; i<rawCodeLen; i++) {
|
||||
dict[(posOut&4095)] = rawCode[i];
|
||||
posOut++;
|
||||
}
|
||||
|
||||
// If we're now at 8 codes, output
|
||||
if(maskBitsSet == 8) {
|
||||
output8Codes(res);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Output the un-compressed byte
|
||||
*/
|
||||
private void outputUncompressed(byte b, OutputStream res) throws IOException {
|
||||
// Set the mask bit for us
|
||||
nextMask += (1<<maskBitsSet);
|
||||
maskBitsSet++;
|
||||
|
||||
// And add us to the buffer + dictionary
|
||||
buffer[bufferLen] = b;
|
||||
bufferLen++;
|
||||
dict[(posOut&4095)] = b;
|
||||
posOut++;
|
||||
|
||||
// If we're now at 8 codes, output
|
||||
if(maskBitsSet == 8) {
|
||||
output8Codes(res);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* We've got 8 code worth to write out, so
|
||||
* output along with the header
|
||||
*/
|
||||
private void output8Codes(OutputStream res) throws IOException {
|
||||
// Output the mask and the data
|
||||
res.write(new byte[] { HDGFLZW.fromInt(nextMask) } );
|
||||
res.write(buffer, 0, bufferLen);
|
||||
|
||||
// Reset things
|
||||
nextMask = 0;
|
||||
maskBitsSet = 0;
|
||||
bufferLen = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does the compression
|
||||
*/
|
||||
public void compress(InputStream src, OutputStream res) throws IOException {
|
||||
// Have we hit the end of the file yet?
|
||||
boolean going = true;
|
||||
|
||||
// This is a byte as looked up in the dictionary
|
||||
// It needs to be signed, as it'll get passed on to
|
||||
// the output stream
|
||||
byte dataB;
|
||||
// This is an unsigned byte read from the stream
|
||||
// It needs to be unsigned, so that bit stuff works
|
||||
int dataI;
|
||||
|
||||
while( going ) {
|
||||
dataI = src.read();
|
||||
posInp++;
|
||||
if(dataI == -1) { going = false; }
|
||||
dataB = HDGFLZW.fromInt(dataI);
|
||||
|
||||
// If we've run out of data, output anything that's
|
||||
// pending then finish
|
||||
if(!going) {
|
||||
if(rawCodeLen > 0) {
|
||||
outputCompressed(res);
|
||||
if(maskBitsSet > 0) {
|
||||
output8Codes(res);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Try adding this new byte onto rawCode, and
|
||||
// see if all of that is still found in the
|
||||
// buffer dictionary or not
|
||||
rawCode[rawCodeLen] = dataB;
|
||||
rawCodeLen++;
|
||||
int rawAt = findRawCodeInBuffer();
|
||||
|
||||
// If we found it and are now at 18 bytes,
|
||||
// we need to output our pending code block
|
||||
if(rawCodeLen == 18 && rawAt > -1) {
|
||||
outputCompressed(res);
|
||||
rawCodeLen = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
// If we did find all of rawCode with our new
|
||||
// byte added on, we can wait to see what happens
|
||||
// with the next byte
|
||||
if(rawAt > -1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// If we get here, then the rawCode + this byte weren't
|
||||
// found in the dictionary
|
||||
|
||||
// If there was something in rawCode before, then that was
|
||||
// found in the dictionary, so output that compressed
|
||||
rawCodeLen--;
|
||||
if(rawCodeLen > 0) {
|
||||
// Output the old rawCode
|
||||
outputCompressed(res);
|
||||
|
||||
// Can this byte start a new rawCode, or does
|
||||
// it need outputting itself?
|
||||
rawCode[0] = dataB;
|
||||
rawCodeLen = 1;
|
||||
if(findRawCodeInBuffer() > -1) {
|
||||
// Fits in, wait for next byte
|
||||
continue;
|
||||
}
|
||||
// Doesn't fit, output
|
||||
outputUncompressed(dataB,res);
|
||||
rawCodeLen = 0;
|
||||
} else {
|
||||
// Nothing in rawCode before, so this byte
|
||||
// isn't in the buffer dictionary
|
||||
// Output it un-compressed
|
||||
outputUncompressed(dataB,res);
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Returns the last place that the bytes from rawCode are found
|
||||
* at in the buffer, or -1 if they can't be found
|
||||
*/
|
||||
private int findRawCodeInBuffer() {
|
||||
// Work our way through all the codes until we
|
||||
// find the right one. Visio starts from the end
|
||||
for (int i = rawCodeLen+1; i < DICT_SIZE; i++) {
|
||||
int pos = (posInp - i) & DICT_MASK;
|
||||
// in the example data it seems, that the compressor doesn't like to wrap beyond DICT_SIZE
|
||||
// if (pos + rawCodeLen > DICT_SIZE) continue;
|
||||
boolean matches = true;
|
||||
for (int j = 0; j < rawCodeLen; j++) {
|
||||
if (dict[(pos + j) & DICT_MASK] != rawCode[j]) {
|
||||
// Doesn't fit, can't be a match
|
||||
matches = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Was this position a match?
|
||||
if (matches) {
|
||||
return pos;
|
||||
}
|
||||
}
|
||||
|
||||
// Not found
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Output the compressed representation for the bytes
|
||||
* found in rawCode
|
||||
*/
|
||||
private void outputCompressed() throws IOException {
|
||||
// It's not worth compressing only 1 or two bytes, due to the overheads
|
||||
// So if asked, just output uncompressed
|
||||
if (rawCodeLen < 3) {
|
||||
final int rcl = rawCodeLen;
|
||||
for (int i = 0; i < rcl; i++) {
|
||||
outputUncompressed(rawCode[i]);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Grab where the data lives
|
||||
int codesAt = findRawCodeInBuffer();
|
||||
codesAt = (codesAt-18) & DICT_MASK;
|
||||
|
||||
// Increment the mask bit count, we've done another code
|
||||
maskBitsSet++;
|
||||
|
||||
// Add the length+code to the buffer
|
||||
// (The position is the first 12 bits, the length is the last 4 bits)
|
||||
int bp1 = (codesAt & 0xFF);
|
||||
int bp2 = (rawCodeLen - 3) + ((codesAt - bp1) >>> 4);
|
||||
buffer[bufferLen++] = (byte) bp1;
|
||||
buffer[bufferLen++] = (byte) bp2;
|
||||
|
||||
assert(maskBitsSet <= 8);
|
||||
|
||||
// If we're now at 8 codes, output
|
||||
if (maskBitsSet == 8) {
|
||||
output8Codes();
|
||||
}
|
||||
|
||||
rawCodeLen = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Output the un-compressed byte
|
||||
*/
|
||||
private void outputUncompressed(byte b) throws IOException {
|
||||
// Set the mask bit for us
|
||||
nextMask += (1 << maskBitsSet);
|
||||
maskBitsSet++;
|
||||
|
||||
// And add us to the buffer + dictionary
|
||||
buffer[bufferLen++] = b;
|
||||
|
||||
// If we're now at 8 codes, output
|
||||
if (maskBitsSet == 8) {
|
||||
output8Codes();
|
||||
}
|
||||
|
||||
rawCodeLen = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* We've got 8 code worth to write out, so
|
||||
* output along with the header
|
||||
*/
|
||||
private void output8Codes() throws IOException {
|
||||
// Output the mask and the data
|
||||
res.write(nextMask);
|
||||
res.write(buffer, 0, bufferLen);
|
||||
posOut += 1 + bufferLen;
|
||||
|
||||
// Reset things
|
||||
nextMask = 0;
|
||||
maskBitsSet = 0;
|
||||
bufferLen = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does the compression
|
||||
*/
|
||||
public void compress(InputStream src) throws IOException {
|
||||
int dataI = -1;
|
||||
while (true) {
|
||||
if (dataI > -1) {
|
||||
// copy the last read byte into the dictionary.
|
||||
// the example data compressor used self references, so we don't wait for filling the dictionary
|
||||
// until we know if it's a un-/compressed token.
|
||||
dict[(posInp++) & DICT_MASK] = (byte)dataI;
|
||||
}
|
||||
// This is an unsigned byte read from the stream
|
||||
// It needs to be unsigned, so that bit stuff works
|
||||
dataI = src.read();
|
||||
|
||||
// If we've run out of data, output anything that's pending then finish
|
||||
if (dataI == -1) {
|
||||
if (rawCodeLen > 0) {
|
||||
outputCompressed();
|
||||
if (maskBitsSet > 0) {
|
||||
output8Codes();
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// This is a byte as looked up in the dictionary
|
||||
// It needs to be signed, as it'll get passed on to the output stream
|
||||
byte dataB = (byte) dataI;
|
||||
|
||||
// Try adding this new byte onto rawCode, and see if all of that is still found
|
||||
// in the buffer dictionary or not
|
||||
rawCode[rawCodeLen++] = dataB;
|
||||
int rawAt = findRawCodeInBuffer();
|
||||
|
||||
if (rawAt > -1) {
|
||||
// If we found it and are now at 18 bytes, we need to output our pending code block
|
||||
if (rawCodeLen == 18) {
|
||||
outputCompressed();
|
||||
}
|
||||
|
||||
// If we did find all of rawCode with our new byte added on,
|
||||
// we can wait to see what happens with the next byte
|
||||
continue;
|
||||
}
|
||||
|
||||
// If we get here, then the rawCode + this byte weren't found in the dictionary
|
||||
|
||||
// If there was something in rawCode before, then that was
|
||||
// found in the dictionary, so output that compressed
|
||||
rawCodeLen--;
|
||||
if (rawCodeLen > 0) {
|
||||
// Output the old rawCode
|
||||
outputCompressed();
|
||||
|
||||
// Can this byte start a new rawCode, or does it need outputting itself?
|
||||
rawCode[0] = dataB;
|
||||
rawCodeLen = 1;
|
||||
if (findRawCodeInBuffer() > -1) {
|
||||
// Fits in, wait for next byte
|
||||
continue;
|
||||
}
|
||||
// Doesn't fit, output
|
||||
outputUncompressed(dataB);
|
||||
} else {
|
||||
// Nothing in rawCode before, so this byte isn't in the buffer dictionary
|
||||
// Output it un-compressed
|
||||
outputUncompressed(dataB);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,139 +17,112 @@
|
|||
|
||||
package org.apache.poi.hdgf;
|
||||
|
||||
import static org.junit.Assert.assertArrayEquals;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
public final class TestHDGFLZW {
|
||||
public static final byte[] testTrailerComp = {
|
||||
123, // *mask bit*
|
||||
-60, 2,
|
||||
-21, -16, // 3 @ 4093
|
||||
1, 0, 0, -72,
|
||||
-13, -16, // 3 @ 5
|
||||
78, // *mask bit* 2,3,4,7
|
||||
-32, -5, // 14 @ 4082
|
||||
1, 0, 3,
|
||||
-21, -16, // 3 @ 4093
|
||||
10, 5, // 8 @ 28
|
||||
4,
|
||||
-21, -16, // 3 @ 4093
|
||||
21, // *mask bit* 1,3,5
|
||||
9,
|
||||
-21, -16, // 3 @ 4093
|
||||
103,
|
||||
-21, -16, // 3 @ 4093
|
||||
34,
|
||||
-36, -1, // 18 @ 4078
|
||||
52, 15, // 18 @ 70
|
||||
70, 15, // 18 @ 88
|
||||
120, // *mask bit*
|
||||
88, 15, // 18 @ 106
|
||||
-7, -2, // 17 @ 11
|
||||
-28, -9, // 10 @ 4086
|
||||
-123, 21, 0, 44,
|
||||
-122, 1, // 4 @ 152
|
||||
-4, // *mask bit*
|
||||
104, 15, // 18 @ 122
|
||||
-24, -13, 40, -98, 32,
|
||||
78, 102, -67, -1, -2, -30, 64, 40, -67, -113, -73, 116, -98,
|
||||
-85, 2, 66, 123, 9, 109, -85, 2, -89, 14, -56, -69, -83, -79,
|
||||
-34, -3, 120, 110, 75, -9, -10, 20, -6, -25, -12, 22, -21, -16,
|
||||
-12, -81, 67, 1, -128, -70, -21, -16, 84, -21, -16, 70, 0, 23,
|
||||
-21, -16, 76, 47, -40, 79, 1, -44, -21, -16, 32, 3, 18, 12, 17,
|
||||
-43, -68, 17, 16, -8, 21, 22, -1, -21, -16, -84, -1, -35, 79,
|
||||
-9, -10, 96, 0, 46, -21, -16, 44, -39, -41, 79, 1, 119, -13,
|
||||
-16, -106, -13, -16, 84, 0, 125, 26, -21, -16, 68, -38, 79, 1,
|
||||
17, 10, 0, -97, 50, 10, 0, 0, -42, -108, 15, 118, 31, 0, -3, 29,
|
||||
-21, -16, -100, -25, 79, 1, -18, 97, -36, 76, 16, -21, -16, 86,
|
||||
0, 36, -5, 1, -5, 79, 63, 1, -124, 98, 0, 0, 28, 3, 20, -34, -3,
|
||||
125, 33, -21, -16, 100, -4, 79, 1, -92, -91, 16, -22, 24, 19, 41,
|
||||
-21, -16, -44, -59, 16, 108, 100, 0, -21, 0, 71, -105, 18, 39, 85,
|
||||
17, -3, 79, 1, 95, -108, 113, 0, 0, 104, 3, 18, 49, 49, 17, -1, 64,
|
||||
85, 1, 0, 114, 0, 0, -93, -36, -21, -16, 100, 31, 0, 0, -40, -21,
|
||||
-16, -92, 66, 127, 85, 1, 98, 119, 0, 0, -48, 79, 18, -3, 50, -17,
|
||||
1, 67, 85, 1, 81, -127, 0, -41, 0, 14, 6, 4, 17, 63, -63, 17, 68,
|
||||
85, -65, 1, 30, -120, 0, 0, 42, 79, 18, 68, 126, -21, -16, -76, 69,
|
||||
85, 1, 102, -119, 72, 37, 0, 97, 33 };
|
||||
public static final byte[] testTrailerDecomp = {
|
||||
-60, 2, 0, 0, 0, 1, 0, 0, -72, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0,
|
||||
0, 9, 0, 0, 0, 103, 0, 0, 0, 34, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
-123, 21, 0, 44, -123, 21, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, -98, 32, 78, 102, -67,
|
||||
-2, -30, 64, 40, -67, -113, -73, 116, -67, -2, -30, 64, 40, 66,
|
||||
123, 9, 109, -67, -2, -30, 64, 40, -98, 32, 78, 102, -67, -2, -30,
|
||||
64, 40, -67, -113, -73, 116, -67, -2, -30, 64, -56, -83, -79, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 110, 75, 1, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 22, 0, 0, 0, -12, -81, 67,
|
||||
1, -128, 0, 0, 0, 84, 0, 0, 0, 70, 0, 23, 0, 0, 0, 76, -40, 79, 1,
|
||||
-44, 0, 0, 0, 32, 0, 0, 0, 84, 0, 23, 0, 0, 0, -68, -40, 79, 1, -8,
|
||||
0, 0, 0, 32, 0, 0, 0, 84, 0, -1, 0, 0, 0, -84, -1, 79, 1, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 96, 0, 46, 0, 0, 0, 44, -39, 79, 1, 119, 1, 0, 0,
|
||||
-106, 1, 0, 0, 84, 0, 26, 0, 0, 0, 68, -38, 79, 1, 17, 3, 0, 0,
|
||||
50, 10, 0, 0, -42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
29, 0, 0, 0, -100, -25, 79, 1, -18, 97, 0, 0, -106, 0, 0, 0, 86, 0,
|
||||
36, 0, 0, 0, -12, -5, 79, 1, -124, 98, 0, 0, 28, 0, 0, 0, 84, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 100,
|
||||
-4, 79, 1, -92, 98, 0, 0, 32, 0, 0, 0, 84, 0, 41, 0, 0, 0, -44, -4,
|
||||
79, 1, 108, 100, 0, 0, 71, 0, 0, 0, 86, 0, 39, 0, 0, 0, 68, -3, 79,
|
||||
1, -108, 113, 0, 0, 104, 0, 0, 0, 84, 0, 49, 0, 0, 0, -84, 64, 85,
|
||||
1, 0, 114, 0, 0, -93, 0, 0, 0, -42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, -40, 0, 0, 0, -92, 66, 85, 1, 98, 119,
|
||||
0, 0, -48, 1, 0, 0, 84, 0, 50, 0, 0, 0, 20, 67, 85, 1, 81, -127,
|
||||
0, 0, 14, 6, 0, 0, 84, 0, 63, 0, 0, 0, 100, 68, 85, 1, 30, -120,
|
||||
0, 0, 42, 1, 0, 0, 84, 0, 68, 0, 0, 0, -76, 69, 85, 1, 102, -119,
|
||||
0, 0, 42, 1, 0, 0, 84, 0, 0, 0, 0, 0
|
||||
};
|
||||
public static final byte[] testTrailerComp = {
|
||||
123, // *mask bit* 1,2,4-7
|
||||
-60, 2,
|
||||
-21, -16, // 3 @ 4093
|
||||
1, 0, 0, -72,
|
||||
-13, -16, // 3 @ 5
|
||||
78, // *mask bit* 2,3,4,7
|
||||
-32, -5, // 14 @ 4082
|
||||
1, 0, 3,
|
||||
-21, -16, // 3 @ 4093
|
||||
10, 5, // 8 @ 28
|
||||
4,
|
||||
-21, -16, // 3 @ 4093
|
||||
21, // *mask bit* 1,3,5
|
||||
9,
|
||||
-21, -16, // 3 @ 4093
|
||||
103,
|
||||
-21, -16, // 3 @ 4093
|
||||
34,
|
||||
-36, -1, // 18 @ 4078
|
||||
52, 15, // 18 @ 70
|
||||
70, 15, // 18 @ 88
|
||||
120, // *mask bit*
|
||||
88, 15, // 18 @ 106
|
||||
-7, -2, // 17 @ 11
|
||||
-28, -9, // 10 @ 4086
|
||||
-123, 21, 0, 44,
|
||||
-122, 1, // 4 @ 152
|
||||
-4, // *mask bit*
|
||||
104, 15, // 18 @ 122
|
||||
-24, -13, 40, -98, 32,
|
||||
78, 102, -67, -1, -2, -30, 64, 40, -67, -113, -73, 116, -98,
|
||||
-85, 2, 66, 123, 9, 109, -85, 2, -89, 14, -56, -69, -83, -79,
|
||||
-34, -3, 120, 110, 75, -9, -10, 20, -6, -25, -12, 22, -21, -16,
|
||||
-12, -81, 67, 1, -128, -70, -21, -16, 84, -21, -16, 70, 0, 23,
|
||||
-21, -16, 76, 47, -40, 79, 1, -44, -21, -16, 32, 3, 18, 12, 17,
|
||||
-43, -68, 17, 16, -8, 21, 22, -1, -21, -16, -84, -1, -35, 79,
|
||||
-9, -10, 96, 0, 46, -21, -16, 44, -39, -41, 79, 1, 119, -13,
|
||||
-16, -106, -13, -16, 84, 0, 125, 26, -21, -16, 68, -38, 79, 1,
|
||||
17, 10, 0, -97, 50, 10, 0, 0, -42, -108, 15, 118, 31, 0, -3, 29,
|
||||
-21, -16, -100, -25, 79, 1, -18, 97, -36, 76, 16, -21, -16, 86,
|
||||
0, 36, -5, 1, -5, 79, 63, 1, -124, 98, 0, 0, 28, 3, 20, -34, -3,
|
||||
125, 33, -21, -16, 100, -4, 79, 1, -92, -91, 16, -22, 24, 19, 41,
|
||||
-21, -16, -44, -59, 16, 108, 100, 0, -21, 0, 71, -105, 18, 39, 85,
|
||||
17, -3, 79, 1, 95, -108, 113, 0, 0, 104, 3, 18, 49, 49, 17, -1, 64,
|
||||
85, 1, 0, 114, 0, 0, -93, -36, -21, -16, 100, 31, 0, 0, -40, -21,
|
||||
-16, -92, 66, 127, 85, 1, 98, 119, 0, 0, -48, 79, 18, -3, 50, -17,
|
||||
1, 67, 85, 1, 81, -127, 0, -41, 0, 14, 6, 4, 17, 63, -63, 17, 68,
|
||||
85, -65, 1, 30, -120, 0, 0, 42, 79, 18, 68, 126, -21, -16, -76, 69,
|
||||
85, 1, 102, -119, 72, 37, 0, 97, 33};
|
||||
public static final byte[] testTrailerDecomp = {
|
||||
-60, 2, 0, 0, 0, 1, 0, 0, -72, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0,
|
||||
0, 9, 0, 0, 0, 103, 0, 0, 0, 34, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
-123, 21, 0, 44, -123, 21, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, -98, 32, 78, 102, -67,
|
||||
-2, -30, 64, 40, -67, -113, -73, 116, -67, -2, -30, 64, 40, 66,
|
||||
123, 9, 109, -67, -2, -30, 64, 40, -98, 32, 78, 102, -67, -2, -30,
|
||||
64, 40, -67, -113, -73, 116, -67, -2, -30, 64, -56, -83, -79, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 110, 75, 1, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 22, 0, 0, 0, -12, -81, 67,
|
||||
1, -128, 0, 0, 0, 84, 0, 0, 0, 70, 0, 23, 0, 0, 0, 76, -40, 79, 1,
|
||||
-44, 0, 0, 0, 32, 0, 0, 0, 84, 0, 23, 0, 0, 0, -68, -40, 79, 1, -8,
|
||||
0, 0, 0, 32, 0, 0, 0, 84, 0, -1, 0, 0, 0, -84, -1, 79, 1, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 96, 0, 46, 0, 0, 0, 44, -39, 79, 1, 119, 1, 0, 0,
|
||||
-106, 1, 0, 0, 84, 0, 26, 0, 0, 0, 68, -38, 79, 1, 17, 3, 0, 0,
|
||||
50, 10, 0, 0, -42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
29, 0, 0, 0, -100, -25, 79, 1, -18, 97, 0, 0, -106, 0, 0, 0, 86, 0,
|
||||
36, 0, 0, 0, -12, -5, 79, 1, -124, 98, 0, 0, 28, 0, 0, 0, 84, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 100,
|
||||
-4, 79, 1, -92, 98, 0, 0, 32, 0, 0, 0, 84, 0, 41, 0, 0, 0, -44, -4,
|
||||
79, 1, 108, 100, 0, 0, 71, 0, 0, 0, 86, 0, 39, 0, 0, 0, 68, -3, 79,
|
||||
1, -108, 113, 0, 0, 104, 0, 0, 0, 84, 0, 49, 0, 0, 0, -84, 64, 85,
|
||||
1, 0, 114, 0, 0, -93, 0, 0, 0, -42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, -40, 0, 0, 0, -92, 66, 85, 1, 98, 119,
|
||||
0, 0, -48, 1, 0, 0, 84, 0, 50, 0, 0, 0, 20, 67, 85, 1, 81, -127,
|
||||
0, 0, 14, 6, 0, 0, 84, 0, 63, 0, 0, 0, 100, 68, 85, 1, 30, -120,
|
||||
0, 0, 42, 1, 0, 0, 84, 0, 68, 0, 0, 0, -76, 69, 85, 1, 102, -119,
|
||||
0, 0, 42, 1, 0, 0, 84, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
@Test
|
||||
public void testFromToInt() {
|
||||
byte b255 = -1;
|
||||
assertEquals(255, HDGFLZW.fromByte(b255));
|
||||
assertEquals(-1, HDGFLZW.fromInt( HDGFLZW.fromByte(b255) ));
|
||||
assertEquals(-1, HDGFLZW.fromInt( 255 ));
|
||||
@Test
|
||||
public void testCounts() throws Exception {
|
||||
assertEquals(339, testTrailerComp.length);
|
||||
assertEquals(632, testTrailerDecomp.length);
|
||||
|
||||
byte b11 = 11;
|
||||
assertEquals(11, HDGFLZW.fromByte(b11));
|
||||
assertEquals(11, HDGFLZW.fromInt( HDGFLZW.fromByte(b11) ));
|
||||
assertEquals(11, HDGFLZW.fromInt( 11 ));
|
||||
// decompress it using our engine
|
||||
HDGFLZW lzw = new HDGFLZW();
|
||||
byte[] dec = lzw.decompress(new ByteArrayInputStream(testTrailerComp));
|
||||
|
||||
byte b0 = 0;
|
||||
assertEquals(0, HDGFLZW.fromByte(b0));
|
||||
assertEquals(0, HDGFLZW.fromInt( HDGFLZW.fromByte(b0) ));
|
||||
assertEquals(0, HDGFLZW.fromInt( 0 ));
|
||||
|
||||
byte b127 = 127;
|
||||
assertEquals(127, HDGFLZW.fromByte(b127));
|
||||
assertEquals(127, HDGFLZW.fromInt( HDGFLZW.fromByte(b127) ));
|
||||
assertEquals(127, HDGFLZW.fromInt( 127 ));
|
||||
|
||||
byte b128 = -128;
|
||||
assertEquals(128, HDGFLZW.fromByte(b128));
|
||||
assertEquals(-128, HDGFLZW.fromInt( HDGFLZW.fromByte(b128) ));
|
||||
assertEquals(-128, HDGFLZW.fromInt( 128 ));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCounts() throws Exception {
|
||||
assertEquals(339, testTrailerComp.length);
|
||||
assertEquals(632, testTrailerDecomp.length);
|
||||
|
||||
// decompress it using our engine
|
||||
HDGFLZW lzw = new HDGFLZW();
|
||||
byte[] dec = lzw.decompress(new ByteArrayInputStream(testTrailerComp));
|
||||
|
||||
// Check it's of the right size
|
||||
assertEquals(632, dec.length);
|
||||
// Check it's of the right size
|
||||
assertEquals(632, dec.length);
|
||||
|
||||
/*
|
||||
// Encode it again using our engine
|
||||
|
@ -158,121 +131,89 @@ public final class TestHDGFLZW {
|
|||
// Check it's of the right size
|
||||
assertEquals(339, comp.length);
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDecompress() throws Exception {
|
||||
assertEquals(339, testTrailerComp.length);
|
||||
assertEquals(632, testTrailerDecomp.length);
|
||||
@Test
|
||||
public void testDecompress() throws Exception {
|
||||
assertEquals(339, testTrailerComp.length);
|
||||
assertEquals(632, testTrailerDecomp.length);
|
||||
|
||||
// decompress it using our engine
|
||||
HDGFLZW lzw = new HDGFLZW();
|
||||
byte[] dec = lzw.decompress(new ByteArrayInputStream(testTrailerComp));
|
||||
// decompress it using our engine
|
||||
HDGFLZW lzw = new HDGFLZW();
|
||||
byte[] dec = lzw.decompress(new ByteArrayInputStream(testTrailerComp));
|
||||
|
||||
// Now check it's the right data
|
||||
assertEquals(632, dec.length);
|
||||
for(int i=0; i<dec.length; i++) {
|
||||
if(dec[i] != testTrailerDecomp[i])
|
||||
System.err.println(i + "\t" + dec[i] + "\t" + testTrailerDecomp[i]);
|
||||
}
|
||||
}
|
||||
// Now check it's the right data
|
||||
assertArrayEquals(testTrailerDecomp, dec);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that we can round-trip a little bit.
|
||||
* Uses a part short enough that we agree with visio
|
||||
* on the best way to compress it
|
||||
*/
|
||||
@Test
|
||||
public void testCompressMini() throws Exception {
|
||||
// first 11 bytes compressed = 12 bytes uncompressed
|
||||
byte[] sourceComp = new byte[11];
|
||||
byte[] sourceDecomp = new byte[12];
|
||||
System.arraycopy(testTrailerComp, 0, sourceComp, 0, sourceComp.length);
|
||||
System.arraycopy(testTrailerDecomp, 0, sourceDecomp, 0, sourceDecomp.length);
|
||||
/**
|
||||
* Test that we can round-trip a little bit.
|
||||
* Uses a part short enough that we agree with visio
|
||||
* on the best way to compress it
|
||||
*/
|
||||
@Test
|
||||
public void testCompressMini() throws Exception {
|
||||
// first 11 bytes compressed = 12 bytes uncompressed
|
||||
byte[] sourceDecomp = Arrays.copyOf(testTrailerDecomp, 12);
|
||||
|
||||
// Compress it using our engine
|
||||
HDGFLZW lzw = new HDGFLZW();
|
||||
byte[] comp = lzw.compress(new ByteArrayInputStream(sourceDecomp));
|
||||
// Compress it using our engine
|
||||
HDGFLZW lzw = new HDGFLZW();
|
||||
byte[] comp = lzw.compress(new ByteArrayInputStream(sourceDecomp));
|
||||
|
||||
// Now decompress it again
|
||||
byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
|
||||
// Now decompress it again
|
||||
byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
|
||||
|
||||
// First up, check the round tripping
|
||||
// First up, check the round tripping
|
||||
assertEquals(12, decomp.length);
|
||||
for(int i=0; i<decomp.length; i++) {
|
||||
assertEquals("Wrong at " + i, decomp[i], testTrailerDecomp[i]);
|
||||
}
|
||||
assertArrayEquals(Arrays.copyOfRange(testTrailerDecomp, 0, decomp.length), decomp);
|
||||
|
||||
// Now check the compressed intermediate version
|
||||
assertEquals(11, comp.length);
|
||||
for(int i=0; i<comp.length; i++) {
|
||||
assertEquals("Wrong at " + i, comp[i], testTrailerComp[i]);
|
||||
}
|
||||
}
|
||||
// Now check the compressed intermediate version
|
||||
assertEquals(11, comp.length);
|
||||
assertArrayEquals(Arrays.copyOfRange(testTrailerComp, 0, comp.length), comp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests that we can do several mask pages
|
||||
*/
|
||||
@Test
|
||||
public void testCompressMidi() throws Exception {
|
||||
// First 12 -> 11
|
||||
// Next 32 -> 13
|
||||
byte[] sourceComp = new byte[24];
|
||||
byte[] sourceDecomp = new byte[44];
|
||||
System.arraycopy(testTrailerComp, 0, sourceComp, 0, sourceComp.length);
|
||||
System.arraycopy(testTrailerDecomp, 0, sourceDecomp, 0, sourceDecomp.length);
|
||||
/**
|
||||
* Tests that we can do several mask pages
|
||||
*/
|
||||
@Test
|
||||
public void testCompressMidi() throws Exception {
|
||||
// First 12 -> 11
|
||||
// Next 32 -> 13
|
||||
byte[] sourceDecomp = Arrays.copyOf(testTrailerDecomp, 44);
|
||||
|
||||
// Compress it using our engine
|
||||
HDGFLZW lzw = new HDGFLZW();
|
||||
byte[] comp = lzw.compress(new ByteArrayInputStream(sourceDecomp));
|
||||
// Compress it using our engine
|
||||
HDGFLZW lzw = new HDGFLZW();
|
||||
byte[] comp = lzw.compress(new ByteArrayInputStream(sourceDecomp));
|
||||
|
||||
// We should be 3 characters bigger, as
|
||||
// we split one compressed bit into two
|
||||
assertEquals(27, comp.length);
|
||||
assertEquals(24, comp.length);
|
||||
|
||||
// Now decompress it again
|
||||
byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
|
||||
// Now decompress it again
|
||||
byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
|
||||
|
||||
// We can only check the round-tripping, as for now
|
||||
// visio cheats on re-using a block
|
||||
assertEquals(44, decomp.length);
|
||||
for(int i=0; i<decomp.length; i++) {
|
||||
assertEquals("Wrong at " + i, decomp[i], sourceDecomp[i]);
|
||||
}
|
||||
}
|
||||
// We can only check the round-tripping, as for now
|
||||
// visio cheats on re-using a block
|
||||
assertArrayEquals(sourceDecomp, decomp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets 160 bytes through then starts going wrong...
|
||||
* TODO Fix this
|
||||
*/
|
||||
@Test
|
||||
@Ignore
|
||||
public void testCompressFull() throws Exception {
|
||||
assertEquals(339, testTrailerComp.length);
|
||||
assertEquals(632, testTrailerDecomp.length);
|
||||
@Test
|
||||
public void testCompressFull() throws Exception {
|
||||
assertEquals(339, testTrailerComp.length);
|
||||
assertEquals(632, testTrailerDecomp.length);
|
||||
|
||||
// Compress it using our engine
|
||||
HDGFLZW lzw = new HDGFLZW();
|
||||
byte[] comp = lzw.compress(new ByteArrayInputStream(testTrailerDecomp));
|
||||
|
||||
// Now decompress it again
|
||||
byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
|
||||
|
||||
// for(int i=0; i<comp.length; i++) {
|
||||
// System.err.println(i + "\t" + comp[i] + "\t" + testTrailerComp[i]);
|
||||
// }
|
||||
|
||||
// First up, check the round tripping
|
||||
// assertEquals(632, decomp.length);
|
||||
for(int i=0; i<decomp.length; i++) {
|
||||
assertEquals("Wrong at " + i, decomp[i], testTrailerDecomp[i]);
|
||||
}
|
||||
HDGFLZW lzw = new HDGFLZW();
|
||||
byte[] decomp2 = lzw.decompress(new ByteArrayInputStream(testTrailerComp));
|
||||
assertArrayEquals(testTrailerDecomp, decomp2);
|
||||
|
||||
|
||||
// Now check the compressed intermediate version
|
||||
assertEquals(339, comp.length);
|
||||
for(int i=0; i<comp.length; i++) {
|
||||
assertEquals("Wrong at " + i, comp[i], testTrailerComp[i]);
|
||||
}
|
||||
}
|
||||
// Compress it using our engine
|
||||
byte[] comp = lzw.compress(new ByteArrayInputStream(testTrailerDecomp));
|
||||
|
||||
// the compressed binary differs, as the run length searching finds different results
|
||||
// but the decompressed data is the same
|
||||
|
||||
// Now decompress it again
|
||||
byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
|
||||
|
||||
assertArrayEquals(testTrailerDecomp, decomp);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue