upgrade to lzf compress 0.9
This commit is contained in:
parent
27a7b0680c
commit
4bbf29834e
|
@ -0,0 +1,228 @@
|
||||||
|
package org.elasticsearch.common.compress.lzf;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decoder that handles decoding of sequence of encoded LZF chunks,
|
||||||
|
* combining them into a single contiguous result byte array.
|
||||||
|
*
|
||||||
|
* @author Tatu Saloranta (tatu@ning.com)
|
||||||
|
* @since 0.9
|
||||||
|
*/
|
||||||
|
public abstract class ChunkDecoder {
|
||||||
|
protected final static byte BYTE_NULL = 0;
|
||||||
|
protected final static int HEADER_BYTES = 5;
|
||||||
|
|
||||||
|
public ChunkDecoder() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
// Public API
|
||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Method for decompressing a block of input data encoded in LZF
|
||||||
|
* block structure (compatible with lzf command line utility),
|
||||||
|
* and can consist of any number of blocks.
|
||||||
|
* Note that input MUST consists of a sequence of one or more complete
|
||||||
|
* chunks; partial chunks can not be handled.
|
||||||
|
*/
|
||||||
|
public final byte[] decode(final byte[] inputBuffer) throws IOException {
|
||||||
|
byte[] result = new byte[calculateUncompressedSize(inputBuffer, 0, inputBuffer.length)];
|
||||||
|
decode(inputBuffer, 0, inputBuffer.length, result);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Method for decompressing a block of input data encoded in LZF
|
||||||
|
* block structure (compatible with lzf command line utility),
|
||||||
|
* and can consist of any number of blocks.
|
||||||
|
* Note that input MUST consists of a sequence of one or more complete
|
||||||
|
* chunks; partial chunks can not be handled.
|
||||||
|
*/
|
||||||
|
public final byte[] decode(final byte[] inputBuffer, int inputPtr, int inputLen) throws IOException {
|
||||||
|
byte[] result = new byte[calculateUncompressedSize(inputBuffer, inputPtr, inputLen)];
|
||||||
|
decode(inputBuffer, inputPtr, inputLen, result);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Method for decompressing a block of input data encoded in LZF
|
||||||
|
* block structure (compatible with lzf command line utility),
|
||||||
|
* and can consist of any number of blocks.
|
||||||
|
* Note that input MUST consists of a sequence of one or more complete
|
||||||
|
* chunks; partial chunks can not be handled.
|
||||||
|
*/
|
||||||
|
public final int decode(final byte[] inputBuffer, final byte[] targetBuffer) throws IOException {
|
||||||
|
return decode(inputBuffer, 0, inputBuffer.length, targetBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Method for decompressing a block of input data encoded in LZF
|
||||||
|
* block structure (compatible with lzf command line utility),
|
||||||
|
* and can consist of any number of blocks.
|
||||||
|
* Note that input MUST consists of a sequence of one or more complete
|
||||||
|
* chunks; partial chunks can not be handled.
|
||||||
|
*/
|
||||||
|
public int decode(final byte[] sourceBuffer, int inPtr, int inLength,
|
||||||
|
final byte[] targetBuffer) throws IOException {
|
||||||
|
byte[] result = targetBuffer;
|
||||||
|
int outPtr = 0;
|
||||||
|
int blockNr = 0;
|
||||||
|
|
||||||
|
final int end = inPtr + inLength - 1; // -1 to offset possible end marker
|
||||||
|
|
||||||
|
while (inPtr < end) {
|
||||||
|
// let's do basic sanity checks; no point in skimping with these checks
|
||||||
|
if (sourceBuffer[inPtr] != LZFChunk.BYTE_Z || sourceBuffer[inPtr + 1] != LZFChunk.BYTE_V) {
|
||||||
|
throw new IOException("Corrupt input data, block #" + blockNr + " (at offset " + inPtr + "): did not start with 'ZV' signature bytes");
|
||||||
|
}
|
||||||
|
inPtr += 2;
|
||||||
|
int type = sourceBuffer[inPtr++];
|
||||||
|
int len = uint16(sourceBuffer, inPtr);
|
||||||
|
inPtr += 2;
|
||||||
|
if (type == LZFChunk.BLOCK_TYPE_NON_COMPRESSED) { // uncompressed
|
||||||
|
System.arraycopy(sourceBuffer, inPtr, result, outPtr, len);
|
||||||
|
outPtr += len;
|
||||||
|
} else { // compressed
|
||||||
|
int uncompLen = uint16(sourceBuffer, inPtr);
|
||||||
|
inPtr += 2;
|
||||||
|
decodeChunk(sourceBuffer, inPtr, result, outPtr, outPtr + uncompLen);
|
||||||
|
outPtr += uncompLen;
|
||||||
|
}
|
||||||
|
inPtr += len;
|
||||||
|
++blockNr;
|
||||||
|
}
|
||||||
|
return outPtr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main decode from a stream. Decompressed bytes are placed in the outputBuffer, inputBuffer
|
||||||
|
* is a "scratch-area".
|
||||||
|
*
|
||||||
|
* @param is An input stream of LZF compressed bytes
|
||||||
|
* @param inputBuffer A byte array used as a scratch area.
|
||||||
|
* @param outputBuffer A byte array in which the result is returned
|
||||||
|
* @return The number of bytes placed in the outputBuffer.
|
||||||
|
*/
|
||||||
|
public abstract int decodeChunk(final InputStream is, final byte[] inputBuffer, final byte[] outputBuffer)
|
||||||
|
throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main decode method for individual chunks.
|
||||||
|
*/
|
||||||
|
public abstract void decodeChunk(byte[] in, int inPos, byte[] out, int outPos, int outEnd)
|
||||||
|
throws IOException;
|
||||||
|
|
||||||
|
/*
|
||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
// Public static methods
|
||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper method that will calculate total uncompressed size, for sequence of
|
||||||
|
* one or more LZF blocks stored in given byte array.
|
||||||
|
* Will do basic sanity checking, so that this method can be called to
|
||||||
|
* verify against some types of corruption.
|
||||||
|
*/
|
||||||
|
public static int calculateUncompressedSize(byte[] data, int ptr, int length) throws IOException {
|
||||||
|
int uncompressedSize = 0;
|
||||||
|
int blockNr = 0;
|
||||||
|
final int end = ptr + length;
|
||||||
|
|
||||||
|
while (ptr < end) {
|
||||||
|
// can use optional end marker
|
||||||
|
if (ptr == (data.length + 1) && data[ptr] == BYTE_NULL) {
|
||||||
|
++ptr; // so that we'll be at end
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// simpler to handle bounds checks by catching exception here...
|
||||||
|
try {
|
||||||
|
if (data[ptr] != LZFChunk.BYTE_Z || data[ptr + 1] != LZFChunk.BYTE_V) {
|
||||||
|
throw new IOException("Corrupt input data, block #" + blockNr + " (at offset " + ptr + "): did not start with 'ZV' signature bytes");
|
||||||
|
}
|
||||||
|
int type = (int) data[ptr + 2];
|
||||||
|
int blockLen = uint16(data, ptr + 3);
|
||||||
|
if (type == LZFChunk.BLOCK_TYPE_NON_COMPRESSED) { // uncompressed
|
||||||
|
ptr += 5;
|
||||||
|
uncompressedSize += blockLen;
|
||||||
|
} else if (type == LZFChunk.BLOCK_TYPE_COMPRESSED) { // compressed
|
||||||
|
uncompressedSize += uint16(data, ptr + 5);
|
||||||
|
ptr += 7;
|
||||||
|
} else { // unknown... CRC-32 would be 2, but that's not implemented by cli tool
|
||||||
|
throw new IOException("Corrupt input data, block #" + blockNr + " (at offset " + ptr + "): unrecognized block type " + (type & 0xFF));
|
||||||
|
}
|
||||||
|
ptr += blockLen;
|
||||||
|
} catch (ArrayIndexOutOfBoundsException e) {
|
||||||
|
throw new IOException("Corrupt input data, block #" + blockNr + " (at offset " + ptr + "): truncated block header");
|
||||||
|
}
|
||||||
|
++blockNr;
|
||||||
|
}
|
||||||
|
// one more sanity check:
|
||||||
|
if (ptr != data.length) {
|
||||||
|
throw new IOException("Corrupt input data: block #" + blockNr + " extends " + (data.length - ptr) + " beyond end of input");
|
||||||
|
}
|
||||||
|
return uncompressedSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
// Internal methods
|
||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
*/
|
||||||
|
|
||||||
|
protected final static int uint16(byte[] data, int ptr) {
|
||||||
|
return ((data[ptr] & 0xFF) << 8) + (data[ptr + 1] & 0xFF);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper method to forcibly load header bytes that must be read before
|
||||||
|
* chunk can be handled.
|
||||||
|
*/
|
||||||
|
protected final static int readHeader(final InputStream is, final byte[] inputBuffer)
|
||||||
|
throws IOException {
|
||||||
|
// Ok: simple case first, where we just get all data we need
|
||||||
|
int needed = HEADER_BYTES;
|
||||||
|
int count = is.read(inputBuffer, 0, needed);
|
||||||
|
|
||||||
|
if (count == needed) {
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
if (count <= 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// if not, a source that trickles data (network etc); must loop
|
||||||
|
int offset = count;
|
||||||
|
needed -= count;
|
||||||
|
|
||||||
|
do {
|
||||||
|
count = is.read(inputBuffer, offset, needed);
|
||||||
|
if (count <= 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
offset += count;
|
||||||
|
needed -= count;
|
||||||
|
} while (needed > 0);
|
||||||
|
return offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected final static void readFully(InputStream is, boolean compressed,
|
||||||
|
byte[] outputBuffer, int offset, int len) throws IOException {
|
||||||
|
int left = len;
|
||||||
|
while (left > 0) {
|
||||||
|
int count = is.read(outputBuffer, offset, left);
|
||||||
|
if (count < 0) { // EOF not allowed here
|
||||||
|
throw new IOException("EOF in " + len + " byte ("
|
||||||
|
+ (compressed ? "" : "un") + "compressed) block: could only read "
|
||||||
|
+ (len - left) + " bytes");
|
||||||
|
}
|
||||||
|
offset += count;
|
||||||
|
left -= count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -20,7 +20,7 @@ import java.io.OutputStream;
|
||||||
* is only used if it actually reduces chunk size (including overhead
|
* is only used if it actually reduces chunk size (including overhead
|
||||||
* of additional header bytes)
|
* of additional header bytes)
|
||||||
*
|
*
|
||||||
* @author tatu@ning.com
|
* @author Tatu Saloranta (tatu@ning.com)
|
||||||
*/
|
*/
|
||||||
public class ChunkEncoder {
|
public class ChunkEncoder {
|
||||||
// Beyond certain point we won't be able to compress; let's use 16 bytes as cut-off
|
// Beyond certain point we won't be able to compress; let's use 16 bytes as cut-off
|
||||||
|
@ -38,6 +38,10 @@ public class ChunkEncoder {
|
||||||
|
|
||||||
private final BufferRecycler _recycler;
|
private final BufferRecycler _recycler;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hash table contains lookup based on 3-byte sequence; key is hash
|
||||||
|
* of such triplet, value is offset in buffer.
|
||||||
|
*/
|
||||||
private int[] _hashTable;
|
private int[] _hashTable;
|
||||||
|
|
||||||
private final int _hashModulo;
|
private final int _hashModulo;
|
||||||
|
@ -78,7 +82,7 @@ public class ChunkEncoder {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Method to close once encoder is no longer in use. Note: after calling
|
* Method to close once encoder is no longer in use. Note: after calling
|
||||||
* this method, further calls to {@link #_encodeChunk} will fail
|
* this method, further calls to {@link #encodeChunk} will fail
|
||||||
*/
|
*/
|
||||||
public void close() {
|
public void close() {
|
||||||
byte[] buf = _encodeBuffer;
|
byte[] buf = _encodeBuffer;
|
||||||
|
@ -177,7 +181,7 @@ public class ChunkEncoder {
|
||||||
private int tryCompress(byte[] in, int inPos, int inEnd, byte[] out, int outPos) {
|
private int tryCompress(byte[] in, int inPos, int inEnd, byte[] out, int outPos) {
|
||||||
final int[] hashTable = _hashTable;
|
final int[] hashTable = _hashTable;
|
||||||
++outPos;
|
++outPos;
|
||||||
int hash = first(in, 0);
|
int seen = first(in, 0); // past 4 bytes we have seen... (last one is LSB)
|
||||||
int literals = 0;
|
int literals = 0;
|
||||||
inEnd -= 4;
|
inEnd -= 4;
|
||||||
final int firstPos = inPos; // so that we won't have back references across block boundary
|
final int firstPos = inPos; // so that we won't have back references across block boundary
|
||||||
|
@ -185,18 +189,18 @@ public class ChunkEncoder {
|
||||||
while (inPos < inEnd) {
|
while (inPos < inEnd) {
|
||||||
byte p2 = in[inPos + 2];
|
byte p2 = in[inPos + 2];
|
||||||
// next
|
// next
|
||||||
hash = (hash << 8) + (p2 & 255);
|
seen = (seen << 8) + (p2 & 255);
|
||||||
int off = hash(hash);
|
int off = hash(seen);
|
||||||
int ref = hashTable[off];
|
int ref = hashTable[off];
|
||||||
hashTable[off] = inPos;
|
hashTable[off] = inPos;
|
||||||
|
|
||||||
// First expected common case: no back-ref (for whatever reason)
|
// First expected common case: no back-ref (for whatever reason)
|
||||||
if (ref >= inPos // can't refer forward (i.e. leftovers)
|
if (ref >= inPos // can't refer forward (i.e. leftovers)
|
||||||
|| ref < firstPos // or to previous block
|
|| ref < firstPos // or to previous block
|
||||||
|| (off = inPos - ref - 1) >= MAX_OFF
|
|| (off = inPos - ref) > MAX_OFF
|
||||||
|| in[ref + 2] != p2 // must match hash
|
|| in[ref + 2] != p2 // must match hash
|
||||||
|| in[ref + 1] != (byte) (hash >> 8)
|
|| in[ref + 1] != (byte) (seen >> 8)
|
||||||
|| in[ref] != (byte) (hash >> 16)) {
|
|| in[ref] != (byte) (seen >> 16)) {
|
||||||
out[outPos++] = in[inPos++];
|
out[outPos++] = in[inPos++];
|
||||||
literals++;
|
literals++;
|
||||||
if (literals == LZFChunk.MAX_LITERAL) {
|
if (literals == LZFChunk.MAX_LITERAL) {
|
||||||
|
@ -222,6 +226,7 @@ public class ChunkEncoder {
|
||||||
len++;
|
len++;
|
||||||
}
|
}
|
||||||
len -= 2;
|
len -= 2;
|
||||||
|
--off; // was off by one earlier
|
||||||
if (len < 7) {
|
if (len < 7) {
|
||||||
out[outPos++] = (byte) ((off >> 8) + (len << 5));
|
out[outPos++] = (byte) ((off >> 8) + (len << 5));
|
||||||
} else {
|
} else {
|
||||||
|
@ -231,19 +236,20 @@ public class ChunkEncoder {
|
||||||
out[outPos++] = (byte) off;
|
out[outPos++] = (byte) off;
|
||||||
outPos++;
|
outPos++;
|
||||||
inPos += len;
|
inPos += len;
|
||||||
hash = first(in, inPos);
|
seen = first(in, inPos);
|
||||||
hash = (hash << 8) + (in[inPos + 2] & 255);
|
seen = (seen << 8) + (in[inPos + 2] & 255);
|
||||||
hashTable[hash(hash)] = inPos++;
|
hashTable[hash(seen)] = inPos;
|
||||||
hash = (hash << 8) + (in[inPos + 2] & 255); // hash = next(hash, in, inPos);
|
++inPos;
|
||||||
hashTable[hash(hash)] = inPos++;
|
seen = (seen << 8) + (in[inPos + 2] & 255); // hash = next(hash, in, inPos);
|
||||||
|
hashTable[hash(seen)] = inPos;
|
||||||
|
++inPos;
|
||||||
}
|
}
|
||||||
inEnd += 4;
|
|
||||||
// try offlining the tail
|
// try offlining the tail
|
||||||
return tryCompressTail(in, inPos, inEnd, out, outPos, literals);
|
return handleTail(in, inPos, inEnd + 4, out, outPos, literals);
|
||||||
}
|
}
|
||||||
|
|
||||||
private int tryCompressTail(byte[] in, int inPos, int inEnd, byte[] out, int outPos,
|
private int handleTail(byte[] in, int inPos, int inEnd, byte[] out, int outPos,
|
||||||
int literals) {
|
int literals) {
|
||||||
while (inPos < inEnd) {
|
while (inPos < inEnd) {
|
||||||
out[outPos++] = in[inPos++];
|
out[outPos++] = in[inPos++];
|
||||||
literals++;
|
literals++;
|
||||||
|
|
|
@ -11,383 +11,45 @@
|
||||||
|
|
||||||
package org.elasticsearch.common.compress.lzf;
|
package org.elasticsearch.common.compress.lzf;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.compress.lzf.util.ChunkDecoderFactory;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Decoder that handles decoding of sequence of encoded LZF chunks,
|
* Decoder that handles decoding of sequence of encoded LZF chunks,
|
||||||
* combining them into a single contiguous result byte array
|
* combining them into a single contiguous result byte array.
|
||||||
|
* As of version 0.9, this class has been mostly replaced by
|
||||||
|
* {@link ChunkDecoder}, although static methods are left here
|
||||||
|
* and may still be used.
|
||||||
|
* All static methods use {@link ChunkDecoderFactory#optimalInstance}
|
||||||
|
* to find actual {@link ChunkDecoder} instance to use.
|
||||||
*
|
*
|
||||||
* @author tatu@ning.com
|
* @author Tatu Saloranta (tatu@ning.com)
|
||||||
*/
|
*/
|
||||||
public class LZFDecoder {
|
public class LZFDecoder {
|
||||||
private final static byte BYTE_NULL = 0;
|
/*
|
||||||
private final static int HEADER_BYTES = 5;
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
// Old API
|
||||||
// static methods, no need to instantiate
|
///////////////////////////////////////////////////////////////////////
|
||||||
private LZFDecoder() {
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Method for decompressing a block of input data encoded in LZF
|
|
||||||
* block structure (compatible with lzf command line utility),
|
|
||||||
* and can consist of any number of blocks.
|
|
||||||
* Note that input MUST consists of a sequence of one or more complete
|
|
||||||
* chunks; partial chunks can not be handled.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public static byte[] decode(final byte[] inputBuffer) throws IOException {
|
public static byte[] decode(final byte[] inputBuffer) throws IOException {
|
||||||
byte[] result = new byte[calculateUncompressedSize(inputBuffer, 0, inputBuffer.length)];
|
return decode(inputBuffer, 0, inputBuffer.length);
|
||||||
decode(inputBuffer, 0, inputBuffer.length, result);
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Method for decompressing a block of input data encoded in LZF
|
|
||||||
* block structure (compatible with lzf command line utility),
|
|
||||||
* and can consist of any number of blocks.
|
|
||||||
* Note that input MUST consists of a sequence of one or more complete
|
|
||||||
* chunks; partial chunks can not be handled.
|
|
||||||
*
|
|
||||||
* @since 0.8.2
|
|
||||||
*/
|
|
||||||
public static byte[] decode(final byte[] inputBuffer, int inputPtr, int inputLen) throws IOException {
|
public static byte[] decode(final byte[] inputBuffer, int inputPtr, int inputLen) throws IOException {
|
||||||
byte[] result = new byte[calculateUncompressedSize(inputBuffer, inputPtr, inputLen)];
|
return ChunkDecoderFactory.optimalInstance().decode(inputBuffer);
|
||||||
decode(inputBuffer, inputPtr, inputLen, result);
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Method for decompressing a block of input data encoded in LZF
|
|
||||||
* block structure (compatible with lzf command line utility),
|
|
||||||
* and can consist of any number of blocks.
|
|
||||||
* Note that input MUST consists of a sequence of one or more complete
|
|
||||||
* chunks; partial chunks can not be handled.
|
|
||||||
*/
|
|
||||||
public static int decode(final byte[] inputBuffer, final byte[] targetBuffer) throws IOException {
|
public static int decode(final byte[] inputBuffer, final byte[] targetBuffer) throws IOException {
|
||||||
return decode(inputBuffer, 0, inputBuffer.length, targetBuffer);
|
return decode(inputBuffer, 0, inputBuffer.length, targetBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
public static int decode(final byte[] sourceBuffer, int inPtr, int inLength, final byte[] targetBuffer) throws IOException {
|
||||||
* Method for decompressing a block of input data encoded in LZF
|
return ChunkDecoderFactory.optimalInstance().decode(sourceBuffer, inPtr, inLength, targetBuffer);
|
||||||
* block structure (compatible with lzf command line utility),
|
|
||||||
* and can consist of any number of blocks.
|
|
||||||
* Note that input MUST consists of a sequence of one or more complete
|
|
||||||
* chunks; partial chunks can not be handled.
|
|
||||||
*/
|
|
||||||
public static int decode(final byte[] sourceBuffer, int inPtr, int inLength,
|
|
||||||
final byte[] targetBuffer) throws IOException {
|
|
||||||
byte[] result = targetBuffer;
|
|
||||||
int outPtr = 0;
|
|
||||||
int blockNr = 0;
|
|
||||||
|
|
||||||
final int end = inPtr + inLength - 1; // -1 to offset possible end marker
|
|
||||||
|
|
||||||
while (inPtr < end) {
|
|
||||||
// let's do basic sanity checks; no point in skimping with these checks
|
|
||||||
if (sourceBuffer[inPtr] != LZFChunk.BYTE_Z || sourceBuffer[inPtr + 1] != LZFChunk.BYTE_V) {
|
|
||||||
throw new IOException("Corrupt input data, block #" + blockNr + " (at offset " + inPtr + "): did not start with 'ZV' signature bytes");
|
|
||||||
}
|
|
||||||
inPtr += 2;
|
|
||||||
int type = sourceBuffer[inPtr++];
|
|
||||||
int len = uint16(sourceBuffer, inPtr);
|
|
||||||
inPtr += 2;
|
|
||||||
if (type == LZFChunk.BLOCK_TYPE_NON_COMPRESSED) { // uncompressed
|
|
||||||
System.arraycopy(sourceBuffer, inPtr, result, outPtr, len);
|
|
||||||
outPtr += len;
|
|
||||||
} else { // compressed
|
|
||||||
int uncompLen = uint16(sourceBuffer, inPtr);
|
|
||||||
inPtr += 2;
|
|
||||||
decompressChunk(sourceBuffer, inPtr, result, outPtr, outPtr + uncompLen);
|
|
||||||
outPtr += uncompLen;
|
|
||||||
}
|
|
||||||
inPtr += len;
|
|
||||||
++blockNr;
|
|
||||||
}
|
|
||||||
return outPtr;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Helper method that will calculate total uncompressed size, for sequence of
|
|
||||||
* one or more LZF blocks stored in given byte array.
|
|
||||||
* Will do basic sanity checking, so that this method can be called to
|
|
||||||
* verify against some types of corruption.
|
|
||||||
*/
|
|
||||||
public static int calculateUncompressedSize(byte[] data, int ptr, int length) throws IOException {
|
public static int calculateUncompressedSize(byte[] data, int ptr, int length) throws IOException {
|
||||||
int uncompressedSize = 0;
|
return ChunkDecoder.calculateUncompressedSize(data, ptr, length);
|
||||||
int blockNr = 0;
|
|
||||||
final int end = ptr + length;
|
|
||||||
|
|
||||||
while (ptr < end) {
|
|
||||||
// can use optional end marker
|
|
||||||
if (ptr == (data.length + 1) && data[ptr] == BYTE_NULL) {
|
|
||||||
++ptr; // so that we'll be at end
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// simpler to handle bounds checks by catching exception here...
|
|
||||||
try {
|
|
||||||
if (data[ptr] != LZFChunk.BYTE_Z || data[ptr + 1] != LZFChunk.BYTE_V) {
|
|
||||||
throw new IOException("Corrupt input data, block #" + blockNr + " (at offset " + ptr + "): did not start with 'ZV' signature bytes");
|
|
||||||
}
|
|
||||||
int type = (int) data[ptr + 2];
|
|
||||||
int blockLen = uint16(data, ptr + 3);
|
|
||||||
if (type == LZFChunk.BLOCK_TYPE_NON_COMPRESSED) { // uncompressed
|
|
||||||
ptr += 5;
|
|
||||||
uncompressedSize += blockLen;
|
|
||||||
} else if (type == LZFChunk.BLOCK_TYPE_COMPRESSED) { // compressed
|
|
||||||
uncompressedSize += uint16(data, ptr + 5);
|
|
||||||
ptr += 7;
|
|
||||||
} else { // unknown... CRC-32 would be 2, but that's not implemented by cli tool
|
|
||||||
throw new IOException("Corrupt input data, block #" + blockNr + " (at offset " + ptr + "): unrecognized block type " + (type & 0xFF));
|
|
||||||
}
|
|
||||||
ptr += blockLen;
|
|
||||||
} catch (ArrayIndexOutOfBoundsException e) {
|
|
||||||
throw new IOException("Corrupt input data, block #" + blockNr + " (at offset " + ptr + "): truncated block header");
|
|
||||||
}
|
|
||||||
++blockNr;
|
|
||||||
}
|
|
||||||
// one more sanity check:
|
|
||||||
if (ptr != data.length) {
|
|
||||||
throw new IOException("Corrupt input data: block #" + blockNr + " extends " + (data.length - ptr) + " beyond end of input");
|
|
||||||
}
|
|
||||||
return uncompressedSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Main decode from a stream. Decompressed bytes are placed in the outputBuffer, inputBuffer
|
|
||||||
* is a "scratch-area".
|
|
||||||
*
|
|
||||||
* @param is An input stream of LZF compressed bytes
|
|
||||||
* @param inputBuffer A byte array used as a scratch area.
|
|
||||||
* @param outputBuffer A byte array in which the result is returned
|
|
||||||
* @return The number of bytes placed in the outputBuffer.
|
|
||||||
*/
|
|
||||||
public static int decompressChunk(final InputStream is, final byte[] inputBuffer, final byte[] outputBuffer)
|
|
||||||
throws IOException {
|
|
||||||
int bytesInOutput;
|
|
||||||
/* note: we do NOT read more than 5 bytes because otherwise might need to shuffle bytes
|
|
||||||
* for output buffer (could perhaps optimize in future?)
|
|
||||||
*/
|
|
||||||
int bytesRead = readHeader(is, inputBuffer);
|
|
||||||
if ((bytesRead < HEADER_BYTES)
|
|
||||||
|| inputBuffer[0] != LZFChunk.BYTE_Z || inputBuffer[1] != LZFChunk.BYTE_V) {
|
|
||||||
if (bytesRead == 0) { // probably fine, clean EOF
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
throw new IOException("Corrupt input data, block did not start with 2 byte signature ('ZV') followed by type byte, 2-byte length)");
|
|
||||||
}
|
|
||||||
int type = inputBuffer[2];
|
|
||||||
int compLen = uint16(inputBuffer, 3);
|
|
||||||
if (type == LZFChunk.BLOCK_TYPE_NON_COMPRESSED) { // uncompressed
|
|
||||||
readFully(is, false, outputBuffer, 0, compLen);
|
|
||||||
bytesInOutput = compLen;
|
|
||||||
} else { // compressed
|
|
||||||
readFully(is, true, inputBuffer, 0, 2 + compLen); // first 2 bytes are uncompressed length
|
|
||||||
int uncompLen = uint16(inputBuffer, 0);
|
|
||||||
decompressChunk(inputBuffer, 2, outputBuffer, 0, uncompLen);
|
|
||||||
bytesInOutput = uncompLen;
|
|
||||||
}
|
|
||||||
return bytesInOutput;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Main decode method for individual chunks.
|
|
||||||
*/
|
|
||||||
public static void decompressChunk(byte[] in, int inPos, byte[] out, int outPos, int outEnd)
|
|
||||||
throws IOException {
|
|
||||||
do {
|
|
||||||
int ctrl = in[inPos++] & 255;
|
|
||||||
if (ctrl < LZFChunk.MAX_LITERAL) { // literal run
|
|
||||||
// 11-Aug-2011, tatu: Looks silly, but is faster than simple loop or System.arraycopy
|
|
||||||
switch (ctrl) {
|
|
||||||
case 31:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 30:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 29:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 28:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 27:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 26:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 25:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 24:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 23:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 22:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 21:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 20:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 19:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 18:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 17:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 16:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 15:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 14:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 13:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 12:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 11:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 10:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 9:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 8:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 7:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 6:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 5:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 4:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 3:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 2:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 1:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
case 0:
|
|
||||||
out[outPos++] = in[inPos++];
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// back reference
|
|
||||||
int len = ctrl >> 5;
|
|
||||||
ctrl = -((ctrl & 0x1f) << 8) - 1;
|
|
||||||
if (len < 7) { // 2 bytes; length of 3 - 8 bytes
|
|
||||||
ctrl -= in[inPos++] & 255;
|
|
||||||
out[outPos] = out[outPos++ + ctrl];
|
|
||||||
out[outPos] = out[outPos++ + ctrl];
|
|
||||||
switch (len) {
|
|
||||||
case 6:
|
|
||||||
out[outPos] = out[outPos++ + ctrl];
|
|
||||||
case 5:
|
|
||||||
out[outPos] = out[outPos++ + ctrl];
|
|
||||||
case 4:
|
|
||||||
out[outPos] = out[outPos++ + ctrl];
|
|
||||||
case 3:
|
|
||||||
out[outPos] = out[outPos++ + ctrl];
|
|
||||||
case 2:
|
|
||||||
out[outPos] = out[outPos++ + ctrl];
|
|
||||||
case 1:
|
|
||||||
out[outPos] = out[outPos++ + ctrl];
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// long version (3 bytes, length of up to 264 bytes)
|
|
||||||
len = in[inPos++] & 255;
|
|
||||||
ctrl -= in[inPos++] & 255;
|
|
||||||
|
|
||||||
// First: if there is no overlap, can just use arraycopy:
|
|
||||||
if ((ctrl + len) < -9) {
|
|
||||||
len += 9;
|
|
||||||
System.arraycopy(out, outPos + ctrl, out, outPos, len);
|
|
||||||
outPos += len;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// otherwise manual copy: so first just copy 9 bytes we know are needed
|
|
||||||
out[outPos] = out[outPos++ + ctrl];
|
|
||||||
out[outPos] = out[outPos++ + ctrl];
|
|
||||||
out[outPos] = out[outPos++ + ctrl];
|
|
||||||
out[outPos] = out[outPos++ + ctrl];
|
|
||||||
out[outPos] = out[outPos++ + ctrl];
|
|
||||||
out[outPos] = out[outPos++ + ctrl];
|
|
||||||
out[outPos] = out[outPos++ + ctrl];
|
|
||||||
out[outPos] = out[outPos++ + ctrl];
|
|
||||||
out[outPos] = out[outPos++ + ctrl];
|
|
||||||
|
|
||||||
// then loop
|
|
||||||
// Odd: after extensive profiling, looks like magic number
|
|
||||||
// for unrolling is 4: with 8 performance is worse (even
|
|
||||||
// bit less than with no unrolling).
|
|
||||||
len += outPos;
|
|
||||||
final int end = len - 3;
|
|
||||||
while (outPos < end) {
|
|
||||||
out[outPos] = out[outPos++ + ctrl];
|
|
||||||
out[outPos] = out[outPos++ + ctrl];
|
|
||||||
out[outPos] = out[outPos++ + ctrl];
|
|
||||||
out[outPos] = out[outPos++ + ctrl];
|
|
||||||
}
|
|
||||||
switch (len - outPos) {
|
|
||||||
case 3:
|
|
||||||
out[outPos] = out[outPos++ + ctrl];
|
|
||||||
case 2:
|
|
||||||
out[outPos] = out[outPos++ + ctrl];
|
|
||||||
case 1:
|
|
||||||
out[outPos] = out[outPos++ + ctrl];
|
|
||||||
}
|
|
||||||
} while (outPos < outEnd);
|
|
||||||
|
|
||||||
// sanity check to guard against corrupt data:
|
|
||||||
if (outPos != outEnd)
|
|
||||||
throw new IOException("Corrupt data: overrun in decompress, input offset " + inPos + ", output offset " + outPos);
|
|
||||||
}
|
|
||||||
|
|
||||||
private final static int uint16(byte[] data, int ptr) {
|
|
||||||
return ((data[ptr] & 0xFF) << 8) + (data[ptr + 1] & 0xFF);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Helper method to forcibly load header bytes that must be read before
|
|
||||||
* chunk can be handled.
|
|
||||||
*/
|
|
||||||
protected static int readHeader(final InputStream is, final byte[] inputBuffer)
|
|
||||||
throws IOException {
|
|
||||||
// Ok: simple case first, where we just get all data we need
|
|
||||||
int needed = HEADER_BYTES;
|
|
||||||
int count = is.read(inputBuffer, 0, needed);
|
|
||||||
|
|
||||||
if (count == needed) {
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
if (count <= 0) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// if not, a source that trickles data (network etc); must loop
|
|
||||||
int offset = count;
|
|
||||||
needed -= count;
|
|
||||||
|
|
||||||
do {
|
|
||||||
count = is.read(inputBuffer, offset, needed);
|
|
||||||
if (count <= 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
offset += count;
|
|
||||||
needed -= count;
|
|
||||||
} while (needed > 0);
|
|
||||||
return offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
private final static void readFully(InputStream is, boolean compressed,
|
|
||||||
byte[] outputBuffer, int offset, int len) throws IOException {
|
|
||||||
int left = len;
|
|
||||||
while (left > 0) {
|
|
||||||
int count = is.read(outputBuffer, offset, left);
|
|
||||||
if (count < 0) { // EOF not allowed here
|
|
||||||
throw new IOException("EOF in " + len + " byte ("
|
|
||||||
+ (compressed ? "" : "un") + "compressed) block: could only read "
|
|
||||||
+ (len - left) + " bytes");
|
|
||||||
}
|
|
||||||
offset += count;
|
|
||||||
left -= count;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,8 +35,19 @@ public class LZFEncoder {
|
||||||
* Result consists of a sequence of chunks.
|
* Result consists of a sequence of chunks.
|
||||||
*/
|
*/
|
||||||
public static byte[] encode(byte[] data, int length) throws IOException {
|
public static byte[] encode(byte[] data, int length) throws IOException {
|
||||||
|
return encode(data, 0, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Method for compressing given input data using LZF encoding and
|
||||||
|
* block structure (compatible with lzf command line utility).
|
||||||
|
* Result consists of a sequence of chunks.
|
||||||
|
*
|
||||||
|
* @since 0.8.1
|
||||||
|
*/
|
||||||
|
public static byte[] encode(byte[] data, int offset, int length) throws IOException {
|
||||||
ChunkEncoder enc = new ChunkEncoder(length, BufferRecycler.instance());
|
ChunkEncoder enc = new ChunkEncoder(length, BufferRecycler.instance());
|
||||||
byte[] result = encode(enc, data, length);
|
byte[] result = encode(enc, data, offset, length);
|
||||||
// important: may be able to reuse buffers
|
// important: may be able to reuse buffers
|
||||||
enc.close();
|
enc.close();
|
||||||
return result;
|
return result;
|
||||||
|
@ -44,9 +55,17 @@ public class LZFEncoder {
|
||||||
|
|
||||||
public static byte[] encode(ChunkEncoder enc, byte[] data, int length)
|
public static byte[] encode(ChunkEncoder enc, byte[] data, int length)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
return encode(enc, data, 0, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @since 0.8.1
|
||||||
|
*/
|
||||||
|
public static byte[] encode(ChunkEncoder enc, byte[] data, int offset, int length)
|
||||||
|
throws IOException {
|
||||||
int left = length;
|
int left = length;
|
||||||
int chunkLen = Math.min(LZFChunk.MAX_CHUNK_LEN, left);
|
int chunkLen = Math.min(LZFChunk.MAX_CHUNK_LEN, left);
|
||||||
LZFChunk first = enc.encodeChunk(data, 0, chunkLen);
|
LZFChunk first = enc.encodeChunk(data, offset, chunkLen);
|
||||||
left -= chunkLen;
|
left -= chunkLen;
|
||||||
// shortcut: if it all fit in, no need to coalesce:
|
// shortcut: if it all fit in, no need to coalesce:
|
||||||
if (left < 1) {
|
if (left < 1) {
|
||||||
|
@ -54,13 +73,13 @@ public class LZFEncoder {
|
||||||
}
|
}
|
||||||
// otherwise need to get other chunks:
|
// otherwise need to get other chunks:
|
||||||
int resultBytes = first.length();
|
int resultBytes = first.length();
|
||||||
int inputOffset = chunkLen;
|
offset += chunkLen;
|
||||||
LZFChunk last = first;
|
LZFChunk last = first;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
chunkLen = Math.min(left, LZFChunk.MAX_CHUNK_LEN);
|
chunkLen = Math.min(left, LZFChunk.MAX_CHUNK_LEN);
|
||||||
LZFChunk chunk = enc.encodeChunk(data, inputOffset, chunkLen);
|
LZFChunk chunk = enc.encodeChunk(data, offset, chunkLen);
|
||||||
inputOffset += chunkLen;
|
offset += chunkLen;
|
||||||
left -= chunkLen;
|
left -= chunkLen;
|
||||||
resultBytes += chunk.length();
|
resultBytes += chunk.length();
|
||||||
last.setNext(chunk);
|
last.setNext(chunk);
|
||||||
|
|
|
@ -0,0 +1,243 @@
|
||||||
|
package org.elasticsearch.common.compress.lzf.impl;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.compress.lzf.ChunkDecoder;
|
||||||
|
import org.elasticsearch.common.compress.lzf.LZFChunk;
|
||||||
|
import sun.misc.Unsafe;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.lang.reflect.Field;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Highly optimized {@link ChunkDecoder} implementation that uses
|
||||||
|
* Sun JDK's Unsafe class (which may be included by other JDK's as well;
|
||||||
|
* IBM's apparently does).
|
||||||
|
* <p>
|
||||||
|
* Credits for the idea go to Dain Sundstrom, who kindly suggested this use,
|
||||||
|
* and is all-around great source for optimization tips and tricks.
|
||||||
|
*/
|
||||||
|
@SuppressWarnings("restriction")
|
||||||
|
public class UnsafeChunkDecoder extends ChunkDecoder {
|
||||||
|
private static final Unsafe unsafe;
|
||||||
|
|
||||||
|
static {
|
||||||
|
try {
|
||||||
|
Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe");
|
||||||
|
theUnsafe.setAccessible(true);
|
||||||
|
unsafe = (Unsafe) theUnsafe.get(null);
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final long BYTE_ARRAY_OFFSET = unsafe.arrayBaseOffset(byte[].class);
|
||||||
|
// private static final long SHORT_ARRAY_OFFSET = unsafe.arrayBaseOffset(short[].class);
|
||||||
|
// private static final long SHORT_ARRAY_STRIDE = unsafe.arrayIndexScale(short[].class);
|
||||||
|
|
||||||
|
public UnsafeChunkDecoder() {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final int decodeChunk(final InputStream is, final byte[] inputBuffer, final byte[] outputBuffer)
|
||||||
|
throws IOException {
|
||||||
|
int bytesInOutput;
|
||||||
|
/* note: we do NOT read more than 5 bytes because otherwise might need to shuffle bytes
|
||||||
|
* for output buffer (could perhaps optimize in future?)
|
||||||
|
*/
|
||||||
|
int bytesRead = readHeader(is, inputBuffer);
|
||||||
|
if ((bytesRead < HEADER_BYTES)
|
||||||
|
|| inputBuffer[0] != LZFChunk.BYTE_Z || inputBuffer[1] != LZFChunk.BYTE_V) {
|
||||||
|
if (bytesRead == 0) { // probably fine, clean EOF
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
throw new IOException("Corrupt input data, block did not start with 2 byte signature ('ZV') followed by type byte, 2-byte length)");
|
||||||
|
}
|
||||||
|
int type = inputBuffer[2];
|
||||||
|
int compLen = uint16(inputBuffer, 3);
|
||||||
|
if (type == LZFChunk.BLOCK_TYPE_NON_COMPRESSED) { // uncompressed
|
||||||
|
readFully(is, false, outputBuffer, 0, compLen);
|
||||||
|
bytesInOutput = compLen;
|
||||||
|
} else { // compressed
|
||||||
|
readFully(is, true, inputBuffer, 0, 2 + compLen); // first 2 bytes are uncompressed length
|
||||||
|
int uncompLen = uint16(inputBuffer, 0);
|
||||||
|
decodeChunk(inputBuffer, 2, outputBuffer, 0, uncompLen);
|
||||||
|
bytesInOutput = uncompLen;
|
||||||
|
}
|
||||||
|
return bytesInOutput;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final void decodeChunk(byte[] in, int inPos, byte[] out, int outPos, int outEnd)
|
||||||
|
throws IOException {
|
||||||
|
main_loop:
|
||||||
|
do {
|
||||||
|
int ctrl = in[inPos++] & 255;
|
||||||
|
while (ctrl < LZFChunk.MAX_LITERAL) { // literal run(s)
|
||||||
|
copyUpTo32(in, inPos, out, outPos, ctrl);
|
||||||
|
++ctrl;
|
||||||
|
inPos += ctrl;
|
||||||
|
outPos += ctrl;
|
||||||
|
if (outPos >= outEnd) {
|
||||||
|
break main_loop;
|
||||||
|
}
|
||||||
|
ctrl = in[inPos++] & 255;
|
||||||
|
}
|
||||||
|
// back reference
|
||||||
|
int len = ctrl >> 5;
|
||||||
|
ctrl = -((ctrl & 0x1f) << 8) - 1;
|
||||||
|
// short back reference? 2 bytes; run lengths of 2 - 8 bytes
|
||||||
|
if (len < 7) {
|
||||||
|
ctrl -= in[inPos++] & 255;
|
||||||
|
if (ctrl < -7) { // non-overlapping? can use efficient bulk copy
|
||||||
|
copyLong(out, outPos + ctrl, out, outPos);
|
||||||
|
outPos += len + 2;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// otherwise, byte-by-byte
|
||||||
|
outPos = copyOverlappingShort(out, outPos, ctrl, len);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// long back reference: 3 bytes, length of up to 264 bytes
|
||||||
|
len = in[inPos++] & 255;
|
||||||
|
ctrl -= in[inPos++] & 255;
|
||||||
|
// First: ovelapping case can't use default handling, off line:
|
||||||
|
if ((ctrl + len) >= -9) {
|
||||||
|
outPos = copyOverlappingLong(out, outPos, ctrl, len);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// but non-overlapping is simple
|
||||||
|
len += 9;
|
||||||
|
if (len <= 32) {
|
||||||
|
copyUpTo32(out, outPos + ctrl, out, outPos, len - 1);
|
||||||
|
} else {
|
||||||
|
System.arraycopy(out, outPos + ctrl, out, outPos, len);
|
||||||
|
}
|
||||||
|
outPos += len;
|
||||||
|
} while (outPos < outEnd);
|
||||||
|
|
||||||
|
// sanity check to guard against corrupt data:
|
||||||
|
if (outPos != outEnd)
|
||||||
|
throw new IOException("Corrupt data: overrun in decompress, input offset " + inPos + ", output offset " + outPos);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
// Internal methods
|
||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
*/
|
||||||
|
|
||||||
|
private final int copyOverlappingShort(final byte[] out, int outPos, final int offset, int len) {
|
||||||
|
out[outPos] = out[outPos++ + offset];
|
||||||
|
out[outPos] = out[outPos++ + offset];
|
||||||
|
switch (len) {
|
||||||
|
case 6:
|
||||||
|
out[outPos] = out[outPos++ + offset];
|
||||||
|
case 5:
|
||||||
|
out[outPos] = out[outPos++ + offset];
|
||||||
|
case 4:
|
||||||
|
out[outPos] = out[outPos++ + offset];
|
||||||
|
case 3:
|
||||||
|
out[outPos] = out[outPos++ + offset];
|
||||||
|
case 2:
|
||||||
|
out[outPos] = out[outPos++ + offset];
|
||||||
|
case 1:
|
||||||
|
out[outPos] = out[outPos++ + offset];
|
||||||
|
}
|
||||||
|
return outPos;
|
||||||
|
}
|
||||||
|
|
||||||
|
private final static int copyOverlappingLong(final byte[] out, int outPos, final int offset, int len) {
|
||||||
|
// otherwise manual copy: so first just copy 9 bytes we know are needed
|
||||||
|
out[outPos] = out[outPos++ + offset];
|
||||||
|
out[outPos] = out[outPos++ + offset];
|
||||||
|
out[outPos] = out[outPos++ + offset];
|
||||||
|
out[outPos] = out[outPos++ + offset];
|
||||||
|
out[outPos] = out[outPos++ + offset];
|
||||||
|
out[outPos] = out[outPos++ + offset];
|
||||||
|
out[outPos] = out[outPos++ + offset];
|
||||||
|
out[outPos] = out[outPos++ + offset];
|
||||||
|
out[outPos] = out[outPos++ + offset];
|
||||||
|
|
||||||
|
// then loop
|
||||||
|
// Odd: after extensive profiling, looks like magic number
|
||||||
|
// for unrolling is 4: with 8 performance is worse (even
|
||||||
|
// bit less than with no unrolling).
|
||||||
|
len += outPos;
|
||||||
|
final int end = len - 3;
|
||||||
|
while (outPos < end) {
|
||||||
|
out[outPos] = out[outPos++ + offset];
|
||||||
|
out[outPos] = out[outPos++ + offset];
|
||||||
|
out[outPos] = out[outPos++ + offset];
|
||||||
|
out[outPos] = out[outPos++ + offset];
|
||||||
|
}
|
||||||
|
switch (len - outPos) {
|
||||||
|
case 3:
|
||||||
|
out[outPos] = out[outPos++ + offset];
|
||||||
|
case 2:
|
||||||
|
out[outPos] = out[outPos++ + offset];
|
||||||
|
case 1:
|
||||||
|
out[outPos] = out[outPos++ + offset];
|
||||||
|
}
|
||||||
|
return outPos;
|
||||||
|
}
|
||||||
|
|
||||||
|
private final static void copyLong(byte[] src, int srcIndex, byte[] dest, int destIndex) {
|
||||||
|
long value = unsafe.getLong(src, BYTE_ARRAY_OFFSET + srcIndex);
|
||||||
|
unsafe.putLong(dest, (BYTE_ARRAY_OFFSET + destIndex), value);
|
||||||
|
}
|
||||||
|
|
||||||
|
private final static void copyUpTo32(byte[] in, int inputIndex, byte[] out, int outputIndex, int lengthMinusOne) {
|
||||||
|
if ((outputIndex + 32) > out.length) {
|
||||||
|
System.arraycopy(in, inputIndex, out, outputIndex, lengthMinusOne + 1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
long inPtr = BYTE_ARRAY_OFFSET + inputIndex;
|
||||||
|
long outPtr = BYTE_ARRAY_OFFSET + outputIndex;
|
||||||
|
|
||||||
|
switch (lengthMinusOne >>> 3) {
|
||||||
|
case 3: {
|
||||||
|
long value = unsafe.getLong(in, inPtr);
|
||||||
|
unsafe.putLong(out, outPtr, value);
|
||||||
|
inPtr += 8;
|
||||||
|
outPtr += 8;
|
||||||
|
value = unsafe.getLong(in, inPtr);
|
||||||
|
unsafe.putLong(out, outPtr, value);
|
||||||
|
inPtr += 8;
|
||||||
|
outPtr += 8;
|
||||||
|
value = unsafe.getLong(in, inPtr);
|
||||||
|
unsafe.putLong(out, outPtr, value);
|
||||||
|
inPtr += 8;
|
||||||
|
outPtr += 8;
|
||||||
|
value = unsafe.getLong(in, inPtr);
|
||||||
|
unsafe.putLong(out, outPtr, value);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 2: {
|
||||||
|
long value = unsafe.getLong(in, inPtr);
|
||||||
|
unsafe.putLong(out, outPtr, value);
|
||||||
|
inPtr += 8;
|
||||||
|
outPtr += 8;
|
||||||
|
value = unsafe.getLong(in, inPtr);
|
||||||
|
unsafe.putLong(out, outPtr, value);
|
||||||
|
inPtr += 8;
|
||||||
|
outPtr += 8;
|
||||||
|
value = unsafe.getLong(in, inPtr);
|
||||||
|
unsafe.putLong(out, outPtr, value);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 1: {
|
||||||
|
long value = unsafe.getLong(in, inPtr);
|
||||||
|
unsafe.putLong(out, outPtr, value);
|
||||||
|
inPtr += 8;
|
||||||
|
outPtr += 8;
|
||||||
|
value = unsafe.getLong(in, inPtr);
|
||||||
|
unsafe.putLong(out, outPtr, value);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 0: {
|
||||||
|
long value = unsafe.getLong(in, inPtr);
|
||||||
|
unsafe.putLong(out, outPtr, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,274 @@
|
||||||
|
package org.elasticsearch.common.compress.lzf.impl;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.compress.lzf.ChunkDecoder;
|
||||||
|
import org.elasticsearch.common.compress.lzf.LZFChunk;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Safe {@link ChunkDecoder} implementation that can be used on any
|
||||||
|
* platform.
|
||||||
|
*/
|
||||||
|
public class VanillaChunkDecoder extends ChunkDecoder {
|
||||||
|
public VanillaChunkDecoder() {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final int decodeChunk(final InputStream is, final byte[] inputBuffer, final byte[] outputBuffer)
|
||||||
|
throws IOException {
|
||||||
|
int bytesInOutput;
|
||||||
|
/* note: we do NOT read more than 5 bytes because otherwise might need to shuffle bytes
|
||||||
|
* for output buffer (could perhaps optimize in future?)
|
||||||
|
*/
|
||||||
|
int bytesRead = readHeader(is, inputBuffer);
|
||||||
|
if ((bytesRead < HEADER_BYTES)
|
||||||
|
|| inputBuffer[0] != LZFChunk.BYTE_Z || inputBuffer[1] != LZFChunk.BYTE_V) {
|
||||||
|
if (bytesRead == 0) { // probably fine, clean EOF
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
throw new IOException("Corrupt input data, block did not start with 2 byte signature ('ZV') followed by type byte, 2-byte length)");
|
||||||
|
}
|
||||||
|
int type = inputBuffer[2];
|
||||||
|
int compLen = uint16(inputBuffer, 3);
|
||||||
|
if (type == LZFChunk.BLOCK_TYPE_NON_COMPRESSED) { // uncompressed
|
||||||
|
readFully(is, false, outputBuffer, 0, compLen);
|
||||||
|
bytesInOutput = compLen;
|
||||||
|
} else { // compressed
|
||||||
|
readFully(is, true, inputBuffer, 0, 2 + compLen); // first 2 bytes are uncompressed length
|
||||||
|
int uncompLen = uint16(inputBuffer, 0);
|
||||||
|
decodeChunk(inputBuffer, 2, outputBuffer, 0, uncompLen);
|
||||||
|
bytesInOutput = uncompLen;
|
||||||
|
}
|
||||||
|
return bytesInOutput;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final void decodeChunk(byte[] in, int inPos, byte[] out, int outPos, int outEnd)
|
||||||
|
throws IOException {
|
||||||
|
do {
|
||||||
|
int ctrl = in[inPos++] & 255;
|
||||||
|
if (ctrl < LZFChunk.MAX_LITERAL) { // literal run
|
||||||
|
switch (ctrl) {
|
||||||
|
case 31:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 30:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 29:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 28:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 27:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 26:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 25:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 24:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 23:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 22:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 21:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 20:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 19:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 18:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 17:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 16:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 15:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 14:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 13:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 12:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 11:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 10:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 9:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 8:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 7:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 6:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 5:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 4:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 3:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 2:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 1:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 0:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// back reference
|
||||||
|
int len = ctrl >> 5;
|
||||||
|
ctrl = -((ctrl & 0x1f) << 8) - 1;
|
||||||
|
if (len < 7) { // 2 bytes; length of 3 - 8 bytes
|
||||||
|
ctrl -= in[inPos++] & 255;
|
||||||
|
out[outPos] = out[outPos++ + ctrl];
|
||||||
|
out[outPos] = out[outPos++ + ctrl];
|
||||||
|
switch (len) {
|
||||||
|
case 6:
|
||||||
|
out[outPos] = out[outPos++ + ctrl];
|
||||||
|
case 5:
|
||||||
|
out[outPos] = out[outPos++ + ctrl];
|
||||||
|
case 4:
|
||||||
|
out[outPos] = out[outPos++ + ctrl];
|
||||||
|
case 3:
|
||||||
|
out[outPos] = out[outPos++ + ctrl];
|
||||||
|
case 2:
|
||||||
|
out[outPos] = out[outPos++ + ctrl];
|
||||||
|
case 1:
|
||||||
|
out[outPos] = out[outPos++ + ctrl];
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// long version (3 bytes, length of up to 264 bytes)
|
||||||
|
len = in[inPos++] & 255;
|
||||||
|
ctrl -= in[inPos++] & 255;
|
||||||
|
|
||||||
|
// First: if there is no overlap, can just use arraycopy:
|
||||||
|
if ((ctrl + len) < -9) {
|
||||||
|
len += 9;
|
||||||
|
if (len <= 32) {
|
||||||
|
copyUpTo32WithSwitch(out, outPos + ctrl, out, outPos, len - 1);
|
||||||
|
} else {
|
||||||
|
System.arraycopy(out, outPos + ctrl, out, outPos, len);
|
||||||
|
}
|
||||||
|
outPos += len;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// otherwise manual copy: so first just copy 9 bytes we know are needed
|
||||||
|
out[outPos] = out[outPos++ + ctrl];
|
||||||
|
out[outPos] = out[outPos++ + ctrl];
|
||||||
|
out[outPos] = out[outPos++ + ctrl];
|
||||||
|
out[outPos] = out[outPos++ + ctrl];
|
||||||
|
out[outPos] = out[outPos++ + ctrl];
|
||||||
|
out[outPos] = out[outPos++ + ctrl];
|
||||||
|
out[outPos] = out[outPos++ + ctrl];
|
||||||
|
out[outPos] = out[outPos++ + ctrl];
|
||||||
|
out[outPos] = out[outPos++ + ctrl];
|
||||||
|
|
||||||
|
// then loop
|
||||||
|
// Odd: after extensive profiling, looks like magic number
|
||||||
|
// for unrolling is 4: with 8 performance is worse (even
|
||||||
|
// bit less than with no unrolling).
|
||||||
|
len += outPos;
|
||||||
|
final int end = len - 3;
|
||||||
|
while (outPos < end) {
|
||||||
|
out[outPos] = out[outPos++ + ctrl];
|
||||||
|
out[outPos] = out[outPos++ + ctrl];
|
||||||
|
out[outPos] = out[outPos++ + ctrl];
|
||||||
|
out[outPos] = out[outPos++ + ctrl];
|
||||||
|
}
|
||||||
|
switch (len - outPos) {
|
||||||
|
case 3:
|
||||||
|
out[outPos] = out[outPos++ + ctrl];
|
||||||
|
case 2:
|
||||||
|
out[outPos] = out[outPos++ + ctrl];
|
||||||
|
case 1:
|
||||||
|
out[outPos] = out[outPos++ + ctrl];
|
||||||
|
}
|
||||||
|
} while (outPos < outEnd);
|
||||||
|
|
||||||
|
// sanity check to guard against corrupt data:
|
||||||
|
if (outPos != outEnd)
|
||||||
|
throw new IOException("Corrupt data: overrun in decompress, input offset " + inPos + ", output offset " + outPos);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
// Internal methods
|
||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
*/
|
||||||
|
|
||||||
|
protected static final void copyUpTo32WithSwitch(byte[] in, int inPos, byte[] out, int outPos,
|
||||||
|
int lengthMinusOne) {
|
||||||
|
switch (lengthMinusOne) {
|
||||||
|
case 31:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 30:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 29:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 28:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 27:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 26:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 25:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 24:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 23:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 22:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 21:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 20:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 19:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 18:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 17:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 16:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 15:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 14:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 13:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 12:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 11:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 10:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 9:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 8:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 7:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 6:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 5:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 4:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 3:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 2:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 1:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
case 0:
|
||||||
|
out[outPos++] = in[inPos++];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,70 @@
|
||||||
|
package org.elasticsearch.common.compress.lzf.util;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.compress.lzf.ChunkDecoder;
|
||||||
|
import org.elasticsearch.common.compress.lzf.impl.UnsafeChunkDecoder;
|
||||||
|
import org.elasticsearch.common.compress.lzf.impl.VanillaChunkDecoder;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple helper class used for loading
|
||||||
|
* {@link ChunkDecoder} implementations, based on criteria
|
||||||
|
* such as "fastest available".
|
||||||
|
* <p>
|
||||||
|
* Yes, it looks butt-ugly, but does the job. Nonetheless, if anyone
|
||||||
|
* has lipstick for this pig, let me know.
|
||||||
|
*
|
||||||
|
* @since 0.9
|
||||||
|
*/
|
||||||
|
public class ChunkDecoderFactory {
|
||||||
|
private final static ChunkDecoderFactory _instance;
|
||||||
|
|
||||||
|
static {
|
||||||
|
Class<?> impl = null;
|
||||||
|
try {
|
||||||
|
// first, try loading optimal one, which uses Sun JDK Unsafe...
|
||||||
|
impl = (Class<?>) Class.forName(UnsafeChunkDecoder.class.getName());
|
||||||
|
} catch (Throwable t) {
|
||||||
|
}
|
||||||
|
if (impl == null) {
|
||||||
|
impl = VanillaChunkDecoder.class;
|
||||||
|
}
|
||||||
|
_instance = new ChunkDecoderFactory(impl);
|
||||||
|
}
|
||||||
|
|
||||||
|
private final Class<? extends ChunkDecoder> _implClass;
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
private ChunkDecoderFactory(Class<?> imp) {
|
||||||
|
_implClass = (Class<? extends ChunkDecoder>) imp;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
// Public API
|
||||||
|
///////////////////////////////////////////////////////////////////////
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Method to use for getting decompressor instance that uses the most optimal
|
||||||
|
* available methods for underlying data access. It should be safe to call
|
||||||
|
* this method as implementations are dynamically loaded; however, on some
|
||||||
|
* non-standard platforms it may be necessary to either directly load
|
||||||
|
* instances, or use {@link #safeInstance()}.
|
||||||
|
*/
|
||||||
|
public static ChunkDecoder optimalInstance() {
|
||||||
|
try {
|
||||||
|
return _instance._implClass.newInstance();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new IllegalStateException("Failed to load a ChunkDecoder instance (" + e.getClass().getName() + "): "
|
||||||
|
+ e.getMessage(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Method that can be used to ensure that a "safe" decompressor instance is loaded.
|
||||||
|
* Safe here means that it should work on any and all Java platforms.
|
||||||
|
*/
|
||||||
|
public static ChunkDecoder safeInstance() {
|
||||||
|
// this will always succeed loading; no need to use dynamic class loading or instantiation
|
||||||
|
return new VanillaChunkDecoder();
|
||||||
|
}
|
||||||
|
}
|
|
@ -20,8 +20,9 @@
|
||||||
package org.elasticsearch.common.io.stream;
|
package org.elasticsearch.common.io.stream;
|
||||||
|
|
||||||
import org.elasticsearch.common.compress.lzf.BufferRecycler;
|
import org.elasticsearch.common.compress.lzf.BufferRecycler;
|
||||||
|
import org.elasticsearch.common.compress.lzf.ChunkDecoder;
|
||||||
import org.elasticsearch.common.compress.lzf.LZFChunk;
|
import org.elasticsearch.common.compress.lzf.LZFChunk;
|
||||||
import org.elasticsearch.common.compress.lzf.LZFDecoder;
|
import org.elasticsearch.common.compress.lzf.util.ChunkDecoderFactory;
|
||||||
|
|
||||||
import java.io.EOFException;
|
import java.io.EOFException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -30,6 +31,14 @@ import java.io.IOException;
|
||||||
* @author kimchy (shay.banon)
|
* @author kimchy (shay.banon)
|
||||||
*/
|
*/
|
||||||
public class LZFStreamInput extends StreamInput {
|
public class LZFStreamInput extends StreamInput {
|
||||||
|
/**
|
||||||
|
* Underlying decoder in use.
|
||||||
|
*/
|
||||||
|
private final ChunkDecoder _decoder;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Object that handles details of buffer recycling
|
||||||
|
*/
|
||||||
private final BufferRecycler _recycler;
|
private final BufferRecycler _recycler;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -49,7 +58,7 @@ public class LZFStreamInput extends StreamInput {
|
||||||
* but at least one). Default is false, meaning that 'optimal' read
|
* but at least one). Default is false, meaning that 'optimal' read
|
||||||
* is used.
|
* is used.
|
||||||
*/
|
*/
|
||||||
protected boolean cfgFullReads = true; // ES: ALWAYS TRUE since we need to throw EOF when doing readBytes
|
protected boolean _cfgFullReads = true; // ES: ALWAYS TRUE since we need to throw EOF when doing readBytes
|
||||||
|
|
||||||
/* the current buffer of compressed bytes (from which to decode) */
|
/* the current buffer of compressed bytes (from which to decode) */
|
||||||
private byte[] _inputBuffer;
|
private byte[] _inputBuffer;
|
||||||
|
@ -74,6 +83,7 @@ public class LZFStreamInput extends StreamInput {
|
||||||
} else {
|
} else {
|
||||||
_recycler = BufferRecycler.instance();
|
_recycler = BufferRecycler.instance();
|
||||||
}
|
}
|
||||||
|
_decoder = ChunkDecoderFactory.optimalInstance();
|
||||||
inputStream = in;
|
inputStream = in;
|
||||||
inputStreamClosed = false;
|
inputStreamClosed = false;
|
||||||
|
|
||||||
|
@ -120,7 +130,7 @@ public class LZFStreamInput extends StreamInput {
|
||||||
System.arraycopy(_decodedBytes, bufferPosition, buffer, offset, chunkLength);
|
System.arraycopy(_decodedBytes, bufferPosition, buffer, offset, chunkLength);
|
||||||
bufferPosition += chunkLength;
|
bufferPosition += chunkLength;
|
||||||
|
|
||||||
if (chunkLength == length || !cfgFullReads) {
|
if (chunkLength == length || !_cfgFullReads) {
|
||||||
return chunkLength;
|
return chunkLength;
|
||||||
}
|
}
|
||||||
// Need more data, then
|
// Need more data, then
|
||||||
|
@ -212,7 +222,7 @@ public class LZFStreamInput extends StreamInput {
|
||||||
if (inputStreamClosed) {
|
if (inputStreamClosed) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
bufferLength = LZFDecoder.decompressChunk(inputStream, _inputBuffer, _decodedBytes);
|
bufferLength = _decoder.decodeChunk(inputStream, _inputBuffer, _decodedBytes);
|
||||||
if (bufferLength < 0) {
|
if (bufferLength < 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,6 +39,17 @@ public class LZFStreamOutput extends StreamOutput {
|
||||||
protected byte[] _outputBuffer;
|
protected byte[] _outputBuffer;
|
||||||
protected int _position = 0;
|
protected int _position = 0;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Configuration setting that governs whether basic 'flush()' should
|
||||||
|
* first complete a block or not.
|
||||||
|
* <p>
|
||||||
|
* Default value is 'true'
|
||||||
|
*
|
||||||
|
* @since 0.8
|
||||||
|
*/
|
||||||
|
protected boolean _cfgFinishBlockOnFlush = true;
|
||||||
|
|
||||||
private final boolean neverClose;
|
private final boolean neverClose;
|
||||||
|
|
||||||
public LZFStreamOutput(StreamOutput out, boolean neverClose) {
|
public LZFStreamOutput(StreamOutput out, boolean neverClose) {
|
||||||
|
@ -64,6 +75,10 @@ public class LZFStreamOutput extends StreamOutput {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override public void writeBytes(byte[] buffer, int offset, int length) throws IOException {
|
@Override public void writeBytes(byte[] buffer, int offset, int length) throws IOException {
|
||||||
|
// ES, check if length is 0, and don't write in this case
|
||||||
|
if (length == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
final int BUFFER_LEN = _outputBuffer.length;
|
final int BUFFER_LEN = _outputBuffer.length;
|
||||||
|
|
||||||
// simple case first: buffering only (for trivially short writes)
|
// simple case first: buffering only (for trivially short writes)
|
||||||
|
@ -96,7 +111,7 @@ public class LZFStreamOutput extends StreamOutput {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void flush() throws IOException {
|
public void flush() throws IOException {
|
||||||
if (_position > 0) {
|
if (_cfgFinishBlockOnFlush && _position > 0) {
|
||||||
writeCompressedBlock();
|
writeCompressedBlock();
|
||||||
}
|
}
|
||||||
_outputStream.flush();
|
_outputStream.flush();
|
||||||
|
@ -104,19 +119,22 @@ public class LZFStreamOutput extends StreamOutput {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
flush();
|
if (_position > 0) {
|
||||||
|
writeCompressedBlock();
|
||||||
|
}
|
||||||
if (neverClose) {
|
if (neverClose) {
|
||||||
// just reset here the LZF stream (not the underlying stream, since we might want to read from it)
|
// just reset here the LZF stream (not the underlying stream, since we might want to read from it)
|
||||||
_position = 0;
|
_position = 0;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
_outputStream.close();
|
_outputStream.flush();
|
||||||
_encoder.close();
|
_encoder.close();
|
||||||
byte[] buf = _outputBuffer;
|
byte[] buf = _outputBuffer;
|
||||||
if (buf != null) {
|
if (buf != null) {
|
||||||
_outputBuffer = null;
|
_outputBuffer = null;
|
||||||
_recycler.releaseOutputBuffer(buf);
|
_recycler.releaseOutputBuffer(buf);
|
||||||
}
|
}
|
||||||
|
_outputStream.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override public void reset() throws IOException {
|
@Override public void reset() throws IOException {
|
||||||
|
@ -143,7 +161,7 @@ public class LZFStreamOutput extends StreamOutput {
|
||||||
|
|
||||||
do {
|
do {
|
||||||
int chunkLen = Math.min(LZFChunk.MAX_CHUNK_LEN, left);
|
int chunkLen = Math.min(LZFChunk.MAX_CHUNK_LEN, left);
|
||||||
_encoder.encodeAndWriteChunk(_outputBuffer, 0, chunkLen, _outputStream);
|
_encoder.encodeAndWriteChunk(_outputBuffer, offset, chunkLen, _outputStream);
|
||||||
offset += chunkLen;
|
offset += chunkLen;
|
||||||
left -= chunkLen;
|
left -= chunkLen;
|
||||||
} while (left > 0);
|
} while (left > 0);
|
||||||
|
|
Loading…
Reference in New Issue