remove compressor package, move lzf to a different package

This commit is contained in:
kimchy 2010-08-14 02:25:30 +03:00
parent 0adb2edba2
commit b3afca2589
16 changed files with 405 additions and 612 deletions

View File

@ -17,22 +17,33 @@
* under the License.
*/
package org.elasticsearch.common.io.compression.lzf;
/* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
* file except in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
package org.elasticsearch.common.compress.lzf;
/**
* Class that handles actual encoding of individual chunks.
* Resulting chunks can be compressed or non-compressed; compression
* is only used if it actually reduces chunk size (including overhead
* of additional header bytes)
* <p>
* Code adapted from H2 project (http://www.h2database.com) Java LZF implementation
* by Thomas (which itself was inspired by original C code by Marc A Lehmann)
*
* @author tatu@ning.com
*/
public class ChunkEncoder {
// Beyond certain point we won't be able to compress:
// Beyond certain point we won't be able to compress; let's use 16 bytes as cut-off
private static final int MIN_BLOCK_TO_COMPRESS = 16;
private static final int MIN_HASH_SIZE = 256;
// Not much point in bigger tables, with 8k window
private static final int MAX_HASH_SIZE = 16384;

View File

@ -0,0 +1,102 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
* file except in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
package org.elasticsearch.common.compress.lzf;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
/**
* Simple command-line utility that can be used for testing LZF
* compression, or as rudimentary command-line tool.
* Arguments are the same as used by the "standard" lzf command line tool
*
* @author tatu@ning.com
*/
public class LZF {
public final static String SUFFIX = ".lzf";
void process(String[] args) throws IOException {
if (args.length == 2) {
String oper = args[0];
boolean compress = "-c".equals(oper);
if (compress || "-d".equals(oper)) {
String filename = args[1];
File src = new File(filename);
if (!src.exists()) {
System.err.println("File '" + filename + "' does not exist.");
System.exit(1);
}
if (!compress && !filename.endsWith(SUFFIX)) {
System.err.println("File '" + filename + "' does end with expected suffix ('" + SUFFIX + "', won't decompress.");
System.exit(1);
}
byte[] data = readData(src);
System.out.println("Read " + data.length + " bytes.");
byte[] result = compress ? LZFEncoder.encode(data) : LZFDecoder.decode(data);
System.out.println("Processed into " + result.length + " bytes.");
File resultFile = compress ? new File(filename + SUFFIX) : new File(filename.substring(0, filename.length() - SUFFIX.length()));
FileOutputStream out = new FileOutputStream(resultFile);
out.write(result);
out.close();
System.out.println("Wrote in file '" + resultFile.getAbsolutePath() + "'.");
return;
}
}
System.err.println("Usage: java " + getClass().getName() + " -c/-d file");
System.exit(1);
}
private byte[] readData(File in) throws IOException {
int len = (int) in.length();
byte[] result = new byte[len];
int offset = 0;
FileInputStream fis = new FileInputStream(in);
while (len > 0) {
int count = fis.read(result, offset, len);
if (count < 0) break;
len -= count;
offset += count;
}
fis.close();
if (len > 0) { // should never occur...
throw new IOException("Could not read the whole file -- received EOF when there was " + len + " bytes left to read");
}
return result;
}
public static void main(String[] args) throws IOException {
new LZF().process(args);
}
}

View File

@ -17,11 +17,24 @@
* under the License.
*/
package org.elasticsearch.common.io.compression.lzf;
/* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
* file except in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
package org.elasticsearch.common.compress.lzf;
/**
* Helper class used to store LZF encoded segments (compressed and non-compressed)
* that can be sequenced to produce LZF files/streams.
*
* @author tatu@ning.com
*/
public class LZFChunk {
/**

View File

@ -17,67 +17,85 @@
* under the License.
*/
package org.elasticsearch.common.io.compression.lzf;
/* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
* file except in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
package org.elasticsearch.common.compress.lzf;
import java.io.IOException;
import java.io.InputStream;
/**
* Decoder that handles decoding of sequence of encoded LZF chunks,
* combining them into a single contiguous result byte array
* <p>
* Code adapted from H2 project (http://www.h2database.com) Java LZF implementation
* by Thomas (which itself was inspired by original C code by Marc A Lehmann)
*
* @author tatu@ning.com
*/
public class LZFDecoder {
final static byte BYTE_NULL = 0;
private final static byte BYTE_NULL = 0;
private final static int HEADER_BYTES = 5;
// static methods, no need to instantiate
private LZFDecoder() {
}
public static byte[] decode(final byte[] sourceBuffer) throws IOException {
byte[] result = new byte[calculateUncompressedSize(sourceBuffer)];
decode(sourceBuffer, result);
return result;
}
/**
* Method for decompressing whole input data, which encoded in LZF
* block structure (compatible with lzf command line utility),
* and can consist of any number of blocks
*/
public static byte[] decode(byte[] data, int length) throws IOException {
public static int decode(final byte[] sourceBuffer, final byte[] targetBuffer) throws IOException {
/* First: let's calculate actual size, so we can allocate
* exact result size. Also useful for basic sanity checking;
* so that after call we know header structure is not corrupt
* (to the degree that lengths etc seem valid)
*/
byte[] result = new byte[calculateUncompressedSize(data, length)];
byte[] result = targetBuffer;
int inPtr = 0;
int outPtr = 0;
while (inPtr < (length - 1)) { // -1 to offset possible end marker
while (inPtr < (sourceBuffer.length - 1)) { // -1 to offset possible end marker
inPtr += 2; // skip 'ZV' marker
int type = data[inPtr++];
int len = uint16(data, inPtr);
int type = sourceBuffer[inPtr++];
int len = uint16(sourceBuffer, inPtr);
inPtr += 2;
if (type == LZFChunk.BLOCK_TYPE_NON_COMPRESSED) { // uncompressed
System.arraycopy(data, inPtr, result, outPtr, len);
System.arraycopy(sourceBuffer, inPtr, result, outPtr, len);
outPtr += len;
} else { // compressed
int uncompLen = uint16(data, inPtr);
int uncompLen = uint16(sourceBuffer, inPtr);
inPtr += 2;
decompressChunk(data, inPtr, result, outPtr, outPtr + uncompLen);
decompressChunk(sourceBuffer, inPtr, result, outPtr, outPtr + uncompLen);
outPtr += uncompLen;
}
inPtr += len;
}
return result;
return outPtr;
}
private static int calculateUncompressedSize(byte[] data, int length) throws IOException {
private static int calculateUncompressedSize(byte[] data) throws IOException {
int uncompressedSize = 0;
int ptr = 0;
int blockNr = 0;
while (ptr < length) {
while (ptr < data.length) {
// can use optional end marker
if (ptr == (length + 1) && data[ptr] == BYTE_NULL) {
if (ptr == (data.length + 1) && data[ptr] == BYTE_NULL) {
++ptr; // so that we'll be at end
break;
}
@ -104,12 +122,48 @@ public class LZFDecoder {
++blockNr;
}
// one more sanity check:
if (ptr != length) {
if (ptr != data.length) {
throw new IOException("Corrupt input data: block #" + blockNr + " extends " + (data.length - ptr) + " beyond end of input");
}
return uncompressedSize;
}
/**
* Main decode from a stream. Decompressed bytes are placed in the outputBuffer, inputBuffer is a "scratch-area".
*
* @param is An input stream of LZF compressed bytes
* @param inputBuffer A byte array used as a scratch area.
* @param outputBuffer A byte array in which the result is returned
* @return The number of bytes placed in the outputBuffer.
*/
public static int decompressChunk(final InputStream is, final byte[] inputBuffer, final byte[] outputBuffer)
throws IOException {
int bytesInOutput;
int headerLength = is.read(inputBuffer, 0, HEADER_BYTES);
if (headerLength != HEADER_BYTES) {
return -1;
}
int inPtr = 0;
if (inputBuffer[inPtr] != LZFChunk.BYTE_Z || inputBuffer[inPtr + 1] != LZFChunk.BYTE_V) {
throw new IOException("Corrupt input data, block did not start with 'ZV' signature bytes");
}
inPtr += 2;
int type = inputBuffer[inPtr++];
int len = uint16(inputBuffer, inPtr);
inPtr += 2;
if (type == LZFChunk.BLOCK_TYPE_NON_COMPRESSED) { // uncompressed
is.read(outputBuffer, 0, len);
bytesInOutput = len;
} else { // compressed
is.read(inputBuffer, inPtr, 2);
int uncompLen = uint16(inputBuffer, inPtr);
is.read(inputBuffer, 0, len);
decompressChunk(inputBuffer, 0, outputBuffer, 0, uncompLen);
bytesInOutput = uncompLen;
}
return bytesInOutput;
}
/**
* Main decode method for individual chunks.
*/

View File

@ -17,7 +17,18 @@
* under the License.
*/
package org.elasticsearch.common.io.compression.lzf;
/* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
* file except in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
package org.elasticsearch.common.compress.lzf;
import java.io.IOException;
@ -25,9 +36,8 @@ import java.io.IOException;
* Encoder that handles splitting of input into chunks to encode,
* calls {@link ChunkEncoder} to compress individual chunks and
* combines resulting chunks into contiguous output byte array.
* <p>
* Code adapted from H2 project (http://www.h2database.com) Java LZF implementation
* by Thomas (which itself was inspired by original C code by Marc A Lehmann)
*
* @author tatu@ning.com
*/
public class LZFEncoder {
// Static methods only, no point in instantiating
@ -35,6 +45,10 @@ public class LZFEncoder {
private LZFEncoder() {
}
public static byte[] encode(byte[] data) throws IOException {
return encode(data, data.length);
}
/**
* Method for compressing given input data using LZF encoding and
* block structure (compatible with lzf command line utility).

View File

@ -0,0 +1,95 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.compress.lzf;
import java.io.IOException;
import java.io.InputStream;
public class LZFInputStream extends InputStream {
public static int EOF_FLAG = -1;
/* stream to be decompressed */
private final InputStream inputStream;
/* the current buffer of compressed bytes */
private final byte[] compressedBytes = new byte[LZFChunk.MAX_CHUNK_LEN];
/* the buffer of uncompressed bytes from which */
private final byte[] uncompressedBytes = new byte[LZFChunk.MAX_CHUNK_LEN];
/* The current position (next char to output) in the uncompressed bytes buffer. */
private int bufferPosition = 0;
/* Length of the current uncompressed bytes buffer */
private int bufferLength = 0;
public LZFInputStream(final InputStream inputStream) throws IOException {
super();
this.inputStream = inputStream;
}
@Override
public int read() throws IOException {
int returnValue = EOF_FLAG;
readyBuffer();
if (bufferPosition < bufferLength) {
returnValue = (uncompressedBytes[bufferPosition++] & 255);
}
return returnValue;
}
public int read(final byte[] buffer) throws IOException {
return (read(buffer, 0, buffer.length));
}
public int read(final byte[] buffer, final int offset, final int length) throws IOException {
int outputPos = offset;
readyBuffer();
if (bufferLength == -1) {
return -1;
}
while (outputPos < buffer.length && bufferPosition < bufferLength) {
int chunkLength = Math.min(bufferLength - bufferPosition, buffer.length - outputPos);
System.arraycopy(uncompressedBytes, bufferPosition, buffer, outputPos, chunkLength);
outputPos += chunkLength;
bufferPosition += chunkLength;
readyBuffer();
}
return outputPos;
}
public void close() throws IOException {
inputStream.close();
}
/**
* Fill the uncompressed bytes buffer by reading the underlying inputStream.
*
* @throws java.io.IOException
*/
private void readyBuffer() throws IOException {
if (bufferPosition >= bufferLength) {
bufferLength = LZFDecoder.decompressChunk(inputStream, compressedBytes, uncompressedBytes);
bufferPosition = 0;
}
}
}

View File

@ -0,0 +1,88 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.compress.lzf;
import java.io.IOException;
import java.io.OutputStream;
public class LZFOutputStream extends OutputStream {
private static int OUTPUT_BUFFER_SIZE = LZFChunk.MAX_CHUNK_LEN;
private static int BYTE_MASK = 0xff;
private final OutputStream outputStream;
private byte[] outputBuffer = new byte[OUTPUT_BUFFER_SIZE];
private int position = 0;
public LZFOutputStream(final OutputStream outputStream) {
this.outputStream = outputStream;
}
@Override
public void write(final int singleByte) throws IOException {
if (position >= outputBuffer.length) {
writeCompressedBlock();
}
outputBuffer[position++] = (byte) (singleByte & BYTE_MASK);
}
@Override
public void write(final byte[] buffer, final int offset, final int length) throws IOException {
int inputCursor = offset;
int remainingBytes = length;
while (remainingBytes > 0) {
if (position >= outputBuffer.length) {
writeCompressedBlock();
}
int chunkLength = (remainingBytes > (outputBuffer.length - position)) ? outputBuffer.length - position : remainingBytes;
System.arraycopy(buffer, inputCursor, outputBuffer, position, chunkLength);
position += chunkLength;
remainingBytes -= chunkLength;
inputCursor += chunkLength;
}
}
@Override
public void flush() throws IOException {
try {
writeCompressedBlock();
} finally {
outputStream.flush();
}
}
@Override
public void close() throws IOException {
try {
flush();
} finally {
outputStream.close();
}
}
/**
* Compress and write the current block to the OutputStream
*/
private void writeCompressedBlock() throws IOException {
final byte[] compressedBytes = LZFEncoder.encode(outputBuffer, position);
outputStream.write(compressedBytes);
position = 0;
}
}

View File

@ -1,68 +0,0 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.io.compression;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Streamable;
import java.io.IOException;
/**
* @author kimchy (Shay Banon)
*/
public class CompressedString implements Streamable {
private byte[] compressedString;
private transient String string;
CompressedString() {
}
public CompressedString(String string) throws IOException {
this.string = string;
this.compressedString = new ZipCompressor().compressString(string);
}
public String string() throws IOException {
if (string != null) {
return string;
}
string = new ZipCompressor().decompressString(compressedString);
return string;
}
public static CompressedString readCompressedString(StreamInput in) throws IOException, ClassNotFoundException {
CompressedString result = new CompressedString();
result.readFrom(in);
return result;
}
@Override public void readFrom(StreamInput in) throws IOException {
compressedString = new byte[in.readVInt()];
in.readFully(compressedString);
}
@Override public void writeTo(StreamOutput out) throws IOException {
out.writeVInt(compressedString.length);
out.writeBytes(compressedString);
}
}

View File

@ -1,36 +0,0 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.io.compression;
import java.io.IOException;
/**
* @author kimchy (Shay Banon)
*/
public interface Compressor {
byte[] compress(byte[] value) throws IOException;
byte[] compressString(String value) throws IOException;
byte[] decompress(byte[] value) throws IOException;
String decompressString(byte[] value) throws IOException;
}

View File

@ -1,115 +0,0 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.io.compression;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.common.Unicode;
import org.elasticsearch.common.io.FastByteArrayInputStream;
import org.elasticsearch.common.io.FastByteArrayOutputStream;
import org.elasticsearch.common.thread.ThreadLocals;
import org.elasticsearch.common.unit.ByteSizeUnit;
import java.io.IOException;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
/**
* @author kimchy (Shay Banon)
*/
public class GZIPCompressor implements Compressor {
private static class Cached {
private static final ThreadLocal<ThreadLocals.CleanableValue<CompressHolder>> cache = new ThreadLocal<ThreadLocals.CleanableValue<CompressHolder>>() {
@Override protected ThreadLocals.CleanableValue<CompressHolder> initialValue() {
return new ThreadLocals.CleanableValue<CompressHolder>(new CompressHolder());
}
};
/**
* Returns the cached thread local byte strean, with its internal stream cleared.
*/
public static CompressHolder cached() {
CompressHolder ch = cache.get().get();
ch.bos.reset();
return ch;
}
}
private static class CompressHolder {
final FastByteArrayOutputStream bos = new FastByteArrayOutputStream();
final byte[] buffer = new byte[(int) ByteSizeUnit.KB.toBytes(5)];
final UnicodeUtil.UTF8Result utf8Result = new UnicodeUtil.UTF8Result();
}
public byte[] compress(byte[] value, int offset, int length) throws IOException {
return compress(value, offset, length, Cached.cached());
}
@Override public byte[] compress(byte[] value) throws IOException {
return compress(value, 0, value.length);
}
@Override public byte[] compressString(String value) throws IOException {
CompressHolder ch = Cached.cached();
UnicodeUtil.UTF16toUTF8(value, 0, value.length(), ch.utf8Result);
return compress(ch.utf8Result.result, 0, ch.utf8Result.length, ch);
}
@Override public byte[] decompress(byte[] value) throws IOException {
CompressHolder ch = Cached.cached();
decompress(value, ch);
return ch.bos.copiedByteArray();
}
@Override public String decompressString(byte[] value) throws IOException {
CompressHolder ch = Cached.cached();
decompress(value);
return Unicode.fromBytes(ch.bos.unsafeByteArray(), 0, ch.bos.size());
}
private static void decompress(byte[] value, CompressHolder ch) throws IOException {
GZIPInputStream in = new GZIPInputStream(new FastByteArrayInputStream(value));
try {
int bytesRead;
while ((bytesRead = in.read(ch.buffer)) != -1) {
ch.bos.write(ch.buffer, 0, bytesRead);
}
ch.bos.flush();
}
finally {
try {
in.close();
}
catch (IOException ex) {
// do nothing
}
}
}
private static byte[] compress(byte[] value, int offset, int length, CompressHolder ch) throws IOException {
GZIPOutputStream os = new GZIPOutputStream(ch.bos);
os.write(value, offset, length);
os.finish();
os.close();
return ch.bos.copiedByteArray();
}
}

View File

@ -1,70 +0,0 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.io.compression;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.common.Unicode;
import org.elasticsearch.common.io.compression.lzf.LZFDecoder;
import org.elasticsearch.common.io.compression.lzf.LZFEncoder;
import org.elasticsearch.common.thread.ThreadLocals;
import java.io.IOException;
/**
* @author kimchy (Shay Banon)
*/
public class LzfCompressor implements Compressor {
private static class Cached {
private static final ThreadLocal<ThreadLocals.CleanableValue<CompressHolder>> cache = new ThreadLocal<ThreadLocals.CleanableValue<CompressHolder>>() {
@Override protected ThreadLocals.CleanableValue<CompressHolder> initialValue() {
return new ThreadLocals.CleanableValue<CompressHolder>(new CompressHolder());
}
};
public static CompressHolder cached() {
return cache.get().get();
}
}
private static class CompressHolder {
final UnicodeUtil.UTF8Result utf8Result = new UnicodeUtil.UTF8Result();
}
@Override public byte[] compress(byte[] value) throws IOException {
return LZFEncoder.encode(value, value.length);
}
@Override public byte[] compressString(String value) throws IOException {
CompressHolder ch = Cached.cached();
UnicodeUtil.UTF16toUTF8(value, 0, value.length(), ch.utf8Result);
return LZFEncoder.encode(ch.utf8Result.result, ch.utf8Result.length);
}
@Override public byte[] decompress(byte[] value) throws IOException {
return LZFDecoder.decode(value, value.length);
}
@Override public String decompressString(byte[] value) throws IOException {
byte[] result = decompress(value);
return Unicode.fromBytes(result, 0, result.length);
}
}

View File

@ -1,132 +0,0 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.io.compression;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.common.Unicode;
import org.elasticsearch.common.io.FastByteArrayOutputStream;
import org.elasticsearch.common.thread.ThreadLocals;
import org.elasticsearch.common.unit.ByteSizeUnit;
import java.io.IOException;
import java.util.zip.DataFormatException;
import java.util.zip.Deflater;
import java.util.zip.Inflater;
/**
* @author kimchy (Shay Banon)
*/
public class ZipCompressor implements Compressor {
private static class Cached {
private static final ThreadLocal<ThreadLocals.CleanableValue<CompressHolder>> cache = new ThreadLocal<ThreadLocals.CleanableValue<CompressHolder>>() {
@Override protected ThreadLocals.CleanableValue<CompressHolder> initialValue() {
return new ThreadLocals.CleanableValue<CompressHolder>(new CompressHolder());
}
};
/**
* Returns the cached thread local byte strean, with its internal stream cleared.
*/
public static CompressHolder cached() {
CompressHolder ch = cache.get().get();
ch.bos.reset();
return ch;
}
}
private static class CompressHolder {
final FastByteArrayOutputStream bos = new FastByteArrayOutputStream();
final Deflater deflater = new Deflater();
final Inflater inflater = new Inflater();
final byte[] buffer = new byte[(int) ByteSizeUnit.KB.toBytes(5)];
final UnicodeUtil.UTF8Result utf8Result = new UnicodeUtil.UTF8Result();
}
private final int compressionLevel;
public ZipCompressor() {
this(Deflater.BEST_COMPRESSION);
}
public ZipCompressor(int compressionLevel) {
this.compressionLevel = compressionLevel;
}
public byte[] compress(byte[] value, int offset, int length) throws IOException {
return compress(value, offset, length, compressionLevel, Cached.cached());
}
@Override public byte[] compress(byte[] value) throws IOException {
return compress(value, 0, value.length);
}
@Override public byte[] compressString(String value) throws IOException {
CompressHolder ch = Cached.cached();
UnicodeUtil.UTF16toUTF8(value, 0, value.length(), ch.utf8Result);
return compress(ch.utf8Result.result, 0, ch.utf8Result.length, compressionLevel, ch);
}
@Override public byte[] decompress(byte[] value) throws IOException {
CompressHolder ch = Cached.cached();
decompress(value, ch);
return ch.bos.copiedByteArray();
}
@Override public String decompressString(byte[] value) throws IOException {
CompressHolder ch = Cached.cached();
decompress(value, ch);
return Unicode.fromBytes(ch.bos.unsafeByteArray(), 0, ch.bos.size());
}
private static void decompress(byte[] value, CompressHolder ch) throws IOException {
try {
ch.inflater.reset();
ch.inflater.setInput(value);
// Decompress the data
final byte[] buf = ch.buffer;
while (!ch.inflater.finished()) {
int count = ch.inflater.inflate(buf);
ch.bos.write(buf, 0, count);
}
} catch (DataFormatException e) {
throw new IOException("Failed to decompress", e);
} // don't close the inflater, we reuse it...
}
private static byte[] compress(byte[] value, int offset, int length, int compressionLevel, CompressHolder ch) throws IOException {
ch.deflater.reset();
ch.deflater.setLevel(compressionLevel);
ch.deflater.setInput(value, offset, length);
ch.deflater.finish();
// Compress the data
final byte[] buf = ch.buffer;
while (!ch.deflater.finished()) {
int count = ch.deflater.deflate(buf);
ch.bos.write(buf, 0, count);
}
return ch.bos.copiedByteArray();
}
}

View File

@ -1,64 +0,0 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.io.compressor;
import org.elasticsearch.common.io.compression.Compressor;
import org.testng.annotations.Test;
import static org.hamcrest.MatcherAssert.*;
import static org.hamcrest.Matchers.*;
/**
* @author kimchy (Shay Banon)
*/
public abstract class AbstractCompressorTests {
private static final String TEST_STRING = "aaaaaaaaaaaa bbbbbbbbbb aa aa aa cccccccccc";
@Test public void testSimpleOperations() throws Exception {
Compressor compressor = createCompressor();
byte[] compressed = compressor.compressString(TEST_STRING);
System.out.println("" + TEST_STRING.length());
System.out.println("" + compressed.length);
assertThat(compressed.length, lessThan(TEST_STRING.length()));
String decompressed = compressor.decompressString(compressed);
// System.out.println("" + TEST_STRING.length());
// System.out.println("" + compressed.length);
assertThat(decompressed, equalTo(TEST_STRING));
decompressed = compressor.decompressString(compressed);
assertThat(decompressed, equalTo(TEST_STRING));
compressed = compressor.compressString(TEST_STRING);
// System.out.println("" + TEST_STRING.length());
// System.out.println("" + compressed.length);
assertThat(compressed.length, lessThan(TEST_STRING.length()));
decompressed = compressor.decompressString(compressed);
assertThat(decompressed, equalTo(TEST_STRING));
decompressed = compressor.decompressString(compressed);
assertThat(decompressed, equalTo(TEST_STRING));
}
protected abstract Compressor createCompressor();
}

View File

@ -1,33 +0,0 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.io.compressor;
import org.elasticsearch.common.io.compression.Compressor;
import org.elasticsearch.common.io.compression.GZIPCompressor;
/**
* @author kimchy (Shay Banon)
*/
public class GZIPCompressorTests extends AbstractCompressorTests {
@Override protected Compressor createCompressor() {
return new GZIPCompressor();
}
}

View File

@ -1,33 +0,0 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.io.compressor;
import org.elasticsearch.common.io.compression.Compressor;
import org.elasticsearch.common.io.compression.LzfCompressor;
/**
* @author kimchy (Shay Banon)
*/
public class LzpCompressorTests extends AbstractCompressorTests {
@Override protected Compressor createCompressor() {
return new LzfCompressor();
}
}

View File

@ -1,33 +0,0 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.io.compressor;
import org.elasticsearch.common.io.compression.Compressor;
import org.elasticsearch.common.io.compression.ZipCompressor;
/**
* @author kimchy (Shay Banon)
*/
public class ZipCompressorTests extends AbstractCompressorTests {
@Override protected Compressor createCompressor() {
return new ZipCompressor();
}
}