mirror of https://github.com/apache/lucene.git
LUCENE-4708: Reuse LZ4 hash tables across calls.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1438519 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
735995f22e
commit
87efa12ae8
|
@ -45,7 +45,7 @@ public abstract class CompressionMode {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Compressor newCompressor() {
|
public Compressor newCompressor() {
|
||||||
return LZ4_FAST_COMPRESSOR;
|
return new LZ4FastCompressor();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -95,7 +95,7 @@ public abstract class CompressionMode {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Compressor newCompressor() {
|
public Compressor newCompressor() {
|
||||||
return LZ4_HIGH_COMPRESSOR;
|
return new LZ4HighCompressor();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -147,25 +147,37 @@ public abstract class CompressionMode {
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
private static final Compressor LZ4_FAST_COMPRESSOR = new Compressor() {
|
private static final class LZ4FastCompressor extends Compressor {
|
||||||
|
|
||||||
|
private final LZ4.HashTable ht;
|
||||||
|
|
||||||
|
LZ4FastCompressor() {
|
||||||
|
ht = new LZ4.HashTable();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void compress(byte[] bytes, int off, int len, DataOutput out)
|
public void compress(byte[] bytes, int off, int len, DataOutput out)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
LZ4.compress(bytes, off, len, out);
|
LZ4.compress(bytes, off, len, out, ht);
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
}
|
||||||
|
|
||||||
private static final Compressor LZ4_HIGH_COMPRESSOR = new Compressor() {
|
private static final class LZ4HighCompressor extends Compressor {
|
||||||
|
|
||||||
|
private final LZ4.HCHashTable ht;
|
||||||
|
|
||||||
|
LZ4HighCompressor() {
|
||||||
|
ht = new LZ4.HCHashTable();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void compress(byte[] bytes, int off, int len, DataOutput out)
|
public void compress(byte[] bytes, int off, int len, DataOutput out)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
LZ4.compressHC(bytes, off, len, out);
|
LZ4.compressHC(bytes, off, len, out, ht);
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
}
|
||||||
|
|
||||||
private static final class DeflateDecompressor extends Decompressor {
|
private static final class DeflateDecompressor extends Decompressor {
|
||||||
|
|
||||||
|
|
|
@ -30,7 +30,7 @@ import org.apache.lucene.util.packed.PackedInts;
|
||||||
* http://code.google.com/p/lz4/
|
* http://code.google.com/p/lz4/
|
||||||
* http://fastcompression.blogspot.fr/p/lz4.html
|
* http://fastcompression.blogspot.fr/p/lz4.html
|
||||||
*/
|
*/
|
||||||
class LZ4 {
|
final class LZ4 {
|
||||||
|
|
||||||
private LZ4() {}
|
private LZ4() {}
|
||||||
|
|
||||||
|
@ -181,11 +181,29 @@ class LZ4 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static final class HashTable {
|
||||||
|
private int hashLog;
|
||||||
|
private PackedInts.Mutable hashTable;
|
||||||
|
|
||||||
|
void reset(int len) {
|
||||||
|
final int bitsPerOffset = PackedInts.bitsRequired(len - LAST_LITERALS);
|
||||||
|
final int bitsPerOffsetLog = 32 - Integer.numberOfLeadingZeros(bitsPerOffset - 1);
|
||||||
|
hashLog = MEMORY_USAGE + 3 - bitsPerOffsetLog;
|
||||||
|
if (hashTable == null || hashTable.size() < 1 << hashLog || hashTable.getBitsPerValue() < bitsPerOffset) {
|
||||||
|
hashTable = PackedInts.getMutable(1 << hashLog, bitsPerOffset, PackedInts.DEFAULT);
|
||||||
|
} else {
|
||||||
|
hashTable.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compress <code>bytes[off:off+len]</code> into <code>out</code> using
|
* Compress <code>bytes[off:off+len]</code> into <code>out</code> using
|
||||||
* at most 16KB of memory.
|
* at most 16KB of memory. <code>ht</code> shouldn't be shared across threads
|
||||||
|
* but can safely be reused.
|
||||||
*/
|
*/
|
||||||
public static void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException {
|
public static void compress(byte[] bytes, int off, int len, DataOutput out, HashTable ht) throws IOException {
|
||||||
|
|
||||||
final int base = off;
|
final int base = off;
|
||||||
final int end = off + len;
|
final int end = off + len;
|
||||||
|
@ -196,11 +214,9 @@ class LZ4 {
|
||||||
|
|
||||||
final int limit = end - LAST_LITERALS;
|
final int limit = end - LAST_LITERALS;
|
||||||
final int matchLimit = limit - MIN_MATCH;
|
final int matchLimit = limit - MIN_MATCH;
|
||||||
|
ht.reset(len);
|
||||||
final int bitsPerOffset = PackedInts.bitsRequired(len - LAST_LITERALS);
|
final int hashLog = ht.hashLog;
|
||||||
final int bitsPerOffsetLog = 32 - Integer.numberOfLeadingZeros(bitsPerOffset - 1);
|
final PackedInts.Mutable hashTable = ht.hashTable;
|
||||||
final int hashLog = MEMORY_USAGE + 3 - bitsPerOffsetLog;
|
|
||||||
final PackedInts.Mutable hashTable = PackedInts.getMutable(1 << hashLog, bitsPerOffset, PackedInts.DEFAULT);
|
|
||||||
|
|
||||||
main:
|
main:
|
||||||
while (off < limit) {
|
while (off < limit) {
|
||||||
|
@ -256,20 +272,24 @@ class LZ4 {
|
||||||
m2.ref = m1.ref;
|
m2.ref = m1.ref;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class HashTable {
|
static final class HCHashTable {
|
||||||
static final int MAX_ATTEMPTS = 256;
|
static final int MAX_ATTEMPTS = 256;
|
||||||
static final int MASK = MAX_DISTANCE - 1;
|
static final int MASK = MAX_DISTANCE - 1;
|
||||||
int nextToUpdate;
|
int nextToUpdate;
|
||||||
private final int base;
|
private int base;
|
||||||
private final int[] hashTable;
|
private final int[] hashTable;
|
||||||
private final short[] chainTable;
|
private final short[] chainTable;
|
||||||
|
|
||||||
HashTable(int base) {
|
HCHashTable() {
|
||||||
|
hashTable = new int[HASH_TABLE_SIZE_HC];
|
||||||
|
chainTable = new short[MAX_DISTANCE];
|
||||||
|
}
|
||||||
|
|
||||||
|
private void reset(int base) {
|
||||||
this.base = base;
|
this.base = base;
|
||||||
nextToUpdate = base;
|
nextToUpdate = base;
|
||||||
hashTable = new int[HASH_TABLE_SIZE_HC];
|
|
||||||
Arrays.fill(hashTable, -1);
|
Arrays.fill(hashTable, -1);
|
||||||
chainTable = new short[MAX_DISTANCE];
|
Arrays.fill(chainTable, (short) 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
private int hashPointer(byte[] bytes, int off) {
|
private int hashPointer(byte[] bytes, int off) {
|
||||||
|
@ -355,12 +375,14 @@ class LZ4 {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compress <code>bytes[off:off+len]</code> into <code>out</code>. Compared to
|
* Compress <code>bytes[off:off+len]</code> into <code>out</code>. Compared to
|
||||||
* {@link LZ4#compress(byte[], int, int, DataOutput)}, this method is slower,
|
* {@link LZ4#compress(byte[], int, int, DataOutput, HashTable)}, this method
|
||||||
* uses more memory (~ 256KB), but should provide better compression ratios
|
* is slower and uses more memory (~ 256KB per thread) but should provide
|
||||||
* (especially on large inputs) because it chooses the best match among up to
|
* better compression ratios (especially on large inputs) because it chooses
|
||||||
* 256 candidates and then performs trade-offs to fix overlapping matches.
|
* the best match among up to 256 candidates and then performs trade-offs to
|
||||||
|
* fix overlapping matches. <code>ht</code> shouldn't be shared across threads
|
||||||
|
* but can safely be reused.
|
||||||
*/
|
*/
|
||||||
public static void compressHC(byte[] src, int srcOff, int srcLen, DataOutput out) throws IOException {
|
public static void compressHC(byte[] src, int srcOff, int srcLen, DataOutput out, HCHashTable ht) throws IOException {
|
||||||
|
|
||||||
final int srcEnd = srcOff + srcLen;
|
final int srcEnd = srcOff + srcLen;
|
||||||
final int matchLimit = srcEnd - LAST_LITERALS;
|
final int matchLimit = srcEnd - LAST_LITERALS;
|
||||||
|
@ -368,7 +390,7 @@ class LZ4 {
|
||||||
int sOff = srcOff;
|
int sOff = srcOff;
|
||||||
int anchor = sOff++;
|
int anchor = sOff++;
|
||||||
|
|
||||||
final HashTable ht = new HashTable(srcOff);
|
ht.reset(srcOff);
|
||||||
final Match match0 = new Match();
|
final Match match0 = new Match();
|
||||||
final Match match1 = new Match();
|
final Match match1 = new Match();
|
||||||
final Match match2 = new Match();
|
final Match match2 = new Match();
|
||||||
|
|
Loading…
Reference in New Issue