mirror of https://github.com/apache/lucene.git
LUCENE-5201: Fixed compression bug in LZ4.compressHC.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1520268 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e69fb35cc2
commit
81563c2f57
|
@ -157,6 +157,10 @@ Bug Fixes
|
|||
The escaping of codepoints > 127 was removed (not needed for valid HTML)
|
||||
and missing escaping for ' and / was added. (Uwe Schindler)
|
||||
|
||||
* LUCENE-5201: Fixed compression bug in LZ4.compressHC when the input is highly
|
||||
compressible and the start offset of the array to compress is > 0.
|
||||
(Adrien Grand)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap.
|
||||
|
|
|
@ -295,7 +295,7 @@ final class LZ4 {
|
|||
private int hashPointer(byte[] bytes, int off) {
|
||||
final int v = readInt(bytes, off);
|
||||
final int h = hashHC(v);
|
||||
return base + hashTable[h];
|
||||
return hashTable[h];
|
||||
}
|
||||
|
||||
private int next(int off) {
|
||||
|
@ -306,6 +306,7 @@ final class LZ4 {
|
|||
final int v = readInt(bytes, off);
|
||||
final int h = hashHC(v);
|
||||
int delta = off - hashTable[h];
|
||||
assert delta > 0 : delta;
|
||||
if (delta >= MAX_DISTANCE) {
|
||||
delta = MAX_DISTANCE - 1;
|
||||
}
|
||||
|
|
|
@ -50,15 +50,15 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase {
|
|||
return arr;
|
||||
}
|
||||
|
||||
byte[] compress(byte[] decompressed) throws IOException {
|
||||
byte[] compress(byte[] decompressed, int off, int len) throws IOException {
|
||||
Compressor compressor = mode.newCompressor();
|
||||
return compress(compressor, decompressed);
|
||||
return compress(compressor, decompressed, off, len);
|
||||
}
|
||||
|
||||
static byte[] compress(Compressor compressor, byte[] decompressed) throws IOException {
|
||||
byte[] compressed = new byte[decompressed.length * 2 + 16]; // should be enough
|
||||
static byte[] compress(Compressor compressor, byte[] decompressed, int off, int len) throws IOException {
|
||||
byte[] compressed = new byte[len * 2 + 16]; // should be enough
|
||||
ByteArrayDataOutput out = new ByteArrayDataOutput(compressed);
|
||||
compressor.compress(decompressed, 0, decompressed.length, out);
|
||||
compressor.compress(decompressed, off, len, out);
|
||||
final int compressedLen = out.getPosition();
|
||||
return Arrays.copyOf(compressed, compressedLen);
|
||||
}
|
||||
|
@ -85,9 +85,11 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase {
|
|||
final int iterations = atLeast(10);
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final byte[] decompressed = randomArray();
|
||||
final byte[] compressed = compress(decompressed);
|
||||
final byte[] restored = decompress(compressed, decompressed.length);
|
||||
assertArrayEquals(decompressed, restored);
|
||||
final int off = random().nextBoolean() ? 0 : _TestUtil.nextInt(random(), 0, decompressed.length);
|
||||
final int len = random().nextBoolean() ? decompressed.length - off : _TestUtil.nextInt(random(), 0, decompressed.length - off);
|
||||
final byte[] compressed = compress(decompressed, off, len);
|
||||
final byte[] restored = decompress(compressed, len);
|
||||
assertArrayEquals(Arrays.copyOfRange(decompressed, off, off+len), restored);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -95,7 +97,7 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase {
|
|||
final int iterations = atLeast(10);
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final byte[] decompressed = randomArray();
|
||||
final byte[] compressed = compress(decompressed);
|
||||
final byte[] compressed = compress(decompressed, 0, decompressed.length);
|
||||
final int offset, length;
|
||||
if (decompressed.length == 0) {
|
||||
offset = length = 0;
|
||||
|
@ -109,9 +111,13 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public byte[] test(byte[] decompressed) throws IOException {
|
||||
final byte[] compressed = compress(decompressed);
|
||||
final byte[] restored = decompress(compressed, decompressed.length);
|
||||
assertEquals(decompressed.length, restored.length);
|
||||
return test(decompressed, 0, decompressed.length);
|
||||
}
|
||||
|
||||
public byte[] test(byte[] decompressed, int off, int len) throws IOException {
|
||||
final byte[] compressed = compress(decompressed, off, len);
|
||||
final byte[] restored = decompress(compressed, len);
|
||||
assertEquals(len, restored.length);
|
||||
return compressed;
|
||||
}
|
||||
|
||||
|
@ -137,4 +143,58 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase {
|
|||
test(decompressed);
|
||||
}
|
||||
|
||||
public void testLUCENE5201() throws IOException {
|
||||
byte[] data = new byte[]{
|
||||
14, 72, 14, 85, 3, 72, 14, 85, 3, 72, 14, 72, 14, 72, 14, 85, 3, 72, 14, 72, 14, 72, 14, 72, 14, 72, 14, 72, 14, 85, 3, 72,
|
||||
14, 85, 3, 72, 14, 85, 3, 72, 14, 85, 3, 72, 14, 85, 3, 72, 14, 85, 3, 72, 14, 50, 64, 0, 46, -1, 0, 0, 0, 29, 3, 85,
|
||||
8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3,
|
||||
0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113,
|
||||
0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113,
|
||||
0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 50, 64, 0, 47, -105, 0, 0, 0, 30, 3, -97, 6, 0, 68, -113,
|
||||
0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, 85,
|
||||
8, -113, 0, 68, -97, 3, 0, 2, -97, 6, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97,
|
||||
6, 0, 68, -113, 0, 120, 64, 0, 48, 4, 0, 0, 0, 31, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72,
|
||||
33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72,
|
||||
43, 72, 19, 72, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72,
|
||||
28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72,
|
||||
35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72,
|
||||
41, 72, 32, 72, 18, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 39, 24, 32, 34, 124, 0, 120, 64, 0, 48, 80, 0, 0, 0, 31, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72,
|
||||
35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72,
|
||||
41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72,
|
||||
40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72,
|
||||
31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72,
|
||||
26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72,
|
||||
37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72,
|
||||
36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72,
|
||||
20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72,
|
||||
22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72,
|
||||
38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72,
|
||||
29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72,
|
||||
27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 50, 64, 0, 49, 20, 0, 0, 0, 32, 3, -97, 6, 0,
|
||||
68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97,
|
||||
6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2,
|
||||
3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2,
|
||||
3, -97, 6, 0, 50, 64, 0, 50, 53, 0, 0, 0, 34, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -113, 0, 2, 3, -97,
|
||||
6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3,
|
||||
-97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97,
|
||||
3, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3,
|
||||
85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0,
|
||||
2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3,
|
||||
-97, 6, 0, 50, 64, 0, 51, 85, 0, 0, 0, 36, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97,
|
||||
6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, -97, 5, 0, 2, 3, 85, 8, -113, 0, 68,
|
||||
-97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0,
|
||||
68, -113, 0, 2, 3, -97, 6, 0, 50, -64, 0, 51, -45, 0, 0, 0, 37, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6,
|
||||
0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -113, 0, 2, 3, -97,
|
||||
6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 120, 64, 0, 52, -88, 0, 0,
|
||||
0, 39, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72,
|
||||
13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 72, 13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85,
|
||||
5, 72, 13, 85, 5, 72, 13, 72, 13, 72, 13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85,
|
||||
5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85,
|
||||
5, 72, 13, 85, 5, 72, 13, 72, 13, 72, 13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 72, 13, 85, 5, 72, 13, 72,
|
||||
13, 85, 5, 72, 13, 72, 13, 85, 5, 72, 13, -19, -24, -101, -35
|
||||
};
|
||||
test(data, 9, data.length - 9);
|
||||
}
|
||||
|
||||
}
|
|
@ -32,8 +32,10 @@ import org.apache.lucene.index.RandomIndexWriter;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.junit.Test;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.Repeat;
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomInts;
|
||||
|
||||
@Repeat(iterations=5) // give it a chance to test various compression modes with different chunk sizes
|
||||
public class TestCompressingStoredFieldsFormat extends BaseStoredFieldsFormatTestCase {
|
||||
|
||||
@Override
|
||||
|
|
|
@ -14,6 +14,8 @@ import org.apache.lucene.index.TermsEnum.SeekStatus;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.Repeat;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -31,6 +33,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
@Repeat(iterations=5) // give it a chance to test various compression modes with different chunk sizes
|
||||
public class TestCompressingTermVectorsFormat extends BaseTermVectorsFormatTestCase {
|
||||
|
||||
@Override
|
||||
|
|
|
@ -28,9 +28,9 @@ public class TestFastDecompressionMode extends AbstractTestLZ4CompressionMode {
|
|||
}
|
||||
|
||||
@Override
|
||||
public byte[] test(byte[] decompressed) throws IOException {
|
||||
final byte[] compressed = super.test(decompressed);
|
||||
final byte[] compressed2 = compress(CompressionMode.FAST.newCompressor(), decompressed);
|
||||
public byte[] test(byte[] decompressed, int off, int len) throws IOException {
|
||||
final byte[] compressed = super.test(decompressed, off, len);
|
||||
final byte[] compressed2 = compress(CompressionMode.FAST.newCompressor(), decompressed, off, len);
|
||||
// because of the way this compression mode works, its output is necessarily
|
||||
// smaller than the output of CompressionMode.FAST
|
||||
assertTrue(compressed.length <= compressed2.length);
|
||||
|
|
Loading…
Reference in New Issue