LUCENE-5201: Fixed compression bug in LZ4.compressHC.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1520268 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2013-09-05 10:08:55 +00:00
parent e69fb35cc2
commit 81563c2f57
6 changed files with 86 additions and 16 deletions

View File

@ -157,6 +157,10 @@ Bug Fixes
The escaping of codepoints > 127 was removed (not needed for valid HTML)
and missing escaping for ' and / was added. (Uwe Schindler)
* LUCENE-5201: Fixed compression bug in LZ4.compressHC when the input is highly
compressible and the start offset of the array to compress is > 0.
(Adrien Grand)
API Changes
* LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap.

View File

@ -295,7 +295,7 @@ final class LZ4 {
private int hashPointer(byte[] bytes, int off) {
final int v = readInt(bytes, off);
final int h = hashHC(v);
return base + hashTable[h];
return hashTable[h];
}
private int next(int off) {
@ -306,6 +306,7 @@ final class LZ4 {
final int v = readInt(bytes, off);
final int h = hashHC(v);
int delta = off - hashTable[h];
assert delta > 0 : delta;
if (delta >= MAX_DISTANCE) {
delta = MAX_DISTANCE - 1;
}

View File

@ -50,15 +50,15 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase {
return arr;
}
byte[] compress(byte[] decompressed) throws IOException {
byte[] compress(byte[] decompressed, int off, int len) throws IOException {
Compressor compressor = mode.newCompressor();
return compress(compressor, decompressed);
return compress(compressor, decompressed, off, len);
}
static byte[] compress(Compressor compressor, byte[] decompressed) throws IOException {
byte[] compressed = new byte[decompressed.length * 2 + 16]; // should be enough
static byte[] compress(Compressor compressor, byte[] decompressed, int off, int len) throws IOException {
byte[] compressed = new byte[len * 2 + 16]; // should be enough
ByteArrayDataOutput out = new ByteArrayDataOutput(compressed);
compressor.compress(decompressed, 0, decompressed.length, out);
compressor.compress(decompressed, off, len, out);
final int compressedLen = out.getPosition();
return Arrays.copyOf(compressed, compressedLen);
}
@ -85,9 +85,11 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase {
final int iterations = atLeast(10);
for (int i = 0; i < iterations; ++i) {
final byte[] decompressed = randomArray();
final byte[] compressed = compress(decompressed);
final byte[] restored = decompress(compressed, decompressed.length);
assertArrayEquals(decompressed, restored);
final int off = random().nextBoolean() ? 0 : _TestUtil.nextInt(random(), 0, decompressed.length);
final int len = random().nextBoolean() ? decompressed.length - off : _TestUtil.nextInt(random(), 0, decompressed.length - off);
final byte[] compressed = compress(decompressed, off, len);
final byte[] restored = decompress(compressed, len);
assertArrayEquals(Arrays.copyOfRange(decompressed, off, off+len), restored);
}
}
@ -95,7 +97,7 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase {
final int iterations = atLeast(10);
for (int i = 0; i < iterations; ++i) {
final byte[] decompressed = randomArray();
final byte[] compressed = compress(decompressed);
final byte[] compressed = compress(decompressed, 0, decompressed.length);
final int offset, length;
if (decompressed.length == 0) {
offset = length = 0;
@ -109,9 +111,13 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase {
}
public byte[] test(byte[] decompressed) throws IOException {
final byte[] compressed = compress(decompressed);
final byte[] restored = decompress(compressed, decompressed.length);
assertEquals(decompressed.length, restored.length);
return test(decompressed, 0, decompressed.length);
}
public byte[] test(byte[] decompressed, int off, int len) throws IOException {
final byte[] compressed = compress(decompressed, off, len);
final byte[] restored = decompress(compressed, len);
assertEquals(len, restored.length);
return compressed;
}
@ -137,4 +143,58 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase {
test(decompressed);
}
public void testLUCENE5201() throws IOException {
byte[] data = new byte[]{
14, 72, 14, 85, 3, 72, 14, 85, 3, 72, 14, 72, 14, 72, 14, 85, 3, 72, 14, 72, 14, 72, 14, 72, 14, 72, 14, 72, 14, 85, 3, 72,
14, 85, 3, 72, 14, 85, 3, 72, 14, 85, 3, 72, 14, 85, 3, 72, 14, 85, 3, 72, 14, 50, 64, 0, 46, -1, 0, 0, 0, 29, 3, 85,
8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3,
0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113,
0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113,
0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 50, 64, 0, 47, -105, 0, 0, 0, 30, 3, -97, 6, 0, 68, -113,
0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, 85,
8, -113, 0, 68, -97, 3, 0, 2, -97, 6, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97,
6, 0, 68, -113, 0, 120, 64, 0, 48, 4, 0, 0, 0, 31, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72,
33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72,
43, 72, 19, 72, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72,
28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72,
35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72,
41, 72, 32, 72, 18, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 39, 24, 32, 34, 124, 0, 120, 64, 0, 48, 80, 0, 0, 0, 31, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72,
35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72,
41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72,
40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72,
31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72,
26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72,
37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72,
36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72,
20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72,
22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72,
38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72,
29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72,
27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 50, 64, 0, 49, 20, 0, 0, 0, 32, 3, -97, 6, 0,
68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97,
6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2,
3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2,
3, -97, 6, 0, 50, 64, 0, 50, 53, 0, 0, 0, 34, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -113, 0, 2, 3, -97,
6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3,
-97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97,
3, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3,
85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0,
2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3,
-97, 6, 0, 50, 64, 0, 51, 85, 0, 0, 0, 36, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97,
6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, -97, 5, 0, 2, 3, 85, 8, -113, 0, 68,
-97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0,
68, -113, 0, 2, 3, -97, 6, 0, 50, -64, 0, 51, -45, 0, 0, 0, 37, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6,
0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -113, 0, 2, 3, -97,
6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 120, 64, 0, 52, -88, 0, 0,
0, 39, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72,
13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 72, 13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85,
5, 72, 13, 85, 5, 72, 13, 72, 13, 72, 13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85,
5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85,
5, 72, 13, 85, 5, 72, 13, 72, 13, 72, 13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 72, 13, 85, 5, 72, 13, 72,
13, 85, 5, 72, 13, 72, 13, 85, 5, 72, 13, -19, -24, -101, -35
};
test(data, 9, data.length - 9);
}
}

View File

@ -32,8 +32,10 @@ import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.store.Directory;
import org.junit.Test;
import com.carrotsearch.randomizedtesting.annotations.Repeat;
import com.carrotsearch.randomizedtesting.generators.RandomInts;
@Repeat(iterations=5) // give it a chance to test various compression modes with different chunk sizes
public class TestCompressingStoredFieldsFormat extends BaseStoredFieldsFormatTestCase {
@Override

View File

@ -14,6 +14,8 @@ import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import com.carrotsearch.randomizedtesting.annotations.Repeat;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -31,6 +33,7 @@ import org.apache.lucene.util.BytesRef;
* limitations under the License.
*/
@Repeat(iterations=5) // give it a chance to test various compression modes with different chunk sizes
public class TestCompressingTermVectorsFormat extends BaseTermVectorsFormatTestCase {
@Override

View File

@ -28,9 +28,9 @@ public class TestFastDecompressionMode extends AbstractTestLZ4CompressionMode {
}
@Override
public byte[] test(byte[] decompressed) throws IOException {
final byte[] compressed = super.test(decompressed);
final byte[] compressed2 = compress(CompressionMode.FAST.newCompressor(), decompressed);
public byte[] test(byte[] decompressed, int off, int len) throws IOException {
final byte[] compressed = super.test(decompressed, off, len);
final byte[] compressed2 = compress(CompressionMode.FAST.newCompressor(), decompressed, off, len);
// because of the way this compression mode works, its output is necessarily
// smaller than the output of CompressionMode.FAST
assertTrue(compressed.length <= compressed2.length);