diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 6e289dbaeea..c94e0bfe1b8 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -156,6 +156,10 @@ Bug Fixes outside BMP because it encoded UTF-16 chars instead of codepoints. The escaping of codepoints > 127 was removed (not needed for valid HTML) and missing escaping for ' and / was added. (Uwe Schindler) + +* LUCENE-5201: Fixed compression bug in LZ4.compressHC when the input is highly + compressible and the start offset of the array to compress is > 0. + (Adrien Grand) API Changes diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java index 699b2c47679..d1fc3893bc2 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java @@ -295,7 +295,7 @@ final class LZ4 { private int hashPointer(byte[] bytes, int off) { final int v = readInt(bytes, off); final int h = hashHC(v); - return base + hashTable[h]; + return hashTable[h]; } private int next(int off) { @@ -306,6 +306,7 @@ final class LZ4 { final int v = readInt(bytes, off); final int h = hashHC(v); int delta = off - hashTable[h]; + assert delta > 0 : delta; if (delta >= MAX_DISTANCE) { delta = MAX_DISTANCE - 1; } diff --git a/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java b/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java index 4fa95b34ed5..33fbfb59e65 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java @@ -50,15 +50,15 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase { return arr; } - byte[] compress(byte[] decompressed) throws IOException { + byte[] compress(byte[] decompressed, int off, int len) throws IOException { Compressor compressor = mode.newCompressor(); - return compress(compressor, decompressed); + return compress(compressor, decompressed, off, len); } - static byte[] compress(Compressor compressor, byte[] decompressed) throws IOException { - byte[] compressed = new byte[decompressed.length * 2 + 16]; // should be enough + static byte[] compress(Compressor compressor, byte[] decompressed, int off, int len) throws IOException { + byte[] compressed = new byte[len * 2 + 16]; // should be enough ByteArrayDataOutput out = new ByteArrayDataOutput(compressed); - compressor.compress(decompressed, 0, decompressed.length, out); + compressor.compress(decompressed, off, len, out); final int compressedLen = out.getPosition(); return Arrays.copyOf(compressed, compressedLen); } @@ -85,9 +85,11 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase { final int iterations = atLeast(10); for (int i = 0; i < iterations; ++i) { final byte[] decompressed = randomArray(); - final byte[] compressed = compress(decompressed); - final byte[] restored = decompress(compressed, decompressed.length); - assertArrayEquals(decompressed, restored); + final int off = random().nextBoolean() ? 0 : _TestUtil.nextInt(random(), 0, decompressed.length); + final int len = random().nextBoolean() ? decompressed.length - off : _TestUtil.nextInt(random(), 0, decompressed.length - off); + final byte[] compressed = compress(decompressed, off, len); + final byte[] restored = decompress(compressed, len); + assertArrayEquals(Arrays.copyOfRange(decompressed, off, off+len), restored); } } @@ -95,7 +97,7 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase { final int iterations = atLeast(10); for (int i = 0; i < iterations; ++i) { final byte[] decompressed = randomArray(); - final byte[] compressed = compress(decompressed); + final byte[] compressed = compress(decompressed, 0, decompressed.length); final int offset, length; if (decompressed.length == 0) { offset = length = 0; @@ -109,9 +111,13 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase { } public byte[] test(byte[] decompressed) throws IOException { - final byte[] compressed = compress(decompressed); - final byte[] restored = decompress(compressed, decompressed.length); - assertEquals(decompressed.length, restored.length); + return test(decompressed, 0, decompressed.length); + } + + public byte[] test(byte[] decompressed, int off, int len) throws IOException { + final byte[] compressed = compress(decompressed, off, len); + final byte[] restored = decompress(compressed, len); + assertEquals(len, restored.length); return compressed; } @@ -137,4 +143,58 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase { test(decompressed); } + public void testLUCENE5201() throws IOException { + byte[] data = new byte[]{ + 14, 72, 14, 85, 3, 72, 14, 85, 3, 72, 14, 72, 14, 72, 14, 85, 3, 72, 14, 72, 14, 72, 14, 72, 14, 72, 14, 72, 14, 85, 3, 72, + 14, 85, 3, 72, 14, 85, 3, 72, 14, 85, 3, 72, 14, 85, 3, 72, 14, 85, 3, 72, 14, 50, 64, 0, 46, -1, 0, 0, 0, 29, 3, 85, + 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, + 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, + 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, + 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 50, 64, 0, 47, -105, 0, 0, 0, 30, 3, -97, 6, 0, 68, -113, + 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, 85, + 8, -113, 0, 68, -97, 3, 0, 2, -97, 6, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, + 6, 0, 68, -113, 0, 120, 64, 0, 48, 4, 0, 0, 0, 31, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, + 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, + 43, 72, 19, 72, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, + 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72, + 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, + 41, 72, 32, 72, 18, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 39, 24, 32, 34, 124, 0, 120, 64, 0, 48, 80, 0, 0, 0, 31, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72, + 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, + 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, + 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, + 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, + 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, + 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, + 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, + 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, + 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, + 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, + 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, + 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 50, 64, 0, 49, 20, 0, 0, 0, 32, 3, -97, 6, 0, + 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, + 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, + 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, + 3, -97, 6, 0, 50, 64, 0, 50, 53, 0, 0, 0, 34, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -113, 0, 2, 3, -97, + 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, + -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, + 3, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, + 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, + 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, + -97, 6, 0, 50, 64, 0, 51, 85, 0, 0, 0, 36, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, + 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, -97, 5, 0, 2, 3, 85, 8, -113, 0, 68, + -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, + 68, -113, 0, 2, 3, -97, 6, 0, 50, -64, 0, 51, -45, 0, 0, 0, 37, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, + 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -113, 0, 2, 3, -97, + 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 120, 64, 0, 52, -88, 0, 0, + 0, 39, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, + 13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 72, 13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, + 5, 72, 13, 85, 5, 72, 13, 72, 13, 72, 13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, + 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, + 5, 72, 13, 85, 5, 72, 13, 72, 13, 72, 13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 72, 13, 85, 5, 72, 13, 72, + 13, 85, 5, 72, 13, 72, 13, 85, 5, 72, 13, -19, -24, -101, -35 + }; + test(data, 9, data.length - 9); + } + } \ No newline at end of file diff --git a/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java index 157b353a974..f55ea6c0754 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java @@ -32,8 +32,10 @@ import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.store.Directory; import org.junit.Test; +import com.carrotsearch.randomizedtesting.annotations.Repeat; import com.carrotsearch.randomizedtesting.generators.RandomInts; +@Repeat(iterations=5) // give it a chance to test various compression modes with different chunk sizes public class TestCompressingStoredFieldsFormat extends BaseStoredFieldsFormatTestCase { @Override diff --git a/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingTermVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingTermVectorsFormat.java index 7294315a26a..70fc572bccb 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingTermVectorsFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingTermVectorsFormat.java @@ -14,6 +14,8 @@ import org.apache.lucene.index.TermsEnum.SeekStatus; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; +import com.carrotsearch.randomizedtesting.annotations.Repeat; + /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -31,6 +33,7 @@ import org.apache.lucene.util.BytesRef; * limitations under the License. */ +@Repeat(iterations=5) // give it a chance to test various compression modes with different chunk sizes public class TestCompressingTermVectorsFormat extends BaseTermVectorsFormatTestCase { @Override diff --git a/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestFastDecompressionMode.java b/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestFastDecompressionMode.java index 9ed7648d8cc..de14013d457 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestFastDecompressionMode.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestFastDecompressionMode.java @@ -28,9 +28,9 @@ public class TestFastDecompressionMode extends AbstractTestLZ4CompressionMode { } @Override - public byte[] test(byte[] decompressed) throws IOException { - final byte[] compressed = super.test(decompressed); - final byte[] compressed2 = compress(CompressionMode.FAST.newCompressor(), decompressed); + public byte[] test(byte[] decompressed, int off, int len) throws IOException { + final byte[] compressed = super.test(decompressed, off, len); + final byte[] compressed2 = compress(CompressionMode.FAST.newCompressor(), decompressed, off, len); // because of the way this compression mode works, its output is necessarily // smaller than the output of CompressionMode.FAST assertTrue(compressed.length <= compressed2.length);