From 7c394f10d97eb051a16dd08d73a247d8164f4256 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 4 Dec 2012 22:38:01 +0000 Subject: [PATCH] More LZ4 tests. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1417227 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/codecs/compressing/LZ4.java | 4 +- .../AbstractTestCompressionMode.java | 41 ++----- .../AbstractTestLZ4CompressionMode.java | 106 ++++++++++++++++++ .../compressing/TestFastCompressionMode.java | 2 +- .../TestFastDecompressionMode.java | 13 ++- 5 files changed, 130 insertions(+), 36 deletions(-) create mode 100644 lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestLZ4CompressionMode.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java index 359ecb5741c..8807018fb75 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java @@ -121,7 +121,7 @@ class LZ4 { matchLen += MIN_MATCH; // copying a multiple of 8 bytes can make decompression from 5% to 10% faster - final int fastLen = ((matchLen - 1) & 0xFFFFFFF8) + 8; + final int fastLen = (matchLen + 7) & 0xFFFFFFF8; if (matchDec < matchLen || dOff + fastLen > destEnd) { // overlap -> naive incremental copy for (int ref = dOff - matchDec, end = dOff + matchLen; dOff < end; ++ref, ++dOff) { @@ -222,7 +222,7 @@ class LZ4 { } // compute match length - final int matchLen = MIN_MATCH + commonBytes(bytes, ref + 4, off + 4, limit); + final int matchLen = MIN_MATCH + commonBytes(bytes, ref + MIN_MATCH, off + MIN_MATCH, limit); encodeSequence(bytes, anchor, ref, off, matchLen, out); off += matchLen; diff --git a/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java b/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java index b12ff47cbba..7b25839179f 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java @@ -91,10 +91,13 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase { } public void testDecompress() throws IOException { - final byte[] decompressed = randomArray(); - final byte[] compressed = compress(decompressed); - final byte[] restored = decompress(compressed, decompressed.length); - assertArrayEquals(decompressed, restored); + final int iterations = atLeast(10); + for (int i = 0; i < iterations; ++i) { + final byte[] decompressed = randomArray(); + final byte[] compressed = compress(decompressed); + final byte[] restored = decompress(compressed, decompressed.length); + assertArrayEquals(decompressed, restored); + } } public void testPartialDecompress() throws IOException { @@ -120,11 +123,12 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase { assertArrayEquals(compressed, copyCompressedData(compressed, decompressed.length)); } - public void test(byte[] decompressed) throws IOException { + public byte[] test(byte[] decompressed) throws IOException { final byte[] compressed = compress(decompressed); final byte[] restored = decompress(compressed, decompressed.length); assertEquals(decompressed.length, restored.length); assertArrayEquals(compressed, copyCompressedData(compressed, decompressed.length)); + return compressed; } public void testEmptySequence() throws IOException { @@ -143,31 +147,4 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase { test(decompressed); } - // for LZ compression - - public void testShortLiteralsAndMatchs() throws IOException { - // literals and matchs lengths <= 15 - final byte[] decompressed = "1234562345673456745678910123".getBytes("UTF-8"); - test(decompressed); - } - - public void testLongMatchs() throws IOException { - // match length > 16 - final byte[] decompressed = new byte[RandomInts.randomIntBetween(random(), 300, 1024)]; - for (int i = 0; i < decompressed.length; ++i) { - decompressed[i] = (byte) i; - } - test(decompressed); - } - - public void testLongLiterals() throws IOException { - // long literals (length > 16) which are not the last literals - final byte[] decompressed = randomArray(RandomInts.randomIntBetween(random(), 400, 1024), 256); - final int matchRef = random().nextInt(30); - final int matchOff = RandomInts.randomIntBetween(random(), decompressed.length - 40, decompressed.length - 20); - final int matchLength = RandomInts.randomIntBetween(random(), 4, 10); - System.arraycopy(decompressed, matchRef, decompressed, matchOff, matchLength); - test(decompressed); - } - } \ No newline at end of file diff --git a/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestLZ4CompressionMode.java b/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestLZ4CompressionMode.java new file mode 100644 index 00000000000..9576e5727e1 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestLZ4CompressionMode.java @@ -0,0 +1,106 @@ +package org.apache.lucene.codecs.compressing; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import com.carrotsearch.randomizedtesting.generators.RandomInts; + +public abstract class AbstractTestLZ4CompressionMode extends AbstractTestCompressionMode { + + public byte[] test(byte[] decompressed) throws IOException { + final byte[] compressed = super.test(decompressed); + int off = 0; + int decompressedOff = 0; + for (;;) { + final int token = compressed[off++] & 0xFF; + int literalLen = token >>> 4; + if (literalLen == 0x0F) { + while (compressed[off] == (byte) 0xFF) { + literalLen += 0xFF; + ++off; + } + literalLen += compressed[off++] & 0xFF; + } + // skip literals + off += literalLen; + decompressedOff += literalLen; + + // check that the stream ends with literals and that there are at least + // 5 of them + if (off == compressed.length) { + assertEquals(decompressed.length, decompressedOff); + assertTrue("lastLiterals=" + literalLen + ", bytes=" + decompressed.length, + literalLen >= LZ4.LAST_LITERALS || literalLen == decompressed.length); + break; + } + + final int matchDec = (compressed[off++] & 0xFF) | ((compressed[off++] & 0xFF) << 8); + // check that match dec is not 0 + assertTrue(matchDec + " " + decompressedOff, matchDec > 0 && matchDec <= decompressedOff); + + int matchLen = token & 0x0F; + if (matchLen == 0x0F) { + while (compressed[off] == (byte) 0xFF) { + matchLen += 0xFF; + ++off; + } + matchLen += compressed[off++] & 0xFF; + } + matchLen += LZ4.MIN_MATCH; + + // if the match ends prematurely, the next sequence should not have + // literals or this means we are wasting space + if (decompressedOff + matchLen < decompressed.length - LZ4.LAST_LITERALS) { + final boolean moreCommonBytes = decompressed[decompressedOff + matchLen] == decompressed[decompressedOff - matchDec + matchLen]; + final boolean nextSequenceHasLiterals = ((compressed[off] & 0xFF) >>> 4) != 0; + assertTrue(!moreCommonBytes || !nextSequenceHasLiterals); + } + + decompressedOff += matchLen; + } + assertEquals(decompressed.length, decompressedOff); + return compressed; + } + + public void testShortLiteralsAndMatchs() throws IOException { + // literals and matchs lengths <= 15 + final byte[] decompressed = "1234562345673456745678910123".getBytes("UTF-8"); + test(decompressed); + } + + public void testLongMatchs() throws IOException { + // match length >= 20 + final byte[] decompressed = new byte[RandomInts.randomIntBetween(random(), 300, 1024)]; + for (int i = 0; i < decompressed.length; ++i) { + decompressed[i] = (byte) i; + } + test(decompressed); + } + + public void testLongLiterals() throws IOException { + // long literals (length >= 16) which are not the last literals + final byte[] decompressed = randomArray(RandomInts.randomIntBetween(random(), 400, 1024), 256); + final int matchRef = random().nextInt(30); + final int matchOff = RandomInts.randomIntBetween(random(), decompressed.length - 40, decompressed.length - 20); + final int matchLength = RandomInts.randomIntBetween(random(), 4, 10); + System.arraycopy(decompressed, matchRef, decompressed, matchOff, matchLength); + test(decompressed); + } + +} diff --git a/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestFastCompressionMode.java b/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestFastCompressionMode.java index addfa60d6c1..deb867af19d 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestFastCompressionMode.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestFastCompressionMode.java @@ -17,7 +17,7 @@ package org.apache.lucene.codecs.compressing; * limitations under the License. */ -public class TestFastCompressionMode extends AbstractTestCompressionMode { +public class TestFastCompressionMode extends AbstractTestLZ4CompressionMode { public void setUp() throws Exception { super.setUp(); diff --git a/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestFastDecompressionMode.java b/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestFastDecompressionMode.java index f5250927cd4..c1bbb59fa6a 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestFastDecompressionMode.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestFastDecompressionMode.java @@ -17,12 +17,23 @@ package org.apache.lucene.codecs.compressing; * limitations under the License. */ +import java.io.IOException; -public class TestFastDecompressionMode extends AbstractTestCompressionMode { +public class TestFastDecompressionMode extends AbstractTestLZ4CompressionMode { public void setUp() throws Exception { super.setUp(); mode = CompressionMode.FAST_DECOMPRESSION; } + @Override + public byte[] test(byte[] decompressed) throws IOException { + final byte[] compressed = super.test(decompressed); + final byte[] compressed2 = compress(CompressionMode.FAST.newCompressor(), decompressed); + // because of the way this compression mode works, its output is necessarily + // smaller than the output of CompressionMode.FAST + assertTrue(compressed.length <= compressed2.length); + return compressed; + } + }