More LZ4 tests.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1417227 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2012-12-04 22:38:01 +00:00
parent efb8b8e62e
commit 7c394f10d9
5 changed files with 130 additions and 36 deletions

View File

@ -121,7 +121,7 @@ class LZ4 {
matchLen += MIN_MATCH;
// copying a multiple of 8 bytes can make decompression from 5% to 10% faster
final int fastLen = ((matchLen - 1) & 0xFFFFFFF8) + 8;
final int fastLen = (matchLen + 7) & 0xFFFFFFF8;
if (matchDec < matchLen || dOff + fastLen > destEnd) {
// overlap -> naive incremental copy
for (int ref = dOff - matchDec, end = dOff + matchLen; dOff < end; ++ref, ++dOff) {
@ -222,7 +222,7 @@ class LZ4 {
}
// compute match length
final int matchLen = MIN_MATCH + commonBytes(bytes, ref + 4, off + 4, limit);
final int matchLen = MIN_MATCH + commonBytes(bytes, ref + MIN_MATCH, off + MIN_MATCH, limit);
encodeSequence(bytes, anchor, ref, off, matchLen, out);
off += matchLen;

View File

@ -91,10 +91,13 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase {
}
public void testDecompress() throws IOException {
final byte[] decompressed = randomArray();
final byte[] compressed = compress(decompressed);
final byte[] restored = decompress(compressed, decompressed.length);
assertArrayEquals(decompressed, restored);
final int iterations = atLeast(10);
for (int i = 0; i < iterations; ++i) {
final byte[] decompressed = randomArray();
final byte[] compressed = compress(decompressed);
final byte[] restored = decompress(compressed, decompressed.length);
assertArrayEquals(decompressed, restored);
}
}
public void testPartialDecompress() throws IOException {
@ -120,11 +123,12 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase {
assertArrayEquals(compressed, copyCompressedData(compressed, decompressed.length));
}
public void test(byte[] decompressed) throws IOException {
public byte[] test(byte[] decompressed) throws IOException {
final byte[] compressed = compress(decompressed);
final byte[] restored = decompress(compressed, decompressed.length);
assertEquals(decompressed.length, restored.length);
assertArrayEquals(compressed, copyCompressedData(compressed, decompressed.length));
return compressed;
}
public void testEmptySequence() throws IOException {
@ -143,31 +147,4 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase {
test(decompressed);
}
// for LZ compression
public void testShortLiteralsAndMatchs() throws IOException {
// literals and matchs lengths <= 15
final byte[] decompressed = "1234562345673456745678910123".getBytes("UTF-8");
test(decompressed);
}
public void testLongMatchs() throws IOException {
// match length > 16
final byte[] decompressed = new byte[RandomInts.randomIntBetween(random(), 300, 1024)];
for (int i = 0; i < decompressed.length; ++i) {
decompressed[i] = (byte) i;
}
test(decompressed);
}
public void testLongLiterals() throws IOException {
// long literals (length > 16) which are not the last literals
final byte[] decompressed = randomArray(RandomInts.randomIntBetween(random(), 400, 1024), 256);
final int matchRef = random().nextInt(30);
final int matchOff = RandomInts.randomIntBetween(random(), decompressed.length - 40, decompressed.length - 20);
final int matchLength = RandomInts.randomIntBetween(random(), 4, 10);
System.arraycopy(decompressed, matchRef, decompressed, matchOff, matchLength);
test(decompressed);
}
}

View File

@ -0,0 +1,106 @@
package org.apache.lucene.codecs.compressing;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import com.carrotsearch.randomizedtesting.generators.RandomInts;
public abstract class AbstractTestLZ4CompressionMode extends AbstractTestCompressionMode {
public byte[] test(byte[] decompressed) throws IOException {
final byte[] compressed = super.test(decompressed);
int off = 0;
int decompressedOff = 0;
for (;;) {
final int token = compressed[off++] & 0xFF;
int literalLen = token >>> 4;
if (literalLen == 0x0F) {
while (compressed[off] == (byte) 0xFF) {
literalLen += 0xFF;
++off;
}
literalLen += compressed[off++] & 0xFF;
}
// skip literals
off += literalLen;
decompressedOff += literalLen;
// check that the stream ends with literals and that there are at least
// 5 of them
if (off == compressed.length) {
assertEquals(decompressed.length, decompressedOff);
assertTrue("lastLiterals=" + literalLen + ", bytes=" + decompressed.length,
literalLen >= LZ4.LAST_LITERALS || literalLen == decompressed.length);
break;
}
final int matchDec = (compressed[off++] & 0xFF) | ((compressed[off++] & 0xFF) << 8);
// check that match dec is not 0
assertTrue(matchDec + " " + decompressedOff, matchDec > 0 && matchDec <= decompressedOff);
int matchLen = token & 0x0F;
if (matchLen == 0x0F) {
while (compressed[off] == (byte) 0xFF) {
matchLen += 0xFF;
++off;
}
matchLen += compressed[off++] & 0xFF;
}
matchLen += LZ4.MIN_MATCH;
// if the match ends prematurely, the next sequence should not have
// literals or this means we are wasting space
if (decompressedOff + matchLen < decompressed.length - LZ4.LAST_LITERALS) {
final boolean moreCommonBytes = decompressed[decompressedOff + matchLen] == decompressed[decompressedOff - matchDec + matchLen];
final boolean nextSequenceHasLiterals = ((compressed[off] & 0xFF) >>> 4) != 0;
assertTrue(!moreCommonBytes || !nextSequenceHasLiterals);
}
decompressedOff += matchLen;
}
assertEquals(decompressed.length, decompressedOff);
return compressed;
}
public void testShortLiteralsAndMatchs() throws IOException {
// literals and matchs lengths <= 15
final byte[] decompressed = "1234562345673456745678910123".getBytes("UTF-8");
test(decompressed);
}
public void testLongMatchs() throws IOException {
// match length >= 20
final byte[] decompressed = new byte[RandomInts.randomIntBetween(random(), 300, 1024)];
for (int i = 0; i < decompressed.length; ++i) {
decompressed[i] = (byte) i;
}
test(decompressed);
}
public void testLongLiterals() throws IOException {
// long literals (length >= 16) which are not the last literals
final byte[] decompressed = randomArray(RandomInts.randomIntBetween(random(), 400, 1024), 256);
final int matchRef = random().nextInt(30);
final int matchOff = RandomInts.randomIntBetween(random(), decompressed.length - 40, decompressed.length - 20);
final int matchLength = RandomInts.randomIntBetween(random(), 4, 10);
System.arraycopy(decompressed, matchRef, decompressed, matchOff, matchLength);
test(decompressed);
}
}

View File

@ -17,7 +17,7 @@ package org.apache.lucene.codecs.compressing;
* limitations under the License.
*/
public class TestFastCompressionMode extends AbstractTestCompressionMode {
public class TestFastCompressionMode extends AbstractTestLZ4CompressionMode {
public void setUp() throws Exception {
super.setUp();

View File

@ -17,12 +17,23 @@ package org.apache.lucene.codecs.compressing;
* limitations under the License.
*/
import java.io.IOException;
public class TestFastDecompressionMode extends AbstractTestCompressionMode {
public class TestFastDecompressionMode extends AbstractTestLZ4CompressionMode {
public void setUp() throws Exception {
super.setUp();
mode = CompressionMode.FAST_DECOMPRESSION;
}
@Override
public byte[] test(byte[] decompressed) throws IOException {
final byte[] compressed = super.test(decompressed);
final byte[] compressed2 = compress(CompressionMode.FAST.newCompressor(), decompressed);
// because of the way this compression mode works, its output is necessarily
// smaller than the output of CompressionMode.FAST
assertTrue(compressed.length <= compressed2.length);
return compressed;
}
}