mirror of https://github.com/apache/lucene.git
More LZ4 tests.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1417227 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
efb8b8e62e
commit
7c394f10d9
|
@ -121,7 +121,7 @@ class LZ4 {
|
|||
matchLen += MIN_MATCH;
|
||||
|
||||
// copying a multiple of 8 bytes can make decompression from 5% to 10% faster
|
||||
final int fastLen = ((matchLen - 1) & 0xFFFFFFF8) + 8;
|
||||
final int fastLen = (matchLen + 7) & 0xFFFFFFF8;
|
||||
if (matchDec < matchLen || dOff + fastLen > destEnd) {
|
||||
// overlap -> naive incremental copy
|
||||
for (int ref = dOff - matchDec, end = dOff + matchLen; dOff < end; ++ref, ++dOff) {
|
||||
|
@ -222,7 +222,7 @@ class LZ4 {
|
|||
}
|
||||
|
||||
// compute match length
|
||||
final int matchLen = MIN_MATCH + commonBytes(bytes, ref + 4, off + 4, limit);
|
||||
final int matchLen = MIN_MATCH + commonBytes(bytes, ref + MIN_MATCH, off + MIN_MATCH, limit);
|
||||
|
||||
encodeSequence(bytes, anchor, ref, off, matchLen, out);
|
||||
off += matchLen;
|
||||
|
|
|
@ -91,10 +91,13 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testDecompress() throws IOException {
|
||||
final byte[] decompressed = randomArray();
|
||||
final byte[] compressed = compress(decompressed);
|
||||
final byte[] restored = decompress(compressed, decompressed.length);
|
||||
assertArrayEquals(decompressed, restored);
|
||||
final int iterations = atLeast(10);
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final byte[] decompressed = randomArray();
|
||||
final byte[] compressed = compress(decompressed);
|
||||
final byte[] restored = decompress(compressed, decompressed.length);
|
||||
assertArrayEquals(decompressed, restored);
|
||||
}
|
||||
}
|
||||
|
||||
public void testPartialDecompress() throws IOException {
|
||||
|
@ -120,11 +123,12 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase {
|
|||
assertArrayEquals(compressed, copyCompressedData(compressed, decompressed.length));
|
||||
}
|
||||
|
||||
public void test(byte[] decompressed) throws IOException {
|
||||
public byte[] test(byte[] decompressed) throws IOException {
|
||||
final byte[] compressed = compress(decompressed);
|
||||
final byte[] restored = decompress(compressed, decompressed.length);
|
||||
assertEquals(decompressed.length, restored.length);
|
||||
assertArrayEquals(compressed, copyCompressedData(compressed, decompressed.length));
|
||||
return compressed;
|
||||
}
|
||||
|
||||
public void testEmptySequence() throws IOException {
|
||||
|
@ -143,31 +147,4 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase {
|
|||
test(decompressed);
|
||||
}
|
||||
|
||||
// for LZ compression
|
||||
|
||||
public void testShortLiteralsAndMatchs() throws IOException {
|
||||
// literals and matchs lengths <= 15
|
||||
final byte[] decompressed = "1234562345673456745678910123".getBytes("UTF-8");
|
||||
test(decompressed);
|
||||
}
|
||||
|
||||
public void testLongMatchs() throws IOException {
|
||||
// match length > 16
|
||||
final byte[] decompressed = new byte[RandomInts.randomIntBetween(random(), 300, 1024)];
|
||||
for (int i = 0; i < decompressed.length; ++i) {
|
||||
decompressed[i] = (byte) i;
|
||||
}
|
||||
test(decompressed);
|
||||
}
|
||||
|
||||
public void testLongLiterals() throws IOException {
|
||||
// long literals (length > 16) which are not the last literals
|
||||
final byte[] decompressed = randomArray(RandomInts.randomIntBetween(random(), 400, 1024), 256);
|
||||
final int matchRef = random().nextInt(30);
|
||||
final int matchOff = RandomInts.randomIntBetween(random(), decompressed.length - 40, decompressed.length - 20);
|
||||
final int matchLength = RandomInts.randomIntBetween(random(), 4, 10);
|
||||
System.arraycopy(decompressed, matchRef, decompressed, matchOff, matchLength);
|
||||
test(decompressed);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,106 @@
|
|||
package org.apache.lucene.codecs.compressing;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomInts;
|
||||
|
||||
public abstract class AbstractTestLZ4CompressionMode extends AbstractTestCompressionMode {
|
||||
|
||||
public byte[] test(byte[] decompressed) throws IOException {
|
||||
final byte[] compressed = super.test(decompressed);
|
||||
int off = 0;
|
||||
int decompressedOff = 0;
|
||||
for (;;) {
|
||||
final int token = compressed[off++] & 0xFF;
|
||||
int literalLen = token >>> 4;
|
||||
if (literalLen == 0x0F) {
|
||||
while (compressed[off] == (byte) 0xFF) {
|
||||
literalLen += 0xFF;
|
||||
++off;
|
||||
}
|
||||
literalLen += compressed[off++] & 0xFF;
|
||||
}
|
||||
// skip literals
|
||||
off += literalLen;
|
||||
decompressedOff += literalLen;
|
||||
|
||||
// check that the stream ends with literals and that there are at least
|
||||
// 5 of them
|
||||
if (off == compressed.length) {
|
||||
assertEquals(decompressed.length, decompressedOff);
|
||||
assertTrue("lastLiterals=" + literalLen + ", bytes=" + decompressed.length,
|
||||
literalLen >= LZ4.LAST_LITERALS || literalLen == decompressed.length);
|
||||
break;
|
||||
}
|
||||
|
||||
final int matchDec = (compressed[off++] & 0xFF) | ((compressed[off++] & 0xFF) << 8);
|
||||
// check that match dec is not 0
|
||||
assertTrue(matchDec + " " + decompressedOff, matchDec > 0 && matchDec <= decompressedOff);
|
||||
|
||||
int matchLen = token & 0x0F;
|
||||
if (matchLen == 0x0F) {
|
||||
while (compressed[off] == (byte) 0xFF) {
|
||||
matchLen += 0xFF;
|
||||
++off;
|
||||
}
|
||||
matchLen += compressed[off++] & 0xFF;
|
||||
}
|
||||
matchLen += LZ4.MIN_MATCH;
|
||||
|
||||
// if the match ends prematurely, the next sequence should not have
|
||||
// literals or this means we are wasting space
|
||||
if (decompressedOff + matchLen < decompressed.length - LZ4.LAST_LITERALS) {
|
||||
final boolean moreCommonBytes = decompressed[decompressedOff + matchLen] == decompressed[decompressedOff - matchDec + matchLen];
|
||||
final boolean nextSequenceHasLiterals = ((compressed[off] & 0xFF) >>> 4) != 0;
|
||||
assertTrue(!moreCommonBytes || !nextSequenceHasLiterals);
|
||||
}
|
||||
|
||||
decompressedOff += matchLen;
|
||||
}
|
||||
assertEquals(decompressed.length, decompressedOff);
|
||||
return compressed;
|
||||
}
|
||||
|
||||
public void testShortLiteralsAndMatchs() throws IOException {
|
||||
// literals and matchs lengths <= 15
|
||||
final byte[] decompressed = "1234562345673456745678910123".getBytes("UTF-8");
|
||||
test(decompressed);
|
||||
}
|
||||
|
||||
public void testLongMatchs() throws IOException {
|
||||
// match length >= 20
|
||||
final byte[] decompressed = new byte[RandomInts.randomIntBetween(random(), 300, 1024)];
|
||||
for (int i = 0; i < decompressed.length; ++i) {
|
||||
decompressed[i] = (byte) i;
|
||||
}
|
||||
test(decompressed);
|
||||
}
|
||||
|
||||
public void testLongLiterals() throws IOException {
|
||||
// long literals (length >= 16) which are not the last literals
|
||||
final byte[] decompressed = randomArray(RandomInts.randomIntBetween(random(), 400, 1024), 256);
|
||||
final int matchRef = random().nextInt(30);
|
||||
final int matchOff = RandomInts.randomIntBetween(random(), decompressed.length - 40, decompressed.length - 20);
|
||||
final int matchLength = RandomInts.randomIntBetween(random(), 4, 10);
|
||||
System.arraycopy(decompressed, matchRef, decompressed, matchOff, matchLength);
|
||||
test(decompressed);
|
||||
}
|
||||
|
||||
}
|
|
@ -17,7 +17,7 @@ package org.apache.lucene.codecs.compressing;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
public class TestFastCompressionMode extends AbstractTestCompressionMode {
|
||||
public class TestFastCompressionMode extends AbstractTestLZ4CompressionMode {
|
||||
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
|
|
|
@ -17,12 +17,23 @@ package org.apache.lucene.codecs.compressing;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class TestFastDecompressionMode extends AbstractTestCompressionMode {
|
||||
public class TestFastDecompressionMode extends AbstractTestLZ4CompressionMode {
|
||||
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
mode = CompressionMode.FAST_DECOMPRESSION;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] test(byte[] decompressed) throws IOException {
|
||||
final byte[] compressed = super.test(decompressed);
|
||||
final byte[] compressed2 = compress(CompressionMode.FAST.newCompressor(), decompressed);
|
||||
// because of the way this compression mode works, its output is necessarily
|
||||
// smaller than the output of CompressionMode.FAST
|
||||
assertTrue(compressed.length <= compressed2.length);
|
||||
return compressed;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue