mirror of https://github.com/apache/lucene.git
More LZ4 tests.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1417227 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
efb8b8e62e
commit
7c394f10d9
|
@ -121,7 +121,7 @@ class LZ4 {
|
||||||
matchLen += MIN_MATCH;
|
matchLen += MIN_MATCH;
|
||||||
|
|
||||||
// copying a multiple of 8 bytes can make decompression from 5% to 10% faster
|
// copying a multiple of 8 bytes can make decompression from 5% to 10% faster
|
||||||
final int fastLen = ((matchLen - 1) & 0xFFFFFFF8) + 8;
|
final int fastLen = (matchLen + 7) & 0xFFFFFFF8;
|
||||||
if (matchDec < matchLen || dOff + fastLen > destEnd) {
|
if (matchDec < matchLen || dOff + fastLen > destEnd) {
|
||||||
// overlap -> naive incremental copy
|
// overlap -> naive incremental copy
|
||||||
for (int ref = dOff - matchDec, end = dOff + matchLen; dOff < end; ++ref, ++dOff) {
|
for (int ref = dOff - matchDec, end = dOff + matchLen; dOff < end; ++ref, ++dOff) {
|
||||||
|
@ -222,7 +222,7 @@ class LZ4 {
|
||||||
}
|
}
|
||||||
|
|
||||||
// compute match length
|
// compute match length
|
||||||
final int matchLen = MIN_MATCH + commonBytes(bytes, ref + 4, off + 4, limit);
|
final int matchLen = MIN_MATCH + commonBytes(bytes, ref + MIN_MATCH, off + MIN_MATCH, limit);
|
||||||
|
|
||||||
encodeSequence(bytes, anchor, ref, off, matchLen, out);
|
encodeSequence(bytes, anchor, ref, off, matchLen, out);
|
||||||
off += matchLen;
|
off += matchLen;
|
||||||
|
|
|
@ -91,10 +91,13 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testDecompress() throws IOException {
|
public void testDecompress() throws IOException {
|
||||||
final byte[] decompressed = randomArray();
|
final int iterations = atLeast(10);
|
||||||
final byte[] compressed = compress(decompressed);
|
for (int i = 0; i < iterations; ++i) {
|
||||||
final byte[] restored = decompress(compressed, decompressed.length);
|
final byte[] decompressed = randomArray();
|
||||||
assertArrayEquals(decompressed, restored);
|
final byte[] compressed = compress(decompressed);
|
||||||
|
final byte[] restored = decompress(compressed, decompressed.length);
|
||||||
|
assertArrayEquals(decompressed, restored);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testPartialDecompress() throws IOException {
|
public void testPartialDecompress() throws IOException {
|
||||||
|
@ -120,11 +123,12 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase {
|
||||||
assertArrayEquals(compressed, copyCompressedData(compressed, decompressed.length));
|
assertArrayEquals(compressed, copyCompressedData(compressed, decompressed.length));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void test(byte[] decompressed) throws IOException {
|
public byte[] test(byte[] decompressed) throws IOException {
|
||||||
final byte[] compressed = compress(decompressed);
|
final byte[] compressed = compress(decompressed);
|
||||||
final byte[] restored = decompress(compressed, decompressed.length);
|
final byte[] restored = decompress(compressed, decompressed.length);
|
||||||
assertEquals(decompressed.length, restored.length);
|
assertEquals(decompressed.length, restored.length);
|
||||||
assertArrayEquals(compressed, copyCompressedData(compressed, decompressed.length));
|
assertArrayEquals(compressed, copyCompressedData(compressed, decompressed.length));
|
||||||
|
return compressed;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testEmptySequence() throws IOException {
|
public void testEmptySequence() throws IOException {
|
||||||
|
@ -143,31 +147,4 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase {
|
||||||
test(decompressed);
|
test(decompressed);
|
||||||
}
|
}
|
||||||
|
|
||||||
// for LZ compression
|
|
||||||
|
|
||||||
public void testShortLiteralsAndMatchs() throws IOException {
|
|
||||||
// literals and matchs lengths <= 15
|
|
||||||
final byte[] decompressed = "1234562345673456745678910123".getBytes("UTF-8");
|
|
||||||
test(decompressed);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testLongMatchs() throws IOException {
|
|
||||||
// match length > 16
|
|
||||||
final byte[] decompressed = new byte[RandomInts.randomIntBetween(random(), 300, 1024)];
|
|
||||||
for (int i = 0; i < decompressed.length; ++i) {
|
|
||||||
decompressed[i] = (byte) i;
|
|
||||||
}
|
|
||||||
test(decompressed);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testLongLiterals() throws IOException {
|
|
||||||
// long literals (length > 16) which are not the last literals
|
|
||||||
final byte[] decompressed = randomArray(RandomInts.randomIntBetween(random(), 400, 1024), 256);
|
|
||||||
final int matchRef = random().nextInt(30);
|
|
||||||
final int matchOff = RandomInts.randomIntBetween(random(), decompressed.length - 40, decompressed.length - 20);
|
|
||||||
final int matchLength = RandomInts.randomIntBetween(random(), 4, 10);
|
|
||||||
System.arraycopy(decompressed, matchRef, decompressed, matchOff, matchLength);
|
|
||||||
test(decompressed);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
|
@ -0,0 +1,106 @@
|
||||||
|
package org.apache.lucene.codecs.compressing;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import com.carrotsearch.randomizedtesting.generators.RandomInts;
|
||||||
|
|
||||||
|
public abstract class AbstractTestLZ4CompressionMode extends AbstractTestCompressionMode {
|
||||||
|
|
||||||
|
public byte[] test(byte[] decompressed) throws IOException {
|
||||||
|
final byte[] compressed = super.test(decompressed);
|
||||||
|
int off = 0;
|
||||||
|
int decompressedOff = 0;
|
||||||
|
for (;;) {
|
||||||
|
final int token = compressed[off++] & 0xFF;
|
||||||
|
int literalLen = token >>> 4;
|
||||||
|
if (literalLen == 0x0F) {
|
||||||
|
while (compressed[off] == (byte) 0xFF) {
|
||||||
|
literalLen += 0xFF;
|
||||||
|
++off;
|
||||||
|
}
|
||||||
|
literalLen += compressed[off++] & 0xFF;
|
||||||
|
}
|
||||||
|
// skip literals
|
||||||
|
off += literalLen;
|
||||||
|
decompressedOff += literalLen;
|
||||||
|
|
||||||
|
// check that the stream ends with literals and that there are at least
|
||||||
|
// 5 of them
|
||||||
|
if (off == compressed.length) {
|
||||||
|
assertEquals(decompressed.length, decompressedOff);
|
||||||
|
assertTrue("lastLiterals=" + literalLen + ", bytes=" + decompressed.length,
|
||||||
|
literalLen >= LZ4.LAST_LITERALS || literalLen == decompressed.length);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
final int matchDec = (compressed[off++] & 0xFF) | ((compressed[off++] & 0xFF) << 8);
|
||||||
|
// check that match dec is not 0
|
||||||
|
assertTrue(matchDec + " " + decompressedOff, matchDec > 0 && matchDec <= decompressedOff);
|
||||||
|
|
||||||
|
int matchLen = token & 0x0F;
|
||||||
|
if (matchLen == 0x0F) {
|
||||||
|
while (compressed[off] == (byte) 0xFF) {
|
||||||
|
matchLen += 0xFF;
|
||||||
|
++off;
|
||||||
|
}
|
||||||
|
matchLen += compressed[off++] & 0xFF;
|
||||||
|
}
|
||||||
|
matchLen += LZ4.MIN_MATCH;
|
||||||
|
|
||||||
|
// if the match ends prematurely, the next sequence should not have
|
||||||
|
// literals or this means we are wasting space
|
||||||
|
if (decompressedOff + matchLen < decompressed.length - LZ4.LAST_LITERALS) {
|
||||||
|
final boolean moreCommonBytes = decompressed[decompressedOff + matchLen] == decompressed[decompressedOff - matchDec + matchLen];
|
||||||
|
final boolean nextSequenceHasLiterals = ((compressed[off] & 0xFF) >>> 4) != 0;
|
||||||
|
assertTrue(!moreCommonBytes || !nextSequenceHasLiterals);
|
||||||
|
}
|
||||||
|
|
||||||
|
decompressedOff += matchLen;
|
||||||
|
}
|
||||||
|
assertEquals(decompressed.length, decompressedOff);
|
||||||
|
return compressed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testShortLiteralsAndMatchs() throws IOException {
|
||||||
|
// literals and matchs lengths <= 15
|
||||||
|
final byte[] decompressed = "1234562345673456745678910123".getBytes("UTF-8");
|
||||||
|
test(decompressed);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testLongMatchs() throws IOException {
|
||||||
|
// match length >= 20
|
||||||
|
final byte[] decompressed = new byte[RandomInts.randomIntBetween(random(), 300, 1024)];
|
||||||
|
for (int i = 0; i < decompressed.length; ++i) {
|
||||||
|
decompressed[i] = (byte) i;
|
||||||
|
}
|
||||||
|
test(decompressed);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testLongLiterals() throws IOException {
|
||||||
|
// long literals (length >= 16) which are not the last literals
|
||||||
|
final byte[] decompressed = randomArray(RandomInts.randomIntBetween(random(), 400, 1024), 256);
|
||||||
|
final int matchRef = random().nextInt(30);
|
||||||
|
final int matchOff = RandomInts.randomIntBetween(random(), decompressed.length - 40, decompressed.length - 20);
|
||||||
|
final int matchLength = RandomInts.randomIntBetween(random(), 4, 10);
|
||||||
|
System.arraycopy(decompressed, matchRef, decompressed, matchOff, matchLength);
|
||||||
|
test(decompressed);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -17,7 +17,7 @@ package org.apache.lucene.codecs.compressing;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class TestFastCompressionMode extends AbstractTestCompressionMode {
|
public class TestFastCompressionMode extends AbstractTestLZ4CompressionMode {
|
||||||
|
|
||||||
public void setUp() throws Exception {
|
public void setUp() throws Exception {
|
||||||
super.setUp();
|
super.setUp();
|
||||||
|
|
|
@ -17,12 +17,23 @@ package org.apache.lucene.codecs.compressing;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
public class TestFastDecompressionMode extends AbstractTestCompressionMode {
|
public class TestFastDecompressionMode extends AbstractTestLZ4CompressionMode {
|
||||||
|
|
||||||
public void setUp() throws Exception {
|
public void setUp() throws Exception {
|
||||||
super.setUp();
|
super.setUp();
|
||||||
mode = CompressionMode.FAST_DECOMPRESSION;
|
mode = CompressionMode.FAST_DECOMPRESSION;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public byte[] test(byte[] decompressed) throws IOException {
|
||||||
|
final byte[] compressed = super.test(decompressed);
|
||||||
|
final byte[] compressed2 = compress(CompressionMode.FAST.newCompressor(), decompressed);
|
||||||
|
// because of the way this compression mode works, its output is necessarily
|
||||||
|
// smaller than the output of CompressionMode.FAST
|
||||||
|
assertTrue(compressed.length <= compressed2.length);
|
||||||
|
return compressed;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue