mirror of https://github.com/apache/lucene.git
Improve checksum calculations (#13989)
Take advantage of the existing buffer in BufferedChecksum to speed up reads for Longs, Ints, Shorts and Long arrays by avoiding byte-by-byte reads.
This commit is contained in:
parent
d9c3bc875b
commit
71715b59e8
|
@ -79,6 +79,8 @@ Optimizations
|
||||||
* GITHUB#13999: CombinedFieldQuery now returns non-infinite maximum scores,
|
* GITHUB#13999: CombinedFieldQuery now returns non-infinite maximum scores,
|
||||||
making it eligible to dynamic pruning. (Adrien Grand)
|
making it eligible to dynamic pruning. (Adrien Grand)
|
||||||
|
|
||||||
|
* GITHUB#13989: Faster checksum computation. (Jean-François Boeuf)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
---------------------
|
---------------------
|
||||||
* GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended
|
* GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended
|
||||||
|
|
|
@ -196,9 +196,7 @@ public class FuzzySet implements Accountable {
|
||||||
int bloomSize = in.readInt();
|
int bloomSize = in.readInt();
|
||||||
int numLongs = in.readInt();
|
int numLongs = in.readInt();
|
||||||
long[] longs = new long[numLongs];
|
long[] longs = new long[numLongs];
|
||||||
for (int i = 0; i < numLongs; i++) {
|
in.readLongs(longs, 0, numLongs);
|
||||||
longs[i] = in.readLong();
|
|
||||||
}
|
|
||||||
FixedBitSet bits = new FixedBitSet(longs, bloomSize + 1);
|
FixedBitSet bits = new FixedBitSet(longs, bloomSize + 1);
|
||||||
return new FuzzySet(bits, bloomSize, hashCount);
|
return new FuzzySet(bits, bloomSize, hashCount);
|
||||||
}
|
}
|
||||||
|
|
|
@ -101,9 +101,7 @@ public final class Lucene90LiveDocsFormat extends LiveDocsFormat {
|
||||||
|
|
||||||
private FixedBitSet readFixedBitSet(IndexInput input, int length) throws IOException {
|
private FixedBitSet readFixedBitSet(IndexInput input, int length) throws IOException {
|
||||||
long[] data = new long[FixedBitSet.bits2words(length)];
|
long[] data = new long[FixedBitSet.bits2words(length)];
|
||||||
for (int i = 0; i < data.length; i++) {
|
input.readLongs(data, 0, data.length);
|
||||||
data[i] = input.readLong();
|
|
||||||
}
|
|
||||||
return new FixedBitSet(data, length);
|
return new FixedBitSet(data, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -16,7 +16,11 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.store;
|
package org.apache.lucene.store;
|
||||||
|
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
|
import java.nio.LongBuffer;
|
||||||
import java.util.zip.Checksum;
|
import java.util.zip.Checksum;
|
||||||
|
import org.apache.lucene.util.BitUtil;
|
||||||
|
|
||||||
/** Wraps another {@link Checksum} with an internal buffer to speed up checksum calculations. */
|
/** Wraps another {@link Checksum} with an internal buffer to speed up checksum calculations. */
|
||||||
public class BufferedChecksum implements Checksum {
|
public class BufferedChecksum implements Checksum {
|
||||||
|
@ -60,6 +64,45 @@ public class BufferedChecksum implements Checksum {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void updateShort(short val) {
|
||||||
|
if (upto + Short.BYTES > buffer.length) flush();
|
||||||
|
BitUtil.VH_LE_SHORT.set(buffer, upto, val);
|
||||||
|
upto += Short.BYTES;
|
||||||
|
}
|
||||||
|
|
||||||
|
void updateInt(int val) {
|
||||||
|
if (upto + Integer.BYTES > buffer.length) flush();
|
||||||
|
BitUtil.VH_LE_INT.set(buffer, upto, val);
|
||||||
|
upto += Integer.BYTES;
|
||||||
|
}
|
||||||
|
|
||||||
|
void updateLong(long val) {
|
||||||
|
if (upto + Long.BYTES > buffer.length) flush();
|
||||||
|
BitUtil.VH_LE_LONG.set(buffer, upto, val);
|
||||||
|
upto += Long.BYTES;
|
||||||
|
}
|
||||||
|
|
||||||
|
void updateLongs(long[] vals, int offset, int len) {
|
||||||
|
if (upto > 0) {
|
||||||
|
int remainingCapacityInLong = Math.min((buffer.length - upto) / Long.BYTES, len);
|
||||||
|
for (int i = 0; i < remainingCapacityInLong; i++, offset++, len--) {
|
||||||
|
updateLong(vals[offset]);
|
||||||
|
}
|
||||||
|
if (0 == len) return;
|
||||||
|
}
|
||||||
|
|
||||||
|
LongBuffer b = ByteBuffer.wrap(buffer).order(ByteOrder.LITTLE_ENDIAN).asLongBuffer();
|
||||||
|
final int capacityInLong = buffer.length / Long.BYTES;
|
||||||
|
while (len > 0) {
|
||||||
|
flush();
|
||||||
|
int l = Math.min(capacityInLong, len);
|
||||||
|
b.put(0, vals, offset, l);
|
||||||
|
upto += l * Long.BYTES;
|
||||||
|
offset += l;
|
||||||
|
len -= l;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getValue() {
|
public long getValue() {
|
||||||
flush();
|
flush();
|
||||||
|
|
|
@ -18,14 +18,13 @@ package org.apache.lucene.store;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.zip.CRC32;
|
import java.util.zip.CRC32;
|
||||||
import java.util.zip.Checksum;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple implementation of {@link ChecksumIndexInput} that wraps another input and delegates calls.
|
* Simple implementation of {@link ChecksumIndexInput} that wraps another input and delegates calls.
|
||||||
*/
|
*/
|
||||||
public class BufferedChecksumIndexInput extends ChecksumIndexInput {
|
public class BufferedChecksumIndexInput extends ChecksumIndexInput {
|
||||||
final IndexInput main;
|
final IndexInput main;
|
||||||
final Checksum digest;
|
final BufferedChecksum digest;
|
||||||
|
|
||||||
/** Creates a new BufferedChecksumIndexInput */
|
/** Creates a new BufferedChecksumIndexInput */
|
||||||
public BufferedChecksumIndexInput(IndexInput main) {
|
public BufferedChecksumIndexInput(IndexInput main) {
|
||||||
|
@ -47,6 +46,33 @@ public class BufferedChecksumIndexInput extends ChecksumIndexInput {
|
||||||
digest.update(b, offset, len);
|
digest.update(b, offset, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public short readShort() throws IOException {
|
||||||
|
short v = main.readShort();
|
||||||
|
digest.updateShort(v);
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int readInt() throws IOException {
|
||||||
|
int v = main.readInt();
|
||||||
|
digest.updateInt(v);
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long readLong() throws IOException {
|
||||||
|
long v = main.readLong();
|
||||||
|
digest.updateLong(v);
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void readLongs(long[] dst, int offset, int length) throws IOException {
|
||||||
|
main.readLongs(dst, offset, length);
|
||||||
|
digest.updateLongs(dst, offset, length);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getChecksum() {
|
public long getChecksum() {
|
||||||
return digest.getValue();
|
return digest.getValue();
|
||||||
|
|
|
@ -16,9 +16,13 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.store;
|
package org.apache.lucene.store;
|
||||||
|
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
|
import java.nio.LongBuffer;
|
||||||
import java.util.zip.CRC32;
|
import java.util.zip.CRC32;
|
||||||
import java.util.zip.Checksum;
|
import java.util.zip.Checksum;
|
||||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.BitUtil;
|
||||||
|
|
||||||
public class TestBufferedChecksum extends LuceneTestCase {
|
public class TestBufferedChecksum extends LuceneTestCase {
|
||||||
|
|
||||||
|
@ -63,4 +67,135 @@ public class TestBufferedChecksum extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
assertEquals(c1.getValue(), c2.getValue());
|
assertEquals(c1.getValue(), c2.getValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testDifferentInputTypes() {
|
||||||
|
Checksum crc = new CRC32();
|
||||||
|
BufferedChecksum buffered = new BufferedChecksum(new CRC32());
|
||||||
|
int iterations = atLeast(1000);
|
||||||
|
for (int i = 0; i < iterations; i++) {
|
||||||
|
byte[] input = new byte[4096];
|
||||||
|
random().nextBytes(input);
|
||||||
|
crc.update(input);
|
||||||
|
final long checksum = crc.getValue();
|
||||||
|
crc.reset();
|
||||||
|
updateByShorts(checksum, buffered, input);
|
||||||
|
updateByInts(checksum, buffered, input);
|
||||||
|
updateByLongs(checksum, buffered, input);
|
||||||
|
updateByChunkOfBytes(checksum, buffered, input);
|
||||||
|
updateByChunkOfLongs(checksum, buffered, input);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void updateByChunkOfBytes(long expected, BufferedChecksum checksum, byte[] input) {
|
||||||
|
for (int i = 0; i < input.length; i++) {
|
||||||
|
checksum.update(input[i]);
|
||||||
|
}
|
||||||
|
checkChecksumValueAndReset(expected, checksum);
|
||||||
|
|
||||||
|
checksum.update(input);
|
||||||
|
checkChecksumValueAndReset(expected, checksum);
|
||||||
|
|
||||||
|
int iterations = atLeast(10);
|
||||||
|
for (int ite = 0; ite < iterations; ite++) {
|
||||||
|
int len0 = random().nextInt(input.length / 2);
|
||||||
|
checksum.update(input, 0, len0);
|
||||||
|
checksum.update(input, len0, input.length - len0);
|
||||||
|
checkChecksumValueAndReset(expected, checksum);
|
||||||
|
|
||||||
|
checksum.update(input, 0, len0);
|
||||||
|
int len1 = random().nextInt(input.length / 4);
|
||||||
|
for (int i = 0; i < len1; i++) {
|
||||||
|
checksum.update(input[len0 + i]);
|
||||||
|
}
|
||||||
|
checksum.update(input, len0 + len1, input.length - len1 - len0);
|
||||||
|
checkChecksumValueAndReset(expected, checksum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void updateByShorts(long expected, BufferedChecksum checksum, byte[] input) {
|
||||||
|
int ix = shiftArray(checksum, input);
|
||||||
|
while (ix <= input.length - Short.BYTES) {
|
||||||
|
checksum.updateShort((short) BitUtil.VH_LE_SHORT.get(input, ix));
|
||||||
|
ix += Short.BYTES;
|
||||||
|
}
|
||||||
|
checksum.update(input, ix, input.length - ix);
|
||||||
|
checkChecksumValueAndReset(expected, checksum);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void updateByInts(long expected, BufferedChecksum checksum, byte[] input) {
|
||||||
|
int ix = shiftArray(checksum, input);
|
||||||
|
while (ix <= input.length - Integer.BYTES) {
|
||||||
|
checksum.updateInt((int) BitUtil.VH_LE_INT.get(input, ix));
|
||||||
|
ix += Integer.BYTES;
|
||||||
|
}
|
||||||
|
checksum.update(input, ix, input.length - ix);
|
||||||
|
checkChecksumValueAndReset(expected, checksum);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void updateByLongs(long expected, BufferedChecksum checksum, byte[] input) {
|
||||||
|
int ix = shiftArray(checksum, input);
|
||||||
|
while (ix <= input.length - Long.BYTES) {
|
||||||
|
checksum.updateLong((long) BitUtil.VH_LE_LONG.get(input, ix));
|
||||||
|
ix += Long.BYTES;
|
||||||
|
}
|
||||||
|
checksum.update(input, ix, input.length - ix);
|
||||||
|
checkChecksumValueAndReset(expected, checksum);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int shiftArray(BufferedChecksum checksum, byte[] input) {
|
||||||
|
int ix = random().nextInt(input.length / 4);
|
||||||
|
checksum.update(input, 0, ix);
|
||||||
|
return ix;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void updateByChunkOfLongs(long expected, BufferedChecksum checksum, byte[] input) {
|
||||||
|
int ix = random().nextInt(input.length / 4);
|
||||||
|
int remaining = Long.BYTES - ix & 7;
|
||||||
|
LongBuffer b =
|
||||||
|
ByteBuffer.wrap(input).position(ix).order(ByteOrder.LITTLE_ENDIAN).asLongBuffer();
|
||||||
|
long[] longInput = new long[(input.length - ix) / Long.BYTES];
|
||||||
|
b.get(longInput);
|
||||||
|
|
||||||
|
checksum.update(input, 0, ix);
|
||||||
|
for (int i = 0; i < longInput.length; i++) {
|
||||||
|
checksum.updateLong(longInput[i]);
|
||||||
|
}
|
||||||
|
checksum.update(input, input.length - remaining, remaining);
|
||||||
|
checkChecksumValueAndReset(expected, checksum);
|
||||||
|
|
||||||
|
checksum.update(input, 0, ix);
|
||||||
|
checksum.updateLongs(longInput, 0, longInput.length);
|
||||||
|
checksum.update(input, input.length - remaining, remaining);
|
||||||
|
checkChecksumValueAndReset(expected, checksum);
|
||||||
|
|
||||||
|
int iterations = atLeast(10);
|
||||||
|
for (int ite = 0; ite < iterations; ite++) {
|
||||||
|
int len0 = random().nextInt(longInput.length / 2);
|
||||||
|
checksum.update(input, 0, ix);
|
||||||
|
checksum.updateLongs(longInput, 0, len0);
|
||||||
|
checksum.updateLongs(longInput, len0, longInput.length - len0);
|
||||||
|
checksum.update(input, input.length - remaining, remaining);
|
||||||
|
checkChecksumValueAndReset(expected, checksum);
|
||||||
|
|
||||||
|
checksum.update(input, 0, ix);
|
||||||
|
checksum.updateLongs(longInput, 0, len0);
|
||||||
|
int len1 = random().nextInt(longInput.length / 4);
|
||||||
|
for (int i = 0; i < len1; i++) {
|
||||||
|
checksum.updateLong(longInput[len0 + i]);
|
||||||
|
}
|
||||||
|
checksum.updateLongs(longInput, len0 + len1, longInput.length - len1 - len0);
|
||||||
|
checksum.update(input, input.length - remaining, remaining);
|
||||||
|
checkChecksumValueAndReset(expected, checksum);
|
||||||
|
|
||||||
|
checksum.update(input, 0, ix);
|
||||||
|
checksum.updateLongs(longInput, 0, len0);
|
||||||
|
checksum.update(input, ix + len0 * Long.BYTES, input.length - len0 * Long.BYTES - ix);
|
||||||
|
checkChecksumValueAndReset(expected, checksum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkChecksumValueAndReset(long expected, Checksum checksum) {
|
||||||
|
assertEquals(expected, checksum.getValue());
|
||||||
|
checksum.reset();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue