mirror of https://github.com/apache/lucene.git
Improve checksum calculations (#13989)
Take advantage of the existing buffer in BufferedChecksum to speed up reads for Longs, Ints, Shorts and Long arrays by avoiding byte-by-byte reads.
This commit is contained in:
parent
d9c3bc875b
commit
71715b59e8
|
@ -79,6 +79,8 @@ Optimizations
|
|||
* GITHUB#13999: CombinedFieldQuery now returns non-infinite maximum scores,
|
||||
making it eligible to dynamic pruning. (Adrien Grand)
|
||||
|
||||
* GITHUB#13989: Faster checksum computation. (Jean-François Boeuf)
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
* GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended
|
||||
|
|
|
@ -196,9 +196,7 @@ public class FuzzySet implements Accountable {
|
|||
int bloomSize = in.readInt();
|
||||
int numLongs = in.readInt();
|
||||
long[] longs = new long[numLongs];
|
||||
for (int i = 0; i < numLongs; i++) {
|
||||
longs[i] = in.readLong();
|
||||
}
|
||||
in.readLongs(longs, 0, numLongs);
|
||||
FixedBitSet bits = new FixedBitSet(longs, bloomSize + 1);
|
||||
return new FuzzySet(bits, bloomSize, hashCount);
|
||||
}
|
||||
|
|
|
@ -101,9 +101,7 @@ public final class Lucene90LiveDocsFormat extends LiveDocsFormat {
|
|||
|
||||
private FixedBitSet readFixedBitSet(IndexInput input, int length) throws IOException {
|
||||
long[] data = new long[FixedBitSet.bits2words(length)];
|
||||
for (int i = 0; i < data.length; i++) {
|
||||
data[i] = input.readLong();
|
||||
}
|
||||
input.readLongs(data, 0, data.length);
|
||||
return new FixedBitSet(data, length);
|
||||
}
|
||||
|
||||
|
|
|
@ -16,7 +16,11 @@
|
|||
*/
|
||||
package org.apache.lucene.store;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.LongBuffer;
|
||||
import java.util.zip.Checksum;
|
||||
import org.apache.lucene.util.BitUtil;
|
||||
|
||||
/** Wraps another {@link Checksum} with an internal buffer to speed up checksum calculations. */
|
||||
public class BufferedChecksum implements Checksum {
|
||||
|
@ -60,6 +64,45 @@ public class BufferedChecksum implements Checksum {
|
|||
}
|
||||
}
|
||||
|
||||
void updateShort(short val) {
|
||||
if (upto + Short.BYTES > buffer.length) flush();
|
||||
BitUtil.VH_LE_SHORT.set(buffer, upto, val);
|
||||
upto += Short.BYTES;
|
||||
}
|
||||
|
||||
void updateInt(int val) {
|
||||
if (upto + Integer.BYTES > buffer.length) flush();
|
||||
BitUtil.VH_LE_INT.set(buffer, upto, val);
|
||||
upto += Integer.BYTES;
|
||||
}
|
||||
|
||||
void updateLong(long val) {
|
||||
if (upto + Long.BYTES > buffer.length) flush();
|
||||
BitUtil.VH_LE_LONG.set(buffer, upto, val);
|
||||
upto += Long.BYTES;
|
||||
}
|
||||
|
||||
void updateLongs(long[] vals, int offset, int len) {
|
||||
if (upto > 0) {
|
||||
int remainingCapacityInLong = Math.min((buffer.length - upto) / Long.BYTES, len);
|
||||
for (int i = 0; i < remainingCapacityInLong; i++, offset++, len--) {
|
||||
updateLong(vals[offset]);
|
||||
}
|
||||
if (0 == len) return;
|
||||
}
|
||||
|
||||
LongBuffer b = ByteBuffer.wrap(buffer).order(ByteOrder.LITTLE_ENDIAN).asLongBuffer();
|
||||
final int capacityInLong = buffer.length / Long.BYTES;
|
||||
while (len > 0) {
|
||||
flush();
|
||||
int l = Math.min(capacityInLong, len);
|
||||
b.put(0, vals, offset, l);
|
||||
upto += l * Long.BYTES;
|
||||
offset += l;
|
||||
len -= l;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getValue() {
|
||||
flush();
|
||||
|
|
|
@ -18,14 +18,13 @@ package org.apache.lucene.store;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.zip.CRC32;
|
||||
import java.util.zip.Checksum;
|
||||
|
||||
/**
|
||||
* Simple implementation of {@link ChecksumIndexInput} that wraps another input and delegates calls.
|
||||
*/
|
||||
public class BufferedChecksumIndexInput extends ChecksumIndexInput {
|
||||
final IndexInput main;
|
||||
final Checksum digest;
|
||||
final BufferedChecksum digest;
|
||||
|
||||
/** Creates a new BufferedChecksumIndexInput */
|
||||
public BufferedChecksumIndexInput(IndexInput main) {
|
||||
|
@ -47,6 +46,33 @@ public class BufferedChecksumIndexInput extends ChecksumIndexInput {
|
|||
digest.update(b, offset, len);
|
||||
}
|
||||
|
||||
@Override
|
||||
public short readShort() throws IOException {
|
||||
short v = main.readShort();
|
||||
digest.updateShort(v);
|
||||
return v;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int readInt() throws IOException {
|
||||
int v = main.readInt();
|
||||
digest.updateInt(v);
|
||||
return v;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long readLong() throws IOException {
|
||||
long v = main.readLong();
|
||||
digest.updateLong(v);
|
||||
return v;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readLongs(long[] dst, int offset, int length) throws IOException {
|
||||
main.readLongs(dst, offset, length);
|
||||
digest.updateLongs(dst, offset, length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getChecksum() {
|
||||
return digest.getValue();
|
||||
|
|
|
@ -16,9 +16,13 @@
|
|||
*/
|
||||
package org.apache.lucene.store;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.LongBuffer;
|
||||
import java.util.zip.CRC32;
|
||||
import java.util.zip.Checksum;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.BitUtil;
|
||||
|
||||
public class TestBufferedChecksum extends LuceneTestCase {
|
||||
|
||||
|
@ -63,4 +67,135 @@ public class TestBufferedChecksum extends LuceneTestCase {
|
|||
}
|
||||
assertEquals(c1.getValue(), c2.getValue());
|
||||
}
|
||||
|
||||
public void testDifferentInputTypes() {
|
||||
Checksum crc = new CRC32();
|
||||
BufferedChecksum buffered = new BufferedChecksum(new CRC32());
|
||||
int iterations = atLeast(1000);
|
||||
for (int i = 0; i < iterations; i++) {
|
||||
byte[] input = new byte[4096];
|
||||
random().nextBytes(input);
|
||||
crc.update(input);
|
||||
final long checksum = crc.getValue();
|
||||
crc.reset();
|
||||
updateByShorts(checksum, buffered, input);
|
||||
updateByInts(checksum, buffered, input);
|
||||
updateByLongs(checksum, buffered, input);
|
||||
updateByChunkOfBytes(checksum, buffered, input);
|
||||
updateByChunkOfLongs(checksum, buffered, input);
|
||||
}
|
||||
}
|
||||
|
||||
private void updateByChunkOfBytes(long expected, BufferedChecksum checksum, byte[] input) {
|
||||
for (int i = 0; i < input.length; i++) {
|
||||
checksum.update(input[i]);
|
||||
}
|
||||
checkChecksumValueAndReset(expected, checksum);
|
||||
|
||||
checksum.update(input);
|
||||
checkChecksumValueAndReset(expected, checksum);
|
||||
|
||||
int iterations = atLeast(10);
|
||||
for (int ite = 0; ite < iterations; ite++) {
|
||||
int len0 = random().nextInt(input.length / 2);
|
||||
checksum.update(input, 0, len0);
|
||||
checksum.update(input, len0, input.length - len0);
|
||||
checkChecksumValueAndReset(expected, checksum);
|
||||
|
||||
checksum.update(input, 0, len0);
|
||||
int len1 = random().nextInt(input.length / 4);
|
||||
for (int i = 0; i < len1; i++) {
|
||||
checksum.update(input[len0 + i]);
|
||||
}
|
||||
checksum.update(input, len0 + len1, input.length - len1 - len0);
|
||||
checkChecksumValueAndReset(expected, checksum);
|
||||
}
|
||||
}
|
||||
|
||||
private void updateByShorts(long expected, BufferedChecksum checksum, byte[] input) {
|
||||
int ix = shiftArray(checksum, input);
|
||||
while (ix <= input.length - Short.BYTES) {
|
||||
checksum.updateShort((short) BitUtil.VH_LE_SHORT.get(input, ix));
|
||||
ix += Short.BYTES;
|
||||
}
|
||||
checksum.update(input, ix, input.length - ix);
|
||||
checkChecksumValueAndReset(expected, checksum);
|
||||
}
|
||||
|
||||
private void updateByInts(long expected, BufferedChecksum checksum, byte[] input) {
|
||||
int ix = shiftArray(checksum, input);
|
||||
while (ix <= input.length - Integer.BYTES) {
|
||||
checksum.updateInt((int) BitUtil.VH_LE_INT.get(input, ix));
|
||||
ix += Integer.BYTES;
|
||||
}
|
||||
checksum.update(input, ix, input.length - ix);
|
||||
checkChecksumValueAndReset(expected, checksum);
|
||||
}
|
||||
|
||||
private void updateByLongs(long expected, BufferedChecksum checksum, byte[] input) {
|
||||
int ix = shiftArray(checksum, input);
|
||||
while (ix <= input.length - Long.BYTES) {
|
||||
checksum.updateLong((long) BitUtil.VH_LE_LONG.get(input, ix));
|
||||
ix += Long.BYTES;
|
||||
}
|
||||
checksum.update(input, ix, input.length - ix);
|
||||
checkChecksumValueAndReset(expected, checksum);
|
||||
}
|
||||
|
||||
private static int shiftArray(BufferedChecksum checksum, byte[] input) {
|
||||
int ix = random().nextInt(input.length / 4);
|
||||
checksum.update(input, 0, ix);
|
||||
return ix;
|
||||
}
|
||||
|
||||
private void updateByChunkOfLongs(long expected, BufferedChecksum checksum, byte[] input) {
|
||||
int ix = random().nextInt(input.length / 4);
|
||||
int remaining = Long.BYTES - ix & 7;
|
||||
LongBuffer b =
|
||||
ByteBuffer.wrap(input).position(ix).order(ByteOrder.LITTLE_ENDIAN).asLongBuffer();
|
||||
long[] longInput = new long[(input.length - ix) / Long.BYTES];
|
||||
b.get(longInput);
|
||||
|
||||
checksum.update(input, 0, ix);
|
||||
for (int i = 0; i < longInput.length; i++) {
|
||||
checksum.updateLong(longInput[i]);
|
||||
}
|
||||
checksum.update(input, input.length - remaining, remaining);
|
||||
checkChecksumValueAndReset(expected, checksum);
|
||||
|
||||
checksum.update(input, 0, ix);
|
||||
checksum.updateLongs(longInput, 0, longInput.length);
|
||||
checksum.update(input, input.length - remaining, remaining);
|
||||
checkChecksumValueAndReset(expected, checksum);
|
||||
|
||||
int iterations = atLeast(10);
|
||||
for (int ite = 0; ite < iterations; ite++) {
|
||||
int len0 = random().nextInt(longInput.length / 2);
|
||||
checksum.update(input, 0, ix);
|
||||
checksum.updateLongs(longInput, 0, len0);
|
||||
checksum.updateLongs(longInput, len0, longInput.length - len0);
|
||||
checksum.update(input, input.length - remaining, remaining);
|
||||
checkChecksumValueAndReset(expected, checksum);
|
||||
|
||||
checksum.update(input, 0, ix);
|
||||
checksum.updateLongs(longInput, 0, len0);
|
||||
int len1 = random().nextInt(longInput.length / 4);
|
||||
for (int i = 0; i < len1; i++) {
|
||||
checksum.updateLong(longInput[len0 + i]);
|
||||
}
|
||||
checksum.updateLongs(longInput, len0 + len1, longInput.length - len1 - len0);
|
||||
checksum.update(input, input.length - remaining, remaining);
|
||||
checkChecksumValueAndReset(expected, checksum);
|
||||
|
||||
checksum.update(input, 0, ix);
|
||||
checksum.updateLongs(longInput, 0, len0);
|
||||
checksum.update(input, ix + len0 * Long.BYTES, input.length - len0 * Long.BYTES - ix);
|
||||
checkChecksumValueAndReset(expected, checksum);
|
||||
}
|
||||
}
|
||||
|
||||
private void checkChecksumValueAndReset(long expected, Checksum checksum) {
|
||||
assertEquals(expected, checksum.getValue());
|
||||
checksum.reset();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue