LUCENE-10619: Optimize the writeBytes in TermsHashPerField (#966)

This commit is contained in:
tang donghai 2022-07-12 23:14:37 +08:00 committed by GitHub
parent d6dbe4374a
commit d7c2def019
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 93 additions and 3 deletions

View File

@ -109,6 +109,9 @@ Improvements
* LUCENE-10603: Update SortedSetDocValues iteration to use SortedSetDocValues#docValueCount().
(Greg Miller, Stefan Vodita)
* LUCENE-10619: Optimize the writeBytes in TermsHashPerField. (Tang Donghai)
* GITHUB#983: AbstractSortedSetDocValueFacetCounts internal code cleanup/refactoring. (Greg Miller)
Optimizations

View File

@ -230,9 +230,29 @@ abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
}
final void writeBytes(int stream, byte[] b, int offset, int len) {
// TODO: optimize
final int end = offset + len;
for (int i = offset; i < end; i++) writeByte(stream, b[i]);
int streamAddress = streamAddressOffset + stream;
int upto = termStreamAddressBuffer[streamAddress];
byte[] slice = bytePool.buffers[upto >> ByteBlockPool.BYTE_BLOCK_SHIFT];
assert slice != null;
int sliceOffset = upto & ByteBlockPool.BYTE_BLOCK_MASK;
while (slice[sliceOffset] == 0 && offset < end) {
slice[sliceOffset++] = b[offset++];
(termStreamAddressBuffer[streamAddress])++;
}
while (offset < end) {
int offsetAndLength = bytePool.allocKnownSizeSlice(slice, sliceOffset);
sliceOffset = offsetAndLength >> 8;
int sliceLength = offsetAndLength & 0xff;
slice = bytePool.buffer;
int writeLength = Math.min(sliceLength - 1, end - offset);
System.arraycopy(b, offset, slice, sliceOffset, writeLength);
sliceOffset += writeLength;
offset += writeLength;
termStreamAddressBuffer[streamAddress] = sliceOffset + bytePool.byteOffset;
}
}
final void writeVInt(int stream, int i) {

View File

@ -240,7 +240,15 @@ public final class ByteBlockPool implements Accountable {
* pool.
*/
public int allocSlice(final byte[] slice, final int upto) {
return allocKnownSizeSlice(slice, upto) >> 8;
}
/**
* Create a new byte slice with the given starting size return the slice offset in the pool and
* length. The lower 8 bits of the returned int represent the length of the slice, and the upper
* 24 bits represent the offset.
*/
public int allocKnownSizeSlice(final byte[] slice, final int upto) {
final int level = slice[upto] & 15;
final int newLevel = NEXT_LEVEL_ARRAY[level];
final int newSize = LEVEL_SIZE_ARRAY[newLevel];
@ -268,7 +276,7 @@ public final class ByteBlockPool implements Accountable {
// Write new level:
buffer[byteUpto - 1] = (byte) (16 | newLevel);
return newUpto + 3;
return ((newUpto + 3) << 8) | (newSize - 3);
}
/**

View File

@ -29,6 +29,7 @@ import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
@ -298,4 +299,28 @@ public class TestTermsHashPerField extends LuceneTestCase {
assertTrue("the last posting must be EOF on the reader", eof);
}
}
public void testWriteBytes() throws IOException {
for (int i = 0; i < 100; i++) {
AtomicInteger newCalled = new AtomicInteger(0);
AtomicInteger addCalled = new AtomicInteger(0);
TermsHashPerField hash = createNewHash(newCalled, addCalled);
hash.start(null, true);
hash.add(newBytesRef("start"), 0); // tid = 0;
int size = TestUtil.nextInt(random(), 50000, 100000);
byte[] randomData = new byte[size];
random().nextBytes(randomData);
int offset = 0;
while (offset < randomData.length) {
int writeLength = Math.min(randomData.length - offset, TestUtil.nextInt(random(), 1, 200));
hash.writeBytes(0, randomData, offset, writeLength);
offset += writeLength;
}
ByteSliceReader reader = new ByteSliceReader();
reader.init(hash.bytePool, 0, hash.bytePool.byteOffset + hash.bytePool.byteUpto);
for (byte expected : randomData) {
assertEquals(expected, reader.readByte());
}
}
}
}

View File

@ -101,4 +101,38 @@ public class TestByteBlockPool extends LuceneTestCase {
position += expected.length;
}
}
public void testAllocKnowSizeSlice() throws IOException {
Counter bytesUsed = Counter.newCounter();
ByteBlockPool pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed));
pool.nextBuffer();
for (int i = 0; i < 100; i++) {
int size;
if (random().nextBoolean()) {
size = TestUtil.nextInt(random(), 100, 1000);
} else {
size = TestUtil.nextInt(random(), 50000, 100000);
}
byte[] randomData = new byte[size];
random().nextBytes(randomData);
int upto = pool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
for (int offset = 0; offset < size; ) {
if ((pool.buffer[upto] & 16) == 0) {
pool.buffer[upto++] = randomData[offset++];
} else {
int offsetAndLength = pool.allocKnownSizeSlice(pool.buffer, upto);
int sliceLength = offsetAndLength & 0xff;
upto = offsetAndLength >> 8;
assertNotEquals(0, pool.buffer[upto + sliceLength - 1]);
assertEquals(0, pool.buffer[upto]);
int writeLength = Math.min(sliceLength - 1, size - offset);
System.arraycopy(randomData, offset, pool.buffer, upto, writeLength);
offset += writeLength;
upto += writeLength;
}
}
}
}
}