mirror of https://github.com/apache/lucene.git
LUCENE-10619: Optimize the writeBytes in TermsHashPerField (#966)
This commit is contained in:
parent
d6dbe4374a
commit
d7c2def019
|
@ -109,6 +109,9 @@ Improvements
|
|||
* LUCENE-10603: Update SortedSetDocValues iteration to use SortedSetDocValues#docValueCount().
|
||||
(Greg Miller, Stefan Vodita)
|
||||
|
||||
|
||||
* LUCENE-10619: Optimize the writeBytes in TermsHashPerField. (Tang Donghai)
|
||||
|
||||
* GITHUB#983: AbstractSortedSetDocValueFacetCounts internal code cleanup/refactoring. (Greg Miller)
|
||||
|
||||
Optimizations
|
||||
|
|
|
@ -230,9 +230,29 @@ abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
|
|||
}
|
||||
|
||||
final void writeBytes(int stream, byte[] b, int offset, int len) {
|
||||
// TODO: optimize
|
||||
final int end = offset + len;
|
||||
for (int i = offset; i < end; i++) writeByte(stream, b[i]);
|
||||
int streamAddress = streamAddressOffset + stream;
|
||||
int upto = termStreamAddressBuffer[streamAddress];
|
||||
byte[] slice = bytePool.buffers[upto >> ByteBlockPool.BYTE_BLOCK_SHIFT];
|
||||
assert slice != null;
|
||||
int sliceOffset = upto & ByteBlockPool.BYTE_BLOCK_MASK;
|
||||
|
||||
while (slice[sliceOffset] == 0 && offset < end) {
|
||||
slice[sliceOffset++] = b[offset++];
|
||||
(termStreamAddressBuffer[streamAddress])++;
|
||||
}
|
||||
|
||||
while (offset < end) {
|
||||
int offsetAndLength = bytePool.allocKnownSizeSlice(slice, sliceOffset);
|
||||
sliceOffset = offsetAndLength >> 8;
|
||||
int sliceLength = offsetAndLength & 0xff;
|
||||
slice = bytePool.buffer;
|
||||
int writeLength = Math.min(sliceLength - 1, end - offset);
|
||||
System.arraycopy(b, offset, slice, sliceOffset, writeLength);
|
||||
sliceOffset += writeLength;
|
||||
offset += writeLength;
|
||||
termStreamAddressBuffer[streamAddress] = sliceOffset + bytePool.byteOffset;
|
||||
}
|
||||
}
|
||||
|
||||
final void writeVInt(int stream, int i) {
|
||||
|
|
|
@ -240,7 +240,15 @@ public final class ByteBlockPool implements Accountable {
|
|||
* pool.
|
||||
*/
|
||||
public int allocSlice(final byte[] slice, final int upto) {
|
||||
return allocKnownSizeSlice(slice, upto) >> 8;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new byte slice with the given starting size return the slice offset in the pool and
|
||||
* length. The lower 8 bits of the returned int represent the length of the slice, and the upper
|
||||
* 24 bits represent the offset.
|
||||
*/
|
||||
public int allocKnownSizeSlice(final byte[] slice, final int upto) {
|
||||
final int level = slice[upto] & 15;
|
||||
final int newLevel = NEXT_LEVEL_ARRAY[level];
|
||||
final int newSize = LEVEL_SIZE_ARRAY[newLevel];
|
||||
|
@ -268,7 +276,7 @@ public final class ByteBlockPool implements Accountable {
|
|||
// Write new level:
|
||||
buffer[byteUpto - 1] = (byte) (16 | newLevel);
|
||||
|
||||
return newUpto + 3;
|
||||
return ((newUpto + 3) << 8) | (newSize - 3);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -29,6 +29,7 @@ import java.util.TreeMap;
|
|||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
import org.apache.lucene.tests.util.TestUtil;
|
||||
import org.apache.lucene.util.ByteBlockPool;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Counter;
|
||||
|
@ -298,4 +299,28 @@ public class TestTermsHashPerField extends LuceneTestCase {
|
|||
assertTrue("the last posting must be EOF on the reader", eof);
|
||||
}
|
||||
}
|
||||
|
||||
public void testWriteBytes() throws IOException {
|
||||
for (int i = 0; i < 100; i++) {
|
||||
AtomicInteger newCalled = new AtomicInteger(0);
|
||||
AtomicInteger addCalled = new AtomicInteger(0);
|
||||
TermsHashPerField hash = createNewHash(newCalled, addCalled);
|
||||
hash.start(null, true);
|
||||
hash.add(newBytesRef("start"), 0); // tid = 0;
|
||||
int size = TestUtil.nextInt(random(), 50000, 100000);
|
||||
byte[] randomData = new byte[size];
|
||||
random().nextBytes(randomData);
|
||||
int offset = 0;
|
||||
while (offset < randomData.length) {
|
||||
int writeLength = Math.min(randomData.length - offset, TestUtil.nextInt(random(), 1, 200));
|
||||
hash.writeBytes(0, randomData, offset, writeLength);
|
||||
offset += writeLength;
|
||||
}
|
||||
ByteSliceReader reader = new ByteSliceReader();
|
||||
reader.init(hash.bytePool, 0, hash.bytePool.byteOffset + hash.bytePool.byteUpto);
|
||||
for (byte expected : randomData) {
|
||||
assertEquals(expected, reader.readByte());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -101,4 +101,38 @@ public class TestByteBlockPool extends LuceneTestCase {
|
|||
position += expected.length;
|
||||
}
|
||||
}
|
||||
|
||||
public void testAllocKnowSizeSlice() throws IOException {
|
||||
Counter bytesUsed = Counter.newCounter();
|
||||
ByteBlockPool pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed));
|
||||
pool.nextBuffer();
|
||||
for (int i = 0; i < 100; i++) {
|
||||
int size;
|
||||
if (random().nextBoolean()) {
|
||||
size = TestUtil.nextInt(random(), 100, 1000);
|
||||
} else {
|
||||
size = TestUtil.nextInt(random(), 50000, 100000);
|
||||
}
|
||||
byte[] randomData = new byte[size];
|
||||
random().nextBytes(randomData);
|
||||
|
||||
int upto = pool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
|
||||
|
||||
for (int offset = 0; offset < size; ) {
|
||||
if ((pool.buffer[upto] & 16) == 0) {
|
||||
pool.buffer[upto++] = randomData[offset++];
|
||||
} else {
|
||||
int offsetAndLength = pool.allocKnownSizeSlice(pool.buffer, upto);
|
||||
int sliceLength = offsetAndLength & 0xff;
|
||||
upto = offsetAndLength >> 8;
|
||||
assertNotEquals(0, pool.buffer[upto + sliceLength - 1]);
|
||||
assertEquals(0, pool.buffer[upto]);
|
||||
int writeLength = Math.min(sliceLength - 1, size - offset);
|
||||
System.arraycopy(randomData, offset, pool.buffer, upto, writeLength);
|
||||
offset += writeLength;
|
||||
upto += writeLength;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue