mirror of https://github.com/apache/lucene.git
LUCENE-10619: Optimize the writeBytes in TermsHashPerField (#966)
This commit is contained in:
parent
d6dbe4374a
commit
d7c2def019
|
@ -109,6 +109,9 @@ Improvements
|
||||||
* LUCENE-10603: Update SortedSetDocValues iteration to use SortedSetDocValues#docValueCount().
|
* LUCENE-10603: Update SortedSetDocValues iteration to use SortedSetDocValues#docValueCount().
|
||||||
(Greg Miller, Stefan Vodita)
|
(Greg Miller, Stefan Vodita)
|
||||||
|
|
||||||
|
|
||||||
|
* LUCENE-10619: Optimize the writeBytes in TermsHashPerField. (Tang Donghai)
|
||||||
|
|
||||||
* GITHUB#983: AbstractSortedSetDocValueFacetCounts internal code cleanup/refactoring. (Greg Miller)
|
* GITHUB#983: AbstractSortedSetDocValueFacetCounts internal code cleanup/refactoring. (Greg Miller)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
|
@ -230,9 +230,29 @@ abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
|
||||||
}
|
}
|
||||||
|
|
||||||
final void writeBytes(int stream, byte[] b, int offset, int len) {
|
final void writeBytes(int stream, byte[] b, int offset, int len) {
|
||||||
// TODO: optimize
|
|
||||||
final int end = offset + len;
|
final int end = offset + len;
|
||||||
for (int i = offset; i < end; i++) writeByte(stream, b[i]);
|
int streamAddress = streamAddressOffset + stream;
|
||||||
|
int upto = termStreamAddressBuffer[streamAddress];
|
||||||
|
byte[] slice = bytePool.buffers[upto >> ByteBlockPool.BYTE_BLOCK_SHIFT];
|
||||||
|
assert slice != null;
|
||||||
|
int sliceOffset = upto & ByteBlockPool.BYTE_BLOCK_MASK;
|
||||||
|
|
||||||
|
while (slice[sliceOffset] == 0 && offset < end) {
|
||||||
|
slice[sliceOffset++] = b[offset++];
|
||||||
|
(termStreamAddressBuffer[streamAddress])++;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (offset < end) {
|
||||||
|
int offsetAndLength = bytePool.allocKnownSizeSlice(slice, sliceOffset);
|
||||||
|
sliceOffset = offsetAndLength >> 8;
|
||||||
|
int sliceLength = offsetAndLength & 0xff;
|
||||||
|
slice = bytePool.buffer;
|
||||||
|
int writeLength = Math.min(sliceLength - 1, end - offset);
|
||||||
|
System.arraycopy(b, offset, slice, sliceOffset, writeLength);
|
||||||
|
sliceOffset += writeLength;
|
||||||
|
offset += writeLength;
|
||||||
|
termStreamAddressBuffer[streamAddress] = sliceOffset + bytePool.byteOffset;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final void writeVInt(int stream, int i) {
|
final void writeVInt(int stream, int i) {
|
||||||
|
|
|
@ -240,7 +240,15 @@ public final class ByteBlockPool implements Accountable {
|
||||||
* pool.
|
* pool.
|
||||||
*/
|
*/
|
||||||
public int allocSlice(final byte[] slice, final int upto) {
|
public int allocSlice(final byte[] slice, final int upto) {
|
||||||
|
return allocKnownSizeSlice(slice, upto) >> 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new byte slice with the given starting size return the slice offset in the pool and
|
||||||
|
* length. The lower 8 bits of the returned int represent the length of the slice, and the upper
|
||||||
|
* 24 bits represent the offset.
|
||||||
|
*/
|
||||||
|
public int allocKnownSizeSlice(final byte[] slice, final int upto) {
|
||||||
final int level = slice[upto] & 15;
|
final int level = slice[upto] & 15;
|
||||||
final int newLevel = NEXT_LEVEL_ARRAY[level];
|
final int newLevel = NEXT_LEVEL_ARRAY[level];
|
||||||
final int newSize = LEVEL_SIZE_ARRAY[newLevel];
|
final int newSize = LEVEL_SIZE_ARRAY[newLevel];
|
||||||
|
@ -268,7 +276,7 @@ public final class ByteBlockPool implements Accountable {
|
||||||
// Write new level:
|
// Write new level:
|
||||||
buffer[byteUpto - 1] = (byte) (16 | newLevel);
|
buffer[byteUpto - 1] = (byte) (16 | newLevel);
|
||||||
|
|
||||||
return newUpto + 3;
|
return ((newUpto + 3) << 8) | (newSize - 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -29,6 +29,7 @@ import java.util.TreeMap;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.tests.util.TestUtil;
|
||||||
import org.apache.lucene.util.ByteBlockPool;
|
import org.apache.lucene.util.ByteBlockPool;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.Counter;
|
import org.apache.lucene.util.Counter;
|
||||||
|
@ -298,4 +299,28 @@ public class TestTermsHashPerField extends LuceneTestCase {
|
||||||
assertTrue("the last posting must be EOF on the reader", eof);
|
assertTrue("the last posting must be EOF on the reader", eof);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testWriteBytes() throws IOException {
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
AtomicInteger newCalled = new AtomicInteger(0);
|
||||||
|
AtomicInteger addCalled = new AtomicInteger(0);
|
||||||
|
TermsHashPerField hash = createNewHash(newCalled, addCalled);
|
||||||
|
hash.start(null, true);
|
||||||
|
hash.add(newBytesRef("start"), 0); // tid = 0;
|
||||||
|
int size = TestUtil.nextInt(random(), 50000, 100000);
|
||||||
|
byte[] randomData = new byte[size];
|
||||||
|
random().nextBytes(randomData);
|
||||||
|
int offset = 0;
|
||||||
|
while (offset < randomData.length) {
|
||||||
|
int writeLength = Math.min(randomData.length - offset, TestUtil.nextInt(random(), 1, 200));
|
||||||
|
hash.writeBytes(0, randomData, offset, writeLength);
|
||||||
|
offset += writeLength;
|
||||||
|
}
|
||||||
|
ByteSliceReader reader = new ByteSliceReader();
|
||||||
|
reader.init(hash.bytePool, 0, hash.bytePool.byteOffset + hash.bytePool.byteUpto);
|
||||||
|
for (byte expected : randomData) {
|
||||||
|
assertEquals(expected, reader.readByte());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -101,4 +101,38 @@ public class TestByteBlockPool extends LuceneTestCase {
|
||||||
position += expected.length;
|
position += expected.length;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testAllocKnowSizeSlice() throws IOException {
|
||||||
|
Counter bytesUsed = Counter.newCounter();
|
||||||
|
ByteBlockPool pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed));
|
||||||
|
pool.nextBuffer();
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
int size;
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
size = TestUtil.nextInt(random(), 100, 1000);
|
||||||
|
} else {
|
||||||
|
size = TestUtil.nextInt(random(), 50000, 100000);
|
||||||
|
}
|
||||||
|
byte[] randomData = new byte[size];
|
||||||
|
random().nextBytes(randomData);
|
||||||
|
|
||||||
|
int upto = pool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
|
||||||
|
|
||||||
|
for (int offset = 0; offset < size; ) {
|
||||||
|
if ((pool.buffer[upto] & 16) == 0) {
|
||||||
|
pool.buffer[upto++] = randomData[offset++];
|
||||||
|
} else {
|
||||||
|
int offsetAndLength = pool.allocKnownSizeSlice(pool.buffer, upto);
|
||||||
|
int sliceLength = offsetAndLength & 0xff;
|
||||||
|
upto = offsetAndLength >> 8;
|
||||||
|
assertNotEquals(0, pool.buffer[upto + sliceLength - 1]);
|
||||||
|
assertEquals(0, pool.buffer[upto]);
|
||||||
|
int writeLength = Math.min(sliceLength - 1, size - offset);
|
||||||
|
System.arraycopy(randomData, offset, pool.buffer, upto, writeLength);
|
||||||
|
offset += writeLength;
|
||||||
|
upto += writeLength;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue