mirror of https://github.com/apache/lucene.git
Copy directly between 2 ByteBlockPool to avoid double-copy (#12786)
* Copy directly between 2 ByteBlockPool to avoid double-copy * add a comment on the start address * Add unit test for NodeHash * tidy code
This commit is contained in:
parent
fcf6878144
commit
1999353d9e
|
@ -228,6 +228,46 @@ public final class ByteBlockPool implements Accountable {
|
||||||
append(bytes.bytes, bytes.offset, bytes.length);
|
append(bytes.bytes, bytes.offset, bytes.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Append the bytes from a source {@link ByteBlockPool} at a given offset and length
|
||||||
|
*
|
||||||
|
* @param srcPool the source pool to copy from
|
||||||
|
* @param srcOffset the source pool offset
|
||||||
|
* @param length the number of bytes to copy
|
||||||
|
*/
|
||||||
|
public void append(ByteBlockPool srcPool, long srcOffset, int length) {
|
||||||
|
int bytesLeft = length;
|
||||||
|
while (bytesLeft > 0) {
|
||||||
|
int bufferLeft = BYTE_BLOCK_SIZE - byteUpto;
|
||||||
|
if (bytesLeft < bufferLeft) { // fits within current buffer
|
||||||
|
appendBytesSingleBuffer(srcPool, srcOffset, bytesLeft);
|
||||||
|
break;
|
||||||
|
} else { // fill up this buffer and move to next one
|
||||||
|
if (bufferLeft > 0) {
|
||||||
|
appendBytesSingleBuffer(srcPool, srcOffset, bufferLeft);
|
||||||
|
bytesLeft -= bufferLeft;
|
||||||
|
srcOffset += bufferLeft;
|
||||||
|
}
|
||||||
|
nextBuffer();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// copy from source pool until no bytes left. length must be fit within the current head buffer
|
||||||
|
private void appendBytesSingleBuffer(ByteBlockPool srcPool, long srcOffset, int length) {
|
||||||
|
assert length <= BYTE_BLOCK_SIZE - byteUpto;
|
||||||
|
// doing a loop as the bytes to copy might span across multiple byte[] in srcPool
|
||||||
|
while (length > 0) {
|
||||||
|
byte[] srcBytes = srcPool.buffers[Math.toIntExact(srcOffset >> BYTE_BLOCK_SHIFT)];
|
||||||
|
int srcPos = Math.toIntExact(srcOffset & BYTE_BLOCK_MASK);
|
||||||
|
int bytesToCopy = Math.min(length, BYTE_BLOCK_SIZE - srcPos);
|
||||||
|
System.arraycopy(srcBytes, srcPos, buffer, byteUpto, bytesToCopy);
|
||||||
|
length -= bytesToCopy;
|
||||||
|
srcOffset += bytesToCopy;
|
||||||
|
byteUpto += bytesToCopy;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Append the provided byte array at the current position.
|
* Append the provided byte array at the current position.
|
||||||
*
|
*
|
||||||
|
@ -277,6 +317,7 @@ public final class ByteBlockPool implements Accountable {
|
||||||
int pos = (int) (offset & BYTE_BLOCK_MASK);
|
int pos = (int) (offset & BYTE_BLOCK_MASK);
|
||||||
while (bytesLeft > 0) {
|
while (bytesLeft > 0) {
|
||||||
byte[] buffer = buffers[bufferIndex++];
|
byte[] buffer = buffers[bufferIndex++];
|
||||||
|
assert buffer != null;
|
||||||
int chunk = Math.min(bytesLeft, BYTE_BLOCK_SIZE - pos);
|
int chunk = Math.min(bytesLeft, BYTE_BLOCK_SIZE - pos);
|
||||||
System.arraycopy(buffer, pos, bytes, bytesOffset, chunk);
|
System.arraycopy(buffer, pos, bytes, bytesOffset, chunk);
|
||||||
bytesOffset += chunk;
|
bytesOffset += chunk;
|
||||||
|
|
|
@ -123,11 +123,9 @@ final class NodeHash<T> {
|
||||||
assert lastFallbackHashSlot != -1 && lastFallbackNodeLength != -1;
|
assert lastFallbackHashSlot != -1 && lastFallbackNodeLength != -1;
|
||||||
|
|
||||||
// it was already in fallback -- promote to primary
|
// it was already in fallback -- promote to primary
|
||||||
// TODO: Copy directly between 2 ByteBlockPool to avoid double-copy
|
primaryTable.setNodeAddress(hashSlot, nodeAddress);
|
||||||
primaryTable.setNode(
|
primaryTable.copyFallbackNodeBytes(
|
||||||
hashSlot,
|
hashSlot, fallbackTable, lastFallbackHashSlot, lastFallbackNodeLength);
|
||||||
nodeAddress,
|
|
||||||
fallbackTable.getBytes(lastFallbackHashSlot, lastFallbackNodeLength));
|
|
||||||
} else {
|
} else {
|
||||||
// not in fallback either -- freeze & add the incoming node
|
// not in fallback either -- freeze & add the incoming node
|
||||||
|
|
||||||
|
@ -142,7 +140,8 @@ final class NodeHash<T> {
|
||||||
byte[] buf = new byte[Math.toIntExact(nodeAddress - startAddress + 1)];
|
byte[] buf = new byte[Math.toIntExact(nodeAddress - startAddress + 1)];
|
||||||
fstCompiler.bytes.copyBytes(startAddress, buf, 0, buf.length);
|
fstCompiler.bytes.copyBytes(startAddress, buf, 0, buf.length);
|
||||||
|
|
||||||
primaryTable.setNode(hashSlot, nodeAddress, buf);
|
primaryTable.setNodeAddress(hashSlot, nodeAddress);
|
||||||
|
primaryTable.copyNodeBytes(hashSlot, buf);
|
||||||
|
|
||||||
// confirm frozen hash and unfrozen hash are the same
|
// confirm frozen hash and unfrozen hash are the same
|
||||||
assert primaryTable.hash(nodeAddress, hashSlot) == hash
|
assert primaryTable.hash(nodeAddress, hashSlot) == hash
|
||||||
|
@ -221,7 +220,7 @@ final class NodeHash<T> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Inner class because it needs access to hash function and FST bytes. */
|
/** Inner class because it needs access to hash function and FST bytes. */
|
||||||
private class PagedGrowableHash {
|
class PagedGrowableHash {
|
||||||
// storing the FST node address where the position is the masked hash of the node arcs
|
// storing the FST node address where the position is the masked hash of the node arcs
|
||||||
private PagedGrowableWriter fstNodeAddress;
|
private PagedGrowableWriter fstNodeAddress;
|
||||||
// storing the local copiedNodes address in the same position as fstNodeAddress
|
// storing the local copiedNodes address in the same position as fstNodeAddress
|
||||||
|
@ -289,21 +288,38 @@ final class NodeHash<T> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the node address and bytes from the provided hash slot
|
* Set the node address from the provided hash slot
|
||||||
*
|
*
|
||||||
* @param hashSlot the hash slot to write to
|
* @param hashSlot the hash slot to write to
|
||||||
* @param nodeAddress the node address
|
* @param nodeAddress the node address
|
||||||
* @param bytes the node bytes to be copied
|
|
||||||
*/
|
*/
|
||||||
public void setNode(long hashSlot, long nodeAddress, byte[] bytes) {
|
public void setNodeAddress(long hashSlot, long nodeAddress) {
|
||||||
assert fstNodeAddress.get(hashSlot) == 0;
|
assert fstNodeAddress.get(hashSlot) == 0;
|
||||||
fstNodeAddress.set(hashSlot, nodeAddress);
|
fstNodeAddress.set(hashSlot, nodeAddress);
|
||||||
count++;
|
count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** copy the node bytes from the FST */
|
||||||
|
void copyNodeBytes(long hashSlot, byte[] bytes) {
|
||||||
|
assert copiedNodeAddress.get(hashSlot) == 0;
|
||||||
copiedNodes.append(bytes);
|
copiedNodes.append(bytes);
|
||||||
// write the offset, which points to the last byte of the node we copied since we later read
|
// write the offset, which points to the last byte of the node we copied since we later read
|
||||||
// this node in reverse
|
// this node in reverse
|
||||||
|
copiedNodeAddress.set(hashSlot, copiedNodes.getPosition() - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** promote the node bytes from the fallback table */
|
||||||
|
void copyFallbackNodeBytes(
|
||||||
|
long hashSlot, PagedGrowableHash fallbackTable, long fallbackHashSlot, int nodeLength) {
|
||||||
assert copiedNodeAddress.get(hashSlot) == 0;
|
assert copiedNodeAddress.get(hashSlot) == 0;
|
||||||
|
long fallbackAddress = fallbackTable.copiedNodeAddress.get(fallbackHashSlot);
|
||||||
|
// fallbackAddress is the last offset of the node, but we need to copy the bytes from the
|
||||||
|
// start address
|
||||||
|
long fallbackStartAddress = fallbackAddress - nodeLength + 1;
|
||||||
|
assert fallbackStartAddress >= 0;
|
||||||
|
copiedNodes.append(fallbackTable.copiedNodes, fallbackStartAddress, nodeLength);
|
||||||
|
// write the offset, which points to the last byte of the node we copied since we later read
|
||||||
|
// this node in reverse
|
||||||
copiedNodeAddress.set(hashSlot, copiedNodes.getPosition() - 1);
|
copiedNodeAddress.set(hashSlot, copiedNodes.getPosition() - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -16,16 +16,46 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.util;
|
package org.apache.lucene.util;
|
||||||
|
|
||||||
import java.io.IOException;
|
import com.carrotsearch.randomizedtesting.generators.RandomBytes;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Random;
|
||||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||||
import org.apache.lucene.tests.util.TestUtil;
|
import org.apache.lucene.tests.util.TestUtil;
|
||||||
|
|
||||||
public class TestByteBlockPool extends LuceneTestCase {
|
public class TestByteBlockPool extends LuceneTestCase {
|
||||||
|
|
||||||
public void testReadAndWrite() throws IOException {
|
public void testAppendFromOtherPool() {
|
||||||
|
Random random = random();
|
||||||
|
|
||||||
|
ByteBlockPool pool = new ByteBlockPool(new ByteBlockPool.DirectAllocator());
|
||||||
|
final int numBytes = atLeast(2 << 16);
|
||||||
|
byte[] bytes = RandomBytes.randomBytesOfLength(random, numBytes);
|
||||||
|
pool.append(bytes);
|
||||||
|
|
||||||
|
ByteBlockPool anotherPool = new ByteBlockPool(new ByteBlockPool.DirectAllocator());
|
||||||
|
byte[] existingBytes = new byte[atLeast(500)];
|
||||||
|
anotherPool.append(existingBytes);
|
||||||
|
|
||||||
|
// now slice and append to another pool
|
||||||
|
int offset = TestUtil.nextInt(random, 1, 2 << 15);
|
||||||
|
int length = bytes.length - offset;
|
||||||
|
if (random.nextBoolean()) {
|
||||||
|
length = TestUtil.nextInt(random, 1, length);
|
||||||
|
}
|
||||||
|
anotherPool.append(pool, offset, length);
|
||||||
|
|
||||||
|
assertEquals(existingBytes.length + length, anotherPool.getPosition());
|
||||||
|
|
||||||
|
byte[] results = new byte[length];
|
||||||
|
anotherPool.readBytes(existingBytes.length, results, 0, results.length);
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
assertEquals("byte @ index=" + i, bytes[offset + i], results[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testReadAndWrite() {
|
||||||
Counter bytesUsed = Counter.newCounter();
|
Counter bytesUsed = Counter.newCounter();
|
||||||
ByteBlockPool pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed));
|
ByteBlockPool pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed));
|
||||||
pool.nextBuffer();
|
pool.nextBuffer();
|
||||||
|
@ -74,7 +104,7 @@ public class TestByteBlockPool extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testLargeRandomBlocks() throws IOException {
|
public void testLargeRandomBlocks() {
|
||||||
Counter bytesUsed = Counter.newCounter();
|
Counter bytesUsed = Counter.newCounter();
|
||||||
ByteBlockPool pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed));
|
ByteBlockPool pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed));
|
||||||
pool.nextBuffer();
|
pool.nextBuffer();
|
||||||
|
|
|
@ -0,0 +1,51 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.util.fst;
|
||||||
|
|
||||||
|
import com.carrotsearch.randomizedtesting.generators.RandomBytes;
|
||||||
|
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||||
|
|
||||||
|
public class TestNodeHash extends LuceneTestCase {
|
||||||
|
|
||||||
|
public void testCopyFallbackNodeBytes() {
|
||||||
|
// we don't need the FSTCompiler in this test
|
||||||
|
NodeHash<Object> nodeHash = new NodeHash<>(null, 1);
|
||||||
|
|
||||||
|
NodeHash<Object>.PagedGrowableHash primaryHashTable = nodeHash.new PagedGrowableHash();
|
||||||
|
NodeHash<Object>.PagedGrowableHash fallbackHashTable = nodeHash.new PagedGrowableHash();
|
||||||
|
int nodeLength = atLeast(500);
|
||||||
|
long fallbackHashSlot = 1;
|
||||||
|
byte[] fallbackBytes = RandomBytes.randomBytesOfLength(random(), nodeLength);
|
||||||
|
fallbackHashTable.copyNodeBytes(fallbackHashSlot, fallbackBytes);
|
||||||
|
|
||||||
|
// check if the bytes we wrote are the same as the original bytes
|
||||||
|
byte[] storedBytes = fallbackHashTable.getBytes(fallbackHashSlot, nodeLength);
|
||||||
|
for (int i = 0; i < nodeLength; i++) {
|
||||||
|
assertEquals("byte @ index=" + i, fallbackBytes[i], storedBytes[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
long primaryHashSlot = 2;
|
||||||
|
primaryHashTable.copyFallbackNodeBytes(
|
||||||
|
primaryHashSlot, fallbackHashTable, fallbackHashSlot, nodeLength);
|
||||||
|
|
||||||
|
// check if the bytes we copied are the same as the original bytes
|
||||||
|
byte[] copiedBytes = primaryHashTable.getBytes(primaryHashSlot, nodeLength);
|
||||||
|
for (int i = 0; i < nodeLength; i++) {
|
||||||
|
assertEquals("byte @ index=" + i, fallbackBytes[i], copiedBytes[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue