From 20d5de448a739beb85e380d153fb13bf1817a8d6 Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Wed, 8 Nov 2023 11:14:32 -0500 Subject: [PATCH] Revert "Copy directly between 2 ByteBlockPool to avoid double-copy (#12778)" This reverts commit efd68f8165210c2c649964a758bb941fe31421c6. --- .../org/apache/lucene/util/ByteBlockPool.java | 41 ----------------- .../org/apache/lucene/util/fst/NodeHash.java | 46 ++++++++++--------- .../apache/lucene/util/TestByteBlockPool.java | 36 ++------------- 3 files changed, 27 insertions(+), 96 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java b/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java index 889e03ff5a6..73203c07fbc 100644 --- a/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java +++ b/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java @@ -234,46 +234,6 @@ public final class ByteBlockPool implements Accountable { append(bytes.bytes, bytes.offset, bytes.length); } - /** - * Append the bytes from a source {@link ByteBlockPool} at a given offset and length - * - * @param srcPool the source pool to copy from - * @param srcOffset the source pool offset - * @param length the number of bytes to copy - */ - public void append(ByteBlockPool srcPool, long srcOffset, int length) { - int bytesLeft = length; - while (bytesLeft > 0) { - int bufferLeft = BYTE_BLOCK_SIZE - byteUpto; - if (bytesLeft < bufferLeft) { // fits within current buffer - appendBytesSingleBuffer(srcPool, srcOffset, bytesLeft); - break; - } else { // fill up this buffer and move to next one - if (bufferLeft > 0) { - appendBytesSingleBuffer(srcPool, srcOffset, bufferLeft); - bytesLeft -= bufferLeft; - srcOffset += bufferLeft; - } - nextBuffer(); - } - } - } - - // copy from source pool until no bytes left. length must be fit within the current head buffer - private void appendBytesSingleBuffer(ByteBlockPool srcPool, long srcOffset, int length) { - assert length <= BYTE_BLOCK_SIZE - byteUpto; - // doing a loop as the bytes to copy might span across multiple byte[] in srcPool - while (length > 0) { - byte[] srcBytes = srcPool.buffers[Math.toIntExact(srcOffset >> BYTE_BLOCK_SHIFT)]; - int srcPos = Math.toIntExact(srcOffset & BYTE_BLOCK_MASK); - int bytesToCopy = Math.min(length, BYTE_BLOCK_SIZE - srcPos); - System.arraycopy(srcBytes, srcPos, buffer, byteUpto, bytesToCopy); - length -= bytesToCopy; - srcOffset += bytesToCopy; - byteUpto += bytesToCopy; - } - } - /** * Append the provided byte array at the current position. * @@ -323,7 +283,6 @@ public final class ByteBlockPool implements Accountable { int pos = (int) (offset & BYTE_BLOCK_MASK); while (bytesLeft > 0) { byte[] buffer = buffers[bufferIndex++]; - assert buffer != null; int chunk = Math.min(bytesLeft, BYTE_BLOCK_SIZE - pos); System.arraycopy(buffer, pos, bytes, bytesOffset, chunk); bytesOffset += chunk; diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java b/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java index 59bd2c45b33..690741682a6 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java @@ -123,9 +123,11 @@ final class NodeHash { assert lastFallbackHashSlot != -1 && lastFallbackNodeLength != -1; // it was already in fallback -- promote to primary - primaryTable.setNodeAddress(hashSlot, nodeAddress); - primaryTable.copyFallbackNodeBytes( - hashSlot, fallbackTable, lastFallbackHashSlot, lastFallbackNodeLength); + // TODO: Copy directly between 2 ByteBlockPool to avoid double-copy + primaryTable.setNode( + hashSlot, + nodeAddress, + fallbackTable.getBytes(lastFallbackHashSlot, lastFallbackNodeLength)); } else { // not in fallback either -- freeze & add the incoming node @@ -140,8 +142,7 @@ final class NodeHash { byte[] buf = new byte[Math.toIntExact(nodeAddress - startAddress + 1)]; fstCompiler.bytes.copyBytes(startAddress, buf, 0, buf.length); - primaryTable.setNodeAddress(hashSlot, nodeAddress); - primaryTable.copyNodeBytes(hashSlot, buf); + primaryTable.setNode(hashSlot, nodeAddress, buf); // confirm frozen hash and unfrozen hash are the same assert primaryTable.hash(nodeAddress, hashSlot) == hash @@ -262,6 +263,21 @@ final class NodeHash { bytesReader = new ByteBlockPoolReverseBytesReader(copiedNodes); } + /** + * Get the copied bytes at the provided hash slot + * + * @param hashSlot the hash slot to read from + * @param length the number of bytes to read + * @return the copied byte array + */ + public byte[] getBytes(long hashSlot, int length) { + long address = copiedNodeAddress.get(hashSlot); + assert address - length + 1 >= 0; + byte[] buf = new byte[length]; + copiedNodes.readBytes(address - length + 1, buf, 0, length); + return buf; + } + /** * Get the node address from the provided hash slot * @@ -273,35 +289,21 @@ final class NodeHash { } /** - * Set the node address from the provided hash slot + * Set the node address and bytes from the provided hash slot * * @param hashSlot the hash slot to write to * @param nodeAddress the node address + * @param bytes the node bytes to be copied */ - public void setNodeAddress(long hashSlot, long nodeAddress) { + public void setNode(long hashSlot, long nodeAddress, byte[] bytes) { assert fstNodeAddress.get(hashSlot) == 0; fstNodeAddress.set(hashSlot, nodeAddress); count++; - } - /** copy the node bytes from the FST */ - private void copyNodeBytes(long hashSlot, byte[] bytes) { - assert copiedNodeAddress.get(hashSlot) == 0; copiedNodes.append(bytes); // write the offset, which points to the last byte of the node we copied since we later read // this node in reverse - copiedNodeAddress.set(hashSlot, copiedNodes.getPosition() - 1); - } - - /** promote the node bytes from the fallback table */ - private void copyFallbackNodeBytes( - long hashSlot, PagedGrowableHash fallbackTable, long fallbackHashSlot, int nodeLength) { assert copiedNodeAddress.get(hashSlot) == 0; - long fallbackAddress = fallbackTable.copiedNodeAddress.get(fallbackHashSlot); - assert fallbackAddress - nodeLength + 1 >= 0; - copiedNodes.append(fallbackTable.copiedNodes, fallbackAddress, nodeLength); - // write the offset, which points to the last byte of the node we copied since we later read - // this node in reverse copiedNodeAddress.set(hashSlot, copiedNodes.getPosition() - 1); } diff --git a/lucene/core/src/test/org/apache/lucene/util/TestByteBlockPool.java b/lucene/core/src/test/org/apache/lucene/util/TestByteBlockPool.java index 7e97f547ac5..c7c4e80872d 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestByteBlockPool.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestByteBlockPool.java @@ -16,46 +16,16 @@ */ package org.apache.lucene.util; -import com.carrotsearch.randomizedtesting.generators.RandomBytes; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import java.util.Random; import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.TestUtil; public class TestByteBlockPool extends LuceneTestCase { - public void testAppendFromOtherPool() { - Random random = random(); - - ByteBlockPool pool = new ByteBlockPool(new ByteBlockPool.DirectAllocator()); - final int numBytes = atLeast(2 << 16); - byte[] bytes = RandomBytes.randomBytesOfLength(random, numBytes); - pool.append(bytes); - - ByteBlockPool anotherPool = new ByteBlockPool(new ByteBlockPool.DirectAllocator()); - byte[] existingBytes = new byte[atLeast(500)]; - anotherPool.append(existingBytes); - - // now slice and append to another pool - int offset = TestUtil.nextInt(random, 1, 2 << 15); - int length = bytes.length - offset; - if (random.nextBoolean()) { - length = TestUtil.nextInt(random, 1, length); - } - anotherPool.append(pool, offset, length); - - assertEquals(existingBytes.length + length, anotherPool.getPosition()); - - byte[] results = new byte[length]; - anotherPool.readBytes(existingBytes.length, results, 0, results.length); - for (int i = 0; i < length; i++) { - assertEquals("byte @ index=" + i, bytes[offset + i], results[i]); - } - } - - public void testReadAndWrite() { + public void testReadAndWrite() throws IOException { Counter bytesUsed = Counter.newCounter(); ByteBlockPool pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed)); pool.nextBuffer(); @@ -104,7 +74,7 @@ public class TestByteBlockPool extends LuceneTestCase { } } - public void testLargeRandomBlocks() { + public void testLargeRandomBlocks() throws IOException { Counter bytesUsed = Counter.newCounter(); ByteBlockPool pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed)); pool.nextBuffer();