From d0e0f6090e9c93b7a10142211c0e50f2d535a907 Mon Sep 17 00:00:00 2001 From: Dzung Bui Date: Tue, 21 Nov 2023 21:26:47 +0900 Subject: [PATCH] Simplify BytesStore operation (#12814) * Simplify BytesStore operations * remove writeInt() --- .../apache/lucene/util/fst/BytesStore.java | 41 ++++------------- .../apache/lucene/util/fst/FSTCompiler.java | 39 ++++++---------- .../org/apache/lucene/util/fst/NodeHash.java | 2 +- .../lucene/util/fst/TestBytesStore.java | 44 +++---------------- 4 files changed, 30 insertions(+), 96 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/BytesStore.java b/lucene/core/src/java/org/apache/lucene/util/fst/BytesStore.java index 900822966a4..11810428d41 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/BytesStore.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/BytesStore.java @@ -48,13 +48,6 @@ class BytesStore extends DataOutput implements FSTReader { nextWrite = blockSize; } - /** Absolute write byte; you must ensure dest is < max position written so far. */ - public void writeByte(long dest, byte b) { - int blockIndex = (int) (dest >> blockBits); - byte[] block = blocks.get(blockIndex); - block[(int) (dest & blockMask)] = b; - } - @Override public void writeByte(byte b) { if (nextWrite == blockSize) { @@ -237,7 +230,7 @@ class BytesStore extends DataOutput implements FSTReader { } /** Copies bytes from this store to a target byte array. */ - public void copyBytes(long src, byte[] dest, int offset, int len) { + public void writeTo(long src, byte[] dest, int offset, int len) { int blockIndex = (int) (src >> blockBits); int upto = (int) (src & blockMask); byte[] block = blocks.get(blockIndex); @@ -257,23 +250,6 @@ class BytesStore extends DataOutput implements FSTReader { } } - /** Writes an int at the absolute position without changing the current pointer. */ - public void writeInt(long pos, int value) { - int blockIndex = (int) (pos >> blockBits); - int upto = (int) (pos & blockMask); - byte[] block = blocks.get(blockIndex); - int shift = 24; - for (int i = 0; i < 4; i++) { - block[upto++] = (byte) (value >> shift); - shift -= 8; - if (upto == blockSize) { - upto = 0; - blockIndex++; - block = blocks.get(blockIndex); - } - } - } - /** Reverse from srcPos, inclusive, to destPos, inclusive. */ public void reverse(long srcPos, long destPos) { assert srcPos < destPos; @@ -313,7 +289,7 @@ class BytesStore extends DataOutput implements FSTReader { } } - public void skipBytes(int len) { + private void skipBytes(int len) { while (len > 0) { int chunk = blockSize - nextWrite; if (len <= chunk) { @@ -337,13 +313,14 @@ class BytesStore extends DataOutput implements FSTReader { return getPosition(); } - /** - * Pos must be less than the max position written so far! Ie, you cannot "grow" the file with - * this! - */ - public void truncate(long newLen) { - assert newLen <= getPosition(); + /** Set the position of this BytesStore, truncating or expanding if needed */ + public void setPosition(long newLen) { assert newLen >= 0; + long oldPosition = getPosition(); + if (newLen > oldPosition) { + skipBytes((int) (newLen - oldPosition)); + return; + } int blockIndex = (int) (newLen >> blockBits); nextWrite = (int) (newLen & blockMask); if (nextWrite == 0) { diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java b/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java index 56c8f18fc48..725b14614fa 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java @@ -549,7 +549,7 @@ public class FSTCompiler { long destPos = startAddress + headerLen + nodeIn.numArcs * (long) maxBytesPerArc; assert destPos >= srcPos; if (destPos > srcPos) { - bytes.skipBytes((int) (destPos - srcPos)); + bytes.setPosition(destPos); for (int arcIdx = nodeIn.numArcs - 1; arcIdx >= 0; arcIdx--) { destPos -= maxBytesPerArc; int arcLen = numBytesPerArc[arcIdx]; @@ -601,16 +601,16 @@ public class FSTCompiler { srcPos -= srcArcLen; int labelLen = numLabelBytesPerArc[arcIdx]; // Copy the flags. - bytes.copyBytes(srcPos, buffer, bufferOffset, 1); + bytes.writeTo(srcPos, buffer, bufferOffset, 1); // Skip the label, copy the remaining. int remainingArcLen = srcArcLen - 1 - labelLen; if (remainingArcLen != 0) { - bytes.copyBytes(srcPos + 1 + labelLen, buffer, bufferOffset + 1, remainingArcLen); + bytes.writeTo(srcPos + 1 + labelLen, buffer, bufferOffset + 1, remainingArcLen); } if (arcIdx == 0) { // Copy the label of the first arc only. bufferOffset -= labelLen; - bytes.copyBytes(srcPos + 1, buffer, bufferOffset, labelLen); + bytes.writeTo(srcPos + 1, buffer, bufferOffset, labelLen); } } assert bufferOffset == headerMaxLen + numPresenceBytes; @@ -626,34 +626,22 @@ public class FSTCompiler { maxBytesPerArcWithoutLabel); // maxBytesPerArcWithoutLabel instead of maxBytesPerArc. int headerLen = fixedLengthArcsBuffer.getPosition(); - // Prepare the builder byte store. Enlarge or truncate if needed. - long nodeEnd = startAddress + headerLen + numPresenceBytes + totalArcBytes; - long currentPosition = bytes.getPosition(); - if (nodeEnd >= currentPosition) { - bytes.skipBytes((int) (nodeEnd - currentPosition)); - } else { - bytes.truncate(nodeEnd); - } - assert bytes.getPosition() == nodeEnd; - // Write the header. - long writeOffset = startAddress; - bytes.writeBytes(writeOffset, fixedLengthArcsBuffer.getBytes(), 0, headerLen); - writeOffset += headerLen; + bytes.setPosition(startAddress); + bytes.writeBytes(fixedLengthArcsBuffer.getBytes(), 0, headerLen); // Write the presence bits if (continuous == false) { - writePresenceBits(nodeIn, writeOffset, numPresenceBytes); - writeOffset += numPresenceBytes; + writePresenceBits(nodeIn); + assert bytes.getPosition() == startAddress + headerLen + numPresenceBytes; } // Write the first label and the arcs. - bytes.writeBytes(writeOffset, fixedLengthArcsBuffer.getBytes(), bufferOffset, totalArcBytes); + bytes.writeBytes(fixedLengthArcsBuffer.getBytes(), bufferOffset, totalArcBytes); + assert bytes.getPosition() == startAddress + headerLen + numPresenceBytes + totalArcBytes; } - private void writePresenceBits( - FSTCompiler.UnCompiledNode nodeIn, long dest, int numPresenceBytes) { - long bytePos = dest; + private void writePresenceBits(FSTCompiler.UnCompiledNode nodeIn) { byte presenceBits = 1; // The first arc is always present. int presenceIndex = 0; int previousLabel = nodeIn.arcs[0].label; @@ -662,7 +650,7 @@ public class FSTCompiler { assert label > previousLabel; presenceIndex += label - previousLabel; while (presenceIndex >= Byte.SIZE) { - bytes.writeByte(bytePos++, presenceBits); + bytes.writeByte(presenceBits); presenceBits = 0; presenceIndex -= Byte.SIZE; } @@ -673,8 +661,7 @@ public class FSTCompiler { assert presenceIndex == (nodeIn.arcs[nodeIn.numArcs - 1].label - nodeIn.arcs[0].label) % 8; assert presenceBits != 0; // The last byte is not 0. assert (presenceBits & (1 << presenceIndex)) != 0; // The last arc is always present. - bytes.writeByte(bytePos++, presenceBits); - assert bytePos - dest == numPresenceBytes; + bytes.writeByte(presenceBits); } private void freezeTail(int prefixLenPlus1) throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java b/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java index faf258afde0..53640f59ffd 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java @@ -138,7 +138,7 @@ final class NodeHash { // at 0: assert nodeAddress != FST.FINAL_END_NODE && nodeAddress != FST.NON_FINAL_END_NODE; byte[] buf = new byte[Math.toIntExact(nodeAddress - startAddress + 1)]; - fstCompiler.bytes.copyBytes(startAddress, buf, 0, buf.length); + fstCompiler.bytes.writeTo(startAddress, buf, 0, buf.length); primaryTable.setNodeAddress(hashSlot, nodeAddress); primaryTable.copyNodeBytes(hashSlot, buf); diff --git a/lucene/core/src/test/org/apache/lucene/util/fst/TestBytesStore.java b/lucene/core/src/test/org/apache/lucene/util/fst/TestBytesStore.java index 6190b903c7d..b550fe81fa9 100644 --- a/lucene/core/src/test/org/apache/lucene/util/fst/TestBytesStore.java +++ b/lucene/core/src/test/org/apache/lucene/util/fst/TestBytesStore.java @@ -45,7 +45,7 @@ public class TestBytesStore extends LuceneTestCase { int pos = 0; while (pos < numBytes) { - int op = random().nextInt(8); + int op = random().nextInt(6); if (VERBOSE) { System.out.println(" cycle pos=" + pos); } @@ -79,24 +79,6 @@ public class TestBytesStore extends LuceneTestCase { break; case 2: - { - // write int @ absolute pos - if (pos > 4) { - int x = random().nextInt(); - int randomPos = random().nextInt(pos - 4); - if (VERBOSE) { - System.out.println(" abs writeInt pos=" + randomPos + " x=" + x); - } - bytes.writeInt(randomPos, x); - expected[randomPos++] = (byte) (x >> 24); - expected[randomPos++] = (byte) (x >> 16); - expected[randomPos++] = (byte) (x >> 8); - expected[randomPos++] = (byte) x; - } - } - break; - - case 3: { // reverse bytes if (pos > 1) { @@ -125,7 +107,7 @@ public class TestBytesStore extends LuceneTestCase { } break; - case 4: + case 3: { // abs write random byte[] if (pos > 2) { @@ -148,7 +130,7 @@ public class TestBytesStore extends LuceneTestCase { } break; - case 5: + case 4: { // copyBytes if (pos > 1) { @@ -164,7 +146,7 @@ public class TestBytesStore extends LuceneTestCase { } break; - case 6: + case 5: { // skip int len = random().nextInt(Math.min(100, numBytes - pos)); @@ -174,7 +156,7 @@ public class TestBytesStore extends LuceneTestCase { } pos += len; - bytes.skipBytes(len); + bytes.setPosition(pos); // NOTE: must fill in zeros in case truncate was // used, else we get false fails: @@ -184,18 +166,6 @@ public class TestBytesStore extends LuceneTestCase { } } break; - - case 7: - { - // absWriteByte - if (pos > 0) { - int dest = random().nextInt(pos); - byte b = (byte) random().nextInt(256); - expected[dest] = b; - bytes.writeByte(dest, b); - } - break; - } } assertEquals(pos, bytes.getPosition()); @@ -203,7 +173,7 @@ public class TestBytesStore extends LuceneTestCase { if (pos > 0 && random().nextInt(50) == 17) { // truncate int len = TestUtil.nextInt(random(), 1, Math.min(pos, 100)); - bytes.truncate(pos - len); + bytes.setPosition(pos - len); pos -= len; Arrays.fill(expected, pos, pos + len, (byte) 0); if (VERBOSE) { @@ -249,7 +219,7 @@ public class TestBytesStore extends LuceneTestCase { final int blockBits = TestUtil.nextInt(random(), 8, 15); final BytesStore o = new BytesStore(blockBits); o.copyBytes(in, len); - o.copyBytes(0, bytesout, 0, len); + o.writeTo(0, bytesout, 0, len); assertArrayEquals( ArrayUtil.copyOfSubArray(bytesout, 0, len), ArrayUtil.copyOfSubArray(bytes, offset, offset + len));