From 97d89c661f9e93adb8e0479d8814d8d55f6a5752 Mon Sep 17 00:00:00 2001 From: Dzung Bui Date: Tue, 23 Jul 2024 01:14:53 +0900 Subject: [PATCH] Refactor FST.saveMetadata() to FSTMetadata.save() (#13549) * lazily write the FST padding byte * Also write the pad byte when there is emptyOutput * add comment * Make Lucene90BlockTreeTermsWriter to write FST off-heap * Add change log * Tidy code & Add comments * use temp IndexOutput for FST writing * Use IOUtils to delete files * Update CHANGES.txt * Update CHANGES.txt --- .../java/org/apache/lucene/util/fst/FST.java | 102 ++++++++++-------- 1 file changed, 55 insertions(+), 47 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/FST.java b/lucene/core/src/java/org/apache/lucene/util/fst/FST.java index 17201194da4..ead90dca24c 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/FST.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/FST.java @@ -528,56 +528,10 @@ public final class FST implements Accountable { * @param out the DataOutput to write the FST bytes to */ public void save(DataOutput metaOut, DataOutput out) throws IOException { - saveMetadata(metaOut); + metadata.save(metaOut); fstReader.writeTo(out); } - /** - * Save the metadata to a DataOutput - * - * @param metaOut the DataOutput to write the metadata to - */ - public void saveMetadata(DataOutput metaOut) throws IOException { - CodecUtil.writeHeader(metaOut, FILE_FORMAT_NAME, VERSION_CURRENT); - // TODO: really we should encode this as an arc, arriving - // to the root node, instead of special casing here: - if (metadata.emptyOutput != null) { - // Accepts empty string - metaOut.writeByte((byte) 1); - - // Serialize empty-string output: - ByteBuffersDataOutput ros = new ByteBuffersDataOutput(); - outputs.writeFinalOutput(metadata.emptyOutput, ros); - byte[] emptyOutputBytes = ros.toArrayCopy(); - int emptyLen = emptyOutputBytes.length; - - // reverse - final int stopAt = emptyLen / 2; - int upto = 0; - while (upto < stopAt) { - final byte b = emptyOutputBytes[upto]; - emptyOutputBytes[upto] = emptyOutputBytes[emptyLen - upto - 1]; - emptyOutputBytes[emptyLen - upto - 1] = b; - upto++; - } - metaOut.writeVInt(emptyLen); - metaOut.writeBytes(emptyOutputBytes, 0, emptyLen); - } else { - metaOut.writeByte((byte) 0); - } - final byte t; - if (metadata.inputType == INPUT_TYPE.BYTE1) { - t = 0; - } else if (metadata.inputType == INPUT_TYPE.BYTE2) { - t = 1; - } else { - t = 2; - } - metaOut.writeByte(t); - metaOut.writeVLong(metadata.startNode); - metaOut.writeVLong(numBytes()); - } - /** Writes an automaton to a file. */ public void save(final Path path) throws IOException { try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(path))) { @@ -1249,5 +1203,59 @@ public final class FST implements Accountable { public int getVersion() { return version; } + + public T getEmptyOutput() { + return emptyOutput; + } + + public long getNumBytes() { + return numBytes; + } + + /** + * Save the metadata to a DataOutput + * + * @param metaOut the DataOutput to write the metadata to + */ + public void save(DataOutput metaOut) throws IOException { + CodecUtil.writeHeader(metaOut, FILE_FORMAT_NAME, VERSION_CURRENT); + // TODO: really we should encode this as an arc, arriving + // to the root node, instead of special casing here: + if (emptyOutput != null) { + // Accepts empty string + metaOut.writeByte((byte) 1); + + // Serialize empty-string output: + ByteBuffersDataOutput ros = new ByteBuffersDataOutput(); + outputs.writeFinalOutput(emptyOutput, ros); + byte[] emptyOutputBytes = ros.toArrayCopy(); + int emptyLen = emptyOutputBytes.length; + + // reverse + final int stopAt = emptyLen / 2; + int upto = 0; + while (upto < stopAt) { + final byte b = emptyOutputBytes[upto]; + emptyOutputBytes[upto] = emptyOutputBytes[emptyLen - upto - 1]; + emptyOutputBytes[emptyLen - upto - 1] = b; + upto++; + } + metaOut.writeVInt(emptyLen); + metaOut.writeBytes(emptyOutputBytes, 0, emptyLen); + } else { + metaOut.writeByte((byte) 0); + } + final byte t; + if (inputType == INPUT_TYPE.BYTE1) { + t = 0; + } else if (inputType == INPUT_TYPE.BYTE2) { + t = 1; + } else { + t = 2; + } + metaOut.writeByte(t); + metaOut.writeVLong(startNode); + metaOut.writeVLong(numBytes); + } } }