Refactor FST.saveMetadata() to FSTMetadata.save() (#13549)

* lazily write the FST padding byte

* Also write the pad byte when there is emptyOutput

* add comment

* Make Lucene90BlockTreeTermsWriter to write FST off-heap

* Add change log

* Tidy code & Add comments

* use temp IndexOutput for FST writing

* Use IOUtils to delete files

* Update CHANGES.txt

* Update CHANGES.txt
This commit is contained in:
Dzung Bui 2024-07-23 01:14:53 +09:00 committed by GitHub
parent af9a2b9803
commit 97d89c661f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 55 additions and 47 deletions

View File

@ -528,56 +528,10 @@ public final class FST<T> implements Accountable {
* @param out the DataOutput to write the FST bytes to
*/
public void save(DataOutput metaOut, DataOutput out) throws IOException {
saveMetadata(metaOut);
metadata.save(metaOut);
fstReader.writeTo(out);
}
/**
* Save the metadata to a DataOutput
*
* @param metaOut the DataOutput to write the metadata to
*/
public void saveMetadata(DataOutput metaOut) throws IOException {
CodecUtil.writeHeader(metaOut, FILE_FORMAT_NAME, VERSION_CURRENT);
// TODO: really we should encode this as an arc, arriving
// to the root node, instead of special casing here:
if (metadata.emptyOutput != null) {
// Accepts empty string
metaOut.writeByte((byte) 1);
// Serialize empty-string output:
ByteBuffersDataOutput ros = new ByteBuffersDataOutput();
outputs.writeFinalOutput(metadata.emptyOutput, ros);
byte[] emptyOutputBytes = ros.toArrayCopy();
int emptyLen = emptyOutputBytes.length;
// reverse
final int stopAt = emptyLen / 2;
int upto = 0;
while (upto < stopAt) {
final byte b = emptyOutputBytes[upto];
emptyOutputBytes[upto] = emptyOutputBytes[emptyLen - upto - 1];
emptyOutputBytes[emptyLen - upto - 1] = b;
upto++;
}
metaOut.writeVInt(emptyLen);
metaOut.writeBytes(emptyOutputBytes, 0, emptyLen);
} else {
metaOut.writeByte((byte) 0);
}
final byte t;
if (metadata.inputType == INPUT_TYPE.BYTE1) {
t = 0;
} else if (metadata.inputType == INPUT_TYPE.BYTE2) {
t = 1;
} else {
t = 2;
}
metaOut.writeByte(t);
metaOut.writeVLong(metadata.startNode);
metaOut.writeVLong(numBytes());
}
/** Writes an automaton to a file. */
public void save(final Path path) throws IOException {
try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(path))) {
@ -1249,5 +1203,59 @@ public final class FST<T> implements Accountable {
public int getVersion() {
return version;
}
public T getEmptyOutput() {
return emptyOutput;
}
public long getNumBytes() {
return numBytes;
}
/**
* Save the metadata to a DataOutput
*
* @param metaOut the DataOutput to write the metadata to
*/
public void save(DataOutput metaOut) throws IOException {
CodecUtil.writeHeader(metaOut, FILE_FORMAT_NAME, VERSION_CURRENT);
// TODO: really we should encode this as an arc, arriving
// to the root node, instead of special casing here:
if (emptyOutput != null) {
// Accepts empty string
metaOut.writeByte((byte) 1);
// Serialize empty-string output:
ByteBuffersDataOutput ros = new ByteBuffersDataOutput();
outputs.writeFinalOutput(emptyOutput, ros);
byte[] emptyOutputBytes = ros.toArrayCopy();
int emptyLen = emptyOutputBytes.length;
// reverse
final int stopAt = emptyLen / 2;
int upto = 0;
while (upto < stopAt) {
final byte b = emptyOutputBytes[upto];
emptyOutputBytes[upto] = emptyOutputBytes[emptyLen - upto - 1];
emptyOutputBytes[emptyLen - upto - 1] = b;
upto++;
}
metaOut.writeVInt(emptyLen);
metaOut.writeBytes(emptyOutputBytes, 0, emptyLen);
} else {
metaOut.writeByte((byte) 0);
}
final byte t;
if (inputType == INPUT_TYPE.BYTE1) {
t = 0;
} else if (inputType == INPUT_TYPE.BYTE2) {
t = 1;
} else {
t = 2;
}
metaOut.writeByte(t);
metaOut.writeVLong(startNode);
metaOut.writeVLong(numBytes);
}
}
}