mirror of
https://github.com/apache/lucene.git
synced 2025-02-09 11:35:14 +00:00
Refactor FST.saveMetadata() to FSTMetadata.save() (#13549)
* lazily write the FST padding byte * Also write the pad byte when there is emptyOutput * add comment * Make Lucene90BlockTreeTermsWriter to write FST off-heap * Add change log * Tidy code & Add comments * use temp IndexOutput for FST writing * Use IOUtils to delete files * Update CHANGES.txt * Update CHANGES.txt
This commit is contained in:
parent
af9a2b9803
commit
97d89c661f
@ -528,56 +528,10 @@ public final class FST<T> implements Accountable {
|
|||||||
* @param out the DataOutput to write the FST bytes to
|
* @param out the DataOutput to write the FST bytes to
|
||||||
*/
|
*/
|
||||||
public void save(DataOutput metaOut, DataOutput out) throws IOException {
|
public void save(DataOutput metaOut, DataOutput out) throws IOException {
|
||||||
saveMetadata(metaOut);
|
metadata.save(metaOut);
|
||||||
fstReader.writeTo(out);
|
fstReader.writeTo(out);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Save the metadata to a DataOutput
|
|
||||||
*
|
|
||||||
* @param metaOut the DataOutput to write the metadata to
|
|
||||||
*/
|
|
||||||
public void saveMetadata(DataOutput metaOut) throws IOException {
|
|
||||||
CodecUtil.writeHeader(metaOut, FILE_FORMAT_NAME, VERSION_CURRENT);
|
|
||||||
// TODO: really we should encode this as an arc, arriving
|
|
||||||
// to the root node, instead of special casing here:
|
|
||||||
if (metadata.emptyOutput != null) {
|
|
||||||
// Accepts empty string
|
|
||||||
metaOut.writeByte((byte) 1);
|
|
||||||
|
|
||||||
// Serialize empty-string output:
|
|
||||||
ByteBuffersDataOutput ros = new ByteBuffersDataOutput();
|
|
||||||
outputs.writeFinalOutput(metadata.emptyOutput, ros);
|
|
||||||
byte[] emptyOutputBytes = ros.toArrayCopy();
|
|
||||||
int emptyLen = emptyOutputBytes.length;
|
|
||||||
|
|
||||||
// reverse
|
|
||||||
final int stopAt = emptyLen / 2;
|
|
||||||
int upto = 0;
|
|
||||||
while (upto < stopAt) {
|
|
||||||
final byte b = emptyOutputBytes[upto];
|
|
||||||
emptyOutputBytes[upto] = emptyOutputBytes[emptyLen - upto - 1];
|
|
||||||
emptyOutputBytes[emptyLen - upto - 1] = b;
|
|
||||||
upto++;
|
|
||||||
}
|
|
||||||
metaOut.writeVInt(emptyLen);
|
|
||||||
metaOut.writeBytes(emptyOutputBytes, 0, emptyLen);
|
|
||||||
} else {
|
|
||||||
metaOut.writeByte((byte) 0);
|
|
||||||
}
|
|
||||||
final byte t;
|
|
||||||
if (metadata.inputType == INPUT_TYPE.BYTE1) {
|
|
||||||
t = 0;
|
|
||||||
} else if (metadata.inputType == INPUT_TYPE.BYTE2) {
|
|
||||||
t = 1;
|
|
||||||
} else {
|
|
||||||
t = 2;
|
|
||||||
}
|
|
||||||
metaOut.writeByte(t);
|
|
||||||
metaOut.writeVLong(metadata.startNode);
|
|
||||||
metaOut.writeVLong(numBytes());
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Writes an automaton to a file. */
|
/** Writes an automaton to a file. */
|
||||||
public void save(final Path path) throws IOException {
|
public void save(final Path path) throws IOException {
|
||||||
try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(path))) {
|
try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(path))) {
|
||||||
@ -1249,5 +1203,59 @@ public final class FST<T> implements Accountable {
|
|||||||
public int getVersion() {
|
public int getVersion() {
|
||||||
return version;
|
return version;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public T getEmptyOutput() {
|
||||||
|
return emptyOutput;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getNumBytes() {
|
||||||
|
return numBytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save the metadata to a DataOutput
|
||||||
|
*
|
||||||
|
* @param metaOut the DataOutput to write the metadata to
|
||||||
|
*/
|
||||||
|
public void save(DataOutput metaOut) throws IOException {
|
||||||
|
CodecUtil.writeHeader(metaOut, FILE_FORMAT_NAME, VERSION_CURRENT);
|
||||||
|
// TODO: really we should encode this as an arc, arriving
|
||||||
|
// to the root node, instead of special casing here:
|
||||||
|
if (emptyOutput != null) {
|
||||||
|
// Accepts empty string
|
||||||
|
metaOut.writeByte((byte) 1);
|
||||||
|
|
||||||
|
// Serialize empty-string output:
|
||||||
|
ByteBuffersDataOutput ros = new ByteBuffersDataOutput();
|
||||||
|
outputs.writeFinalOutput(emptyOutput, ros);
|
||||||
|
byte[] emptyOutputBytes = ros.toArrayCopy();
|
||||||
|
int emptyLen = emptyOutputBytes.length;
|
||||||
|
|
||||||
|
// reverse
|
||||||
|
final int stopAt = emptyLen / 2;
|
||||||
|
int upto = 0;
|
||||||
|
while (upto < stopAt) {
|
||||||
|
final byte b = emptyOutputBytes[upto];
|
||||||
|
emptyOutputBytes[upto] = emptyOutputBytes[emptyLen - upto - 1];
|
||||||
|
emptyOutputBytes[emptyLen - upto - 1] = b;
|
||||||
|
upto++;
|
||||||
|
}
|
||||||
|
metaOut.writeVInt(emptyLen);
|
||||||
|
metaOut.writeBytes(emptyOutputBytes, 0, emptyLen);
|
||||||
|
} else {
|
||||||
|
metaOut.writeByte((byte) 0);
|
||||||
|
}
|
||||||
|
final byte t;
|
||||||
|
if (inputType == INPUT_TYPE.BYTE1) {
|
||||||
|
t = 0;
|
||||||
|
} else if (inputType == INPUT_TYPE.BYTE2) {
|
||||||
|
t = 1;
|
||||||
|
} else {
|
||||||
|
t = 2;
|
||||||
|
}
|
||||||
|
metaOut.writeByte(t);
|
||||||
|
metaOut.writeVLong(startNode);
|
||||||
|
metaOut.writeVLong(numBytes);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user