mirror of https://github.com/apache/lucene.git
Lazily write the FST padding byte (#12981)
* lazily write the FST padding byte * Also write the pad byte when there is emptyOutput * add comment * Add more comments
This commit is contained in:
parent
09837bae73
commit
701619d35a
|
@ -106,6 +106,9 @@ public class FSTCompiler<T> {
|
||||||
|
|
||||||
private final IntsRefBuilder lastInput = new IntsRefBuilder();
|
private final IntsRefBuilder lastInput = new IntsRefBuilder();
|
||||||
|
|
||||||
|
// indicates whether we are not yet to write the padding byte
|
||||||
|
private boolean paddingBytePending;
|
||||||
|
|
||||||
// NOTE: cutting this over to ArrayList instead loses ~6%
|
// NOTE: cutting this over to ArrayList instead loses ~6%
|
||||||
// in build performance on 9.8M Wikipedia terms; so we
|
// in build performance on 9.8M Wikipedia terms; so we
|
||||||
// left this as an array:
|
// left this as an array:
|
||||||
|
@ -160,15 +163,14 @@ public class FSTCompiler<T> {
|
||||||
boolean allowFixedLengthArcs,
|
boolean allowFixedLengthArcs,
|
||||||
DataOutput dataOutput,
|
DataOutput dataOutput,
|
||||||
float directAddressingMaxOversizingFactor,
|
float directAddressingMaxOversizingFactor,
|
||||||
int version)
|
int version) {
|
||||||
throws IOException {
|
|
||||||
this.allowFixedLengthArcs = allowFixedLengthArcs;
|
this.allowFixedLengthArcs = allowFixedLengthArcs;
|
||||||
this.directAddressingMaxOversizingFactor = directAddressingMaxOversizingFactor;
|
this.directAddressingMaxOversizingFactor = directAddressingMaxOversizingFactor;
|
||||||
this.version = version;
|
this.version = version;
|
||||||
// pad: ensure no node gets address 0 which is reserved to mean
|
// pad: ensure no node gets address 0 which is reserved to mean
|
||||||
// the stop state w/ no arcs
|
// the stop state w/ no arcs. the actual byte will be written lazily
|
||||||
dataOutput.writeByte((byte) 0);
|
|
||||||
numBytesWritten++;
|
numBytesWritten++;
|
||||||
|
paddingBytePending = true;
|
||||||
this.dataOutput = dataOutput;
|
this.dataOutput = dataOutput;
|
||||||
fst =
|
fst =
|
||||||
new FST<>(
|
new FST<>(
|
||||||
|
@ -340,7 +342,7 @@ public class FSTCompiler<T> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Creates a new {@link FSTCompiler}. */
|
/** Creates a new {@link FSTCompiler}. */
|
||||||
public FSTCompiler<T> build() throws IOException {
|
public FSTCompiler<T> build() {
|
||||||
// create a default DataOutput if not specified
|
// create a default DataOutput if not specified
|
||||||
if (dataOutput == null) {
|
if (dataOutput == null) {
|
||||||
dataOutput = getOnHeapReaderWriter(15);
|
dataOutput = getOnHeapReaderWriter(15);
|
||||||
|
@ -548,6 +550,10 @@ public class FSTCompiler<T> {
|
||||||
}
|
}
|
||||||
|
|
||||||
reverseScratchBytes();
|
reverseScratchBytes();
|
||||||
|
// write the padding byte if needed
|
||||||
|
if (paddingBytePending) {
|
||||||
|
writePaddingByte();
|
||||||
|
}
|
||||||
scratchBytes.writeTo(dataOutput);
|
scratchBytes.writeTo(dataOutput);
|
||||||
numBytesWritten += scratchBytes.getPosition();
|
numBytesWritten += scratchBytes.getPosition();
|
||||||
|
|
||||||
|
@ -555,6 +561,16 @@ public class FSTCompiler<T> {
|
||||||
return numBytesWritten - 1;
|
return numBytesWritten - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write the padding byte, ensure no node gets address 0 which is reserved to mean the stop state
|
||||||
|
* w/ no arcs
|
||||||
|
*/
|
||||||
|
private void writePaddingByte() throws IOException {
|
||||||
|
assert paddingBytePending;
|
||||||
|
dataOutput.writeByte((byte) 0);
|
||||||
|
paddingBytePending = false;
|
||||||
|
}
|
||||||
|
|
||||||
private void writeLabel(DataOutput out, int v) throws IOException {
|
private void writeLabel(DataOutput out, int v) throws IOException {
|
||||||
assert v >= 0 : "v=" + v;
|
assert v >= 0 : "v=" + v;
|
||||||
if (fst.metadata.inputType == INPUT_TYPE.BYTE1) {
|
if (fst.metadata.inputType == INPUT_TYPE.BYTE1) {
|
||||||
|
@ -963,7 +979,11 @@ public class FSTCompiler<T> {
|
||||||
freezeTail(0);
|
freezeTail(0);
|
||||||
if (root.numArcs == 0) {
|
if (root.numArcs == 0) {
|
||||||
if (fst.metadata.emptyOutput == null) {
|
if (fst.metadata.emptyOutput == null) {
|
||||||
|
// return null for completely empty FST which accepts nothing
|
||||||
return null;
|
return null;
|
||||||
|
} else {
|
||||||
|
// we haven't written the padding byte so far, but the FST is still valid
|
||||||
|
writePaddingByte();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue