Fix index out of bounds when writing FST to different metaOut (#12697) (#12698)

* Fix index out of bounds when writing FST to different metaOut (#12697)

* Tidify code

* Update CHANGES.txt

* Re-add assertion
This commit is contained in:
Dzung Bui 2023-10-20 20:25:04 +09:00 committed by GitHub
parent 343a9e7100
commit 0d8a3e6c4f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 56 additions and 5 deletions

View File

@ -216,6 +216,8 @@ Bug Fixes
* GITHUB#11556: HTMLStripCharFilter fails on '>' or '<' characters in attribute values. (Elliot Lin)
* GITHUB#12698: Fix IndexOutOfBoundsException when saving FSTStore-backed FST with different DataOutput for metadata (Anh Dung Bui)
* GITHUB#12642: Ensure #finish only gets called once on the base collector during drill-sideways (Greg Miller)
Build

View File

@ -558,6 +558,8 @@ public final class FST<T> implements Accountable {
bytes.writeTo(out);
} else {
assert fstStore != null;
long numBytes = fstStore.size();
metaOut.writeVLong(numBytes);
fstStore.writeTo(out);
}
}

View File

@ -68,13 +68,19 @@ public final class OnHeapFSTStore implements FSTStore {
if (bytesArray != null) {
return bytesArray.length;
} else {
return bytes.ramBytesUsed();
return bytes.getPosition();
}
}
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED + size();
long size = BASE_RAM_BYTES_USED;
if (bytesArray != null) {
size += bytesArray.length;
} else {
size += bytes.ramBytesUsed();
}
return size;
}
@Override
@ -89,12 +95,9 @@ public final class OnHeapFSTStore implements FSTStore {
@Override
public void writeTo(DataOutput out) throws IOException {
if (bytes != null) {
long numBytes = bytes.getPosition();
out.writeVLong(numBytes);
bytes.writeTo(out);
} else {
assert bytesArray != null;
out.writeVLong(bytesArray.length);
out.writeBytes(bytesArray, 0, bytesArray.length);
}
}

View File

@ -21,6 +21,7 @@ import static org.apache.lucene.tests.util.fst.FSTTester.simpleRandomString;
import static org.apache.lucene.tests.util.fst.FSTTester.toIntsRef;
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
@ -54,11 +55,13 @@ import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.store.MockDirectoryWrapper;
@ -1193,6 +1196,47 @@ public class TestFSTs extends LuceneTestCase {
assertTrue(w.toString().contains("[label=\"n\" style=\"bold\""));
}
// https://github.com/apache/lucene/issues/12697
// Make sure the FST can be saved and loaded with different DataOutput for metadata
public void testSaveDifferentMetaOut() throws Exception {
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
FSTCompiler<Long> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
// first build the FST from scratch
final IntsRefBuilder scratch = new IntsRefBuilder();
fstCompiler.add(Util.toIntsRef(newBytesRef("aab"), scratch), 22L);
fstCompiler.add(Util.toIntsRef(newBytesRef("aac"), scratch), 7L);
fstCompiler.add(Util.toIntsRef(newBytesRef("ax"), scratch), 17L);
FST<Long> fst = fstCompiler.compile();
// save the FST to DataOutput, here it would not matter whether we are saving to different
// DataOutput for meta or not
ByteArrayOutputStream outOS = new ByteArrayOutputStream();
OutputStreamDataOutput out = new OutputStreamDataOutput(outOS);
fst.save(out, out);
// load the FST, which will force it to use FSTStore instead of BytesStore
ByteArrayDataInput in = new ByteArrayDataInput(outOS.toByteArray());
FST<Long> loadedFST = new FST<>(in, in, outputs);
// now save the FST again, this time to different DataOutput for meta
ByteArrayOutputStream metdataOS = new ByteArrayOutputStream();
OutputStreamDataOutput metaOut = new OutputStreamDataOutput(metdataOS);
ByteArrayOutputStream dataOS = new ByteArrayOutputStream();
OutputStreamDataOutput dataOut = new OutputStreamDataOutput(dataOS);
loadedFST.save(metaOut, dataOut);
// finally load it again
ByteArrayDataInput metaIn = new ByteArrayDataInput(metdataOS.toByteArray());
ByteArrayDataInput dataIn = new ByteArrayDataInput(dataOS.toByteArray());
loadedFST = new FST<>(metaIn, dataIn, outputs);
assertEquals(22L, Util.get(loadedFST, Util.toIntsRef(newBytesRef("aab"), scratch)).longValue());
assertEquals(7L, Util.get(loadedFST, Util.toIntsRef(newBytesRef("aac"), scratch)).longValue());
assertEquals(17L, Util.get(loadedFST, Util.toIntsRef(newBytesRef("ax"), scratch)).longValue());
}
// Make sure raw FST can differentiate between final vs
// non-final end nodes
public void testNonFinalStopNode() throws Exception {