Fix weird NRT bug #13353 (#13369)

The issue outlines the problem. When we have point value dimensions, segment core readers assume that there will be point files.

However, when allowing soft deletes and a document fails indexing failed before a point field could be written, this assumption fails. Consequently, the NRT fails to open. I settled on always flushing a point file if the field info says there are point fields, even if there aren't any docs in the buffer.

closes #13353
This commit is contained in:
Benjamin Trent 2024-05-15 09:37:19 -04:00 committed by GitHub
parent 46f1f95ceb
commit b1d3c08619
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 37 additions and 1 deletions

View File

@ -367,6 +367,9 @@ Bug Fixes
* GITHUB#13366: Disallow NaN and Inf values in scalar quantization and better handle extreme cases. (Ben Trent)
* GITHUB#13369: Fix NRT opening failure when soft deletes are enabled and the document fails to index before a point
field is written (Ben Trent)
Build
---------------------

View File

@ -375,7 +375,7 @@ final class IndexingChain implements Accountable {
while (perField != null) {
if (perField.pointValuesWriter != null) {
// We could have initialized pointValuesWriter, but failed to write even a single doc
if (perField.pointValuesWriter.getNumDocs() > 0) {
if (perField.fieldInfo.getPointDimensionCount() > 0) {
if (pointsWriter == null) {
// lazy init
PointsFormat fmt = state.segmentInfo.getCodec().pointsFormat();

View File

@ -47,6 +47,7 @@ import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.ByteBuffersDirectory;
@ -2309,4 +2310,36 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
DirectoryReader.open(dir).close();
}
}
public void testExceptionJustBeforeFlushWithPointValues() throws Exception {
Directory dir = newDirectory();
Analyzer analyzer =
new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) {
@Override
public TokenStreamComponents createComponents(String fieldName) {
MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
tokenizer.setEnableChecks(
false); // disable workflow checking as we forcefully close() in exceptional cases.
TokenStream stream = new CrashingFilter(fieldName, tokenizer);
return new TokenStreamComponents(tokenizer, stream);
}
};
IndexWriterConfig iwc =
newIndexWriterConfig(analyzer).setCommitOnClose(false).setMaxBufferedDocs(3);
MergePolicy mp = iwc.getMergePolicy();
iwc.setMergePolicy(
new SoftDeletesRetentionMergePolicy("soft_delete", MatchAllDocsQuery::new, mp));
IndexWriter w = RandomIndexWriter.mockIndexWriter(dir, iwc, random());
Document newdoc = new Document();
newdoc.add(newTextField("crash", "do it on token 4", Field.Store.NO));
newdoc.add(new IntPoint("int", 42));
expectThrows(IOException.class, () -> w.addDocument(newdoc));
DirectoryReader r = w.getReader(false, false);
LeafReader onlyReader = getOnlyLeafReader(r);
// we mark the failed doc as deleted
assertEquals(onlyReader.numDeletedDocs(), 1);
onlyReader.close();
w.close();
dir.close();
}
}