Fix handling of ghost fields in string sorts. (#11792)

Introduction of dynamic pruning for string sorts (#11669) introduced a bug with string sorts and ghost fields, triggering a `NullPointerException` because the code assumes that `LeafReader#terms` is not null if the field is indexed according to field infos. This commit fixes the issue and adds tests for ghost fields across all sort types. Hopefully we can simplify and remove the null check in the future when we improve handling of ghost fields (#11393).
2025-02-07 02:28:49 +00:00 · 2022-09-20 13:49:52 +02:00 · 2022-09-20 13:49:52 +02:00 · 6c46662b43
commit 6c46662b43
parent 00a8112d97
2 changed files with 194 additions and 1 deletions
--- a/lucene/core/src/java/org/apache/lucene/search/comparators/TermOrdValComparator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/comparators/TermOrdValComparator.java
@ -251,7 +251,7 @@ public class TermOrdValComparator extends FieldComparator<BytesRef> {
          enableSkipping = false;
        } else {
          Terms terms = context.reader().terms(field);
-          dense = terms.getDocCount() == context.reader().maxDoc();
+          dense = terms != null && terms.getDocCount() == context.reader().maxDoc();
          if (dense || topValue != null) {
            enableSkipping = true;
          } else if (reverse == sortMissingLast) {
--- a/lucene/core/src/test/org/apache/lucene/search/TestSort.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestSort.java
@ -20,11 +20,21 @@ import java.io.IOException;
 import org.apache.lucene.document.BinaryDocValuesField;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.DoubleDocValuesField;
+import org.apache.lucene.document.DoublePoint;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.FloatDocValuesField;
+import org.apache.lucene.document.FloatPoint;
+import org.apache.lucene.document.IntPoint;
+import org.apache.lucene.document.LongPoint;
 import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.Term;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.tests.index.RandomIndexWriter;
 import org.apache.lucene.tests.util.LuceneTestCase;
@ -847,4 +857,187 @@ public class TestSort extends LuceneTestCase {
      assertSame(sort, sort.rewrite(searcher));
    }
  }
+
+  // Ghost tests make sure that sorting can cope with segments that are missing values while their
+  // FieldInfo reports that the field exists.
+
+  public void testStringGhost() throws IOException {
+    doTestStringGhost(true);
+    doTestStringGhost(false);
+  }
+
+  private void doTestStringGhost(boolean indexed) throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
+    Document doc = new Document();
+    doc.add(new SortedDocValuesField("value", newBytesRef("foo")));
+    if (indexed) {
+      doc.add(newStringField("value", "foo", Field.Store.YES));
+    }
+    doc.add(new StringField("id", "0", Store.NO));
+    writer.addDocument(doc);
+    writer.addDocument(new Document());
+    writer.flush();
+    writer.addDocument(new Document());
+    writer.flush();
+    writer.deleteDocuments(new Term("id", "0"));
+    writer.forceMerge(1);
+    IndexReader ir = DirectoryReader.open(writer);
+    writer.close();
+
+    IndexSearcher searcher = newSearcher(ir);
+    Sort sort = new Sort(new SortField("value", SortField.Type.STRING));
+
+    TopFieldDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
+    assertEquals(2, td.totalHits.value);
+    assertNull(((FieldDoc) td.scoreDocs[0]).fields[0]);
+    assertNull(((FieldDoc) td.scoreDocs[1]).fields[0]);
+
+    ir.close();
+    dir.close();
+  }
+
+  public void testIntGhost() throws IOException {
+    doTestIntGhost(true);
+    doTestIntGhost(false);
+  }
+
+  private void doTestIntGhost(boolean indexed) throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
+    Document doc = new Document();
+    doc.add(new NumericDocValuesField("value", 3));
+    if (indexed) {
+      doc.add(new IntPoint("value", 3));
+    }
+    doc.add(new StringField("id", "0", Store.NO));
+    writer.addDocument(doc);
+    writer.addDocument(new Document());
+    writer.flush();
+    writer.addDocument(new Document());
+    writer.flush();
+    writer.deleteDocuments(new Term("id", "0"));
+    writer.forceMerge(1);
+    IndexReader ir = DirectoryReader.open(writer);
+    writer.close();
+
+    IndexSearcher searcher = newSearcher(ir);
+    Sort sort = new Sort(new SortField("value", SortField.Type.INT));
+
+    TopFieldDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
+    assertEquals(2, td.totalHits.value);
+    assertEquals(0, ((FieldDoc) td.scoreDocs[0]).fields[0]);
+    assertEquals(0, ((FieldDoc) td.scoreDocs[1]).fields[0]);
+
+    ir.close();
+    dir.close();
+  }
+
+  public void testLongGhost() throws IOException {
+    doTestLongGhost(true);
+    doTestLongGhost(false);
+  }
+
+  private void doTestLongGhost(boolean indexed) throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
+    Document doc = new Document();
+    doc.add(new NumericDocValuesField("value", 3L));
+    if (indexed) {
+      doc.add(new LongPoint("value", 3L));
+    }
+    doc.add(new StringField("id", "0", Store.NO));
+    writer.addDocument(doc);
+    writer.addDocument(new Document());
+    writer.flush();
+    writer.addDocument(new Document());
+    writer.flush();
+    writer.deleteDocuments(new Term("id", "0"));
+    writer.forceMerge(1);
+    IndexReader ir = DirectoryReader.open(writer);
+    writer.close();
+
+    IndexSearcher searcher = newSearcher(ir);
+    Sort sort = new Sort(new SortField("value", SortField.Type.LONG));
+
+    TopFieldDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
+    assertEquals(2, td.totalHits.value);
+    assertEquals(0L, ((FieldDoc) td.scoreDocs[0]).fields[0]);
+    assertEquals(0L, ((FieldDoc) td.scoreDocs[1]).fields[0]);
+
+    ir.close();
+    dir.close();
+  }
+
+  public void testDoubleGhost() throws IOException {
+    doTestDoubleGhost(true);
+    doTestDoubleGhost(false);
+  }
+
+  private void doTestDoubleGhost(boolean indexed) throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
+    Document doc = new Document();
+    doc.add(new DoubleDocValuesField("value", 1.25));
+    if (indexed) {
+      doc.add(new DoublePoint("value", 1.25));
+    }
+    doc.add(new StringField("id", "0", Store.NO));
+    writer.addDocument(doc);
+    writer.addDocument(new Document());
+    writer.flush();
+    writer.addDocument(new Document());
+    writer.flush();
+    writer.deleteDocuments(new Term("id", "0"));
+    writer.forceMerge(1);
+    IndexReader ir = DirectoryReader.open(writer);
+    writer.close();
+
+    IndexSearcher searcher = newSearcher(ir);
+    Sort sort = new Sort(new SortField("value", SortField.Type.DOUBLE));
+
+    TopFieldDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
+    assertEquals(2, td.totalHits.value);
+    assertEquals(0.0, ((FieldDoc) td.scoreDocs[0]).fields[0]);
+    assertEquals(0.0, ((FieldDoc) td.scoreDocs[1]).fields[0]);
+
+    ir.close();
+    dir.close();
+  }
+
+  public void testFloatGhost() throws IOException {
+    doTestFloatGhost(true);
+    doTestFloatGhost(false);
+  }
+
+  private void doTestFloatGhost(boolean indexed) throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
+    Document doc = new Document();
+    doc.add(new FloatDocValuesField("value", 1.25f));
+    if (indexed) {
+      doc.add(new FloatPoint("value", 1.25f));
+    }
+    doc.add(new StringField("id", "0", Store.NO));
+    writer.addDocument(doc);
+    writer.addDocument(new Document());
+    writer.flush();
+    writer.addDocument(new Document());
+    writer.flush();
+    writer.deleteDocuments(new Term("id", "0"));
+    writer.forceMerge(1);
+    IndexReader ir = DirectoryReader.open(writer);
+    writer.close();
+
+    IndexSearcher searcher = newSearcher(ir);
+    Sort sort = new Sort(new SortField("value", SortField.Type.FLOAT));
+
+    TopFieldDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
+    assertEquals(2, td.totalHits.value);
+    assertEquals(0.0f, ((FieldDoc) td.scoreDocs[0]).fields[0]);
+    assertEquals(0.0f, ((FieldDoc) td.scoreDocs[1]).fields[0]);
+
+    ir.close();
+    dir.close();
+  }
 }