Explain term automaton queries (#12208)

This commit is contained in:
Marcus 2023-04-08 16:09:42 -07:00 committed by GitHub
parent c31017589b
commit 2d7908e3c9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 148 additions and 2 deletions

View File

@ -179,6 +179,8 @@ Bug Fixes
* GITHUB#12202: Fix MultiFieldQueryParser to apply boosts to regexp, wildcard, prefix, range, fuzzy queries. (Jasir KT)
* GITHUB#12178: Add explanations for TermAutomatonQuery (Marcus Eagan via Patrick Zhai, Mike McCandless, Robert Muir, Mikhail Khludnev)
* GITHUB#12214: Fix ordered intervals query to avoid skipping some of the results over interleaved terms. (Hongyu Yan)
* GITHUB#12212: Bug fix for a DrillSideways issue where matching hits could occasionally be missed. (Frederic Thevenet)

View File

@ -442,8 +442,44 @@ public class TermAutomatonQuery extends Query implements Accountable {
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
// TODO
return null;
Scorer scorer = scorer(context);
if (scorer == null) {
return Explanation.noMatch("No matching terms in the document");
}
int advancedDoc = scorer.iterator().advance(doc);
if (advancedDoc != doc) {
return Explanation.noMatch("No matching terms in the document");
}
float score = scorer.score();
LeafSimScorer leafSimScorer = ((TermAutomatonScorer) scorer).getLeafSimScorer();
EnumAndScorer[] originalSubsOnDoc = ((TermAutomatonScorer) scorer).getOriginalSubsOnDoc();
List<Explanation> termExplanations = new ArrayList<>();
for (EnumAndScorer enumAndScorer : originalSubsOnDoc) {
if (enumAndScorer != null) {
PostingsEnum postingsEnum = enumAndScorer.posEnum;
if (postingsEnum.docID() == doc) {
float termScore = leafSimScorer.score(doc, postingsEnum.freq());
termExplanations.add(
Explanation.match(
postingsEnum.freq(),
"term frequency in the document",
Explanation.match(
termScore,
"score for term: " + idToTerm.get(enumAndScorer.termID).utf8ToString())));
}
}
}
if (termExplanations.isEmpty()) {
return Explanation.noMatch("No matching terms in the document");
}
Explanation freqExplanation =
Explanation.match(score, "TermAutomatonQuery, sum of:", termExplanations);
return leafSimScorer.explain(doc, freqExplanation);
}
}

View File

@ -53,6 +53,13 @@ class TermAutomatonScorer extends Scorer {
private int docID = -1;
private int freq;
/**
* originalSubsOnDoc is an array of EnumAndScorer instances used to create this
* TermAutomatonScorer. This field is only for explain purposes and should not be used for
* scoring/matching.
*/
private final EnumAndScorer[] originalSubsOnDoc;
public TermAutomatonScorer(
TermAutomatonWeight weight, EnumAndScorer[] subs, int anyTermID, LeafSimScorer docScorer)
throws IOException {
@ -65,6 +72,7 @@ class TermAutomatonScorer extends Scorer {
this.anyTermID = anyTermID;
this.subsOnDoc = new EnumAndScorer[subs.length];
this.positions = new PosState[4];
this.originalSubsOnDoc = subs;
for (int i = 0; i < this.positions.length; i++) {
this.positions[i] = new PosState();
}
@ -345,6 +353,14 @@ class TermAutomatonScorer extends Scorer {
}
}
EnumAndScorer[] getOriginalSubsOnDoc() {
return originalSubsOnDoc;
}
LeafSimScorer getLeafSimScorer() {
return docScorer;
}
@Override
public String toString() {
return "TermAutomatonScorer(" + weight + ")";

View File

@ -43,6 +43,7 @@ import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
@ -842,6 +843,97 @@ public class TestTermAutomatonQuery extends LuceneTestCase {
IOUtils.close(w, r, dir);
}
/* Implement a custom term automaton query to ensure that rewritten queries
* do not get rewritten to primitive queries. The custom extension will allow
* the following explain tests to evaluate Explain for the query we intend to
* test, TermAutomatonQuery.
* */
private static class CustomTermAutomatonQuery extends TermAutomatonQuery {
public CustomTermAutomatonQuery(String field) {
super(field);
}
@Override
public Query rewrite(IndexSearcher searcher) throws IOException {
return this;
}
}
public void testExplainNoMatchingDocument() throws Exception {
CustomTermAutomatonQuery q = new CustomTermAutomatonQuery("field");
int initState = q.createState();
int s1 = q.createState();
q.addTransition(initState, s1, "xml");
q.setAccept(s1, true);
q.finish();
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "protobuf", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
IndexSearcher searcher = newSearcher(r);
Query rewrittenQuery = q.rewrite(searcher);
assertTrue(rewrittenQuery instanceof TermAutomatonQuery);
TopDocs topDocs = searcher.search(rewrittenQuery, 10);
assertEquals(0, topDocs.totalHits.value);
Explanation explanation = searcher.explain(rewrittenQuery, 0);
assertFalse("Explanation should indicate no match", explanation.isMatch());
IOUtils.close(w, r, dir);
}
// TODO: improve experience of working with explain
public void testExplainMatchingDocuments() throws Exception {
CustomTermAutomatonQuery q = new CustomTermAutomatonQuery("field");
int initState = q.createState();
int s1 = q.createState();
int s2 = q.createState();
q.addTransition(initState, s1, "xml");
q.addTransition(s1, s2, "json");
q.addTransition(s1, s2, "protobuf");
q.setAccept(s2, true);
q.finish();
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc1 = new Document();
doc1.add(newTextField("field", "xml json", Field.Store.NO));
w.addDocument(doc1);
Document doc2 = new Document();
doc2.add(newTextField("field", "xml protobuf", Field.Store.NO));
w.addDocument(doc2);
Document doc3 = new Document();
doc3.add(newTextField("field", "xml qux", Field.Store.NO));
w.addDocument(doc3);
IndexReader r = w.getReader();
IndexSearcher searcher = newSearcher(r);
Query rewrittenQuery = q.rewrite(searcher);
assertTrue(
"Rewritten query should be an instance of TermAutomatonQuery",
rewrittenQuery instanceof TermAutomatonQuery);
TopDocs topDocs = searcher.search(q, 10);
assertEquals(2, topDocs.totalHits.value);
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
Explanation explanation = searcher.explain(q, scoreDoc.doc);
assertNotNull("Explanation should not be null", explanation);
assertTrue("Explanation should indicate a match", explanation.isMatch());
}
IOUtils.close(w, r, dir);
}
public void testRewritePhraseWithAny() throws Exception {
TermAutomatonQuery q = new TermAutomatonQuery("field");
int initState = q.createState();