mirror of https://github.com/apache/lucene.git
Explain term automaton queries (#12208)
This commit is contained in:
parent
c31017589b
commit
2d7908e3c9
|
@ -179,6 +179,8 @@ Bug Fixes
|
|||
|
||||
* GITHUB#12202: Fix MultiFieldQueryParser to apply boosts to regexp, wildcard, prefix, range, fuzzy queries. (Jasir KT)
|
||||
|
||||
* GITHUB#12178: Add explanations for TermAutomatonQuery (Marcus Eagan via Patrick Zhai, Mike McCandless, Robert Muir, Mikhail Khludnev)
|
||||
|
||||
* GITHUB#12214: Fix ordered intervals query to avoid skipping some of the results over interleaved terms. (Hongyu Yan)
|
||||
|
||||
* GITHUB#12212: Bug fix for a DrillSideways issue where matching hits could occasionally be missed. (Frederic Thevenet)
|
||||
|
|
|
@ -442,8 +442,44 @@ public class TermAutomatonQuery extends Query implements Accountable {
|
|||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
// TODO
|
||||
return null;
|
||||
Scorer scorer = scorer(context);
|
||||
if (scorer == null) {
|
||||
return Explanation.noMatch("No matching terms in the document");
|
||||
}
|
||||
|
||||
int advancedDoc = scorer.iterator().advance(doc);
|
||||
if (advancedDoc != doc) {
|
||||
return Explanation.noMatch("No matching terms in the document");
|
||||
}
|
||||
|
||||
float score = scorer.score();
|
||||
LeafSimScorer leafSimScorer = ((TermAutomatonScorer) scorer).getLeafSimScorer();
|
||||
EnumAndScorer[] originalSubsOnDoc = ((TermAutomatonScorer) scorer).getOriginalSubsOnDoc();
|
||||
|
||||
List<Explanation> termExplanations = new ArrayList<>();
|
||||
for (EnumAndScorer enumAndScorer : originalSubsOnDoc) {
|
||||
if (enumAndScorer != null) {
|
||||
PostingsEnum postingsEnum = enumAndScorer.posEnum;
|
||||
if (postingsEnum.docID() == doc) {
|
||||
float termScore = leafSimScorer.score(doc, postingsEnum.freq());
|
||||
termExplanations.add(
|
||||
Explanation.match(
|
||||
postingsEnum.freq(),
|
||||
"term frequency in the document",
|
||||
Explanation.match(
|
||||
termScore,
|
||||
"score for term: " + idToTerm.get(enumAndScorer.termID).utf8ToString())));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (termExplanations.isEmpty()) {
|
||||
return Explanation.noMatch("No matching terms in the document");
|
||||
}
|
||||
|
||||
Explanation freqExplanation =
|
||||
Explanation.match(score, "TermAutomatonQuery, sum of:", termExplanations);
|
||||
return leafSimScorer.explain(doc, freqExplanation);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -53,6 +53,13 @@ class TermAutomatonScorer extends Scorer {
|
|||
private int docID = -1;
|
||||
private int freq;
|
||||
|
||||
/**
|
||||
* originalSubsOnDoc is an array of EnumAndScorer instances used to create this
|
||||
* TermAutomatonScorer. This field is only for explain purposes and should not be used for
|
||||
* scoring/matching.
|
||||
*/
|
||||
private final EnumAndScorer[] originalSubsOnDoc;
|
||||
|
||||
public TermAutomatonScorer(
|
||||
TermAutomatonWeight weight, EnumAndScorer[] subs, int anyTermID, LeafSimScorer docScorer)
|
||||
throws IOException {
|
||||
|
@ -65,6 +72,7 @@ class TermAutomatonScorer extends Scorer {
|
|||
this.anyTermID = anyTermID;
|
||||
this.subsOnDoc = new EnumAndScorer[subs.length];
|
||||
this.positions = new PosState[4];
|
||||
this.originalSubsOnDoc = subs;
|
||||
for (int i = 0; i < this.positions.length; i++) {
|
||||
this.positions[i] = new PosState();
|
||||
}
|
||||
|
@ -345,6 +353,14 @@ class TermAutomatonScorer extends Scorer {
|
|||
}
|
||||
}
|
||||
|
||||
EnumAndScorer[] getOriginalSubsOnDoc() {
|
||||
return originalSubsOnDoc;
|
||||
}
|
||||
|
||||
LeafSimScorer getLeafSimScorer() {
|
||||
return docScorer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "TermAutomatonScorer(" + weight + ")";
|
||||
|
|
|
@ -43,6 +43,7 @@ import org.apache.lucene.search.BooleanClause.Occur;
|
|||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.ConstantScoreScorer;
|
||||
import org.apache.lucene.search.ConstantScoreWeight;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||
import org.apache.lucene.search.MultiPhraseQuery;
|
||||
|
@ -842,6 +843,97 @@ public class TestTermAutomatonQuery extends LuceneTestCase {
|
|||
IOUtils.close(w, r, dir);
|
||||
}
|
||||
|
||||
/* Implement a custom term automaton query to ensure that rewritten queries
|
||||
* do not get rewritten to primitive queries. The custom extension will allow
|
||||
* the following explain tests to evaluate Explain for the query we intend to
|
||||
* test, TermAutomatonQuery.
|
||||
* */
|
||||
|
||||
private static class CustomTermAutomatonQuery extends TermAutomatonQuery {
|
||||
public CustomTermAutomatonQuery(String field) {
|
||||
super(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query rewrite(IndexSearcher searcher) throws IOException {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
public void testExplainNoMatchingDocument() throws Exception {
|
||||
CustomTermAutomatonQuery q = new CustomTermAutomatonQuery("field");
|
||||
int initState = q.createState();
|
||||
int s1 = q.createState();
|
||||
q.addTransition(initState, s1, "xml");
|
||||
q.setAccept(s1, true);
|
||||
q.finish();
|
||||
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
Document doc = new Document();
|
||||
doc.add(newTextField("field", "protobuf", Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
IndexReader r = w.getReader();
|
||||
IndexSearcher searcher = newSearcher(r);
|
||||
Query rewrittenQuery = q.rewrite(searcher);
|
||||
assertTrue(rewrittenQuery instanceof TermAutomatonQuery);
|
||||
|
||||
TopDocs topDocs = searcher.search(rewrittenQuery, 10);
|
||||
assertEquals(0, topDocs.totalHits.value);
|
||||
|
||||
Explanation explanation = searcher.explain(rewrittenQuery, 0);
|
||||
assertFalse("Explanation should indicate no match", explanation.isMatch());
|
||||
|
||||
IOUtils.close(w, r, dir);
|
||||
}
|
||||
|
||||
// TODO: improve experience of working with explain
|
||||
public void testExplainMatchingDocuments() throws Exception {
|
||||
CustomTermAutomatonQuery q = new CustomTermAutomatonQuery("field");
|
||||
|
||||
int initState = q.createState();
|
||||
int s1 = q.createState();
|
||||
int s2 = q.createState();
|
||||
q.addTransition(initState, s1, "xml");
|
||||
q.addTransition(s1, s2, "json");
|
||||
q.addTransition(s1, s2, "protobuf");
|
||||
q.setAccept(s2, true);
|
||||
q.finish();
|
||||
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
|
||||
Document doc1 = new Document();
|
||||
doc1.add(newTextField("field", "xml json", Field.Store.NO));
|
||||
w.addDocument(doc1);
|
||||
|
||||
Document doc2 = new Document();
|
||||
doc2.add(newTextField("field", "xml protobuf", Field.Store.NO));
|
||||
w.addDocument(doc2);
|
||||
|
||||
Document doc3 = new Document();
|
||||
doc3.add(newTextField("field", "xml qux", Field.Store.NO));
|
||||
w.addDocument(doc3);
|
||||
|
||||
IndexReader r = w.getReader();
|
||||
IndexSearcher searcher = newSearcher(r);
|
||||
Query rewrittenQuery = q.rewrite(searcher);
|
||||
assertTrue(
|
||||
"Rewritten query should be an instance of TermAutomatonQuery",
|
||||
rewrittenQuery instanceof TermAutomatonQuery);
|
||||
TopDocs topDocs = searcher.search(q, 10);
|
||||
assertEquals(2, topDocs.totalHits.value);
|
||||
|
||||
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
|
||||
Explanation explanation = searcher.explain(q, scoreDoc.doc);
|
||||
assertNotNull("Explanation should not be null", explanation);
|
||||
assertTrue("Explanation should indicate a match", explanation.isMatch());
|
||||
}
|
||||
|
||||
IOUtils.close(w, r, dir);
|
||||
}
|
||||
|
||||
public void testRewritePhraseWithAny() throws Exception {
|
||||
TermAutomatonQuery q = new TermAutomatonQuery("field");
|
||||
int initState = q.createState();
|
||||
|
|
Loading…
Reference in New Issue