mirror of https://github.com/apache/lucene.git
Explain term automaton queries (#12208)
This commit is contained in:
parent
c31017589b
commit
2d7908e3c9
|
@ -179,6 +179,8 @@ Bug Fixes
|
||||||
|
|
||||||
* GITHUB#12202: Fix MultiFieldQueryParser to apply boosts to regexp, wildcard, prefix, range, fuzzy queries. (Jasir KT)
|
* GITHUB#12202: Fix MultiFieldQueryParser to apply boosts to regexp, wildcard, prefix, range, fuzzy queries. (Jasir KT)
|
||||||
|
|
||||||
|
* GITHUB#12178: Add explanations for TermAutomatonQuery (Marcus Eagan via Patrick Zhai, Mike McCandless, Robert Muir, Mikhail Khludnev)
|
||||||
|
|
||||||
* GITHUB#12214: Fix ordered intervals query to avoid skipping some of the results over interleaved terms. (Hongyu Yan)
|
* GITHUB#12214: Fix ordered intervals query to avoid skipping some of the results over interleaved terms. (Hongyu Yan)
|
||||||
|
|
||||||
* GITHUB#12212: Bug fix for a DrillSideways issue where matching hits could occasionally be missed. (Frederic Thevenet)
|
* GITHUB#12212: Bug fix for a DrillSideways issue where matching hits could occasionally be missed. (Frederic Thevenet)
|
||||||
|
|
|
@ -442,8 +442,44 @@ public class TermAutomatonQuery extends Query implements Accountable {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||||
// TODO
|
Scorer scorer = scorer(context);
|
||||||
return null;
|
if (scorer == null) {
|
||||||
|
return Explanation.noMatch("No matching terms in the document");
|
||||||
|
}
|
||||||
|
|
||||||
|
int advancedDoc = scorer.iterator().advance(doc);
|
||||||
|
if (advancedDoc != doc) {
|
||||||
|
return Explanation.noMatch("No matching terms in the document");
|
||||||
|
}
|
||||||
|
|
||||||
|
float score = scorer.score();
|
||||||
|
LeafSimScorer leafSimScorer = ((TermAutomatonScorer) scorer).getLeafSimScorer();
|
||||||
|
EnumAndScorer[] originalSubsOnDoc = ((TermAutomatonScorer) scorer).getOriginalSubsOnDoc();
|
||||||
|
|
||||||
|
List<Explanation> termExplanations = new ArrayList<>();
|
||||||
|
for (EnumAndScorer enumAndScorer : originalSubsOnDoc) {
|
||||||
|
if (enumAndScorer != null) {
|
||||||
|
PostingsEnum postingsEnum = enumAndScorer.posEnum;
|
||||||
|
if (postingsEnum.docID() == doc) {
|
||||||
|
float termScore = leafSimScorer.score(doc, postingsEnum.freq());
|
||||||
|
termExplanations.add(
|
||||||
|
Explanation.match(
|
||||||
|
postingsEnum.freq(),
|
||||||
|
"term frequency in the document",
|
||||||
|
Explanation.match(
|
||||||
|
termScore,
|
||||||
|
"score for term: " + idToTerm.get(enumAndScorer.termID).utf8ToString())));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (termExplanations.isEmpty()) {
|
||||||
|
return Explanation.noMatch("No matching terms in the document");
|
||||||
|
}
|
||||||
|
|
||||||
|
Explanation freqExplanation =
|
||||||
|
Explanation.match(score, "TermAutomatonQuery, sum of:", termExplanations);
|
||||||
|
return leafSimScorer.explain(doc, freqExplanation);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -53,6 +53,13 @@ class TermAutomatonScorer extends Scorer {
|
||||||
private int docID = -1;
|
private int docID = -1;
|
||||||
private int freq;
|
private int freq;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* originalSubsOnDoc is an array of EnumAndScorer instances used to create this
|
||||||
|
* TermAutomatonScorer. This field is only for explain purposes and should not be used for
|
||||||
|
* scoring/matching.
|
||||||
|
*/
|
||||||
|
private final EnumAndScorer[] originalSubsOnDoc;
|
||||||
|
|
||||||
public TermAutomatonScorer(
|
public TermAutomatonScorer(
|
||||||
TermAutomatonWeight weight, EnumAndScorer[] subs, int anyTermID, LeafSimScorer docScorer)
|
TermAutomatonWeight weight, EnumAndScorer[] subs, int anyTermID, LeafSimScorer docScorer)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
@ -65,6 +72,7 @@ class TermAutomatonScorer extends Scorer {
|
||||||
this.anyTermID = anyTermID;
|
this.anyTermID = anyTermID;
|
||||||
this.subsOnDoc = new EnumAndScorer[subs.length];
|
this.subsOnDoc = new EnumAndScorer[subs.length];
|
||||||
this.positions = new PosState[4];
|
this.positions = new PosState[4];
|
||||||
|
this.originalSubsOnDoc = subs;
|
||||||
for (int i = 0; i < this.positions.length; i++) {
|
for (int i = 0; i < this.positions.length; i++) {
|
||||||
this.positions[i] = new PosState();
|
this.positions[i] = new PosState();
|
||||||
}
|
}
|
||||||
|
@ -345,6 +353,14 @@ class TermAutomatonScorer extends Scorer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EnumAndScorer[] getOriginalSubsOnDoc() {
|
||||||
|
return originalSubsOnDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
LeafSimScorer getLeafSimScorer() {
|
||||||
|
return docScorer;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "TermAutomatonScorer(" + weight + ")";
|
return "TermAutomatonScorer(" + weight + ")";
|
||||||
|
|
|
@ -43,6 +43,7 @@ import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.ConstantScoreScorer;
|
import org.apache.lucene.search.ConstantScoreScorer;
|
||||||
import org.apache.lucene.search.ConstantScoreWeight;
|
import org.apache.lucene.search.ConstantScoreWeight;
|
||||||
|
import org.apache.lucene.search.Explanation;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||||
import org.apache.lucene.search.MultiPhraseQuery;
|
import org.apache.lucene.search.MultiPhraseQuery;
|
||||||
|
@ -842,6 +843,97 @@ public class TestTermAutomatonQuery extends LuceneTestCase {
|
||||||
IOUtils.close(w, r, dir);
|
IOUtils.close(w, r, dir);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Implement a custom term automaton query to ensure that rewritten queries
|
||||||
|
* do not get rewritten to primitive queries. The custom extension will allow
|
||||||
|
* the following explain tests to evaluate Explain for the query we intend to
|
||||||
|
* test, TermAutomatonQuery.
|
||||||
|
* */
|
||||||
|
|
||||||
|
private static class CustomTermAutomatonQuery extends TermAutomatonQuery {
|
||||||
|
public CustomTermAutomatonQuery(String field) {
|
||||||
|
super(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query rewrite(IndexSearcher searcher) throws IOException {
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testExplainNoMatchingDocument() throws Exception {
|
||||||
|
CustomTermAutomatonQuery q = new CustomTermAutomatonQuery("field");
|
||||||
|
int initState = q.createState();
|
||||||
|
int s1 = q.createState();
|
||||||
|
q.addTransition(initState, s1, "xml");
|
||||||
|
q.setAccept(s1, true);
|
||||||
|
q.finish();
|
||||||
|
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(newTextField("field", "protobuf", Field.Store.NO));
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
IndexReader r = w.getReader();
|
||||||
|
IndexSearcher searcher = newSearcher(r);
|
||||||
|
Query rewrittenQuery = q.rewrite(searcher);
|
||||||
|
assertTrue(rewrittenQuery instanceof TermAutomatonQuery);
|
||||||
|
|
||||||
|
TopDocs topDocs = searcher.search(rewrittenQuery, 10);
|
||||||
|
assertEquals(0, topDocs.totalHits.value);
|
||||||
|
|
||||||
|
Explanation explanation = searcher.explain(rewrittenQuery, 0);
|
||||||
|
assertFalse("Explanation should indicate no match", explanation.isMatch());
|
||||||
|
|
||||||
|
IOUtils.close(w, r, dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: improve experience of working with explain
|
||||||
|
public void testExplainMatchingDocuments() throws Exception {
|
||||||
|
CustomTermAutomatonQuery q = new CustomTermAutomatonQuery("field");
|
||||||
|
|
||||||
|
int initState = q.createState();
|
||||||
|
int s1 = q.createState();
|
||||||
|
int s2 = q.createState();
|
||||||
|
q.addTransition(initState, s1, "xml");
|
||||||
|
q.addTransition(s1, s2, "json");
|
||||||
|
q.addTransition(s1, s2, "protobuf");
|
||||||
|
q.setAccept(s2, true);
|
||||||
|
q.finish();
|
||||||
|
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||||
|
|
||||||
|
Document doc1 = new Document();
|
||||||
|
doc1.add(newTextField("field", "xml json", Field.Store.NO));
|
||||||
|
w.addDocument(doc1);
|
||||||
|
|
||||||
|
Document doc2 = new Document();
|
||||||
|
doc2.add(newTextField("field", "xml protobuf", Field.Store.NO));
|
||||||
|
w.addDocument(doc2);
|
||||||
|
|
||||||
|
Document doc3 = new Document();
|
||||||
|
doc3.add(newTextField("field", "xml qux", Field.Store.NO));
|
||||||
|
w.addDocument(doc3);
|
||||||
|
|
||||||
|
IndexReader r = w.getReader();
|
||||||
|
IndexSearcher searcher = newSearcher(r);
|
||||||
|
Query rewrittenQuery = q.rewrite(searcher);
|
||||||
|
assertTrue(
|
||||||
|
"Rewritten query should be an instance of TermAutomatonQuery",
|
||||||
|
rewrittenQuery instanceof TermAutomatonQuery);
|
||||||
|
TopDocs topDocs = searcher.search(q, 10);
|
||||||
|
assertEquals(2, topDocs.totalHits.value);
|
||||||
|
|
||||||
|
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
|
||||||
|
Explanation explanation = searcher.explain(q, scoreDoc.doc);
|
||||||
|
assertNotNull("Explanation should not be null", explanation);
|
||||||
|
assertTrue("Explanation should indicate a match", explanation.isMatch());
|
||||||
|
}
|
||||||
|
|
||||||
|
IOUtils.close(w, r, dir);
|
||||||
|
}
|
||||||
|
|
||||||
public void testRewritePhraseWithAny() throws Exception {
|
public void testRewritePhraseWithAny() throws Exception {
|
||||||
TermAutomatonQuery q = new TermAutomatonQuery("field");
|
TermAutomatonQuery q = new TermAutomatonQuery("field");
|
||||||
int initState = q.createState();
|
int initState = q.createState();
|
||||||
|
|
Loading…
Reference in New Issue