LUCENE-5717: Postings highlighter support for multi term queries within filtered and constant score queries

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1598755 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2014-05-30 21:59:21 +00:00
parent 98a527731d
commit 075778c06e
3 changed files with 97 additions and 0 deletions

View File

@ -122,6 +122,10 @@ New Features
* LUCENE-5680: Add ability to atomically update a set of DocValues * LUCENE-5680: Add ability to atomically update a set of DocValues
fields. (Shai Erera) fields. (Shai Erera)
* LUCENE-5717: Add support for multiterm queries nested inside
filtered and constant-score queries to postings highlighter.
(Luca Cavanna via Robert Muir)
Changes in Backwards Compatibility Policy Changes in Backwards Compatibility Policy
* LUCENE-5634: Add reuse argument to IndexableField.tokenStream. This * LUCENE-5634: Add reuse argument to IndexableField.tokenStream. This

View File

@ -31,7 +31,9 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.AutomatonQuery; import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
@ -68,6 +70,10 @@ class MultiTermHighlighting {
list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), field))); list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), field)));
} }
} }
} else if (query instanceof FilteredQuery) {
list.addAll(Arrays.asList(extractAutomata(((FilteredQuery) query).getQuery(), field)));
} else if (query instanceof ConstantScoreQuery) {
list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), field)));
} else if (query instanceof DisjunctionMaxQuery) { } else if (query instanceof DisjunctionMaxQuery) {
for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) { for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
list.addAll(Arrays.asList(extractAutomata(sub, field))); list.addAll(Arrays.asList(extractAutomata(sub, field)));

View File

@ -29,9 +29,12 @@ import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.queries.TermFilter;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchAllDocsQuery;
@ -437,6 +440,90 @@ public class TestMultiTermHighlighting extends LuceneTestCase {
ir.close(); ir.close();
dir.close(); dir.close();
} }
public void testWildcardInFiltered() throws Exception {
Directory dir = newDirectory();
// use simpleanalyzer for more natural tokenization (else "test." is a token)
final Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", "", offsetsType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test.");
iw.addDocument(doc);
body.setStringValue("Test a one sentence document.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.shutdown();
IndexSearcher searcher = newSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter() {
@Override
protected Analyzer getIndexAnalyzer(String field) {
return analyzer;
}
};
FilteredQuery query = new FilteredQuery(
new WildcardQuery(new Term("body", "te*")),
new TermFilter(new Term("body", "test")));
TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a <b>test</b>.", snippets[0]);
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
ir.close();
dir.close();
}
public void testWildcardInConstantScore() throws Exception {
Directory dir = newDirectory();
// use simpleanalyzer for more natural tokenization (else "test." is a token)
final Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", "", offsetsType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test.");
iw.addDocument(doc);
body.setStringValue("Test a one sentence document.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.shutdown();
IndexSearcher searcher = newSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter() {
@Override
protected Analyzer getIndexAnalyzer(String field) {
return analyzer;
}
};
ConstantScoreQuery query = new ConstantScoreQuery(new WildcardQuery(new Term("body", "te*")));
TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a <b>test</b>.", snippets[0]);
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
ir.close();
dir.close();
}
public void testWildcardInDisjunctionMax() throws Exception { public void testWildcardInDisjunctionMax() throws Exception {
Directory dir = newDirectory(); Directory dir = newDirectory();