From 075778c06e28aef5fecadaaf3d5257ed813cecb4 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 30 May 2014 21:59:21 +0000 Subject: [PATCH] LUCENE-5717: Postings highlighter support for multi term queries within filtered and constant score queries git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1598755 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 4 + .../MultiTermHighlighting.java | 6 ++ .../TestMultiTermHighlighting.java | 87 +++++++++++++++++++ 3 files changed, 97 insertions(+) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index e18d3c923a2..2825f91b2d6 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -122,6 +122,10 @@ New Features * LUCENE-5680: Add ability to atomically update a set of DocValues fields. (Shai Erera) +* LUCENE-5717: Add support for multiterm queries nested inside + filtered and constant-score queries to postings highlighter. + (Luca Cavanna via Robert Muir) + Changes in Backwards Compatibility Policy * LUCENE-5634: Add reuse argument to IndexableField.tokenStream. This diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java index c5e14f572ce..a06102578b1 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java @@ -31,7 +31,9 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.AutomatonQuery; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.DisjunctionMaxQuery; +import org.apache.lucene.search.FilteredQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; @@ -68,6 +70,10 @@ class MultiTermHighlighting { list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), field))); } } + } else if (query instanceof FilteredQuery) { + list.addAll(Arrays.asList(extractAutomata(((FilteredQuery) query).getQuery(), field))); + } else if (query instanceof ConstantScoreQuery) { + list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), field))); } else if (query instanceof DisjunctionMaxQuery) { for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) { list.addAll(Arrays.asList(extractAutomata(sub, field))); diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestMultiTermHighlighting.java b/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestMultiTermHighlighting.java index 3aa7dd47158..11c9457595b 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestMultiTermHighlighting.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestMultiTermHighlighting.java @@ -29,9 +29,12 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.queries.TermFilter; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.DisjunctionMaxQuery; +import org.apache.lucene.search.FilteredQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; @@ -437,6 +440,90 @@ public class TestMultiTermHighlighting extends LuceneTestCase { ir.close(); dir.close(); } + + public void testWildcardInFiltered() throws Exception { + Directory dir = newDirectory(); + // use simpleanalyzer for more natural tokenization (else "test." is a token) + final Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); + iwc.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); + + FieldType offsetsType = new FieldType(TextField.TYPE_STORED); + offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + Field body = new Field("body", "", offsetsType); + Document doc = new Document(); + doc.add(body); + + body.setStringValue("This is a test."); + iw.addDocument(doc); + body.setStringValue("Test a one sentence document."); + iw.addDocument(doc); + + IndexReader ir = iw.getReader(); + iw.shutdown(); + + IndexSearcher searcher = newSearcher(ir); + PostingsHighlighter highlighter = new PostingsHighlighter() { + @Override + protected Analyzer getIndexAnalyzer(String field) { + return analyzer; + } + }; + FilteredQuery query = new FilteredQuery( + new WildcardQuery(new Term("body", "te*")), + new TermFilter(new Term("body", "test"))); + TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER); + assertEquals(2, topDocs.totalHits); + String snippets[] = highlighter.highlight("body", query, searcher, topDocs); + assertEquals(2, snippets.length); + assertEquals("This is a test.", snippets[0]); + assertEquals("Test a one sentence document.", snippets[1]); + + ir.close(); + dir.close(); + } + + public void testWildcardInConstantScore() throws Exception { + Directory dir = newDirectory(); + // use simpleanalyzer for more natural tokenization (else "test." is a token) + final Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); + iwc.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); + + FieldType offsetsType = new FieldType(TextField.TYPE_STORED); + offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + Field body = new Field("body", "", offsetsType); + Document doc = new Document(); + doc.add(body); + + body.setStringValue("This is a test."); + iw.addDocument(doc); + body.setStringValue("Test a one sentence document."); + iw.addDocument(doc); + + IndexReader ir = iw.getReader(); + iw.shutdown(); + + IndexSearcher searcher = newSearcher(ir); + PostingsHighlighter highlighter = new PostingsHighlighter() { + @Override + protected Analyzer getIndexAnalyzer(String field) { + return analyzer; + } + }; + ConstantScoreQuery query = new ConstantScoreQuery(new WildcardQuery(new Term("body", "te*"))); + TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER); + assertEquals(2, topDocs.totalHits); + String snippets[] = highlighter.highlight("body", query, searcher, topDocs); + assertEquals(2, snippets.length); + assertEquals("This is a test.", snippets[0]); + assertEquals("Test a one sentence document.", snippets[1]); + + ir.close(); + dir.close(); + } public void testWildcardInDisjunctionMax() throws Exception { Directory dir = newDirectory();