From afb517e8320ebb3d0e6b82d32bc94b6c020a7661 Mon Sep 17 00:00:00 2001 From: Mark Robert Miller Date: Wed, 29 Jul 2009 16:47:05 +0000 Subject: [PATCH] LUCENE-1752: Missing highlights when terms were repeated in separate, nested, boolean or disjunction queries. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@798976 13f79535-47bb-0310-9956-ffa450edef68 --- contrib/CHANGES.txt | 3 +++ .../highlight/WeightedSpanTermExtractor.java | 9 +++---- .../search/highlight/HighlighterTest.java | 27 ++++++++++++++++++- 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/contrib/CHANGES.txt b/contrib/CHANGES.txt index dbd12106904..4546df08a3f 100644 --- a/contrib/CHANGES.txt +++ b/contrib/CHANGES.txt @@ -41,6 +41,9 @@ Bug fixes 8. LUCENE-1491: EdgeNGramTokenFilter no longer stops on tokens shorter than minimum n-gram size. (Todd Teak via Otis Gospodnetic) + + 9. LUCENE-1752: Missing highlights when terms were repeated in separate, nested, boolean or + disjunction queries. (Koji Sekiguchi, Mark Miller) New features diff --git a/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java b/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java index 586fbd7177f..241a85a5448 100644 --- a/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java +++ b/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java @@ -98,13 +98,12 @@ public class WeightedSpanTermExtractor { private void extract(Query query, Map terms) throws IOException { if (query instanceof BooleanQuery) { BooleanClause[] queryClauses = ((BooleanQuery) query).getClauses(); - Map booleanTerms = new PositionCheckingMap(); + for (int i = 0; i < queryClauses.length; i++) { if (!queryClauses[i].isProhibited()) { - extract(queryClauses[i].getQuery(), booleanTerms); + extract(queryClauses[i].getQuery(), terms); } } - terms.putAll(booleanTerms); } else if (query instanceof PhraseQuery) { Term[] phraseQueryTerms = ((PhraseQuery) query).getTerms(); SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length]; @@ -129,11 +128,9 @@ public class WeightedSpanTermExtractor { } else if (query instanceof FilteredQuery) { extract(((FilteredQuery) query).getQuery(), terms); } else if (query instanceof DisjunctionMaxQuery) { - Map disjunctTerms = new PositionCheckingMap(); for (Iterator iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) { - extract((Query) iterator.next(), disjunctTerms); + extract((Query) iterator.next(), terms); } - terms.putAll(disjunctTerms); } else if (query instanceof MultiTermQuery && (highlightCnstScrRngQuery || expandMultiTermQuery)) { MultiTermQuery mtq = ((MultiTermQuery)query); if(mtq.getRewriteMethod() != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) { diff --git a/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java b/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java index a8a529f2daa..a79b02ccac6 100644 --- a/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java +++ b/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java @@ -53,13 +53,13 @@ import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.ConstantScoreRangeQuery; import org.apache.lucene.search.FilteredQuery; import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.MultiSearcher; +import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermRangeFilter; @@ -178,6 +178,31 @@ public class HighlighterTest extends TestCase implements Formatter { // throw any exceptions } + // LUCENE-1752 + public void testRepeatingTermsInMultBooleans() throws Exception { + String content = "x y z a b c d e f g b c g"; + String ph1 = "\"a b c d\""; + String ph2 = "\"b c g\""; + String f1 = "f1"; + String f2 = "f2"; + String f1c = f1 + ":"; + String f2c = f2 + ":"; + String q = "(" + f1c + ph1 + " OR " + f2c + ph1 + ") AND (" + f1c + ph2 + + " OR " + f2c + ph2 + ")"; + Analyzer analyzer = new WhitespaceAnalyzer(); + QueryParser qp = new QueryParser(f1, analyzer); + Query query = qp.parse(q); + CachingTokenFilter stream = new CachingTokenFilter(analyzer.tokenStream(f1, + new StringReader(content))); + Scorer scorer = new SpanScorer(query, f1, stream, false); + Highlighter h = new Highlighter(this, scorer); + + h.getBestFragment(analyzer, f1, content); + + assertTrue("Failed to find correct number of highlights " + numHighlights + " found", + numHighlights == 7); + } + public void testSimpleSpanPhraseHighlighting() throws Exception { doSearching("\"very long and contains\"");