LUCENE-1285: WeightedSpanTermExtractor incorrectly treats the same terms occurring in different query types

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@659965 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mark Robert Miller 2008-05-25 11:38:55 +00:00
parent 33aea48b02
commit f32b5a5698
2 changed files with 484 additions and 433 deletions

View File

@ -95,7 +95,7 @@ public class WeightedSpanTermExtractor {
private void extract(Query query, Map terms) throws IOException {
if (query instanceof BooleanQuery) {
BooleanClause[] queryClauses = ((BooleanQuery) query).getClauses();
Map booleanTerms = new HashMap();
Map booleanTerms = new PositionCheckingMap();
for (int i = 0; i < queryClauses.length; i++) {
if (!queryClauses[i].isProhibited()) {
extract(queryClauses[i].getQuery(), booleanTerms);
@ -126,7 +126,7 @@ public class WeightedSpanTermExtractor {
} else if (query instanceof FilteredQuery) {
extract(((FilteredQuery) query).getQuery(), terms);
} else if (query instanceof DisjunctionMaxQuery) {
Map disjunctTerms = new HashMap();
Map disjunctTerms = new PositionCheckingMap();
for (Iterator iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
extract((Query) iterator.next(), disjunctTerms);
}
@ -268,7 +268,6 @@ public class WeightedSpanTermExtractor {
} else {
if (spanPositions.size() > 0) {
weightedSpanTerm.addPositionSpans(spanPositions);
weightedSpanTerm.positionSensitive = true;
}
}
}
@ -336,7 +335,7 @@ public class WeightedSpanTermExtractor {
this.fieldName = null;
this.cachedTokenFilter = cachingTokenFilter;
Map terms = new HashMap();
Map terms = new PositionCheckingMap();
try {
extract(query, terms);
} finally {
@ -366,7 +365,7 @@ public class WeightedSpanTermExtractor {
this.fieldName = fieldName.intern();
}
Map terms = new HashMap();
Map terms = new PositionCheckingMap();
this.cachedTokenFilter = cachingTokenFilter;
try {
extract(query, terms);
@ -399,7 +398,7 @@ public class WeightedSpanTermExtractor {
this.fieldName = fieldName;
this.cachedTokenFilter = new CachingTokenFilter(tokenStream);
Map terms = new HashMap();
Map terms = new PositionCheckingMap();
extract(query, terms);
int totalNumDocs = reader.numDocs();
@ -430,4 +429,32 @@ public class WeightedSpanTermExtractor {
public void setHighlightCnstScrRngQuery(boolean highlightCnstScrRngQuery) {
this.highlightCnstScrRngQuery = highlightCnstScrRngQuery;
}
/**
* This class makes sure that if both position sensitive and insensitive
* versions of the same term are added, the position insensitive one wins.
*/
private class PositionCheckingMap extends HashMap {
public void putAll(Map m) {
Iterator it = m.keySet().iterator();
while (it.hasNext()) {
Object key = it.next();
Object val = m.get(key);
this.put(key, val);
}
}
public Object put(Object key, Object value) {
Object prev = super.put(key, value);
if (prev == null) return prev;
WeightedSpanTerm prevTerm = (WeightedSpanTerm)prev;
WeightedSpanTerm newTerm = (WeightedSpanTerm)value;
if (!prevTerm.positionSensitive) {
newTerm.positionSensitive = false;
}
return prev;
}
}
}

View File

@ -236,6 +236,30 @@ public class HighlighterTest extends TestCase implements Formatter {
}
}
// position sensitive query added after position insensitive query
public void testPosTermStdTerm() throws Exception {
doSearching("y \"x y z\"");
int maxNumFragmentsRequired = 2;
for (int i = 0; i < hits.length(); i++) {
String text = hits.doc(i).get(FIELD_NAME);
CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME,
new StringReader(text)));
Highlighter highlighter = new Highlighter(this,
new SpanScorer(query, FIELD_NAME, tokenStream));
highlighter.setTextFragmenter(new SimpleFragmenter(40));
tokenStream.reset();
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
"...");
System.out.println("\t" + result);
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
numHighlights == 4);
}
}
public void testSpanMultiPhraseQueryHighlighting() throws Exception {
MultiPhraseQuery mpq = new MultiPhraseQuery();