mirror of https://github.com/apache/lucene.git
LUCENE-1285: WeightedSpanTermExtractor incorrectly treats the same terms occurring in different query types
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@659965 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
33aea48b02
commit
f32b5a5698
|
@ -95,7 +95,7 @@ public class WeightedSpanTermExtractor {
|
|||
private void extract(Query query, Map terms) throws IOException {
|
||||
if (query instanceof BooleanQuery) {
|
||||
BooleanClause[] queryClauses = ((BooleanQuery) query).getClauses();
|
||||
Map booleanTerms = new HashMap();
|
||||
Map booleanTerms = new PositionCheckingMap();
|
||||
for (int i = 0; i < queryClauses.length; i++) {
|
||||
if (!queryClauses[i].isProhibited()) {
|
||||
extract(queryClauses[i].getQuery(), booleanTerms);
|
||||
|
@ -126,7 +126,7 @@ public class WeightedSpanTermExtractor {
|
|||
} else if (query instanceof FilteredQuery) {
|
||||
extract(((FilteredQuery) query).getQuery(), terms);
|
||||
} else if (query instanceof DisjunctionMaxQuery) {
|
||||
Map disjunctTerms = new HashMap();
|
||||
Map disjunctTerms = new PositionCheckingMap();
|
||||
for (Iterator iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
|
||||
extract((Query) iterator.next(), disjunctTerms);
|
||||
}
|
||||
|
@ -268,7 +268,6 @@ public class WeightedSpanTermExtractor {
|
|||
} else {
|
||||
if (spanPositions.size() > 0) {
|
||||
weightedSpanTerm.addPositionSpans(spanPositions);
|
||||
weightedSpanTerm.positionSensitive = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -336,7 +335,7 @@ public class WeightedSpanTermExtractor {
|
|||
this.fieldName = null;
|
||||
this.cachedTokenFilter = cachingTokenFilter;
|
||||
|
||||
Map terms = new HashMap();
|
||||
Map terms = new PositionCheckingMap();
|
||||
try {
|
||||
extract(query, terms);
|
||||
} finally {
|
||||
|
@ -366,7 +365,7 @@ public class WeightedSpanTermExtractor {
|
|||
this.fieldName = fieldName.intern();
|
||||
}
|
||||
|
||||
Map terms = new HashMap();
|
||||
Map terms = new PositionCheckingMap();
|
||||
this.cachedTokenFilter = cachingTokenFilter;
|
||||
try {
|
||||
extract(query, terms);
|
||||
|
@ -399,7 +398,7 @@ public class WeightedSpanTermExtractor {
|
|||
this.fieldName = fieldName;
|
||||
this.cachedTokenFilter = new CachingTokenFilter(tokenStream);
|
||||
|
||||
Map terms = new HashMap();
|
||||
Map terms = new PositionCheckingMap();
|
||||
extract(query, terms);
|
||||
|
||||
int totalNumDocs = reader.numDocs();
|
||||
|
@ -430,4 +429,32 @@ public class WeightedSpanTermExtractor {
|
|||
public void setHighlightCnstScrRngQuery(boolean highlightCnstScrRngQuery) {
|
||||
this.highlightCnstScrRngQuery = highlightCnstScrRngQuery;
|
||||
}
|
||||
|
||||
/**
|
||||
* This class makes sure that if both position sensitive and insensitive
|
||||
* versions of the same term are added, the position insensitive one wins.
|
||||
*/
|
||||
private class PositionCheckingMap extends HashMap {
|
||||
|
||||
public void putAll(Map m) {
|
||||
Iterator it = m.keySet().iterator();
|
||||
while (it.hasNext()) {
|
||||
Object key = it.next();
|
||||
Object val = m.get(key);
|
||||
this.put(key, val);
|
||||
}
|
||||
}
|
||||
|
||||
public Object put(Object key, Object value) {
|
||||
Object prev = super.put(key, value);
|
||||
if (prev == null) return prev;
|
||||
WeightedSpanTerm prevTerm = (WeightedSpanTerm)prev;
|
||||
WeightedSpanTerm newTerm = (WeightedSpanTerm)value;
|
||||
if (!prevTerm.positionSensitive) {
|
||||
newTerm.positionSensitive = false;
|
||||
}
|
||||
return prev;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -236,6 +236,30 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
}
|
||||
}
|
||||
|
||||
// position sensitive query added after position insensitive query
|
||||
public void testPosTermStdTerm() throws Exception {
|
||||
doSearching("y \"x y z\"");
|
||||
|
||||
int maxNumFragmentsRequired = 2;
|
||||
|
||||
for (int i = 0; i < hits.length(); i++) {
|
||||
String text = hits.doc(i).get(FIELD_NAME);
|
||||
CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME,
|
||||
new StringReader(text)));
|
||||
Highlighter highlighter = new Highlighter(this,
|
||||
new SpanScorer(query, FIELD_NAME, tokenStream));
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(40));
|
||||
tokenStream.reset();
|
||||
|
||||
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
|
||||
"...");
|
||||
System.out.println("\t" + result);
|
||||
|
||||
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
||||
numHighlights == 4);
|
||||
}
|
||||
}
|
||||
|
||||
public void testSpanMultiPhraseQueryHighlighting() throws Exception {
|
||||
MultiPhraseQuery mpq = new MultiPhraseQuery();
|
||||
|
||||
|
|
Loading…
Reference in New Issue