LUCENE-1285: WeightedSpanTermExtractor incorrectly treats the same terms occurring in different query types

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@659965 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mark Robert Miller 2008-05-25 11:38:55 +00:00
parent 33aea48b02
commit f32b5a5698
2 changed files with 484 additions and 433 deletions

View File

@ -1,433 +1,460 @@
package org.apache.lucene.search.highlight; package org.apache.lucene.search.highlight;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0 * The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with * (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at * the License. You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.FilterIndexReader; import org.apache.lucene.index.FilterIndexReader;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum; import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.memory.MemoryIndex; import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreRangeQuery; import org.apache.lucene.search.ConstantScoreRangeQuery;
import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.FilteredQuery; import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanOrQuery; import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.Spans; import org.apache.lucene.search.spans.Spans;
/** /**
* Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether Terms from the query are contained in a supplied TokenStream. * Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether Terms from the query are contained in a supplied TokenStream.
*/ */
public class WeightedSpanTermExtractor { public class WeightedSpanTermExtractor {
private String fieldName; private String fieldName;
private CachingTokenFilter cachedTokenFilter; private CachingTokenFilter cachedTokenFilter;
private Map readers = new HashMap(10); // Map<String, IndexReader> private Map readers = new HashMap(10); // Map<String, IndexReader>
private String defaultField; private String defaultField;
private boolean highlightCnstScrRngQuery; private boolean highlightCnstScrRngQuery;
public WeightedSpanTermExtractor() { public WeightedSpanTermExtractor() {
} }
public WeightedSpanTermExtractor(String defaultField) { public WeightedSpanTermExtractor(String defaultField) {
if (defaultField != null) { if (defaultField != null) {
this.defaultField = defaultField.intern(); this.defaultField = defaultField.intern();
} }
} }
private void closeReaders() { private void closeReaders() {
Collection readerSet = readers.values(); Collection readerSet = readers.values();
Iterator it = readerSet.iterator(); Iterator it = readerSet.iterator();
while (it.hasNext()) { while (it.hasNext()) {
IndexReader reader = (IndexReader) it.next(); IndexReader reader = (IndexReader) it.next();
try { try {
reader.close(); reader.close();
} catch (IOException e) { } catch (IOException e) {
// alert? // alert?
} }
} }
} }
/** /**
* Fills a <code>Map</code> with <@link WeightedSpanTerm>s using the terms from the supplied <code>Query</code>. * Fills a <code>Map</code> with <@link WeightedSpanTerm>s using the terms from the supplied <code>Query</code>.
* *
* @param query * @param query
* Query to extract Terms from * Query to extract Terms from
* @param terms * @param terms
* Map to place created WeightedSpanTerms in * Map to place created WeightedSpanTerms in
* @throws IOException * @throws IOException
*/ */
private void extract(Query query, Map terms) throws IOException { private void extract(Query query, Map terms) throws IOException {
if (query instanceof BooleanQuery) { if (query instanceof BooleanQuery) {
BooleanClause[] queryClauses = ((BooleanQuery) query).getClauses(); BooleanClause[] queryClauses = ((BooleanQuery) query).getClauses();
Map booleanTerms = new HashMap(); Map booleanTerms = new PositionCheckingMap();
for (int i = 0; i < queryClauses.length; i++) { for (int i = 0; i < queryClauses.length; i++) {
if (!queryClauses[i].isProhibited()) { if (!queryClauses[i].isProhibited()) {
extract(queryClauses[i].getQuery(), booleanTerms); extract(queryClauses[i].getQuery(), booleanTerms);
} }
} }
terms.putAll(booleanTerms); terms.putAll(booleanTerms);
} else if (query instanceof PhraseQuery) { } else if (query instanceof PhraseQuery) {
Term[] phraseQueryTerms = ((PhraseQuery) query).getTerms(); Term[] phraseQueryTerms = ((PhraseQuery) query).getTerms();
SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length]; SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
for (int i = 0; i < phraseQueryTerms.length; i++) { for (int i = 0; i < phraseQueryTerms.length; i++) {
clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
} }
int slop = ((PhraseQuery) query).getSlop(); int slop = ((PhraseQuery) query).getSlop();
boolean inorder = false; boolean inorder = false;
if (slop == 0) { if (slop == 0) {
inorder = true; inorder = true;
} }
SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder); SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
sp.setBoost(query.getBoost()); sp.setBoost(query.getBoost());
extractWeightedSpanTerms(terms, sp); extractWeightedSpanTerms(terms, sp);
} else if (query instanceof TermQuery) { } else if (query instanceof TermQuery) {
extractWeightedTerms(terms, query); extractWeightedTerms(terms, query);
} else if (query instanceof SpanQuery) { } else if (query instanceof SpanQuery) {
extractWeightedSpanTerms(terms, (SpanQuery) query); extractWeightedSpanTerms(terms, (SpanQuery) query);
} else if (query instanceof FilteredQuery) { } else if (query instanceof FilteredQuery) {
extract(((FilteredQuery) query).getQuery(), terms); extract(((FilteredQuery) query).getQuery(), terms);
} else if (query instanceof DisjunctionMaxQuery) { } else if (query instanceof DisjunctionMaxQuery) {
Map disjunctTerms = new HashMap(); Map disjunctTerms = new PositionCheckingMap();
for (Iterator iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) { for (Iterator iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
extract((Query) iterator.next(), disjunctTerms); extract((Query) iterator.next(), disjunctTerms);
} }
terms.putAll(disjunctTerms); terms.putAll(disjunctTerms);
} else if (query instanceof MultiPhraseQuery) { } else if (query instanceof MultiPhraseQuery) {
final MultiPhraseQuery mpq = (MultiPhraseQuery) query; final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
final List termArrays = mpq.getTermArrays(); final List termArrays = mpq.getTermArrays();
final int[] positions = mpq.getPositions(); final int[] positions = mpq.getPositions();
if (positions.length > 0) { if (positions.length > 0) {
int maxPosition = positions[positions.length - 1]; int maxPosition = positions[positions.length - 1];
for (int i = 0; i < positions.length - 1; ++i) { for (int i = 0; i < positions.length - 1; ++i) {
if (positions[i] > maxPosition) { if (positions[i] > maxPosition) {
maxPosition = positions[i]; maxPosition = positions[i];
} }
} }
final List[] disjunctLists = new List[maxPosition + 1]; final List[] disjunctLists = new List[maxPosition + 1];
int distinctPositions = 0; int distinctPositions = 0;
for (int i = 0; i < termArrays.size(); ++i) { for (int i = 0; i < termArrays.size(); ++i) {
final Term[] termArray = (Term[]) termArrays.get(i); final Term[] termArray = (Term[]) termArrays.get(i);
List disjuncts = disjunctLists[positions[i]]; List disjuncts = disjunctLists[positions[i]];
if (disjuncts == null) { if (disjuncts == null) {
disjuncts = (disjunctLists[positions[i]] = new ArrayList(termArray.length)); disjuncts = (disjunctLists[positions[i]] = new ArrayList(termArray.length));
++distinctPositions; ++distinctPositions;
} }
for (int j = 0; j < termArray.length; ++j) { for (int j = 0; j < termArray.length; ++j) {
disjuncts.add(new SpanTermQuery(termArray[j])); disjuncts.add(new SpanTermQuery(termArray[j]));
} }
} }
int positionGaps = 0; int positionGaps = 0;
int position = 0; int position = 0;
final SpanQuery[] clauses = new SpanQuery[distinctPositions]; final SpanQuery[] clauses = new SpanQuery[distinctPositions];
for (int i = 0; i < disjunctLists.length; ++i) { for (int i = 0; i < disjunctLists.length; ++i) {
List disjuncts = disjunctLists[i]; List disjuncts = disjunctLists[i];
if (disjuncts != null) { if (disjuncts != null) {
clauses[position++] = new SpanOrQuery((SpanQuery[]) disjuncts clauses[position++] = new SpanOrQuery((SpanQuery[]) disjuncts
.toArray(new SpanQuery[disjuncts.size()])); .toArray(new SpanQuery[disjuncts.size()]));
} else { } else {
++positionGaps; ++positionGaps;
} }
} }
final int slop = mpq.getSlop(); final int slop = mpq.getSlop();
final boolean inorder = (slop == 0); final boolean inorder = (slop == 0);
SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
sp.setBoost(query.getBoost()); sp.setBoost(query.getBoost());
extractWeightedSpanTerms(terms, sp); extractWeightedSpanTerms(terms, sp);
} }
} else if (query instanceof ConstantScoreRangeQuery) { } else if (query instanceof ConstantScoreRangeQuery) {
ConstantScoreRangeQuery q = (ConstantScoreRangeQuery) query; ConstantScoreRangeQuery q = (ConstantScoreRangeQuery) query;
Term lower = new Term(fieldName, q.getLowerVal()); Term lower = new Term(fieldName, q.getLowerVal());
Term upper = new Term(fieldName, q.getUpperVal()); Term upper = new Term(fieldName, q.getUpperVal());
FilterIndexReader fir = new FilterIndexReader(getReaderForField(fieldName)); FilterIndexReader fir = new FilterIndexReader(getReaderForField(fieldName));
try { try {
TermEnum te = fir.terms(lower); TermEnum te = fir.terms(lower);
BooleanQuery bq = new BooleanQuery(); BooleanQuery bq = new BooleanQuery();
do { do {
Term term = te.term(); Term term = te.term();
if (term != null && upper.compareTo(term) >= 0) { if (term != null && upper.compareTo(term) >= 0) {
bq.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.SHOULD)); bq.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.SHOULD));
} else { } else {
break; break;
} }
} while (te.next()); } while (te.next());
extract(bq, terms); extract(bq, terms);
} finally { } finally {
fir.close(); fir.close();
} }
} }
} }
/** /**
* Fills a <code>Map</code> with <@link WeightedSpanTerm>s using the terms from the supplied <code>SpanQuery</code>. * Fills a <code>Map</code> with <@link WeightedSpanTerm>s using the terms from the supplied <code>SpanQuery</code>.
* *
* @param terms * @param terms
* Map to place created WeightedSpanTerms in * Map to place created WeightedSpanTerms in
* @param spanQuery * @param spanQuery
* SpanQuery to extract Terms from * SpanQuery to extract Terms from
* @throws IOException * @throws IOException
*/ */
private void extractWeightedSpanTerms(Map terms, SpanQuery spanQuery) throws IOException { private void extractWeightedSpanTerms(Map terms, SpanQuery spanQuery) throws IOException {
Set nonWeightedTerms = new HashSet(); Set nonWeightedTerms = new HashSet();
spanQuery.extractTerms(nonWeightedTerms); spanQuery.extractTerms(nonWeightedTerms);
Set fieldNames; Set fieldNames;
if (fieldName == null) { if (fieldName == null) {
fieldNames = new HashSet(); fieldNames = new HashSet();
for (Iterator iter = nonWeightedTerms.iterator(); iter.hasNext();) { for (Iterator iter = nonWeightedTerms.iterator(); iter.hasNext();) {
Term queryTerm = (Term) iter.next(); Term queryTerm = (Term) iter.next();
fieldNames.add(queryTerm.field()); fieldNames.add(queryTerm.field());
} }
} else { } else {
fieldNames = new HashSet(1); fieldNames = new HashSet(1);
fieldNames.add(fieldName); fieldNames.add(fieldName);
} }
// To support the use of the default field name // To support the use of the default field name
if (defaultField != null) { if (defaultField != null) {
fieldNames.add(defaultField); fieldNames.add(defaultField);
} }
Iterator it = fieldNames.iterator(); Iterator it = fieldNames.iterator();
List spanPositions = new ArrayList(); List spanPositions = new ArrayList();
while (it.hasNext()) { while (it.hasNext()) {
String field = (String) it.next(); String field = (String) it.next();
IndexReader reader = getReaderForField(field); IndexReader reader = getReaderForField(field);
Spans spans = spanQuery.getSpans(reader); Spans spans = spanQuery.getSpans(reader);
// collect span positions // collect span positions
while (spans.next()) { while (spans.next()) {
spanPositions.add(new PositionSpan(spans.start(), spans.end() - 1)); spanPositions.add(new PositionSpan(spans.start(), spans.end() - 1));
} }
cachedTokenFilter.reset(); cachedTokenFilter.reset();
} }
if (spanPositions.size() == 0) { if (spanPositions.size() == 0) {
// no spans found // no spans found
return; return;
} }
for (Iterator iter = nonWeightedTerms.iterator(); iter.hasNext();) { for (Iterator iter = nonWeightedTerms.iterator(); iter.hasNext();) {
Term queryTerm = (Term) iter.next(); Term queryTerm = (Term) iter.next();
if (fieldNameComparator(queryTerm.field())) { if (fieldNameComparator(queryTerm.field())) {
WeightedSpanTerm weightedSpanTerm = (WeightedSpanTerm) terms.get(queryTerm.text()); WeightedSpanTerm weightedSpanTerm = (WeightedSpanTerm) terms.get(queryTerm.text());
if (weightedSpanTerm == null) { if (weightedSpanTerm == null) {
weightedSpanTerm = new WeightedSpanTerm(spanQuery.getBoost(), queryTerm.text()); weightedSpanTerm = new WeightedSpanTerm(spanQuery.getBoost(), queryTerm.text());
weightedSpanTerm.addPositionSpans(spanPositions); weightedSpanTerm.addPositionSpans(spanPositions);
weightedSpanTerm.positionSensitive = true; weightedSpanTerm.positionSensitive = true;
terms.put(queryTerm.text(), weightedSpanTerm); terms.put(queryTerm.text(), weightedSpanTerm);
} else { } else {
if (spanPositions.size() > 0) { if (spanPositions.size() > 0) {
weightedSpanTerm.addPositionSpans(spanPositions); weightedSpanTerm.addPositionSpans(spanPositions);
weightedSpanTerm.positionSensitive = true; }
} }
} }
} }
} }
}
/**
/** * Fills a <code>Map</code> with <@link WeightedSpanTerm>s using the terms from the supplied <code>Query</code>.
* Fills a <code>Map</code> with <@link WeightedSpanTerm>s using the terms from the supplied <code>Query</code>. *
* * @param terms
* @param terms * Map to place created WeightedSpanTerms in
* Map to place created WeightedSpanTerms in * @param query
* @param query * Query to extract Terms from
* Query to extract Terms from * @throws IOException
* @throws IOException */
*/ private void extractWeightedTerms(Map terms, Query query) throws IOException {
private void extractWeightedTerms(Map terms, Query query) throws IOException { Set nonWeightedTerms = new HashSet();
Set nonWeightedTerms = new HashSet(); query.extractTerms(nonWeightedTerms);
query.extractTerms(nonWeightedTerms);
for (Iterator iter = nonWeightedTerms.iterator(); iter.hasNext();) {
for (Iterator iter = nonWeightedTerms.iterator(); iter.hasNext();) { Term queryTerm = (Term) iter.next();
Term queryTerm = (Term) iter.next();
if (fieldNameComparator(queryTerm.field())) {
if (fieldNameComparator(queryTerm.field())) { WeightedSpanTerm weightedSpanTerm = new WeightedSpanTerm(query.getBoost(), queryTerm.text());
WeightedSpanTerm weightedSpanTerm = new WeightedSpanTerm(query.getBoost(), queryTerm.text()); terms.put(queryTerm.text(), weightedSpanTerm);
terms.put(queryTerm.text(), weightedSpanTerm); }
} }
} }
}
/**
/** * Necessary to implement matches for queries against <code>defaultField</code>
* Necessary to implement matches for queries against <code>defaultField</code> */
*/ private boolean fieldNameComparator(String fieldNameToCheck) {
private boolean fieldNameComparator(String fieldNameToCheck) { boolean rv = fieldName == null || fieldNameToCheck == fieldName
boolean rv = fieldName == null || fieldNameToCheck == fieldName || fieldNameToCheck == defaultField;
|| fieldNameToCheck == defaultField; return rv;
return rv; }
}
private IndexReader getReaderForField(String field) {
private IndexReader getReaderForField(String field) { IndexReader reader = (IndexReader) readers.get(field);
IndexReader reader = (IndexReader) readers.get(field); if (reader == null) {
if (reader == null) { MemoryIndex indexer = new MemoryIndex();
MemoryIndex indexer = new MemoryIndex(); indexer.addField(field, cachedTokenFilter);
indexer.addField(field, cachedTokenFilter); IndexSearcher searcher = indexer.createSearcher();
IndexSearcher searcher = indexer.createSearcher(); reader = searcher.getIndexReader();
reader = searcher.getIndexReader(); readers.put(field, reader);
readers.put(field, reader); }
} return reader;
return reader; }
}
/**
/** * Creates a Map of <code>WeightedSpanTerms</code> from the given <code>Query</code> and <code>TokenStream</code>.
* Creates a Map of <code>WeightedSpanTerms</code> from the given <code>Query</code> and <code>TokenStream</code>. *
* * <p>
* <p> *
* * @param query
* @param query * that caused hit
* that caused hit * @param tokenStream
* @param tokenStream * of text to be highlighted
* of text to be highlighted * @return
* @return * @throws IOException
* @throws IOException */
*/ public Map getWeightedSpanTerms(Query query, CachingTokenFilter cachingTokenFilter)
public Map getWeightedSpanTerms(Query query, CachingTokenFilter cachingTokenFilter) throws IOException {
throws IOException { this.fieldName = null;
this.fieldName = null; this.cachedTokenFilter = cachingTokenFilter;
this.cachedTokenFilter = cachingTokenFilter;
Map terms = new PositionCheckingMap();
Map terms = new HashMap(); try {
try { extract(query, terms);
extract(query, terms); } finally {
} finally { closeReaders();
closeReaders(); }
}
return terms;
return terms; }
}
/**
/** * Creates a Map of <code>WeightedSpanTerms</code> from the given <code>Query</code> and <code>TokenStream</code>.
* Creates a Map of <code>WeightedSpanTerms</code> from the given <code>Query</code> and <code>TokenStream</code>. *
* * <p>
* <p> *
* * @param query
* @param query * that caused hit
* that caused hit * @param tokenStream
* @param tokenStream * of text to be highlighted
* of text to be highlighted * @param fieldName
* @param fieldName * restricts Term's used based on field name
* restricts Term's used based on field name * @return
* @return * @throws IOException
* @throws IOException */
*/ public Map getWeightedSpanTerms(Query query, CachingTokenFilter cachingTokenFilter,
public Map getWeightedSpanTerms(Query query, CachingTokenFilter cachingTokenFilter, String fieldName) throws IOException {
String fieldName) throws IOException { if (fieldName != null) {
if (fieldName != null) { this.fieldName = fieldName.intern();
this.fieldName = fieldName.intern(); }
}
Map terms = new PositionCheckingMap();
Map terms = new HashMap(); this.cachedTokenFilter = cachingTokenFilter;
this.cachedTokenFilter = cachingTokenFilter; try {
try { extract(query, terms);
extract(query, terms); } finally {
} finally { closeReaders();
closeReaders(); }
}
return terms;
return terms; }
}
/**
/** * Creates a Map of <code>WeightedSpanTerms</code> from the given <code>Query</code> and <code>TokenStream</code>. Uses a supplied
* Creates a Map of <code>WeightedSpanTerms</code> from the given <code>Query</code> and <code>TokenStream</code>. Uses a supplied * <code>IndexReader</code> to properly weight terms (for gradient highlighting).
* <code>IndexReader</code> to properly weight terms (for gradient highlighting). *
* * <p>
* <p> *
* * @param query
* @param query * that caused hit
* that caused hit * @param tokenStream
* @param tokenStream * of text to be highlighted
* of text to be highlighted * @param fieldName
* @param fieldName * restricts Term's used based on field name
* restricts Term's used based on field name * @param reader
* @param reader * to use for scoring
* to use for scoring * @return
* @return * @throws IOException
* @throws IOException */
*/ public Map getWeightedSpanTermsWithScores(Query query, TokenStream tokenStream, String fieldName,
public Map getWeightedSpanTermsWithScores(Query query, TokenStream tokenStream, String fieldName, IndexReader reader) throws IOException {
IndexReader reader) throws IOException { this.fieldName = fieldName;
this.fieldName = fieldName; this.cachedTokenFilter = new CachingTokenFilter(tokenStream);
this.cachedTokenFilter = new CachingTokenFilter(tokenStream);
Map terms = new PositionCheckingMap();
Map terms = new HashMap(); extract(query, terms);
extract(query, terms);
int totalNumDocs = reader.numDocs();
int totalNumDocs = reader.numDocs(); Set weightedTerms = terms.keySet();
Set weightedTerms = terms.keySet(); Iterator it = weightedTerms.iterator();
Iterator it = weightedTerms.iterator();
try {
try { while (it.hasNext()) {
while (it.hasNext()) { WeightedSpanTerm weightedSpanTerm = (WeightedSpanTerm) terms.get(it.next());
WeightedSpanTerm weightedSpanTerm = (WeightedSpanTerm) terms.get(it.next()); int docFreq = reader.docFreq(new Term(fieldName, weightedSpanTerm.term));
int docFreq = reader.docFreq(new Term(fieldName, weightedSpanTerm.term));
// IDF algorithm taken from DefaultSimilarity class
// IDF algorithm taken from DefaultSimilarity class float idf = (float) (Math.log((float) totalNumDocs / (double) (docFreq + 1)) + 1.0);
float idf = (float) (Math.log((float) totalNumDocs / (double) (docFreq + 1)) + 1.0); weightedSpanTerm.weight *= idf;
weightedSpanTerm.weight *= idf; }
} } finally {
} finally {
closeReaders();
closeReaders(); }
}
return terms;
return terms; }
}
public boolean isHighlightCnstScrRngQuery() {
public boolean isHighlightCnstScrRngQuery() { return highlightCnstScrRngQuery;
return highlightCnstScrRngQuery; }
}
public void setHighlightCnstScrRngQuery(boolean highlightCnstScrRngQuery) {
public void setHighlightCnstScrRngQuery(boolean highlightCnstScrRngQuery) { this.highlightCnstScrRngQuery = highlightCnstScrRngQuery;
this.highlightCnstScrRngQuery = highlightCnstScrRngQuery; }
}
} /**
* This class makes sure that if both position sensitive and insensitive
* versions of the same term are added, the position insensitive one wins.
*/
private class PositionCheckingMap extends HashMap {
public void putAll(Map m) {
Iterator it = m.keySet().iterator();
while (it.hasNext()) {
Object key = it.next();
Object val = m.get(key);
this.put(key, val);
}
}
public Object put(Object key, Object value) {
Object prev = super.put(key, value);
if (prev == null) return prev;
WeightedSpanTerm prevTerm = (WeightedSpanTerm)prev;
WeightedSpanTerm newTerm = (WeightedSpanTerm)value;
if (!prevTerm.positionSensitive) {
newTerm.positionSensitive = false;
}
return prev;
}
}
}

View File

@ -235,6 +235,30 @@ public class HighlighterTest extends TestCase implements Formatter {
numHighlights == 3); numHighlights == 3);
} }
} }
// position sensitive query added after position insensitive query
public void testPosTermStdTerm() throws Exception {
doSearching("y \"x y z\"");
int maxNumFragmentsRequired = 2;
for (int i = 0; i < hits.length(); i++) {
String text = hits.doc(i).get(FIELD_NAME);
CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME,
new StringReader(text)));
Highlighter highlighter = new Highlighter(this,
new SpanScorer(query, FIELD_NAME, tokenStream));
highlighter.setTextFragmenter(new SimpleFragmenter(40));
tokenStream.reset();
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
"...");
System.out.println("\t" + result);
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
numHighlights == 4);
}
}
public void testSpanMultiPhraseQueryHighlighting() throws Exception { public void testSpanMultiPhraseQueryHighlighting() throws Exception {
MultiPhraseQuery mpq = new MultiPhraseQuery(); MultiPhraseQuery mpq = new MultiPhraseQuery();