LUCENE-1685: The position aware SpanScorer has become the default scorer for Highlighting. The SpanScorer implementation has replaced QueryScorer and the old term highlighting QueryScorer has been renamed to QueryTermScorer. Multi-term queries are also now expanded by default. If you were previously rewritting the query for multi-term query highlighting, you should no longer do that (unless you switch to using QueryTermScorer). The SpanScorer API (now QueryScorer) has also been improved to more closely match the API of the previous QueryScorer implementation.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@800796 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mark Robert Miller 2009-08-04 13:56:11 +00:00
parent b155258203
commit 10b41d2dce
12 changed files with 499 additions and 612 deletions

View File

@ -122,6 +122,15 @@ New features
14. Added web-based demo of functionality in contrib's XML Query Parser 14. Added web-based demo of functionality in contrib's XML Query Parser
packaged as War file (Mark Harwood) packaged as War file (Mark Harwood)
15. LUCENE-1685: The position aware SpanScorer has become the default scorer
for Highlighting. The SpanScorer implementation has replaced QueryScorer
and the old term highlighting QueryScorer has been renamed to
QueryTermScorer. Multi-term queries are also now expanded by default. If
you were previously rewritting the query for multi-term query highlighting,
you should no longer do that (unless you switch to using QueryTermScorer).
The SpanScorer API (now QueryScorer) has also been improved to more closely
match the API of the previous QueryScorer implementation. (Mark Miller)
Optimizations Optimizations

View File

@ -38,7 +38,7 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.QueryTermScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.TextFragment; import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.search.highlight.TokenSources; import org.apache.lucene.search.highlight.TokenSources;
@ -242,7 +242,7 @@ public abstract class ReadTask extends PerfTask {
} }
protected Highlighter getHighlighter(Query q){ protected Highlighter getHighlighter(Query q){
return new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(q)); return new Highlighter(new SimpleHTMLFormatter(), new QueryTermScorer(q));
} }
/** /**

View File

@ -223,7 +223,10 @@ public class Highlighter
tokenStream.reset(); tokenStream.reset();
TextFragment currentFrag = new TextFragment(newText,newText.length(), docFrags.size()); TextFragment currentFrag = new TextFragment(newText,newText.length(), docFrags.size());
fragmentScorer.init(tokenStream); TokenStream newStream = fragmentScorer.init(tokenStream);
if(newStream != null) {
tokenStream = newStream;
}
fragmentScorer.startFragment(currentFrag); fragmentScorer.startFragment(currentFrag);
docFrags.add(currentFrag); docFrags.add(currentFrag);

View File

@ -1,161 +1,227 @@
package org.apache.lucene.search.highlight; package org.apache.lucene.search.highlight;
/** import java.io.IOException;
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
/** /**
* {@link Scorer} implementation which scores text fragments by the number of * {@link Scorer} implementation which scores text fragments by the number of
* unique query terms found. This class uses the {@link QueryTermExtractor} * unique query terms found. This class converts appropriate Querys to
* class to process determine the query terms and their boosts to be used. * SpanQuerys and attempts to score only those terms that participated in
* generating the 'hit' on the document.
*/ */
// TODO: provide option to boost score of fragments near beginning of document
// based on fragment.getFragNum()
public class QueryScorer implements Scorer { public class QueryScorer implements Scorer {
private float totalScore;
TextFragment currentTextFragment = null; private Set foundTerms;
HashSet uniqueTermsInFragment; private Map fieldWeightedSpanTerms;
private float maxTermWeight;
float totalScore = 0; private int position = -1;
float maxTermWeight = 0; private String defaultField;
private HashMap termsToFind;
private TermAttribute termAtt; private TermAttribute termAtt;
private PositionIncrementAttribute posIncAtt;
private boolean expandMultiTermQuery = true;
private Query query;
private String field;
private IndexReader reader;
private boolean skipInitExtractor;
/** /**
* @param query Query to use for highlighting
* *
* @param query a Lucene query (ideally rewritten using query.rewrite before * @throws IOException
* being passed to this class and the searcher)
*/ */
public QueryScorer(Query query) { public QueryScorer(Query query) {
this(QueryTermExtractor.getTerms(query)); init(query, null, null, true);
} }
/** /**
* * @param query Query to use for highlighting
* @param query a Lucene query (ideally rewritten using query.rewrite before * @param field Field to highlight - pass null to ignore fields
* being passed to this class and the searcher) * @throws IOException
* @param fieldName the Field name which is used to match Query terms
*/ */
public QueryScorer(Query query, String fieldName) { public QueryScorer(Query query, String field) {
this(QueryTermExtractor.getTerms(query, false, fieldName)); init(query, field, null, true);
} }
/** /**
* @param query Query to use for highlighting
* @param field Field to highlight - pass null to ignore fields
* *
* @param query a Lucene query (ideally rewritten using query.rewrite before * @param reader
* being passed to this class and the searcher) * @throws IOException
* @param reader used to compute IDF which can be used to a) score selected
* fragments better b) use graded highlights eg set font color
* intensity
* @param fieldName the field on which Inverse Document Frequency (IDF)
* calculations are based
*/ */
public QueryScorer(Query query, IndexReader reader, String fieldName) { public QueryScorer(Query query, IndexReader reader, String field) {
this(QueryTermExtractor.getIdfWeightedTerms(query, reader, fieldName)); init(query, field, reader, true);
} }
public QueryScorer(WeightedTerm[] weightedTerms) { /**
termsToFind = new HashMap(); * As above, but with ability to pass in an <tt>IndexReader</tt>
*/
public QueryScorer(Query query, IndexReader reader, String field, String defaultField)
throws IOException {
this.defaultField = defaultField.intern();
init(query, field, reader, true);
}
/**
* @param defaultField - The default field for queries with the field name unspecified
*/
public QueryScorer(Query query, String field, String defaultField) {
this.defaultField = defaultField.intern();
init(query, field, null, true);
}
/**
* @param weightedTerms
*/
public QueryScorer(WeightedSpanTerm[] weightedTerms) {
this.fieldWeightedSpanTerms = new HashMap(weightedTerms.length);
for (int i = 0; i < weightedTerms.length; i++) { for (int i = 0; i < weightedTerms.length; i++) {
WeightedTerm existingTerm = (WeightedTerm) termsToFind WeightedSpanTerm existingTerm = (WeightedSpanTerm) fieldWeightedSpanTerms.get(weightedTerms[i].term);
.get(weightedTerms[i].term);
if ((existingTerm == null) if ((existingTerm == null) ||
|| (existingTerm.weight < weightedTerms[i].weight)) { (existingTerm.weight < weightedTerms[i].weight)) {
// if a term is defined more than once, always use the highest scoring // if a term is defined more than once, always use the highest
// weight // scoring weight
termsToFind.put(weightedTerms[i].term, weightedTerms[i]); fieldWeightedSpanTerms.put(weightedTerms[i].term, weightedTerms[i]);
maxTermWeight = Math.max(maxTermWeight, weightedTerms[i].getWeight()); maxTermWeight = Math.max(maxTermWeight, weightedTerms[i].getWeight());
} }
} }
} skipInitExtractor = true;
/* (non-Javadoc)
* @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream)
*/
public void init(TokenStream tokenStream) {
termAtt = (TermAttribute) tokenStream.getAttribute(TermAttribute.class);
} }
/* /*
* (non-Javadoc) * (non-Javadoc)
* *
* @see
* org.apache.lucene.search.highlight.FragmentScorer#startFragment(org.apache
* .lucene.search.highlight.TextFragment)
*/
public void startFragment(TextFragment newFragment) {
uniqueTermsInFragment = new HashSet();
currentTextFragment = newFragment;
totalScore = 0;
}
/* (non-Javadoc)
* @see org.apache.lucene.search.highlight.Scorer#getTokenScore()
*/
public float getTokenScore() {
String termText = termAtt.term();
WeightedTerm queryTerm = (WeightedTerm) termsToFind.get(termText);
if (queryTerm == null) {
// not a query term - return
return 0;
}
// found a query term - is it unique in this doc?
if (!uniqueTermsInFragment.contains(termText)) {
totalScore += queryTerm.getWeight();
uniqueTermsInFragment.add(termText);
}
return queryTerm.getWeight();
}
/* (non-Javadoc)
* @see org.apache.lucene.search.highlight.Scorer#getFragmentScore() * @see org.apache.lucene.search.highlight.Scorer#getFragmentScore()
*/ */
public float getFragmentScore() { public float getFragmentScore() {
return totalScore; return totalScore;
} }
/*
* (non-Javadoc)
*
* @see
* org.apache.lucene.search.highlight.FragmentScorer#allFragmentsProcessed()
*/
public void allFragmentsProcessed() {
// this class has no special operations to perform at end of processing
}
/** /**
* *
* @return The highest weighted term (useful for passing to GradientFormatter * @return The highest weighted term (useful for passing to
* to set top end of coloring scale. * GradientFormatter to set top end of coloring scale.
*/ */
public float getMaxTermWeight() { public float getMaxTermWeight() {
return maxTermWeight; return maxTermWeight;
} }
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.highlight.Scorer#getTokenScore(org.apache.lucene.analysis.Token,
* int)
*/
public float getTokenScore() {
position += posIncAtt.getPositionIncrement();
String termText = termAtt.term();
WeightedSpanTerm weightedSpanTerm;
if ((weightedSpanTerm = (WeightedSpanTerm) fieldWeightedSpanTerms.get(
termText)) == null) {
return 0;
}
if (weightedSpanTerm.positionSensitive &&
!weightedSpanTerm.checkPosition(position)) {
return 0;
}
float score = weightedSpanTerm.getWeight();
// found a query term - is it unique in this doc?
if (!foundTerms.contains(termText)) {
totalScore += score;
foundTerms.add(termText);
}
return score;
}
public TokenStream init(TokenStream tokenStream) throws IOException {
position = -1;
termAtt = (TermAttribute) tokenStream.getAttribute(TermAttribute.class);
posIncAtt = (PositionIncrementAttribute) tokenStream.getAttribute(PositionIncrementAttribute.class);
if(!skipInitExtractor) {
if(fieldWeightedSpanTerms != null) {
fieldWeightedSpanTerms.clear();
}
return initExtractor(tokenStream);
}
return null;
}
/**
* Retrieve the WeightedSpanTerm for the specified token. Useful for passing
* Span information to a Fragmenter.
*
* @param token
* @return WeightedSpanTerm for token
*/
public WeightedSpanTerm getWeightedSpanTerm(String token) {
return (WeightedSpanTerm) fieldWeightedSpanTerms.get(token);
}
/**
* @param query
* @param field
* @param tokenStream
* @param reader
* @throws IOException
*/
private void init(Query query, String field, IndexReader reader, boolean expandMultiTermQuery) {
this.reader = reader;
this.expandMultiTermQuery = expandMultiTermQuery;
this.query = query;
this.field = field;
}
private TokenStream initExtractor(TokenStream tokenStream) throws IOException {
WeightedSpanTermExtractor qse = defaultField == null ? new WeightedSpanTermExtractor()
: new WeightedSpanTermExtractor(defaultField);
qse.setExpandMultiTermQuery(expandMultiTermQuery);
if (reader == null) {
this.fieldWeightedSpanTerms = qse.getWeightedSpanTerms(query,
tokenStream, field);
} else {
this.fieldWeightedSpanTerms = qse.getWeightedSpanTermsWithScores(query,
tokenStream, field, reader);
}
if(qse.isCachedTokenStream()) {
return qse.getTokenStream();
}
return null;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.highlight.Scorer#startFragment(org.apache.lucene.search.highlight.TextFragment)
*/
public void startFragment(TextFragment newFragment) {
foundTerms = new HashSet();
totalScore = 0;
}
public boolean isExpandMultiTermQuery() {
return expandMultiTermQuery;
}
public void setExpandMultiTermQuery(boolean expandMultiTermQuery) {
this.expandMultiTermQuery = expandMultiTermQuery;
}
} }

View File

@ -0,0 +1,162 @@
package org.apache.lucene.search.highlight;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.HashMap;
import java.util.HashSet;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Query;
/**
* {@link Scorer} implementation which scores text fragments by the number of
* unique query terms found. This class uses the {@link QueryTermExtractor}
* class to process determine the query terms and their boosts to be used.
*/
// TODO: provide option to boost score of fragments near beginning of document
// based on fragment.getFragNum()
public class QueryTermScorer implements Scorer {
TextFragment currentTextFragment = null;
HashSet uniqueTermsInFragment;
float totalScore = 0;
float maxTermWeight = 0;
private HashMap termsToFind;
private TermAttribute termAtt;
/**
*
* @param query a Lucene query (ideally rewritten using query.rewrite before
* being passed to this class and the searcher)
*/
public QueryTermScorer(Query query) {
this(QueryTermExtractor.getTerms(query));
}
/**
*
* @param query a Lucene query (ideally rewritten using query.rewrite before
* being passed to this class and the searcher)
* @param fieldName the Field name which is used to match Query terms
*/
public QueryTermScorer(Query query, String fieldName) {
this(QueryTermExtractor.getTerms(query, false, fieldName));
}
/**
*
* @param query a Lucene query (ideally rewritten using query.rewrite before
* being passed to this class and the searcher)
* @param reader used to compute IDF which can be used to a) score selected
* fragments better b) use graded highlights eg set font color
* intensity
* @param fieldName the field on which Inverse Document Frequency (IDF)
* calculations are based
*/
public QueryTermScorer(Query query, IndexReader reader, String fieldName) {
this(QueryTermExtractor.getIdfWeightedTerms(query, reader, fieldName));
}
public QueryTermScorer(WeightedTerm[] weightedTerms) {
termsToFind = new HashMap();
for (int i = 0; i < weightedTerms.length; i++) {
WeightedTerm existingTerm = (WeightedTerm) termsToFind
.get(weightedTerms[i].term);
if ((existingTerm == null)
|| (existingTerm.weight < weightedTerms[i].weight)) {
// if a term is defined more than once, always use the highest scoring
// weight
termsToFind.put(weightedTerms[i].term, weightedTerms[i]);
maxTermWeight = Math.max(maxTermWeight, weightedTerms[i].getWeight());
}
}
}
/* (non-Javadoc)
* @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream)
*/
public TokenStream init(TokenStream tokenStream) {
termAtt = (TermAttribute) tokenStream.getAttribute(TermAttribute.class);
return null;
}
/*
* (non-Javadoc)
*
* @see
* org.apache.lucene.search.highlight.FragmentScorer#startFragment(org.apache
* .lucene.search.highlight.TextFragment)
*/
public void startFragment(TextFragment newFragment) {
uniqueTermsInFragment = new HashSet();
currentTextFragment = newFragment;
totalScore = 0;
}
/* (non-Javadoc)
* @see org.apache.lucene.search.highlight.Scorer#getTokenScore()
*/
public float getTokenScore() {
String termText = termAtt.term();
WeightedTerm queryTerm = (WeightedTerm) termsToFind.get(termText);
if (queryTerm == null) {
// not a query term - return
return 0;
}
// found a query term - is it unique in this doc?
if (!uniqueTermsInFragment.contains(termText)) {
totalScore += queryTerm.getWeight();
uniqueTermsInFragment.add(termText);
}
return queryTerm.getWeight();
}
/* (non-Javadoc)
* @see org.apache.lucene.search.highlight.Scorer#getFragmentScore()
*/
public float getFragmentScore() {
return totalScore;
}
/*
* (non-Javadoc)
*
* @see
* org.apache.lucene.search.highlight.FragmentScorer#allFragmentsProcessed()
*/
public void allFragmentsProcessed() {
// this class has no special operations to perform at end of processing
}
/**
*
* @return The highest weighted term (useful for passing to GradientFormatter
* to set top end of coloring scale.
*/
public float getMaxTermWeight() {
return maxTermWeight;
}
}

View File

@ -17,6 +17,8 @@ package org.apache.lucene.search.highlight;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
/** /**
@ -30,8 +32,9 @@ public interface Scorer {
* getTokenScore(). * getTokenScore().
* *
* @param tokenStream * @param tokenStream
* @throws IOException
*/ */
public void init(TokenStream tokenStream); public TokenStream init(TokenStream tokenStream) throws IOException;
/** /**
* called when a new fragment is started for consideration * called when a new fragment is started for consideration

View File

@ -34,7 +34,7 @@ public class SimpleSpanFragmenter implements Fragmenter {
private int fragmentSize; private int fragmentSize;
private int currentNumFrags; private int currentNumFrags;
private int position = -1; private int position = -1;
private SpanScorer spanScorer; private QueryScorer queryScorer;
private int waitForPos = -1; private int waitForPos = -1;
private int textSize; private int textSize;
private TermAttribute termAtt; private TermAttribute termAtt;
@ -42,19 +42,19 @@ public class SimpleSpanFragmenter implements Fragmenter {
private OffsetAttribute offsetAtt; private OffsetAttribute offsetAtt;
/** /**
* @param spanscorer SpanScorer that was used to score hits * @param queryScorer QueryScorer that was used to score hits
*/ */
public SimpleSpanFragmenter(SpanScorer spanscorer) { public SimpleSpanFragmenter(QueryScorer queryScorer) {
this(spanscorer, DEFAULT_FRAGMENT_SIZE); this(queryScorer, DEFAULT_FRAGMENT_SIZE);
} }
/** /**
* @param spanscorer SpanScorer that was used to score hits * @param queryScorer QueryScorer that was used to score hits
* @param fragmentSize size in bytes of each fragment * @param fragmentSize size in bytes of each fragment
*/ */
public SimpleSpanFragmenter(SpanScorer spanscorer, int fragmentSize) { public SimpleSpanFragmenter(QueryScorer queryScorer, int fragmentSize) {
this.fragmentSize = fragmentSize; this.fragmentSize = fragmentSize;
this.spanScorer = spanscorer; this.queryScorer = queryScorer;
} }
/* (non-Javadoc) /* (non-Javadoc)
@ -69,7 +69,7 @@ public class SimpleSpanFragmenter implements Fragmenter {
return false; return false;
} }
WeightedSpanTerm wSpanTerm = spanScorer.getWeightedSpanTerm(termAtt.term()); WeightedSpanTerm wSpanTerm = queryScorer.getWeightedSpanTerm(termAtt.term());
if (wSpanTerm != null) { if (wSpanTerm != null) {
List positionSpans = wSpanTerm.getPositionSpans(); List positionSpans = wSpanTerm.getPositionSpans();

View File

@ -1,288 +0,0 @@
package org.apache.lucene.search.highlight;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Query;
/**
* {@link Scorer} implementation which scores text fragments by the number of
* unique query terms found. This class converts appropriate Querys to
* SpanQuerys and attempts to score only those terms that participated in
* generating the 'hit' on the document.
*/
public class SpanScorer implements Scorer {
private float totalScore;
private Set foundTerms;
private Map fieldWeightedSpanTerms;
private float maxTermWeight;
private int position = -1;
private String defaultField;
private TermAttribute termAtt;
private PositionIncrementAttribute posIncAtt;
private static boolean highlightCnstScrRngQuery;
/**
* @param query
* Query to use for highlighting
* @param field
* Field to highlight - pass null to ignore fields
* @param tokenStream
* of source text to be highlighted
* @throws IOException
*/
public SpanScorer(Query query, String field,
CachingTokenFilter cachingTokenFilter) throws IOException {
init(query, field, cachingTokenFilter, null, false);
}
/**
* @param query
* Query to use for highlighting
* @param field
* Field to highlight - pass null to ignore fields
* @param tokenStream
* of source text to be highlighted
* @param expandMultiTermQuery
* rewrite multi-term queries against a single doc memory index to
* create boolean queries
* @throws IOException
*/
public SpanScorer(Query query, String field,
CachingTokenFilter cachingTokenFilter, boolean expandMultiTermQuery) throws IOException {
init(query, field, cachingTokenFilter, null, expandMultiTermQuery);
}
/**
* @param query
* Query to use for highlighting
* @param field
* Field to highlight - pass null to ignore fields
* @param tokenStream
* of source text to be highlighted
* @param reader
* @throws IOException
*/
public SpanScorer(Query query, String field,
CachingTokenFilter cachingTokenFilter, IndexReader reader)
throws IOException {
init(query, field, cachingTokenFilter, reader, false);
}
/**
* @param query
* Query to use for highlighting
* @param field
* Field to highlight - pass null to ignore fields
* @param tokenStream
* of source text to be highlighted
* @param reader
* @param expandMultiTermQuery
* rewrite multi-term queries against a single doc memory index to
* create boolean queries
* @throws IOException
*/
public SpanScorer(Query query, String field,
CachingTokenFilter cachingTokenFilter, IndexReader reader, boolean expandMultiTermQuery)
throws IOException {
init(query, field, cachingTokenFilter, reader, expandMultiTermQuery);
}
/**
* As above, but with ability to pass in an <tt>IndexReader</tt>
*/
public SpanScorer(Query query, String field,
CachingTokenFilter cachingTokenFilter, IndexReader reader, String defaultField)
throws IOException {
this.defaultField = defaultField.intern();
init(query, field, cachingTokenFilter, reader, false);
}
/**
* As above, but with ability to pass in an <tt>IndexReader</tt>
*/
public SpanScorer(Query query, String field,
CachingTokenFilter cachingTokenFilter, IndexReader reader, String defaultField, boolean expandMultiTermQuery)
throws IOException {
this.defaultField = defaultField.intern();
init(query, field, cachingTokenFilter, reader, expandMultiTermQuery);
}
/**
* @param defaultField - The default field for queries with the field name unspecified
*/
public SpanScorer(Query query, String field,
CachingTokenFilter cachingTokenFilter, String defaultField) throws IOException {
this.defaultField = defaultField.intern();
init(query, field, cachingTokenFilter, null, false);
}
/**
* @param defaultField - The default field for queries with the field name unspecified
*/
public SpanScorer(Query query, String field,
CachingTokenFilter cachingTokenFilter, String defaultField, boolean expandMultiTermQuery) throws IOException {
this.defaultField = defaultField.intern();
init(query, field, cachingTokenFilter, null, expandMultiTermQuery);
}
/**
* @param weightedTerms
*/
public SpanScorer(WeightedSpanTerm[] weightedTerms) {
this.fieldWeightedSpanTerms = new HashMap(weightedTerms.length);
for (int i = 0; i < weightedTerms.length; i++) {
WeightedSpanTerm existingTerm = (WeightedSpanTerm) fieldWeightedSpanTerms.get(weightedTerms[i].term);
if ((existingTerm == null) ||
(existingTerm.weight < weightedTerms[i].weight)) {
// if a term is defined more than once, always use the highest
// scoring weight
fieldWeightedSpanTerms.put(weightedTerms[i].term, weightedTerms[i]);
maxTermWeight = Math.max(maxTermWeight, weightedTerms[i].getWeight());
}
}
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.highlight.Scorer#getFragmentScore()
*/
public float getFragmentScore() {
return totalScore;
}
/**
*
* @return The highest weighted term (useful for passing to
* GradientFormatter to set top end of coloring scale.
*/
public float getMaxTermWeight() {
return maxTermWeight;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.highlight.Scorer#getTokenScore(org.apache.lucene.analysis.Token,
* int)
*/
public float getTokenScore() {
position += posIncAtt.getPositionIncrement();
String termText = termAtt.term();
WeightedSpanTerm weightedSpanTerm;
if ((weightedSpanTerm = (WeightedSpanTerm) fieldWeightedSpanTerms.get(
termText)) == null) {
return 0;
}
if (weightedSpanTerm.positionSensitive &&
!weightedSpanTerm.checkPosition(position)) {
return 0;
}
float score = weightedSpanTerm.getWeight();
// found a query term - is it unique in this doc?
if (!foundTerms.contains(termText)) {
totalScore += score;
foundTerms.add(termText);
}
return score;
}
public void init(TokenStream tokenStream) {
termAtt = (TermAttribute) tokenStream.getAttribute(TermAttribute.class);
posIncAtt = (PositionIncrementAttribute) tokenStream.getAttribute(PositionIncrementAttribute.class);
}
/**
* Retrieve the WeightedSpanTerm for the specified token. Useful for passing
* Span information to a Fragmenter.
*
* @param token
* @return WeightedSpanTerm for token
*/
public WeightedSpanTerm getWeightedSpanTerm(String token) {
return (WeightedSpanTerm) fieldWeightedSpanTerms.get(token);
}
/**
* @param query
* @param field
* @param tokenStream
* @param reader
* @throws IOException
*/
private void init(Query query, String field,
CachingTokenFilter cachingTokenFilter, IndexReader reader, boolean expandMultiTermQuery)
throws IOException {
WeightedSpanTermExtractor qse = defaultField == null ? new WeightedSpanTermExtractor()
: new WeightedSpanTermExtractor(defaultField);
qse.setHighlightCnstScrRngQuery(highlightCnstScrRngQuery);
qse.setExpandMultiTermQuery(expandMultiTermQuery);
if (reader == null) {
this.fieldWeightedSpanTerms = qse.getWeightedSpanTerms(query,
cachingTokenFilter, field);
} else {
this.fieldWeightedSpanTerms = qse.getWeightedSpanTermsWithScores(query,
cachingTokenFilter, field, reader);
}
}
/**
* @return whether ConstantScoreRangeQuerys are set to be highlighted
* @deprecated {@link ConstantScoreRangeQuery} is deprecated. Use the
* constructor option to expand MultiTerm queries.
*/
public static boolean isHighlightCnstScrRngQuery() {
return highlightCnstScrRngQuery;
}
/**
* If you call Highlighter#getBestFragment() more than once you must reset
* the SpanScorer between each call.
*/
public void reset() {
position = -1;
}
/**
* Turns highlighting of ConstantScoreRangeQuery on/off.
* ConstantScoreRangeQuerys cannot be highlighted if you rewrite the query
* first. Must be called before SpanScorer construction.
*
* @param highlightCnstScrRngQuery
* @deprecated {@link ConstantScoreRangeQuery} is deprecated. Use the
* constructor option to expand MultiTerm queries.
*/
public static void setHighlightCnstScrRngQuery(boolean highlight) {
highlightCnstScrRngQuery = highlight;
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.highlight.Scorer#startFragment(org.apache.lucene.search.highlight.TextFragment)
*/
public void startFragment(TextFragment newFragment) {
foundTerms = new HashSet();
totalScore = 0;
}
}

View File

@ -57,11 +57,11 @@ import org.apache.lucene.search.spans.Spans;
public class WeightedSpanTermExtractor { public class WeightedSpanTermExtractor {
private String fieldName; private String fieldName;
private CachingTokenFilter cachedTokenFilter; private TokenStream tokenStream;
private Map readers = new HashMap(10); // Map<String, IndexReader> private Map readers = new HashMap(10); // Map<String, IndexReader>
private String defaultField; private String defaultField;
private boolean highlightCnstScrRngQuery;
private boolean expandMultiTermQuery; private boolean expandMultiTermQuery;
private boolean cachedTokenStream;
public WeightedSpanTermExtractor() { public WeightedSpanTermExtractor() {
} }
@ -131,7 +131,7 @@ public class WeightedSpanTermExtractor {
for (Iterator iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) { for (Iterator iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
extract((Query) iterator.next(), terms); extract((Query) iterator.next(), terms);
} }
} else if (query instanceof MultiTermQuery && (highlightCnstScrRngQuery || expandMultiTermQuery)) { } else if (query instanceof MultiTermQuery && expandMultiTermQuery) {
MultiTermQuery mtq = ((MultiTermQuery)query); MultiTermQuery mtq = ((MultiTermQuery)query);
if(mtq.getRewriteMethod() != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) { if(mtq.getRewriteMethod() != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) {
mtq = copyMultiTermQuery(mtq); mtq = copyMultiTermQuery(mtq);
@ -241,7 +241,6 @@ public class WeightedSpanTermExtractor {
spanPositions.add(new PositionSpan(spans.start(), spans.end() - 1)); spanPositions.add(new PositionSpan(spans.start(), spans.end() - 1));
} }
cachedTokenFilter.reset();
} }
if (spanPositions.size() == 0) { if (spanPositions.size() == 0) {
@ -301,15 +300,21 @@ public class WeightedSpanTermExtractor {
return rv; return rv;
} }
private IndexReader getReaderForField(String field) { private IndexReader getReaderForField(String field) throws IOException {
if(!cachedTokenStream && !(tokenStream instanceof CachingTokenFilter)) {
tokenStream = new CachingTokenFilter(tokenStream);
cachedTokenStream = true;
}
IndexReader reader = (IndexReader) readers.get(field); IndexReader reader = (IndexReader) readers.get(field);
if (reader == null) { if (reader == null) {
MemoryIndex indexer = new MemoryIndex(); MemoryIndex indexer = new MemoryIndex();
indexer.addField(field, cachedTokenFilter); indexer.addField(field, tokenStream);
tokenStream.reset();
IndexSearcher searcher = indexer.createSearcher(); IndexSearcher searcher = indexer.createSearcher();
reader = searcher.getIndexReader(); reader = searcher.getIndexReader();
readers.put(field, reader); readers.put(field, reader);
} }
return reader; return reader;
} }
@ -328,7 +333,7 @@ public class WeightedSpanTermExtractor {
public Map getWeightedSpanTerms(Query query, CachingTokenFilter cachingTokenFilter) public Map getWeightedSpanTerms(Query query, CachingTokenFilter cachingTokenFilter)
throws IOException { throws IOException {
this.fieldName = null; this.fieldName = null;
this.cachedTokenFilter = cachingTokenFilter; this.tokenStream = cachingTokenFilter;
Map terms = new PositionCheckingMap(); Map terms = new PositionCheckingMap();
try { try {
@ -354,14 +359,14 @@ public class WeightedSpanTermExtractor {
* @return * @return
* @throws IOException * @throws IOException
*/ */
public Map getWeightedSpanTerms(Query query, CachingTokenFilter cachingTokenFilter, public Map getWeightedSpanTerms(Query query, TokenStream tokenStream,
String fieldName) throws IOException { String fieldName) throws IOException {
if (fieldName != null) { if (fieldName != null) {
this.fieldName = fieldName.intern(); this.fieldName = fieldName.intern();
} }
Map terms = new PositionCheckingMap(); Map terms = new PositionCheckingMap();
this.cachedTokenFilter = cachingTokenFilter; this.tokenStream = tokenStream;
try { try {
extract(query, terms); extract(query, terms);
} finally { } finally {
@ -391,7 +396,7 @@ public class WeightedSpanTermExtractor {
public Map getWeightedSpanTermsWithScores(Query query, TokenStream tokenStream, String fieldName, public Map getWeightedSpanTermsWithScores(Query query, TokenStream tokenStream, String fieldName,
IndexReader reader) throws IOException { IndexReader reader) throws IOException {
this.fieldName = fieldName; this.fieldName = fieldName;
this.cachedTokenFilter = new CachingTokenFilter(tokenStream); this.tokenStream = tokenStream;
Map terms = new PositionCheckingMap(); Map terms = new PositionCheckingMap();
extract(query, terms); extract(query, terms);
@ -420,23 +425,6 @@ public class WeightedSpanTermExtractor {
return terms; return terms;
} }
/**
* @deprecated {@link ConstantScoreRangeQuery} is deprecated. Use
* getExpandMultiTermQuery instead.
*/
public boolean isHighlightCnstScrRngQuery() {
return highlightCnstScrRngQuery;
}
/**
* @param highlightCnstScrRngQuery
* @deprecated {@link ConstantScoreRangeQuery} is deprecated. Use the
* setExpandMultiTermQuery option.
*/
public void setHighlightCnstScrRngQuery(boolean highlightCnstScrRngQuery) {
this.highlightCnstScrRngQuery = highlightCnstScrRngQuery;
}
/** /**
* This class makes sure that if both position sensitive and insensitive * This class makes sure that if both position sensitive and insensitive
* versions of the same term are added, the position insensitive one wins. * versions of the same term are added, the position insensitive one wins.
@ -495,4 +483,12 @@ public class WeightedSpanTermExtractor {
public void setExpandMultiTermQuery(boolean expandMultiTermQuery) { public void setExpandMultiTermQuery(boolean expandMultiTermQuery) {
this.expandMultiTermQuery = expandMultiTermQuery; this.expandMultiTermQuery = expandMultiTermQuery;
} }
public boolean isCachedTokenStream() {
return cachedTokenStream;
}
public TokenStream getTokenStream() {
return tokenStream;
}
} }

View File

@ -5,7 +5,7 @@ The highlight package contains classes to provide "keyword in context" features
typically used to highlight search terms in the text of results pages. typically used to highlight search terms in the text of results pages.
The Highlighter class is the central component and can be used to extract the The Highlighter class is the central component and can be used to extract the
most interesting sections of a piece of text and highlight them, with the help of most interesting sections of a piece of text and highlight them, with the help of
Fragmenter, FragmentScorer, Formatter classes. Fragmenter, fragment Scorer, and Formatter classes.
<h2>Example Usage</h2> <h2>Example Usage</h2>
@ -14,14 +14,16 @@ Fragmenter, FragmentScorer, Formatter classes.
IndexSearcher searcher = new IndexSearcher(directory); IndexSearcher searcher = new IndexSearcher(directory);
QueryParser parser = new QueryParser("notv", analyzer); QueryParser parser = new QueryParser("notv", analyzer);
Query query = parser.parse("million"); Query query = parser.parse("million");
//query = query.rewrite(reader); //required to expand search terms
Hits hits = searcher.search(query); TopDocs hits = searcher.search(query, 10);
SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
for (int i = 0; i < 10; i++) { for (int i = 0; i < 10; i++) {
String text = hits.doc(i).get("notv"); int id = hits.scoreDocs[i].doc;
TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), hits.id(i), "notv", analyzer); Document doc = searcher.doc(id);
String text = doc.get("notv");
TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "notv", analyzer);
TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "..."); TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "...");
for (int j = 0; j < frag.length; j++) { for (int j = 0; j < frag.length; j++) {
if ((frag[j] != null) && (frag[j].getScore() > 0)) { if ((frag[j] != null) && (frag[j].getScore() > 0)) {
@ -29,8 +31,8 @@ Fragmenter, FragmentScorer, Formatter classes.
} }
} }
//Term vector //Term vector
text = hits.doc(i).get("tv"); text = doc.get("tv");
tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), hits.id(i), "tv", analyzer); tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), hits.scoreDocs[i].doc, "tv", analyzer);
frag = highlighter.getBestTextFragments(tokenStream, text, false, 10); frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);
for (int j = 0; j < frag.length; j++) { for (int j = 0; j < frag.length; j++) {
if ((frag[j] != null) && (frag[j].getScore() > 0)) { if ((frag[j] != null) && (frag[j].getScore() > 0)) {

View File

@ -1,61 +0,0 @@
<html>
<body>
<p>
The spanscorer classes provide the Highlighter with the ability
to only highlight the Tokens that contributed to a query match.
The SpanScorer class is the central component and it will attempt to score Terms
based on whether they actually participated in scoring the Query.
</p>
<p>
The implementation is very similar to QueryScorer in that WeightedSpanTerms are extracted
from the given Query and then placed in a Map. During Token scoring, Terms found in
the Map return a score equal to their weight. The added wrinkle is that when terms are
extracted, the sub-queries that make up the Query are converted to SpanQuery's and
SpanQuery.getSpans() is applied to a MemoryIndex containing the TokenStream of the text to
be highlighted if the sub-query is position sensitive. The start and end positions of the
matching Spans are recorded with the respective WeightedSpanTerms and these positions are
then used to filter possible Token matches during scoring.
</p>
<p>
Unlike the QueryScorer, you do not want to rewrite the query first with the SpanScorer for
multi term query handling ie wildcard, fuzzy, range.
The SpanScorer constructors provide an option to enable the highlighting of multi-term queries.
If this option is enabled, the SpanScorer will rewrite the query against a single doc index
containing the doc to be highlighted, rather than against the full index. If you do rewrite the
query first, certain multi-term queries may not highlight correctly.
</p>
<h2>Example Usage</h2>
<pre>
IndexSearcher searcher = new IndexSearcher(ramDir);
Query query = QueryParser.parse("Kenne*", FIELD_NAME, analyzer);
Hits hits = searcher.search(query);
for (int i = 0; i &lt; hits.length(); i++)
{
String text = hits.doc(i).get(FIELD_NAME);
CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(
FIELD_NAME, new StringReader(text)));
Highlighter highlighter = new Highlighter(new SpanScorer(query, FIELD_NAME, tokenStream, true));
tokenStream.reset();
// Get 3 best fragments and seperate with a "..."
String result = highlighter.getBestFragments(tokenStream, text, 3, "...");
System.out.println(result);
}
</pre>
<p>
If you make a call to any of the getBestFragments() methods more than once, you must call reset() on the SpanScorer
between each call.
</p>
<p>The SpanScorer class has a constructor which can use an IndexReader to derive the IDF (inverse document frequency)
for each term in order to influence the score. This is useful for helping to extracting the most significant sections
of a document and in supplying scores used by the GradientFormatter to color significant words more strongly.
The SpanScorer.getMaxWeight method is useful when passed to the GradientFormatter constructor to define the top score
which is associated with the top color.</p>
</body>
</html>

View File

@ -119,7 +119,8 @@ public class HighlighterTest extends TestCase implements Formatter {
searcher = new IndexSearcher(ramDir, false); searcher = new IndexSearcher(ramDir, false);
TopDocs hits = searcher.search(query, 10); TopDocs hits = searcher.search(query, 10);
Highlighter highlighter = new Highlighter(null); QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
Highlighter highlighter = new Highlighter(scorer);
for (int i = 0; i < hits.scoreDocs.length; i++) { for (int i = 0; i < hits.scoreDocs.length; i++) {
@ -128,14 +129,12 @@ public class HighlighterTest extends TestCase implements Formatter {
TokenStream stream = TokenSources.getAnyTokenStream(searcher TokenStream stream = TokenSources.getAnyTokenStream(searcher
.getIndexReader(), hits.scoreDocs[i].doc, FIELD_NAME, doc, analyzer); .getIndexReader(), hits.scoreDocs[i].doc, FIELD_NAME, doc, analyzer);
CachingTokenFilter ctf = new CachingTokenFilter(stream);
SpanScorer scorer = new SpanScorer(query, FIELD_NAME, ctf);
// ctf.reset();
Fragmenter fragmenter = new SimpleSpanFragmenter(scorer); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
highlighter.setFragmentScorer(scorer);
highlighter.setTextFragmenter(fragmenter); highlighter.setTextFragmenter(fragmenter);
String fragment = highlighter.getBestFragment(ctf, storedField); String fragment = highlighter.getBestFragment(stream, storedField);
System.out.println(fragment); System.out.println(fragment);
} }
@ -181,10 +180,10 @@ public class HighlighterTest extends TestCase implements Formatter {
fieldName, new StringReader(text))); fieldName, new StringReader(text)));
// Assuming "<B>", "</B>" used to highlight // Assuming "<B>", "</B>" used to highlight
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
Highlighter highlighter = new Highlighter(formatter, new SpanScorer(query, fieldName, QueryScorer scorer = new QueryScorer(query, fieldName, FIELD_NAME);
tokenStream, FIELD_NAME)); Highlighter highlighter = new Highlighter(formatter, scorer);
highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE)); highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));
tokenStream.reset();
String rv = highlighter.getBestFragments(tokenStream, text, 1, "(FIELD TEXT TRUNCATED)"); String rv = highlighter.getBestFragments(tokenStream, text, 1, "(FIELD TEXT TRUNCATED)");
return rv.length() == 0 ? text : rv; return rv.length() == 0 ? text : rv;
} }
@ -194,13 +193,14 @@ public class HighlighterTest extends TestCase implements Formatter {
int maxNumFragmentsRequired = 2; int maxNumFragmentsRequired = 2;
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
Highlighter highlighter = new Highlighter(scorer);
for (int i = 0; i < hits.length(); i++) { for (int i = 0; i < hits.length(); i++) {
String text = hits.doc(i).get(FIELD_NAME); String text = hits.doc(i).get(FIELD_NAME);
CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME, TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME,
new StringReader(text))); new StringReader(text));
Highlighter highlighter = new Highlighter(new SpanScorer(query, FIELD_NAME, tokenStream));
highlighter.setTextFragmenter(new SimpleFragmenter(40)); highlighter.setTextFragmenter(new SimpleFragmenter(40));
tokenStream.reset();
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
"..."); "...");
@ -225,9 +225,10 @@ public class HighlighterTest extends TestCase implements Formatter {
Analyzer analyzer = new WhitespaceAnalyzer(); Analyzer analyzer = new WhitespaceAnalyzer();
QueryParser qp = new QueryParser(f1, analyzer); QueryParser qp = new QueryParser(f1, analyzer);
Query query = qp.parse(q); Query query = qp.parse(q);
CachingTokenFilter stream = new CachingTokenFilter(analyzer.tokenStream(f1,
new StringReader(content))); QueryScorer scorer = new QueryScorer(query, f1);
Scorer scorer = new SpanScorer(query, f1, stream, false); scorer.setExpandMultiTermQuery(false);
Highlighter h = new Highlighter(this, scorer); Highlighter h = new Highlighter(this, scorer);
h.getBestFragment(analyzer, f1, content); h.getBestFragment(analyzer, f1, content);
@ -241,14 +242,14 @@ public class HighlighterTest extends TestCase implements Formatter {
int maxNumFragmentsRequired = 2; int maxNumFragmentsRequired = 2;
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
Highlighter highlighter = new Highlighter(this, scorer);
for (int i = 0; i < hits.length(); i++) { for (int i = 0; i < hits.length(); i++) {
String text = hits.doc(i).get(FIELD_NAME); String text = hits.doc(i).get(FIELD_NAME);
CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME, TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
new StringReader(text)));
Highlighter highlighter = new Highlighter(this,
new SpanScorer(query, FIELD_NAME, tokenStream));
highlighter.setTextFragmenter(new SimpleFragmenter(40)); highlighter.setTextFragmenter(new SimpleFragmenter(40));
tokenStream.reset();
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
"..."); "...");
@ -264,14 +265,13 @@ public class HighlighterTest extends TestCase implements Formatter {
int maxNumFragmentsRequired = 2; int maxNumFragmentsRequired = 2;
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
Highlighter highlighter = new Highlighter(this,scorer);
highlighter.setTextFragmenter(new SimpleFragmenter(40));
for (int i = 0; i < hits.length(); i++) { for (int i = 0; i < hits.length(); i++) {
String text = hits.doc(i).get(FIELD_NAME); String text = hits.doc(i).get(FIELD_NAME);
CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME, TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
new StringReader(text)));
Highlighter highlighter = new Highlighter(this,
new SpanScorer(query, FIELD_NAME, tokenStream));
highlighter.setTextFragmenter(new SimpleFragmenter(40));
tokenStream.reset();
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
"..."); "...");
@ -291,10 +291,10 @@ public class HighlighterTest extends TestCase implements Formatter {
String text = hits.doc(i).get(FIELD_NAME); String text = hits.doc(i).get(FIELD_NAME);
CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME, CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME,
new StringReader(text))); new StringReader(text)));
Highlighter highlighter = new Highlighter(this, QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
new SpanScorer(query, FIELD_NAME, tokenStream)); Highlighter highlighter = new Highlighter(this, scorer);
highlighter.setTextFragmenter(new SimpleFragmenter(40)); highlighter.setTextFragmenter(new SimpleFragmenter(40));
tokenStream.reset();
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
"..."); "...");
@ -310,14 +310,15 @@ public class HighlighterTest extends TestCase implements Formatter {
int maxNumFragmentsRequired = 2; int maxNumFragmentsRequired = 2;
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
Highlighter highlighter = new Highlighter(this, scorer);
for (int i = 0; i < hits.length(); i++) { for (int i = 0; i < hits.length(); i++) {
String text = hits.doc(i).get(FIELD_NAME); String text = hits.doc(i).get(FIELD_NAME);
CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer
.tokenStream(FIELD_NAME, new StringReader(text))); .tokenStream(FIELD_NAME, new StringReader(text)));
SpanScorer spanscorer = new SpanScorer(query, FIELD_NAME, tokenStream);
Highlighter highlighter = new Highlighter(this, spanscorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 5));
highlighter.setTextFragmenter(new SimpleSpanFragmenter(spanscorer, 5));
tokenStream.reset();
String result = highlighter.getBestFragments(tokenStream, text, String result = highlighter.getBestFragments(tokenStream, text,
maxNumFragmentsRequired, "..."); maxNumFragmentsRequired, "...");
@ -329,14 +330,15 @@ public class HighlighterTest extends TestCase implements Formatter {
maxNumFragmentsRequired = 2; maxNumFragmentsRequired = 2;
scorer = new QueryScorer(query, FIELD_NAME);
highlighter = new Highlighter(this, scorer);
for (int i = 0; i < hits.length(); i++) { for (int i = 0; i < hits.length(); i++) {
String text = hits.doc(i).get(FIELD_NAME); String text = hits.doc(i).get(FIELD_NAME);
CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer
.tokenStream(FIELD_NAME, new StringReader(text))); .tokenStream(FIELD_NAME, new StringReader(text)));
SpanScorer spanscorer = new SpanScorer(query, FIELD_NAME, tokenStream);
Highlighter highlighter = new Highlighter(this, spanscorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 20));
highlighter.setTextFragmenter(new SimpleSpanFragmenter(spanscorer, 20));
tokenStream.reset();
String result = highlighter.getBestFragments(tokenStream, text, String result = highlighter.getBestFragments(tokenStream, text,
maxNumFragmentsRequired, "..."); maxNumFragmentsRequired, "...");
@ -351,14 +353,15 @@ public class HighlighterTest extends TestCase implements Formatter {
int maxNumFragmentsRequired = 2; int maxNumFragmentsRequired = 2;
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
Highlighter highlighter = new Highlighter(this,scorer);
for (int i = 0; i < hits.length(); i++) { for (int i = 0; i < hits.length(); i++) {
String text = hits.doc(i).get(FIELD_NAME); String text = hits.doc(i).get(FIELD_NAME);
CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME, CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME,
new StringReader(text))); new StringReader(text)));
Highlighter highlighter = new Highlighter(this,
new SpanScorer(query, FIELD_NAME, tokenStream));
highlighter.setTextFragmenter(new SimpleFragmenter(40)); highlighter.setTextFragmenter(new SimpleFragmenter(40));
tokenStream.reset();
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
"..."); "...");
@ -421,7 +424,7 @@ public class HighlighterTest extends TestCase implements Formatter {
public void testSimpleHighlighter() throws Exception { public void testSimpleHighlighter() throws Exception {
doSearching("Kennedy"); doSearching("Kennedy");
Highlighter highlighter = new Highlighter(new QueryScorer(query)); Highlighter highlighter = new Highlighter(new QueryTermScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(40)); highlighter.setTextFragmenter(new SimpleFragmenter(40));
int maxNumFragmentsRequired = 2; int maxNumFragmentsRequired = 2;
for (int i = 0; i < hits.length(); i++) { for (int i = 0; i < hits.length(); i++) {
@ -579,19 +582,16 @@ public class HighlighterTest extends TestCase implements Formatter {
String text = hits.doc(i).get(HighlighterTest.FIELD_NAME); String text = hits.doc(i).get(HighlighterTest.FIELD_NAME);
int maxNumFragmentsRequired = 2; int maxNumFragmentsRequired = 2;
String fragmentSeparator = "..."; String fragmentSeparator = "...";
SpanScorer scorer = null; QueryScorer scorer = null;
TokenStream tokenStream = null; TokenStream tokenStream = null;
tokenStream = new CachingTokenFilter(analyzer.tokenStream(HighlighterTest.FIELD_NAME, tokenStream = new CachingTokenFilter(analyzer.tokenStream(HighlighterTest.FIELD_NAME,
new StringReader(text))); new StringReader(text)));
SpanScorer.setHighlightCnstScrRngQuery(true); scorer = new QueryScorer(query, HighlighterTest.FIELD_NAME);
scorer = new SpanScorer(query, HighlighterTest.FIELD_NAME, (CachingTokenFilter) tokenStream);
Highlighter highlighter = new Highlighter(this, scorer); Highlighter highlighter = new Highlighter(this, scorer);
((CachingTokenFilter) tokenStream).reset();
highlighter.setTextFragmenter(new SimpleFragmenter(20)); highlighter.setTextFragmenter(new SimpleFragmenter(20));
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
@ -619,18 +619,16 @@ public class HighlighterTest extends TestCase implements Formatter {
String text = hits.doc(i).get(HighlighterTest.FIELD_NAME); String text = hits.doc(i).get(HighlighterTest.FIELD_NAME);
int maxNumFragmentsRequired = 2; int maxNumFragmentsRequired = 2;
String fragmentSeparator = "..."; String fragmentSeparator = "...";
SpanScorer scorer = null; QueryScorer scorer = null;
TokenStream tokenStream = null; TokenStream tokenStream = null;
tokenStream = new CachingTokenFilter(analyzer.tokenStream(HighlighterTest.FIELD_NAME, tokenStream = new CachingTokenFilter(analyzer.tokenStream(HighlighterTest.FIELD_NAME,
new StringReader(text))); new StringReader(text)));
scorer = new SpanScorer(query, HighlighterTest.FIELD_NAME, (CachingTokenFilter) tokenStream, true); scorer = new QueryScorer(query, HighlighterTest.FIELD_NAME);
Highlighter highlighter = new Highlighter(this, scorer); Highlighter highlighter = new Highlighter(this, scorer);
((CachingTokenFilter) tokenStream).reset();
highlighter.setTextFragmenter(new SimpleFragmenter(20)); highlighter.setTextFragmenter(new SimpleFragmenter(20));
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
@ -650,18 +648,16 @@ public class HighlighterTest extends TestCase implements Formatter {
String text = hits.doc(i).get(HighlighterTest.FIELD_NAME); String text = hits.doc(i).get(HighlighterTest.FIELD_NAME);
int maxNumFragmentsRequired = 2; int maxNumFragmentsRequired = 2;
String fragmentSeparator = "..."; String fragmentSeparator = "...";
SpanScorer scorer = null; QueryScorer scorer = null;
TokenStream tokenStream = null; TokenStream tokenStream = null;
tokenStream = new CachingTokenFilter(analyzer.tokenStream(HighlighterTest.FIELD_NAME, tokenStream = new CachingTokenFilter(analyzer.tokenStream(HighlighterTest.FIELD_NAME,
new StringReader(text))); new StringReader(text)));
scorer = new SpanScorer(query, null, (CachingTokenFilter) tokenStream, true); scorer = new QueryScorer(query, null);
Highlighter highlighter = new Highlighter(this, scorer); Highlighter highlighter = new Highlighter(this, scorer);
((CachingTokenFilter) tokenStream).reset();
highlighter.setTextFragmenter(new SimpleFragmenter(20)); highlighter.setTextFragmenter(new SimpleFragmenter(20));
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
@ -681,18 +677,16 @@ public class HighlighterTest extends TestCase implements Formatter {
String text = hits.doc(i).get(HighlighterTest.FIELD_NAME); String text = hits.doc(i).get(HighlighterTest.FIELD_NAME);
int maxNumFragmentsRequired = 2; int maxNumFragmentsRequired = 2;
String fragmentSeparator = "..."; String fragmentSeparator = "...";
SpanScorer scorer = null; QueryScorer scorer = null;
TokenStream tokenStream = null; TokenStream tokenStream = null;
tokenStream = new CachingTokenFilter(analyzer.tokenStream(HighlighterTest.FIELD_NAME, tokenStream = new CachingTokenFilter(analyzer.tokenStream(HighlighterTest.FIELD_NAME,
new StringReader(text))); new StringReader(text)));
scorer = new SpanScorer(query, "random_field", (CachingTokenFilter) tokenStream, HighlighterTest.FIELD_NAME, true); scorer = new QueryScorer(query, "random_field", HighlighterTest.FIELD_NAME);
Highlighter highlighter = new Highlighter(this, scorer); Highlighter highlighter = new Highlighter(this, scorer);
((CachingTokenFilter) tokenStream).reset();
highlighter.setTextFragmenter(new SimpleFragmenter(20)); highlighter.setTextFragmenter(new SimpleFragmenter(20));
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
@ -744,7 +738,7 @@ public class HighlighterTest extends TestCase implements Formatter {
public void run() throws Exception { public void run() throws Exception {
TermQuery query = new TermQuery(new Term("data", "help")); TermQuery query = new TermQuery(new Term("data", "help"));
Highlighter hg = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query)); Highlighter hg = new Highlighter(new SimpleHTMLFormatter(), new QueryTermScorer(query));
hg.setTextFragmenter(new NullFragmenter()); hg.setTextFragmenter(new NullFragmenter());
String match = null; String match = null;
@ -900,7 +894,7 @@ public class HighlighterTest extends TestCase implements Formatter {
Highlighter highlighter = getHighlighter(wTerms, HighlighterTest.this);// new Highlighter highlighter = getHighlighter(wTerms, HighlighterTest.this);// new
// Highlighter(new // Highlighter(new
// QueryScorer(wTerms)); // QueryTermScorer(wTerms));
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0])); TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0]));
highlighter.setTextFragmenter(new SimpleFragmenter(2)); highlighter.setTextFragmenter(new SimpleFragmenter(2));
@ -965,7 +959,7 @@ public class HighlighterTest extends TestCase implements Formatter {
public void run() throws Exception { public void run() throws Exception {
numHighlights = 0; numHighlights = 0;
doSearching("Kennedy"); doSearching("Kennedy");
// new Highlighter(HighlighterTest.this, new QueryScorer(query)); // new Highlighter(HighlighterTest.this, new QueryTermScorer(query));
for (int i = 0; i < hits.length(); i++) { for (int i = 0; i < hits.length(); i++) {
String text = hits.doc(i).get(FIELD_NAME); String text = hits.doc(i).get(FIELD_NAME);
@ -995,7 +989,7 @@ public class HighlighterTest extends TestCase implements Formatter {
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream, Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
HighlighterTest.this);// new Highlighter(this, new HighlighterTest.this);// new Highlighter(this, new
// QueryScorer(query)); // QueryTermScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(20)); highlighter.setTextFragmenter(new SimpleFragmenter(20));
String stringResults[] = highlighter.getBestFragments(tokenStream, text, 10); String stringResults[] = highlighter.getBestFragments(tokenStream, text, 10);
@ -1027,7 +1021,7 @@ public class HighlighterTest extends TestCase implements Formatter {
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0])); TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0]));
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream, Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
HighlighterTest.this);// new Highlighter(this, new HighlighterTest.this);// new Highlighter(this, new
// QueryScorer(query)); // QueryTermScorer(query));
highlighter.setMaxDocBytesToAnalyze(30); highlighter.setMaxDocBytesToAnalyze(30);
highlighter.getBestFragment(tokenStream, texts[0]); highlighter.getBestFragment(tokenStream, texts[0]);
@ -1062,7 +1056,7 @@ public class HighlighterTest extends TestCase implements Formatter {
Highlighter hg = getHighlighter(query, "data", new StandardAnalyzer(stopWords).tokenStream( Highlighter hg = getHighlighter(query, "data", new StandardAnalyzer(stopWords).tokenStream(
"data", new StringReader(sb.toString())), fm);// new Highlighter(fm, "data", new StringReader(sb.toString())), fm);// new Highlighter(fm,
// new // new
// QueryScorer(query)); // QueryTermScorer(query));
hg.setTextFragmenter(new NullFragmenter()); hg.setTextFragmenter(new NullFragmenter());
hg.setMaxDocBytesToAnalyze(100); hg.setMaxDocBytesToAnalyze(100);
match = hg.getBestFragment(new StandardAnalyzer(stopWords), "data", sb.toString()); match = hg.getBestFragment(new StandardAnalyzer(stopWords), "data", sb.toString());
@ -1114,7 +1108,6 @@ public class HighlighterTest extends TestCase implements Formatter {
public void run() throws Exception { public void run() throws Exception {
numHighlights = 0; numHighlights = 0;
SpanScorer.setHighlightCnstScrRngQuery(false);
// test to show how rewritten query can still be used // test to show how rewritten query can still be used
searcher = new IndexSearcher(ramDir); searcher = new IndexSearcher(ramDir);
Analyzer analyzer = new StandardAnalyzer(); Analyzer analyzer = new StandardAnalyzer();
@ -1136,12 +1129,14 @@ public class HighlighterTest extends TestCase implements Formatter {
for (int i = 0; i < hits.length(); i++) { for (int i = 0; i < hits.length(); i++) {
String text = hits.doc(i).get(FIELD_NAME); String text = hits.doc(i).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); TokenStream tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME, new StringReader(text)));
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream, Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream, HighlighterTest.this, false);
HighlighterTest.this);
highlighter.setTextFragmenter(new SimpleFragmenter(40)); highlighter.setTextFragmenter(new SimpleFragmenter(40));
String highlightedText = highlighter.getBestFragments(tokenStream, text, String highlightedText = highlighter.getBestFragments(tokenStream, text,
maxNumFragmentsRequired, "..."); maxNumFragmentsRequired, "...");
System.out.println(highlightedText); System.out.println(highlightedText);
} }
// We expect to have zero highlights if the query is multi-terms and is // We expect to have zero highlights if the query is multi-terms and is
@ -1198,8 +1193,8 @@ public class HighlighterTest extends TestCase implements Formatter {
return 1; return 1;
} }
public void init(TokenStream tokenStream) { public TokenStream init(TokenStream tokenStream) {
return null;
} }
}); });
highlighter.setTextFragmenter(new SimpleFragmenter(2000)); highlighter.setTextFragmenter(new SimpleFragmenter(2000));
@ -1266,7 +1261,7 @@ public class HighlighterTest extends TestCase implements Formatter {
// create an instance of the highlighter with the tags used to surround // create an instance of the highlighter with the tags used to surround
// highlighted text // highlighted text
Highlighter highlighter = new Highlighter(this, new QueryScorer(query)); Highlighter highlighter = new Highlighter(this, new QueryTermScorer(query));
for (int i = 0; i < hits.length(); i++) { for (int i = 0; i < hits.length(); i++) {
String text = hits.doc(i).get(FIELD_NAME); String text = hits.doc(i).get(FIELD_NAME);
@ -1293,9 +1288,10 @@ public class HighlighterTest extends TestCase implements Formatter {
if (mode == this.SPAN) { if (mode == this.SPAN) {
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(docMainText)); TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(docMainText));
CachingTokenFilter ctf = new CachingTokenFilter(tokenStream); CachingTokenFilter ctf = new CachingTokenFilter(tokenStream);
fieldSpecificScorer = new SpanScorer(query, FIELD_NAME, ctf); fieldSpecificScorer = new QueryScorer(query, FIELD_NAME);
} else if (mode == this.STANDARD) { } else if (mode == this.STANDARD) {
fieldSpecificScorer = new QueryScorer(query, "contents"); fieldSpecificScorer = new QueryTermScorer(query, "contents");
} }
Highlighter fieldSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(), Highlighter fieldSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(),
fieldSpecificScorer); fieldSpecificScorer);
@ -1308,9 +1304,10 @@ public class HighlighterTest extends TestCase implements Formatter {
if (mode == this.SPAN) { if (mode == this.SPAN) {
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(docMainText)); TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(docMainText));
CachingTokenFilter ctf = new CachingTokenFilter(tokenStream); CachingTokenFilter ctf = new CachingTokenFilter(tokenStream);
fieldInSpecificScorer = new SpanScorer(query, null, ctf); fieldInSpecificScorer = new QueryScorer(query, null);
} else if (mode == this.STANDARD) { } else if (mode == this.STANDARD) {
fieldInSpecificScorer = new QueryScorer(query); fieldInSpecificScorer = new QueryTermScorer(query);
} }
Highlighter fieldInSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(), Highlighter fieldInSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(),
@ -1535,9 +1532,9 @@ public class HighlighterTest extends TestCase implements Formatter {
Query query = parser.parse( q ); Query query = parser.parse( q );
IndexSearcher searcher = new IndexSearcher( dir ); IndexSearcher searcher = new IndexSearcher( dir );
// This scorer can return negative idf -> null fragment // This scorer can return negative idf -> null fragment
Scorer scorer = new QueryScorer( query, searcher.getIndexReader(), "t_text1" ); Scorer scorer = new QueryTermScorer( query, searcher.getIndexReader(), "t_text1" );
// This scorer doesn't use idf (patch version) // This scorer doesn't use idf (patch version)
//Scorer scorer = new QueryScorer( query, "t_text1" ); //Scorer scorer = new QueryTermScorer( query, "t_text1" );
Highlighter h = new Highlighter( scorer ); Highlighter h = new Highlighter( scorer );
TopDocs hits = searcher.search(query, null, 10); TopDocs hits = searcher.search(query, null, 10);
@ -1606,10 +1603,10 @@ public class HighlighterTest extends TestCase implements Formatter {
String text = hits.doc(i).get(FIELD_NAME); String text = hits.doc(i).get(FIELD_NAME);
CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME, CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME,
new StringReader(text))); new StringReader(text)));
Highlighter highlighter = new Highlighter(this, QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
new SpanScorer(query, FIELD_NAME, tokenStream)); Highlighter highlighter = new Highlighter(this, scorer);
highlighter.setTextFragmenter(new SimpleFragmenter(40)); highlighter.setTextFragmenter(new SimpleFragmenter(40));
tokenStream.reset();
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
"..."); "...");
@ -1764,33 +1761,33 @@ class SynonymTokenizer extends TokenStream {
int mode = STANDARD; int mode = STANDARD;
Fragmenter frag = new SimpleFragmenter(20); Fragmenter frag = new SimpleFragmenter(20);
public Highlighter getHighlighter(Query query, String fieldName, TokenStream stream, public Highlighter getHighlighter(Query query, String fieldName, TokenStream stream, Formatter formatter) {
Formatter formatter) { return getHighlighter(query, fieldName, stream, formatter, true);
if (mode == STANDARD) {
return new Highlighter(formatter, new QueryScorer(query));
} else if (mode == SPAN) {
CachingTokenFilter tokenStream = new CachingTokenFilter(stream);
Highlighter highlighter;
try {
highlighter = new Highlighter(formatter, new SpanScorer(query, fieldName, tokenStream));
tokenStream.reset();
} catch (IOException e) {
throw new RuntimeException(e);
} }
return highlighter; public Highlighter getHighlighter(Query query, String fieldName, TokenStream stream, Formatter formatter, boolean expanMultiTerm) {
Scorer scorer = null;
if (mode == STANDARD) {
scorer = new QueryTermScorer(query);
} else if (mode == SPAN) {
scorer = new QueryScorer(query, fieldName);
if(!expanMultiTerm) {
((QueryScorer)scorer).setExpandMultiTermQuery(false);
}
} else { } else {
throw new RuntimeException("Unknown highlight mode"); throw new RuntimeException("Unknown highlight mode");
} }
return new Highlighter(formatter, scorer);
} }
Highlighter getHighlighter(WeightedTerm[] weightedTerms, Formatter formatter) { Highlighter getHighlighter(WeightedTerm[] weightedTerms, Formatter formatter) {
if (mode == STANDARD) { if (mode == STANDARD) {
return new Highlighter(formatter, new QueryScorer(weightedTerms)); return new Highlighter(formatter, new QueryTermScorer(weightedTerms));
} else if (mode == SPAN) { } else if (mode == SPAN) {
Highlighter highlighter; Highlighter highlighter;
highlighter = new Highlighter(formatter, new SpanScorer((WeightedSpanTerm[]) weightedTerms)); highlighter = new Highlighter(formatter, new QueryScorer((WeightedSpanTerm[]) weightedTerms));
return highlighter; return highlighter;
} else { } else {
@ -1815,16 +1812,14 @@ class SynonymTokenizer extends TokenStream {
if (mode == SPAN) { if (mode == SPAN) {
tokenStream = new CachingTokenFilter(analyzer.tokenStream(HighlighterTest.FIELD_NAME, tokenStream = new CachingTokenFilter(analyzer.tokenStream(HighlighterTest.FIELD_NAME,
new StringReader(text))); new StringReader(text)));
scorer = new SpanScorer(query, HighlighterTest.FIELD_NAME, scorer = new QueryScorer(query, HighlighterTest.FIELD_NAME);
(CachingTokenFilter) tokenStream, expandMT);
} else if (mode == STANDARD) { } else if (mode == STANDARD) {
scorer = new QueryScorer(query); scorer = new QueryTermScorer(query);
tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text)); tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
} }
Highlighter highlighter = new Highlighter(formatter, scorer); Highlighter highlighter = new Highlighter(formatter, scorer);
if (mode == SPAN) {
((CachingTokenFilter) tokenStream).reset();
}
highlighter.setTextFragmenter(frag); highlighter.setTextFragmenter(frag);
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,