mirror of https://github.com/apache/lucene.git
LUCENE-1685: The position aware SpanScorer has become the default scorer for Highlighting. The SpanScorer implementation has replaced QueryScorer and the old term highlighting QueryScorer has been renamed to QueryTermScorer. Multi-term queries are also now expanded by default. If you were previously rewritting the query for multi-term query highlighting, you should no longer do that (unless you switch to using QueryTermScorer). The SpanScorer API (now QueryScorer) has also been improved to more closely match the API of the previous QueryScorer implementation.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@800796 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b155258203
commit
10b41d2dce
|
@ -120,7 +120,16 @@ New features
|
|||
"(jo* -john) smyth~". (Mark Harwood via Mark Miller)
|
||||
|
||||
14. Added web-based demo of functionality in contrib's XML Query Parser
|
||||
packaged as War file (Mark Harwood)
|
||||
packaged as War file (Mark Harwood)
|
||||
|
||||
15. LUCENE-1685: The position aware SpanScorer has become the default scorer
|
||||
for Highlighting. The SpanScorer implementation has replaced QueryScorer
|
||||
and the old term highlighting QueryScorer has been renamed to
|
||||
QueryTermScorer. Multi-term queries are also now expanded by default. If
|
||||
you were previously rewritting the query for multi-term query highlighting,
|
||||
you should no longer do that (unless you switch to using QueryTermScorer).
|
||||
The SpanScorer API (now QueryScorer) has also been improved to more closely
|
||||
match the API of the previous QueryScorer implementation. (Mark Miller)
|
||||
|
||||
|
||||
Optimizations
|
||||
|
|
|
@ -38,7 +38,7 @@ import org.apache.lucene.search.IndexSearcher;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.highlight.Highlighter;
|
||||
import org.apache.lucene.search.highlight.QueryScorer;
|
||||
import org.apache.lucene.search.highlight.QueryTermScorer;
|
||||
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
|
||||
import org.apache.lucene.search.highlight.TextFragment;
|
||||
import org.apache.lucene.search.highlight.TokenSources;
|
||||
|
@ -242,7 +242,7 @@ public abstract class ReadTask extends PerfTask {
|
|||
}
|
||||
|
||||
protected Highlighter getHighlighter(Query q){
|
||||
return new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(q));
|
||||
return new Highlighter(new SimpleHTMLFormatter(), new QueryTermScorer(q));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -223,7 +223,10 @@ public class Highlighter
|
|||
tokenStream.reset();
|
||||
|
||||
TextFragment currentFrag = new TextFragment(newText,newText.length(), docFrags.size());
|
||||
fragmentScorer.init(tokenStream);
|
||||
TokenStream newStream = fragmentScorer.init(tokenStream);
|
||||
if(newStream != null) {
|
||||
tokenStream = newStream;
|
||||
}
|
||||
fragmentScorer.startFragment(currentFrag);
|
||||
docFrags.add(currentFrag);
|
||||
|
||||
|
|
|
@ -1,161 +1,227 @@
|
|||
package org.apache.lucene.search.highlight;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
||||
/**
|
||||
* {@link Scorer} implementation which scores text fragments by the number of
|
||||
* unique query terms found. This class uses the {@link QueryTermExtractor}
|
||||
* class to process determine the query terms and their boosts to be used.
|
||||
* unique query terms found. This class converts appropriate Querys to
|
||||
* SpanQuerys and attempts to score only those terms that participated in
|
||||
* generating the 'hit' on the document.
|
||||
*/
|
||||
// TODO: provide option to boost score of fragments near beginning of document
|
||||
// based on fragment.getFragNum()
|
||||
public class QueryScorer implements Scorer {
|
||||
|
||||
TextFragment currentTextFragment = null;
|
||||
HashSet uniqueTermsInFragment;
|
||||
|
||||
float totalScore = 0;
|
||||
float maxTermWeight = 0;
|
||||
private HashMap termsToFind;
|
||||
|
||||
private float totalScore;
|
||||
private Set foundTerms;
|
||||
private Map fieldWeightedSpanTerms;
|
||||
private float maxTermWeight;
|
||||
private int position = -1;
|
||||
private String defaultField;
|
||||
private TermAttribute termAtt;
|
||||
private PositionIncrementAttribute posIncAtt;
|
||||
private boolean expandMultiTermQuery = true;
|
||||
private Query query;
|
||||
private String field;
|
||||
private IndexReader reader;
|
||||
private boolean skipInitExtractor;
|
||||
|
||||
/**
|
||||
* @param query Query to use for highlighting
|
||||
*
|
||||
* @param query a Lucene query (ideally rewritten using query.rewrite before
|
||||
* being passed to this class and the searcher)
|
||||
* @throws IOException
|
||||
*/
|
||||
public QueryScorer(Query query) {
|
||||
this(QueryTermExtractor.getTerms(query));
|
||||
init(query, null, null, true);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param query a Lucene query (ideally rewritten using query.rewrite before
|
||||
* being passed to this class and the searcher)
|
||||
* @param fieldName the Field name which is used to match Query terms
|
||||
* @param query Query to use for highlighting
|
||||
* @param field Field to highlight - pass null to ignore fields
|
||||
* @throws IOException
|
||||
*/
|
||||
public QueryScorer(Query query, String fieldName) {
|
||||
this(QueryTermExtractor.getTerms(query, false, fieldName));
|
||||
public QueryScorer(Query query, String field) {
|
||||
init(query, field, null, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param query Query to use for highlighting
|
||||
* @param field Field to highlight - pass null to ignore fields
|
||||
*
|
||||
* @param query a Lucene query (ideally rewritten using query.rewrite before
|
||||
* being passed to this class and the searcher)
|
||||
* @param reader used to compute IDF which can be used to a) score selected
|
||||
* fragments better b) use graded highlights eg set font color
|
||||
* intensity
|
||||
* @param fieldName the field on which Inverse Document Frequency (IDF)
|
||||
* calculations are based
|
||||
* @param reader
|
||||
* @throws IOException
|
||||
*/
|
||||
public QueryScorer(Query query, IndexReader reader, String fieldName) {
|
||||
this(QueryTermExtractor.getIdfWeightedTerms(query, reader, fieldName));
|
||||
public QueryScorer(Query query, IndexReader reader, String field) {
|
||||
init(query, field, reader, true);
|
||||
}
|
||||
|
||||
public QueryScorer(WeightedTerm[] weightedTerms) {
|
||||
termsToFind = new HashMap();
|
||||
/**
|
||||
* As above, but with ability to pass in an <tt>IndexReader</tt>
|
||||
*/
|
||||
public QueryScorer(Query query, IndexReader reader, String field, String defaultField)
|
||||
throws IOException {
|
||||
this.defaultField = defaultField.intern();
|
||||
init(query, field, reader, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param defaultField - The default field for queries with the field name unspecified
|
||||
*/
|
||||
public QueryScorer(Query query, String field, String defaultField) {
|
||||
this.defaultField = defaultField.intern();
|
||||
init(query, field, null, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param weightedTerms
|
||||
*/
|
||||
public QueryScorer(WeightedSpanTerm[] weightedTerms) {
|
||||
this.fieldWeightedSpanTerms = new HashMap(weightedTerms.length);
|
||||
|
||||
for (int i = 0; i < weightedTerms.length; i++) {
|
||||
WeightedTerm existingTerm = (WeightedTerm) termsToFind
|
||||
.get(weightedTerms[i].term);
|
||||
if ((existingTerm == null)
|
||||
|| (existingTerm.weight < weightedTerms[i].weight)) {
|
||||
// if a term is defined more than once, always use the highest scoring
|
||||
// weight
|
||||
termsToFind.put(weightedTerms[i].term, weightedTerms[i]);
|
||||
WeightedSpanTerm existingTerm = (WeightedSpanTerm) fieldWeightedSpanTerms.get(weightedTerms[i].term);
|
||||
|
||||
if ((existingTerm == null) ||
|
||||
(existingTerm.weight < weightedTerms[i].weight)) {
|
||||
// if a term is defined more than once, always use the highest
|
||||
// scoring weight
|
||||
fieldWeightedSpanTerms.put(weightedTerms[i].term, weightedTerms[i]);
|
||||
maxTermWeight = Math.max(maxTermWeight, weightedTerms[i].getWeight());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream)
|
||||
*/
|
||||
public void init(TokenStream tokenStream) {
|
||||
termAtt = (TermAttribute) tokenStream.getAttribute(TermAttribute.class);
|
||||
skipInitExtractor = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see
|
||||
* org.apache.lucene.search.highlight.FragmentScorer#startFragment(org.apache
|
||||
* .lucene.search.highlight.TextFragment)
|
||||
*/
|
||||
public void startFragment(TextFragment newFragment) {
|
||||
uniqueTermsInFragment = new HashSet();
|
||||
currentTextFragment = newFragment;
|
||||
totalScore = 0;
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.lucene.search.highlight.Scorer#getTokenScore()
|
||||
*/
|
||||
public float getTokenScore() {
|
||||
String termText = termAtt.term();
|
||||
|
||||
WeightedTerm queryTerm = (WeightedTerm) termsToFind.get(termText);
|
||||
if (queryTerm == null) {
|
||||
// not a query term - return
|
||||
return 0;
|
||||
}
|
||||
// found a query term - is it unique in this doc?
|
||||
if (!uniqueTermsInFragment.contains(termText)) {
|
||||
totalScore += queryTerm.getWeight();
|
||||
uniqueTermsInFragment.add(termText);
|
||||
}
|
||||
return queryTerm.getWeight();
|
||||
}
|
||||
|
||||
|
||||
/* (non-Javadoc)
|
||||
*
|
||||
* @see org.apache.lucene.search.highlight.Scorer#getFragmentScore()
|
||||
*/
|
||||
public float getFragmentScore() {
|
||||
return totalScore;
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see
|
||||
* org.apache.lucene.search.highlight.FragmentScorer#allFragmentsProcessed()
|
||||
*/
|
||||
public void allFragmentsProcessed() {
|
||||
// this class has no special operations to perform at end of processing
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return The highest weighted term (useful for passing to GradientFormatter
|
||||
* to set top end of coloring scale.
|
||||
*
|
||||
* @return The highest weighted term (useful for passing to
|
||||
* GradientFormatter to set top end of coloring scale.
|
||||
*/
|
||||
public float getMaxTermWeight() {
|
||||
return maxTermWeight;
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see org.apache.lucene.search.highlight.Scorer#getTokenScore(org.apache.lucene.analysis.Token,
|
||||
* int)
|
||||
*/
|
||||
public float getTokenScore() {
|
||||
position += posIncAtt.getPositionIncrement();
|
||||
String termText = termAtt.term();
|
||||
|
||||
WeightedSpanTerm weightedSpanTerm;
|
||||
|
||||
if ((weightedSpanTerm = (WeightedSpanTerm) fieldWeightedSpanTerms.get(
|
||||
termText)) == null) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (weightedSpanTerm.positionSensitive &&
|
||||
!weightedSpanTerm.checkPosition(position)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
float score = weightedSpanTerm.getWeight();
|
||||
|
||||
// found a query term - is it unique in this doc?
|
||||
if (!foundTerms.contains(termText)) {
|
||||
totalScore += score;
|
||||
foundTerms.add(termText);
|
||||
}
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
public TokenStream init(TokenStream tokenStream) throws IOException {
|
||||
position = -1;
|
||||
termAtt = (TermAttribute) tokenStream.getAttribute(TermAttribute.class);
|
||||
posIncAtt = (PositionIncrementAttribute) tokenStream.getAttribute(PositionIncrementAttribute.class);
|
||||
if(!skipInitExtractor) {
|
||||
if(fieldWeightedSpanTerms != null) {
|
||||
fieldWeightedSpanTerms.clear();
|
||||
}
|
||||
return initExtractor(tokenStream);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the WeightedSpanTerm for the specified token. Useful for passing
|
||||
* Span information to a Fragmenter.
|
||||
*
|
||||
* @param token
|
||||
* @return WeightedSpanTerm for token
|
||||
*/
|
||||
public WeightedSpanTerm getWeightedSpanTerm(String token) {
|
||||
return (WeightedSpanTerm) fieldWeightedSpanTerms.get(token);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param query
|
||||
* @param field
|
||||
* @param tokenStream
|
||||
* @param reader
|
||||
* @throws IOException
|
||||
*/
|
||||
private void init(Query query, String field, IndexReader reader, boolean expandMultiTermQuery) {
|
||||
this.reader = reader;
|
||||
this.expandMultiTermQuery = expandMultiTermQuery;
|
||||
this.query = query;
|
||||
this.field = field;
|
||||
}
|
||||
|
||||
private TokenStream initExtractor(TokenStream tokenStream) throws IOException {
|
||||
WeightedSpanTermExtractor qse = defaultField == null ? new WeightedSpanTermExtractor()
|
||||
: new WeightedSpanTermExtractor(defaultField);
|
||||
|
||||
qse.setExpandMultiTermQuery(expandMultiTermQuery);
|
||||
if (reader == null) {
|
||||
this.fieldWeightedSpanTerms = qse.getWeightedSpanTerms(query,
|
||||
tokenStream, field);
|
||||
} else {
|
||||
this.fieldWeightedSpanTerms = qse.getWeightedSpanTermsWithScores(query,
|
||||
tokenStream, field, reader);
|
||||
}
|
||||
if(qse.isCachedTokenStream()) {
|
||||
return qse.getTokenStream();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see org.apache.lucene.search.highlight.Scorer#startFragment(org.apache.lucene.search.highlight.TextFragment)
|
||||
*/
|
||||
public void startFragment(TextFragment newFragment) {
|
||||
foundTerms = new HashSet();
|
||||
totalScore = 0;
|
||||
}
|
||||
|
||||
public boolean isExpandMultiTermQuery() {
|
||||
return expandMultiTermQuery;
|
||||
}
|
||||
|
||||
public void setExpandMultiTermQuery(boolean expandMultiTermQuery) {
|
||||
this.expandMultiTermQuery = expandMultiTermQuery;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,162 @@
|
|||
package org.apache.lucene.search.highlight;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
||||
/**
|
||||
* {@link Scorer} implementation which scores text fragments by the number of
|
||||
* unique query terms found. This class uses the {@link QueryTermExtractor}
|
||||
* class to process determine the query terms and their boosts to be used.
|
||||
*/
|
||||
// TODO: provide option to boost score of fragments near beginning of document
|
||||
// based on fragment.getFragNum()
|
||||
public class QueryTermScorer implements Scorer {
|
||||
|
||||
TextFragment currentTextFragment = null;
|
||||
HashSet uniqueTermsInFragment;
|
||||
|
||||
float totalScore = 0;
|
||||
float maxTermWeight = 0;
|
||||
private HashMap termsToFind;
|
||||
|
||||
private TermAttribute termAtt;
|
||||
|
||||
/**
|
||||
*
|
||||
* @param query a Lucene query (ideally rewritten using query.rewrite before
|
||||
* being passed to this class and the searcher)
|
||||
*/
|
||||
public QueryTermScorer(Query query) {
|
||||
this(QueryTermExtractor.getTerms(query));
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param query a Lucene query (ideally rewritten using query.rewrite before
|
||||
* being passed to this class and the searcher)
|
||||
* @param fieldName the Field name which is used to match Query terms
|
||||
*/
|
||||
public QueryTermScorer(Query query, String fieldName) {
|
||||
this(QueryTermExtractor.getTerms(query, false, fieldName));
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param query a Lucene query (ideally rewritten using query.rewrite before
|
||||
* being passed to this class and the searcher)
|
||||
* @param reader used to compute IDF which can be used to a) score selected
|
||||
* fragments better b) use graded highlights eg set font color
|
||||
* intensity
|
||||
* @param fieldName the field on which Inverse Document Frequency (IDF)
|
||||
* calculations are based
|
||||
*/
|
||||
public QueryTermScorer(Query query, IndexReader reader, String fieldName) {
|
||||
this(QueryTermExtractor.getIdfWeightedTerms(query, reader, fieldName));
|
||||
}
|
||||
|
||||
public QueryTermScorer(WeightedTerm[] weightedTerms) {
|
||||
termsToFind = new HashMap();
|
||||
for (int i = 0; i < weightedTerms.length; i++) {
|
||||
WeightedTerm existingTerm = (WeightedTerm) termsToFind
|
||||
.get(weightedTerms[i].term);
|
||||
if ((existingTerm == null)
|
||||
|| (existingTerm.weight < weightedTerms[i].weight)) {
|
||||
// if a term is defined more than once, always use the highest scoring
|
||||
// weight
|
||||
termsToFind.put(weightedTerms[i].term, weightedTerms[i]);
|
||||
maxTermWeight = Math.max(maxTermWeight, weightedTerms[i].getWeight());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream)
|
||||
*/
|
||||
public TokenStream init(TokenStream tokenStream) {
|
||||
termAtt = (TermAttribute) tokenStream.getAttribute(TermAttribute.class);
|
||||
return null;
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see
|
||||
* org.apache.lucene.search.highlight.FragmentScorer#startFragment(org.apache
|
||||
* .lucene.search.highlight.TextFragment)
|
||||
*/
|
||||
public void startFragment(TextFragment newFragment) {
|
||||
uniqueTermsInFragment = new HashSet();
|
||||
currentTextFragment = newFragment;
|
||||
totalScore = 0;
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.lucene.search.highlight.Scorer#getTokenScore()
|
||||
*/
|
||||
public float getTokenScore() {
|
||||
String termText = termAtt.term();
|
||||
|
||||
WeightedTerm queryTerm = (WeightedTerm) termsToFind.get(termText);
|
||||
if (queryTerm == null) {
|
||||
// not a query term - return
|
||||
return 0;
|
||||
}
|
||||
// found a query term - is it unique in this doc?
|
||||
if (!uniqueTermsInFragment.contains(termText)) {
|
||||
totalScore += queryTerm.getWeight();
|
||||
uniqueTermsInFragment.add(termText);
|
||||
}
|
||||
return queryTerm.getWeight();
|
||||
}
|
||||
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.lucene.search.highlight.Scorer#getFragmentScore()
|
||||
*/
|
||||
public float getFragmentScore() {
|
||||
return totalScore;
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see
|
||||
* org.apache.lucene.search.highlight.FragmentScorer#allFragmentsProcessed()
|
||||
*/
|
||||
public void allFragmentsProcessed() {
|
||||
// this class has no special operations to perform at end of processing
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return The highest weighted term (useful for passing to GradientFormatter
|
||||
* to set top end of coloring scale.
|
||||
*/
|
||||
public float getMaxTermWeight() {
|
||||
return maxTermWeight;
|
||||
}
|
||||
}
|
|
@ -17,6 +17,8 @@ package org.apache.lucene.search.highlight;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
/**
|
||||
|
@ -30,8 +32,9 @@ public interface Scorer {
|
|||
* getTokenScore().
|
||||
*
|
||||
* @param tokenStream
|
||||
* @throws IOException
|
||||
*/
|
||||
public void init(TokenStream tokenStream);
|
||||
public TokenStream init(TokenStream tokenStream) throws IOException;
|
||||
|
||||
/**
|
||||
* called when a new fragment is started for consideration
|
||||
|
|
|
@ -34,7 +34,7 @@ public class SimpleSpanFragmenter implements Fragmenter {
|
|||
private int fragmentSize;
|
||||
private int currentNumFrags;
|
||||
private int position = -1;
|
||||
private SpanScorer spanScorer;
|
||||
private QueryScorer queryScorer;
|
||||
private int waitForPos = -1;
|
||||
private int textSize;
|
||||
private TermAttribute termAtt;
|
||||
|
@ -42,19 +42,19 @@ public class SimpleSpanFragmenter implements Fragmenter {
|
|||
private OffsetAttribute offsetAtt;
|
||||
|
||||
/**
|
||||
* @param spanscorer SpanScorer that was used to score hits
|
||||
* @param queryScorer QueryScorer that was used to score hits
|
||||
*/
|
||||
public SimpleSpanFragmenter(SpanScorer spanscorer) {
|
||||
this(spanscorer, DEFAULT_FRAGMENT_SIZE);
|
||||
public SimpleSpanFragmenter(QueryScorer queryScorer) {
|
||||
this(queryScorer, DEFAULT_FRAGMENT_SIZE);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param spanscorer SpanScorer that was used to score hits
|
||||
* @param queryScorer QueryScorer that was used to score hits
|
||||
* @param fragmentSize size in bytes of each fragment
|
||||
*/
|
||||
public SimpleSpanFragmenter(SpanScorer spanscorer, int fragmentSize) {
|
||||
public SimpleSpanFragmenter(QueryScorer queryScorer, int fragmentSize) {
|
||||
this.fragmentSize = fragmentSize;
|
||||
this.spanScorer = spanscorer;
|
||||
this.queryScorer = queryScorer;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
|
@ -69,7 +69,7 @@ public class SimpleSpanFragmenter implements Fragmenter {
|
|||
return false;
|
||||
}
|
||||
|
||||
WeightedSpanTerm wSpanTerm = spanScorer.getWeightedSpanTerm(termAtt.term());
|
||||
WeightedSpanTerm wSpanTerm = queryScorer.getWeightedSpanTerm(termAtt.term());
|
||||
|
||||
if (wSpanTerm != null) {
|
||||
List positionSpans = wSpanTerm.getPositionSpans();
|
||||
|
|
|
@ -1,288 +0,0 @@
|
|||
package org.apache.lucene.search.highlight;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.CachingTokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
||||
|
||||
/**
|
||||
* {@link Scorer} implementation which scores text fragments by the number of
|
||||
* unique query terms found. This class converts appropriate Querys to
|
||||
* SpanQuerys and attempts to score only those terms that participated in
|
||||
* generating the 'hit' on the document.
|
||||
*/
|
||||
public class SpanScorer implements Scorer {
|
||||
private float totalScore;
|
||||
private Set foundTerms;
|
||||
private Map fieldWeightedSpanTerms;
|
||||
private float maxTermWeight;
|
||||
private int position = -1;
|
||||
private String defaultField;
|
||||
private TermAttribute termAtt;
|
||||
private PositionIncrementAttribute posIncAtt;
|
||||
private static boolean highlightCnstScrRngQuery;
|
||||
|
||||
/**
|
||||
* @param query
|
||||
* Query to use for highlighting
|
||||
* @param field
|
||||
* Field to highlight - pass null to ignore fields
|
||||
* @param tokenStream
|
||||
* of source text to be highlighted
|
||||
* @throws IOException
|
||||
*/
|
||||
public SpanScorer(Query query, String field,
|
||||
CachingTokenFilter cachingTokenFilter) throws IOException {
|
||||
init(query, field, cachingTokenFilter, null, false);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @param query
|
||||
* Query to use for highlighting
|
||||
* @param field
|
||||
* Field to highlight - pass null to ignore fields
|
||||
* @param tokenStream
|
||||
* of source text to be highlighted
|
||||
* @param expandMultiTermQuery
|
||||
* rewrite multi-term queries against a single doc memory index to
|
||||
* create boolean queries
|
||||
* @throws IOException
|
||||
*/
|
||||
public SpanScorer(Query query, String field,
|
||||
CachingTokenFilter cachingTokenFilter, boolean expandMultiTermQuery) throws IOException {
|
||||
init(query, field, cachingTokenFilter, null, expandMultiTermQuery);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param query
|
||||
* Query to use for highlighting
|
||||
* @param field
|
||||
* Field to highlight - pass null to ignore fields
|
||||
* @param tokenStream
|
||||
* of source text to be highlighted
|
||||
* @param reader
|
||||
* @throws IOException
|
||||
*/
|
||||
public SpanScorer(Query query, String field,
|
||||
CachingTokenFilter cachingTokenFilter, IndexReader reader)
|
||||
throws IOException {
|
||||
init(query, field, cachingTokenFilter, reader, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param query
|
||||
* Query to use for highlighting
|
||||
* @param field
|
||||
* Field to highlight - pass null to ignore fields
|
||||
* @param tokenStream
|
||||
* of source text to be highlighted
|
||||
* @param reader
|
||||
* @param expandMultiTermQuery
|
||||
* rewrite multi-term queries against a single doc memory index to
|
||||
* create boolean queries
|
||||
* @throws IOException
|
||||
*/
|
||||
public SpanScorer(Query query, String field,
|
||||
CachingTokenFilter cachingTokenFilter, IndexReader reader, boolean expandMultiTermQuery)
|
||||
throws IOException {
|
||||
init(query, field, cachingTokenFilter, reader, expandMultiTermQuery);
|
||||
}
|
||||
|
||||
/**
|
||||
* As above, but with ability to pass in an <tt>IndexReader</tt>
|
||||
*/
|
||||
public SpanScorer(Query query, String field,
|
||||
CachingTokenFilter cachingTokenFilter, IndexReader reader, String defaultField)
|
||||
throws IOException {
|
||||
this.defaultField = defaultField.intern();
|
||||
init(query, field, cachingTokenFilter, reader, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* As above, but with ability to pass in an <tt>IndexReader</tt>
|
||||
*/
|
||||
public SpanScorer(Query query, String field,
|
||||
CachingTokenFilter cachingTokenFilter, IndexReader reader, String defaultField, boolean expandMultiTermQuery)
|
||||
throws IOException {
|
||||
this.defaultField = defaultField.intern();
|
||||
init(query, field, cachingTokenFilter, reader, expandMultiTermQuery);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param defaultField - The default field for queries with the field name unspecified
|
||||
*/
|
||||
public SpanScorer(Query query, String field,
|
||||
CachingTokenFilter cachingTokenFilter, String defaultField) throws IOException {
|
||||
this.defaultField = defaultField.intern();
|
||||
init(query, field, cachingTokenFilter, null, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param defaultField - The default field for queries with the field name unspecified
|
||||
*/
|
||||
public SpanScorer(Query query, String field,
|
||||
CachingTokenFilter cachingTokenFilter, String defaultField, boolean expandMultiTermQuery) throws IOException {
|
||||
this.defaultField = defaultField.intern();
|
||||
init(query, field, cachingTokenFilter, null, expandMultiTermQuery);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param weightedTerms
|
||||
*/
|
||||
public SpanScorer(WeightedSpanTerm[] weightedTerms) {
|
||||
this.fieldWeightedSpanTerms = new HashMap(weightedTerms.length);
|
||||
|
||||
for (int i = 0; i < weightedTerms.length; i++) {
|
||||
WeightedSpanTerm existingTerm = (WeightedSpanTerm) fieldWeightedSpanTerms.get(weightedTerms[i].term);
|
||||
|
||||
if ((existingTerm == null) ||
|
||||
(existingTerm.weight < weightedTerms[i].weight)) {
|
||||
// if a term is defined more than once, always use the highest
|
||||
// scoring weight
|
||||
fieldWeightedSpanTerms.put(weightedTerms[i].term, weightedTerms[i]);
|
||||
maxTermWeight = Math.max(maxTermWeight, weightedTerms[i].getWeight());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see org.apache.lucene.search.highlight.Scorer#getFragmentScore()
|
||||
*/
|
||||
public float getFragmentScore() {
|
||||
return totalScore;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return The highest weighted term (useful for passing to
|
||||
* GradientFormatter to set top end of coloring scale.
|
||||
*/
|
||||
public float getMaxTermWeight() {
|
||||
return maxTermWeight;
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see org.apache.lucene.search.highlight.Scorer#getTokenScore(org.apache.lucene.analysis.Token,
|
||||
* int)
|
||||
*/
|
||||
public float getTokenScore() {
|
||||
position += posIncAtt.getPositionIncrement();
|
||||
String termText = termAtt.term();
|
||||
|
||||
WeightedSpanTerm weightedSpanTerm;
|
||||
|
||||
if ((weightedSpanTerm = (WeightedSpanTerm) fieldWeightedSpanTerms.get(
|
||||
termText)) == null) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (weightedSpanTerm.positionSensitive &&
|
||||
!weightedSpanTerm.checkPosition(position)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
float score = weightedSpanTerm.getWeight();
|
||||
|
||||
// found a query term - is it unique in this doc?
|
||||
if (!foundTerms.contains(termText)) {
|
||||
totalScore += score;
|
||||
foundTerms.add(termText);
|
||||
}
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
public void init(TokenStream tokenStream) {
|
||||
termAtt = (TermAttribute) tokenStream.getAttribute(TermAttribute.class);
|
||||
posIncAtt = (PositionIncrementAttribute) tokenStream.getAttribute(PositionIncrementAttribute.class);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the WeightedSpanTerm for the specified token. Useful for passing
|
||||
* Span information to a Fragmenter.
|
||||
*
|
||||
* @param token
|
||||
* @return WeightedSpanTerm for token
|
||||
*/
|
||||
public WeightedSpanTerm getWeightedSpanTerm(String token) {
|
||||
return (WeightedSpanTerm) fieldWeightedSpanTerms.get(token);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param query
|
||||
* @param field
|
||||
* @param tokenStream
|
||||
* @param reader
|
||||
* @throws IOException
|
||||
*/
|
||||
private void init(Query query, String field,
|
||||
CachingTokenFilter cachingTokenFilter, IndexReader reader, boolean expandMultiTermQuery)
|
||||
throws IOException {
|
||||
WeightedSpanTermExtractor qse = defaultField == null ? new WeightedSpanTermExtractor()
|
||||
: new WeightedSpanTermExtractor(defaultField);
|
||||
|
||||
qse.setHighlightCnstScrRngQuery(highlightCnstScrRngQuery);
|
||||
qse.setExpandMultiTermQuery(expandMultiTermQuery);
|
||||
if (reader == null) {
|
||||
this.fieldWeightedSpanTerms = qse.getWeightedSpanTerms(query,
|
||||
cachingTokenFilter, field);
|
||||
} else {
|
||||
this.fieldWeightedSpanTerms = qse.getWeightedSpanTermsWithScores(query,
|
||||
cachingTokenFilter, field, reader);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return whether ConstantScoreRangeQuerys are set to be highlighted
|
||||
* @deprecated {@link ConstantScoreRangeQuery} is deprecated. Use the
|
||||
* constructor option to expand MultiTerm queries.
|
||||
*/
|
||||
public static boolean isHighlightCnstScrRngQuery() {
|
||||
return highlightCnstScrRngQuery;
|
||||
}
|
||||
|
||||
/**
|
||||
* If you call Highlighter#getBestFragment() more than once you must reset
|
||||
* the SpanScorer between each call.
|
||||
*/
|
||||
public void reset() {
|
||||
position = -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Turns highlighting of ConstantScoreRangeQuery on/off.
|
||||
* ConstantScoreRangeQuerys cannot be highlighted if you rewrite the query
|
||||
* first. Must be called before SpanScorer construction.
|
||||
*
|
||||
* @param highlightCnstScrRngQuery
|
||||
* @deprecated {@link ConstantScoreRangeQuery} is deprecated. Use the
|
||||
* constructor option to expand MultiTerm queries.
|
||||
*/
|
||||
public static void setHighlightCnstScrRngQuery(boolean highlight) {
|
||||
highlightCnstScrRngQuery = highlight;
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see org.apache.lucene.search.highlight.Scorer#startFragment(org.apache.lucene.search.highlight.TextFragment)
|
||||
*/
|
||||
public void startFragment(TextFragment newFragment) {
|
||||
foundTerms = new HashSet();
|
||||
totalScore = 0;
|
||||
}
|
||||
}
|
|
@ -57,11 +57,11 @@ import org.apache.lucene.search.spans.Spans;
|
|||
public class WeightedSpanTermExtractor {
|
||||
|
||||
private String fieldName;
|
||||
private CachingTokenFilter cachedTokenFilter;
|
||||
private TokenStream tokenStream;
|
||||
private Map readers = new HashMap(10); // Map<String, IndexReader>
|
||||
private String defaultField;
|
||||
private boolean highlightCnstScrRngQuery;
|
||||
private boolean expandMultiTermQuery;
|
||||
private boolean cachedTokenStream;
|
||||
|
||||
public WeightedSpanTermExtractor() {
|
||||
}
|
||||
|
@ -131,7 +131,7 @@ public class WeightedSpanTermExtractor {
|
|||
for (Iterator iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
|
||||
extract((Query) iterator.next(), terms);
|
||||
}
|
||||
} else if (query instanceof MultiTermQuery && (highlightCnstScrRngQuery || expandMultiTermQuery)) {
|
||||
} else if (query instanceof MultiTermQuery && expandMultiTermQuery) {
|
||||
MultiTermQuery mtq = ((MultiTermQuery)query);
|
||||
if(mtq.getRewriteMethod() != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) {
|
||||
mtq = copyMultiTermQuery(mtq);
|
||||
|
@ -240,8 +240,7 @@ public class WeightedSpanTermExtractor {
|
|||
while (spans.next()) {
|
||||
spanPositions.add(new PositionSpan(spans.start(), spans.end() - 1));
|
||||
}
|
||||
|
||||
cachedTokenFilter.reset();
|
||||
|
||||
}
|
||||
|
||||
if (spanPositions.size() == 0) {
|
||||
|
@ -301,15 +300,21 @@ public class WeightedSpanTermExtractor {
|
|||
return rv;
|
||||
}
|
||||
|
||||
private IndexReader getReaderForField(String field) {
|
||||
private IndexReader getReaderForField(String field) throws IOException {
|
||||
if(!cachedTokenStream && !(tokenStream instanceof CachingTokenFilter)) {
|
||||
tokenStream = new CachingTokenFilter(tokenStream);
|
||||
cachedTokenStream = true;
|
||||
}
|
||||
IndexReader reader = (IndexReader) readers.get(field);
|
||||
if (reader == null) {
|
||||
MemoryIndex indexer = new MemoryIndex();
|
||||
indexer.addField(field, cachedTokenFilter);
|
||||
indexer.addField(field, tokenStream);
|
||||
tokenStream.reset();
|
||||
IndexSearcher searcher = indexer.createSearcher();
|
||||
reader = searcher.getIndexReader();
|
||||
readers.put(field, reader);
|
||||
}
|
||||
|
||||
return reader;
|
||||
}
|
||||
|
||||
|
@ -328,7 +333,7 @@ public class WeightedSpanTermExtractor {
|
|||
public Map getWeightedSpanTerms(Query query, CachingTokenFilter cachingTokenFilter)
|
||||
throws IOException {
|
||||
this.fieldName = null;
|
||||
this.cachedTokenFilter = cachingTokenFilter;
|
||||
this.tokenStream = cachingTokenFilter;
|
||||
|
||||
Map terms = new PositionCheckingMap();
|
||||
try {
|
||||
|
@ -354,14 +359,14 @@ public class WeightedSpanTermExtractor {
|
|||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
public Map getWeightedSpanTerms(Query query, CachingTokenFilter cachingTokenFilter,
|
||||
public Map getWeightedSpanTerms(Query query, TokenStream tokenStream,
|
||||
String fieldName) throws IOException {
|
||||
if (fieldName != null) {
|
||||
this.fieldName = fieldName.intern();
|
||||
}
|
||||
|
||||
Map terms = new PositionCheckingMap();
|
||||
this.cachedTokenFilter = cachingTokenFilter;
|
||||
this.tokenStream = tokenStream;
|
||||
try {
|
||||
extract(query, terms);
|
||||
} finally {
|
||||
|
@ -391,7 +396,7 @@ public class WeightedSpanTermExtractor {
|
|||
public Map getWeightedSpanTermsWithScores(Query query, TokenStream tokenStream, String fieldName,
|
||||
IndexReader reader) throws IOException {
|
||||
this.fieldName = fieldName;
|
||||
this.cachedTokenFilter = new CachingTokenFilter(tokenStream);
|
||||
this.tokenStream = tokenStream;
|
||||
|
||||
Map terms = new PositionCheckingMap();
|
||||
extract(query, terms);
|
||||
|
@ -419,23 +424,6 @@ public class WeightedSpanTermExtractor {
|
|||
|
||||
return terms;
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated {@link ConstantScoreRangeQuery} is deprecated. Use
|
||||
* getExpandMultiTermQuery instead.
|
||||
*/
|
||||
public boolean isHighlightCnstScrRngQuery() {
|
||||
return highlightCnstScrRngQuery;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param highlightCnstScrRngQuery
|
||||
* @deprecated {@link ConstantScoreRangeQuery} is deprecated. Use the
|
||||
* setExpandMultiTermQuery option.
|
||||
*/
|
||||
public void setHighlightCnstScrRngQuery(boolean highlightCnstScrRngQuery) {
|
||||
this.highlightCnstScrRngQuery = highlightCnstScrRngQuery;
|
||||
}
|
||||
|
||||
/**
|
||||
* This class makes sure that if both position sensitive and insensitive
|
||||
|
@ -495,4 +483,12 @@ public class WeightedSpanTermExtractor {
|
|||
public void setExpandMultiTermQuery(boolean expandMultiTermQuery) {
|
||||
this.expandMultiTermQuery = expandMultiTermQuery;
|
||||
}
|
||||
|
||||
public boolean isCachedTokenStream() {
|
||||
return cachedTokenStream;
|
||||
}
|
||||
|
||||
public TokenStream getTokenStream() {
|
||||
return tokenStream;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@ The highlight package contains classes to provide "keyword in context" features
|
|||
typically used to highlight search terms in the text of results pages.
|
||||
The Highlighter class is the central component and can be used to extract the
|
||||
most interesting sections of a piece of text and highlight them, with the help of
|
||||
Fragmenter, FragmentScorer, Formatter classes.
|
||||
Fragmenter, fragment Scorer, and Formatter classes.
|
||||
|
||||
<h2>Example Usage</h2>
|
||||
|
||||
|
@ -14,14 +14,16 @@ Fragmenter, FragmentScorer, Formatter classes.
|
|||
IndexSearcher searcher = new IndexSearcher(directory);
|
||||
QueryParser parser = new QueryParser("notv", analyzer);
|
||||
Query query = parser.parse("million");
|
||||
//query = query.rewrite(reader); //required to expand search terms
|
||||
Hits hits = searcher.search(query);
|
||||
|
||||
TopDocs hits = searcher.search(query, 10);
|
||||
|
||||
SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
|
||||
Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
|
||||
for (int i = 0; i < 10; i++) {
|
||||
String text = hits.doc(i).get("notv");
|
||||
TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), hits.id(i), "notv", analyzer);
|
||||
int id = hits.scoreDocs[i].doc;
|
||||
Document doc = searcher.doc(id);
|
||||
String text = doc.get("notv");
|
||||
TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "notv", analyzer);
|
||||
TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "...");
|
||||
for (int j = 0; j < frag.length; j++) {
|
||||
if ((frag[j] != null) && (frag[j].getScore() > 0)) {
|
||||
|
@ -29,8 +31,8 @@ Fragmenter, FragmentScorer, Formatter classes.
|
|||
}
|
||||
}
|
||||
//Term vector
|
||||
text = hits.doc(i).get("tv");
|
||||
tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), hits.id(i), "tv", analyzer);
|
||||
text = doc.get("tv");
|
||||
tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), hits.scoreDocs[i].doc, "tv", analyzer);
|
||||
frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);
|
||||
for (int j = 0; j < frag.length; j++) {
|
||||
if ((frag[j] != null) && (frag[j].getScore() > 0)) {
|
||||
|
|
|
@ -1,61 +0,0 @@
|
|||
<html>
|
||||
<body>
|
||||
<p>
|
||||
The spanscorer classes provide the Highlighter with the ability
|
||||
to only highlight the Tokens that contributed to a query match.
|
||||
The SpanScorer class is the central component and it will attempt to score Terms
|
||||
based on whether they actually participated in scoring the Query.
|
||||
</p>
|
||||
<p>
|
||||
The implementation is very similar to QueryScorer in that WeightedSpanTerms are extracted
|
||||
from the given Query and then placed in a Map. During Token scoring, Terms found in
|
||||
the Map return a score equal to their weight. The added wrinkle is that when terms are
|
||||
extracted, the sub-queries that make up the Query are converted to SpanQuery's and
|
||||
SpanQuery.getSpans() is applied to a MemoryIndex containing the TokenStream of the text to
|
||||
be highlighted if the sub-query is position sensitive. The start and end positions of the
|
||||
matching Spans are recorded with the respective WeightedSpanTerms and these positions are
|
||||
then used to filter possible Token matches during scoring.
|
||||
</p>
|
||||
<p>
|
||||
Unlike the QueryScorer, you do not want to rewrite the query first with the SpanScorer for
|
||||
multi term query handling ie wildcard, fuzzy, range.
|
||||
The SpanScorer constructors provide an option to enable the highlighting of multi-term queries.
|
||||
If this option is enabled, the SpanScorer will rewrite the query against a single doc index
|
||||
containing the doc to be highlighted, rather than against the full index. If you do rewrite the
|
||||
query first, certain multi-term queries may not highlight correctly.
|
||||
</p>
|
||||
<h2>Example Usage</h2>
|
||||
|
||||
<pre>
|
||||
IndexSearcher searcher = new IndexSearcher(ramDir);
|
||||
Query query = QueryParser.parse("Kenne*", FIELD_NAME, analyzer);
|
||||
Hits hits = searcher.search(query);
|
||||
|
||||
for (int i = 0; i < hits.length(); i++)
|
||||
{
|
||||
String text = hits.doc(i).get(FIELD_NAME);
|
||||
CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(
|
||||
FIELD_NAME, new StringReader(text)));
|
||||
Highlighter highlighter = new Highlighter(new SpanScorer(query, FIELD_NAME, tokenStream, true));
|
||||
tokenStream.reset();
|
||||
|
||||
// Get 3 best fragments and seperate with a "..."
|
||||
String result = highlighter.getBestFragments(tokenStream, text, 3, "...");
|
||||
System.out.println(result);
|
||||
}
|
||||
</pre>
|
||||
|
||||
<p>
|
||||
If you make a call to any of the getBestFragments() methods more than once, you must call reset() on the SpanScorer
|
||||
between each call.
|
||||
</p>
|
||||
|
||||
<p>The SpanScorer class has a constructor which can use an IndexReader to derive the IDF (inverse document frequency)
|
||||
for each term in order to influence the score. This is useful for helping to extracting the most significant sections
|
||||
of a document and in supplying scores used by the GradientFormatter to color significant words more strongly.
|
||||
The SpanScorer.getMaxWeight method is useful when passed to the GradientFormatter constructor to define the top score
|
||||
which is associated with the top color.</p>
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
|
@ -118,8 +118,9 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
query = qp.parse("\"very long\"");
|
||||
searcher = new IndexSearcher(ramDir, false);
|
||||
TopDocs hits = searcher.search(query, 10);
|
||||
|
||||
Highlighter highlighter = new Highlighter(null);
|
||||
|
||||
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
|
||||
Highlighter highlighter = new Highlighter(scorer);
|
||||
|
||||
|
||||
for (int i = 0; i < hits.scoreDocs.length; i++) {
|
||||
|
@ -128,14 +129,12 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
|
||||
TokenStream stream = TokenSources.getAnyTokenStream(searcher
|
||||
.getIndexReader(), hits.scoreDocs[i].doc, FIELD_NAME, doc, analyzer);
|
||||
CachingTokenFilter ctf = new CachingTokenFilter(stream);
|
||||
SpanScorer scorer = new SpanScorer(query, FIELD_NAME, ctf);
|
||||
// ctf.reset();
|
||||
|
||||
Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
|
||||
highlighter.setFragmentScorer(scorer);
|
||||
|
||||
highlighter.setTextFragmenter(fragmenter);
|
||||
|
||||
String fragment = highlighter.getBestFragment(ctf, storedField);
|
||||
String fragment = highlighter.getBestFragment(stream, storedField);
|
||||
|
||||
System.out.println(fragment);
|
||||
}
|
||||
|
@ -181,10 +180,10 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
fieldName, new StringReader(text)));
|
||||
// Assuming "<B>", "</B>" used to highlight
|
||||
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
|
||||
Highlighter highlighter = new Highlighter(formatter, new SpanScorer(query, fieldName,
|
||||
tokenStream, FIELD_NAME));
|
||||
QueryScorer scorer = new QueryScorer(query, fieldName, FIELD_NAME);
|
||||
Highlighter highlighter = new Highlighter(formatter, scorer);
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));
|
||||
tokenStream.reset();
|
||||
|
||||
String rv = highlighter.getBestFragments(tokenStream, text, 1, "(FIELD TEXT TRUNCATED)");
|
||||
return rv.length() == 0 ? text : rv;
|
||||
}
|
||||
|
@ -194,13 +193,14 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
|
||||
int maxNumFragmentsRequired = 2;
|
||||
|
||||
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
|
||||
Highlighter highlighter = new Highlighter(scorer);
|
||||
|
||||
for (int i = 0; i < hits.length(); i++) {
|
||||
String text = hits.doc(i).get(FIELD_NAME);
|
||||
CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME,
|
||||
new StringReader(text)));
|
||||
Highlighter highlighter = new Highlighter(new SpanScorer(query, FIELD_NAME, tokenStream));
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME,
|
||||
new StringReader(text));
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(40));
|
||||
tokenStream.reset();
|
||||
|
||||
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
|
||||
"...");
|
||||
|
@ -225,9 +225,10 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
Analyzer analyzer = new WhitespaceAnalyzer();
|
||||
QueryParser qp = new QueryParser(f1, analyzer);
|
||||
Query query = qp.parse(q);
|
||||
CachingTokenFilter stream = new CachingTokenFilter(analyzer.tokenStream(f1,
|
||||
new StringReader(content)));
|
||||
Scorer scorer = new SpanScorer(query, f1, stream, false);
|
||||
|
||||
QueryScorer scorer = new QueryScorer(query, f1);
|
||||
scorer.setExpandMultiTermQuery(false);
|
||||
|
||||
Highlighter h = new Highlighter(this, scorer);
|
||||
|
||||
h.getBestFragment(analyzer, f1, content);
|
||||
|
@ -241,14 +242,14 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
|
||||
int maxNumFragmentsRequired = 2;
|
||||
|
||||
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
|
||||
Highlighter highlighter = new Highlighter(this, scorer);
|
||||
|
||||
for (int i = 0; i < hits.length(); i++) {
|
||||
String text = hits.doc(i).get(FIELD_NAME);
|
||||
CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME,
|
||||
new StringReader(text)));
|
||||
Highlighter highlighter = new Highlighter(this,
|
||||
new SpanScorer(query, FIELD_NAME, tokenStream));
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
||||
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(40));
|
||||
tokenStream.reset();
|
||||
|
||||
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
|
||||
"...");
|
||||
|
@ -264,14 +265,13 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
|
||||
int maxNumFragmentsRequired = 2;
|
||||
|
||||
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
|
||||
Highlighter highlighter = new Highlighter(this,scorer);
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(40));
|
||||
|
||||
for (int i = 0; i < hits.length(); i++) {
|
||||
String text = hits.doc(i).get(FIELD_NAME);
|
||||
CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME,
|
||||
new StringReader(text)));
|
||||
Highlighter highlighter = new Highlighter(this,
|
||||
new SpanScorer(query, FIELD_NAME, tokenStream));
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(40));
|
||||
tokenStream.reset();
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
||||
|
||||
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
|
||||
"...");
|
||||
|
@ -291,10 +291,10 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
String text = hits.doc(i).get(FIELD_NAME);
|
||||
CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME,
|
||||
new StringReader(text)));
|
||||
Highlighter highlighter = new Highlighter(this,
|
||||
new SpanScorer(query, FIELD_NAME, tokenStream));
|
||||
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
|
||||
Highlighter highlighter = new Highlighter(this, scorer);
|
||||
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(40));
|
||||
tokenStream.reset();
|
||||
|
||||
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
|
||||
"...");
|
||||
|
@ -310,14 +310,15 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
|
||||
int maxNumFragmentsRequired = 2;
|
||||
|
||||
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
|
||||
Highlighter highlighter = new Highlighter(this, scorer);
|
||||
|
||||
for (int i = 0; i < hits.length(); i++) {
|
||||
String text = hits.doc(i).get(FIELD_NAME);
|
||||
CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer
|
||||
.tokenStream(FIELD_NAME, new StringReader(text)));
|
||||
SpanScorer spanscorer = new SpanScorer(query, FIELD_NAME, tokenStream);
|
||||
Highlighter highlighter = new Highlighter(this, spanscorer);
|
||||
highlighter.setTextFragmenter(new SimpleSpanFragmenter(spanscorer, 5));
|
||||
tokenStream.reset();
|
||||
|
||||
highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 5));
|
||||
|
||||
String result = highlighter.getBestFragments(tokenStream, text,
|
||||
maxNumFragmentsRequired, "...");
|
||||
|
@ -328,15 +329,16 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
doSearching("\"been shot\"");
|
||||
|
||||
maxNumFragmentsRequired = 2;
|
||||
|
||||
scorer = new QueryScorer(query, FIELD_NAME);
|
||||
highlighter = new Highlighter(this, scorer);
|
||||
|
||||
for (int i = 0; i < hits.length(); i++) {
|
||||
String text = hits.doc(i).get(FIELD_NAME);
|
||||
CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer
|
||||
.tokenStream(FIELD_NAME, new StringReader(text)));
|
||||
SpanScorer spanscorer = new SpanScorer(query, FIELD_NAME, tokenStream);
|
||||
Highlighter highlighter = new Highlighter(this, spanscorer);
|
||||
highlighter.setTextFragmenter(new SimpleSpanFragmenter(spanscorer, 20));
|
||||
tokenStream.reset();
|
||||
|
||||
highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 20));
|
||||
|
||||
String result = highlighter.getBestFragments(tokenStream, text,
|
||||
maxNumFragmentsRequired, "...");
|
||||
|
@ -350,15 +352,16 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
doSearching("y \"x y z\"");
|
||||
|
||||
int maxNumFragmentsRequired = 2;
|
||||
|
||||
|
||||
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
|
||||
Highlighter highlighter = new Highlighter(this,scorer);
|
||||
|
||||
for (int i = 0; i < hits.length(); i++) {
|
||||
String text = hits.doc(i).get(FIELD_NAME);
|
||||
CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME,
|
||||
new StringReader(text)));
|
||||
Highlighter highlighter = new Highlighter(this,
|
||||
new SpanScorer(query, FIELD_NAME, tokenStream));
|
||||
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(40));
|
||||
tokenStream.reset();
|
||||
|
||||
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
|
||||
"...");
|
||||
|
@ -421,7 +424,7 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
|
||||
public void testSimpleHighlighter() throws Exception {
|
||||
doSearching("Kennedy");
|
||||
Highlighter highlighter = new Highlighter(new QueryScorer(query));
|
||||
Highlighter highlighter = new Highlighter(new QueryTermScorer(query));
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(40));
|
||||
int maxNumFragmentsRequired = 2;
|
||||
for (int i = 0; i < hits.length(); i++) {
|
||||
|
@ -579,18 +582,15 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
String text = hits.doc(i).get(HighlighterTest.FIELD_NAME);
|
||||
int maxNumFragmentsRequired = 2;
|
||||
String fragmentSeparator = "...";
|
||||
SpanScorer scorer = null;
|
||||
QueryScorer scorer = null;
|
||||
TokenStream tokenStream = null;
|
||||
|
||||
tokenStream = new CachingTokenFilter(analyzer.tokenStream(HighlighterTest.FIELD_NAME,
|
||||
new StringReader(text)));
|
||||
|
||||
SpanScorer.setHighlightCnstScrRngQuery(true);
|
||||
scorer = new SpanScorer(query, HighlighterTest.FIELD_NAME, (CachingTokenFilter) tokenStream);
|
||||
|
||||
Highlighter highlighter = new Highlighter(this, scorer);
|
||||
scorer = new QueryScorer(query, HighlighterTest.FIELD_NAME);
|
||||
|
||||
((CachingTokenFilter) tokenStream).reset();
|
||||
Highlighter highlighter = new Highlighter(this, scorer);
|
||||
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(20));
|
||||
|
||||
|
@ -619,18 +619,16 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
String text = hits.doc(i).get(HighlighterTest.FIELD_NAME);
|
||||
int maxNumFragmentsRequired = 2;
|
||||
String fragmentSeparator = "...";
|
||||
SpanScorer scorer = null;
|
||||
QueryScorer scorer = null;
|
||||
TokenStream tokenStream = null;
|
||||
|
||||
tokenStream = new CachingTokenFilter(analyzer.tokenStream(HighlighterTest.FIELD_NAME,
|
||||
new StringReader(text)));
|
||||
|
||||
scorer = new SpanScorer(query, HighlighterTest.FIELD_NAME, (CachingTokenFilter) tokenStream, true);
|
||||
scorer = new QueryScorer(query, HighlighterTest.FIELD_NAME);
|
||||
|
||||
Highlighter highlighter = new Highlighter(this, scorer);
|
||||
|
||||
((CachingTokenFilter) tokenStream).reset();
|
||||
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(20));
|
||||
|
||||
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
|
||||
|
@ -650,18 +648,16 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
String text = hits.doc(i).get(HighlighterTest.FIELD_NAME);
|
||||
int maxNumFragmentsRequired = 2;
|
||||
String fragmentSeparator = "...";
|
||||
SpanScorer scorer = null;
|
||||
QueryScorer scorer = null;
|
||||
TokenStream tokenStream = null;
|
||||
|
||||
tokenStream = new CachingTokenFilter(analyzer.tokenStream(HighlighterTest.FIELD_NAME,
|
||||
new StringReader(text)));
|
||||
|
||||
scorer = new SpanScorer(query, null, (CachingTokenFilter) tokenStream, true);
|
||||
scorer = new QueryScorer(query, null);
|
||||
|
||||
Highlighter highlighter = new Highlighter(this, scorer);
|
||||
|
||||
((CachingTokenFilter) tokenStream).reset();
|
||||
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(20));
|
||||
|
||||
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
|
||||
|
@ -681,18 +677,16 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
String text = hits.doc(i).get(HighlighterTest.FIELD_NAME);
|
||||
int maxNumFragmentsRequired = 2;
|
||||
String fragmentSeparator = "...";
|
||||
SpanScorer scorer = null;
|
||||
QueryScorer scorer = null;
|
||||
TokenStream tokenStream = null;
|
||||
|
||||
tokenStream = new CachingTokenFilter(analyzer.tokenStream(HighlighterTest.FIELD_NAME,
|
||||
new StringReader(text)));
|
||||
|
||||
scorer = new SpanScorer(query, "random_field", (CachingTokenFilter) tokenStream, HighlighterTest.FIELD_NAME, true);
|
||||
scorer = new QueryScorer(query, "random_field", HighlighterTest.FIELD_NAME);
|
||||
|
||||
Highlighter highlighter = new Highlighter(this, scorer);
|
||||
|
||||
((CachingTokenFilter) tokenStream).reset();
|
||||
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(20));
|
||||
|
||||
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
|
||||
|
@ -744,7 +738,7 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
|
||||
public void run() throws Exception {
|
||||
TermQuery query = new TermQuery(new Term("data", "help"));
|
||||
Highlighter hg = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query));
|
||||
Highlighter hg = new Highlighter(new SimpleHTMLFormatter(), new QueryTermScorer(query));
|
||||
hg.setTextFragmenter(new NullFragmenter());
|
||||
|
||||
String match = null;
|
||||
|
@ -900,7 +894,7 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
|
||||
Highlighter highlighter = getHighlighter(wTerms, HighlighterTest.this);// new
|
||||
// Highlighter(new
|
||||
// QueryScorer(wTerms));
|
||||
// QueryTermScorer(wTerms));
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0]));
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(2));
|
||||
|
||||
|
@ -965,7 +959,7 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
public void run() throws Exception {
|
||||
numHighlights = 0;
|
||||
doSearching("Kennedy");
|
||||
// new Highlighter(HighlighterTest.this, new QueryScorer(query));
|
||||
// new Highlighter(HighlighterTest.this, new QueryTermScorer(query));
|
||||
|
||||
for (int i = 0; i < hits.length(); i++) {
|
||||
String text = hits.doc(i).get(FIELD_NAME);
|
||||
|
@ -995,7 +989,7 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
|
||||
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
|
||||
HighlighterTest.this);// new Highlighter(this, new
|
||||
// QueryScorer(query));
|
||||
// QueryTermScorer(query));
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(20));
|
||||
String stringResults[] = highlighter.getBestFragments(tokenStream, text, 10);
|
||||
|
||||
|
@ -1027,7 +1021,7 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0]));
|
||||
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
|
||||
HighlighterTest.this);// new Highlighter(this, new
|
||||
// QueryScorer(query));
|
||||
// QueryTermScorer(query));
|
||||
highlighter.setMaxDocBytesToAnalyze(30);
|
||||
|
||||
highlighter.getBestFragment(tokenStream, texts[0]);
|
||||
|
@ -1062,7 +1056,7 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
Highlighter hg = getHighlighter(query, "data", new StandardAnalyzer(stopWords).tokenStream(
|
||||
"data", new StringReader(sb.toString())), fm);// new Highlighter(fm,
|
||||
// new
|
||||
// QueryScorer(query));
|
||||
// QueryTermScorer(query));
|
||||
hg.setTextFragmenter(new NullFragmenter());
|
||||
hg.setMaxDocBytesToAnalyze(100);
|
||||
match = hg.getBestFragment(new StandardAnalyzer(stopWords), "data", sb.toString());
|
||||
|
@ -1114,7 +1108,6 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
|
||||
public void run() throws Exception {
|
||||
numHighlights = 0;
|
||||
SpanScorer.setHighlightCnstScrRngQuery(false);
|
||||
// test to show how rewritten query can still be used
|
||||
searcher = new IndexSearcher(ramDir);
|
||||
Analyzer analyzer = new StandardAnalyzer();
|
||||
|
@ -1136,12 +1129,14 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
|
||||
for (int i = 0; i < hits.length(); i++) {
|
||||
String text = hits.doc(i).get(FIELD_NAME);
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
||||
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
|
||||
HighlighterTest.this);
|
||||
TokenStream tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME, new StringReader(text)));
|
||||
Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream, HighlighterTest.this, false);
|
||||
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(40));
|
||||
|
||||
String highlightedText = highlighter.getBestFragments(tokenStream, text,
|
||||
maxNumFragmentsRequired, "...");
|
||||
|
||||
System.out.println(highlightedText);
|
||||
}
|
||||
// We expect to have zero highlights if the query is multi-terms and is
|
||||
|
@ -1198,8 +1193,8 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
return 1;
|
||||
}
|
||||
|
||||
public void init(TokenStream tokenStream) {
|
||||
|
||||
public TokenStream init(TokenStream tokenStream) {
|
||||
return null;
|
||||
}
|
||||
});
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(2000));
|
||||
|
@ -1266,7 +1261,7 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
|
||||
// create an instance of the highlighter with the tags used to surround
|
||||
// highlighted text
|
||||
Highlighter highlighter = new Highlighter(this, new QueryScorer(query));
|
||||
Highlighter highlighter = new Highlighter(this, new QueryTermScorer(query));
|
||||
|
||||
for (int i = 0; i < hits.length(); i++) {
|
||||
String text = hits.doc(i).get(FIELD_NAME);
|
||||
|
@ -1293,9 +1288,10 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
if (mode == this.SPAN) {
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(docMainText));
|
||||
CachingTokenFilter ctf = new CachingTokenFilter(tokenStream);
|
||||
fieldSpecificScorer = new SpanScorer(query, FIELD_NAME, ctf);
|
||||
fieldSpecificScorer = new QueryScorer(query, FIELD_NAME);
|
||||
|
||||
} else if (mode == this.STANDARD) {
|
||||
fieldSpecificScorer = new QueryScorer(query, "contents");
|
||||
fieldSpecificScorer = new QueryTermScorer(query, "contents");
|
||||
}
|
||||
Highlighter fieldSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(),
|
||||
fieldSpecificScorer);
|
||||
|
@ -1308,9 +1304,10 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
if (mode == this.SPAN) {
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(docMainText));
|
||||
CachingTokenFilter ctf = new CachingTokenFilter(tokenStream);
|
||||
fieldInSpecificScorer = new SpanScorer(query, null, ctf);
|
||||
fieldInSpecificScorer = new QueryScorer(query, null);
|
||||
|
||||
} else if (mode == this.STANDARD) {
|
||||
fieldInSpecificScorer = new QueryScorer(query);
|
||||
fieldInSpecificScorer = new QueryTermScorer(query);
|
||||
}
|
||||
|
||||
Highlighter fieldInSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(),
|
||||
|
@ -1535,9 +1532,9 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
Query query = parser.parse( q );
|
||||
IndexSearcher searcher = new IndexSearcher( dir );
|
||||
// This scorer can return negative idf -> null fragment
|
||||
Scorer scorer = new QueryScorer( query, searcher.getIndexReader(), "t_text1" );
|
||||
Scorer scorer = new QueryTermScorer( query, searcher.getIndexReader(), "t_text1" );
|
||||
// This scorer doesn't use idf (patch version)
|
||||
//Scorer scorer = new QueryScorer( query, "t_text1" );
|
||||
//Scorer scorer = new QueryTermScorer( query, "t_text1" );
|
||||
Highlighter h = new Highlighter( scorer );
|
||||
|
||||
TopDocs hits = searcher.search(query, null, 10);
|
||||
|
@ -1606,10 +1603,10 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
String text = hits.doc(i).get(FIELD_NAME);
|
||||
CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(FIELD_NAME,
|
||||
new StringReader(text)));
|
||||
Highlighter highlighter = new Highlighter(this,
|
||||
new SpanScorer(query, FIELD_NAME, tokenStream));
|
||||
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
|
||||
Highlighter highlighter = new Highlighter(this, scorer);
|
||||
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(40));
|
||||
tokenStream.reset();
|
||||
|
||||
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
|
||||
"...");
|
||||
|
@ -1763,34 +1760,34 @@ class SynonymTokenizer extends TokenStream {
|
|||
static final int SPAN = 1;
|
||||
int mode = STANDARD;
|
||||
Fragmenter frag = new SimpleFragmenter(20);
|
||||
|
||||
public Highlighter getHighlighter(Query query, String fieldName, TokenStream stream,
|
||||
Formatter formatter) {
|
||||
|
||||
public Highlighter getHighlighter(Query query, String fieldName, TokenStream stream, Formatter formatter) {
|
||||
return getHighlighter(query, fieldName, stream, formatter, true);
|
||||
}
|
||||
|
||||
public Highlighter getHighlighter(Query query, String fieldName, TokenStream stream, Formatter formatter, boolean expanMultiTerm) {
|
||||
Scorer scorer = null;
|
||||
if (mode == STANDARD) {
|
||||
return new Highlighter(formatter, new QueryScorer(query));
|
||||
scorer = new QueryTermScorer(query);
|
||||
} else if (mode == SPAN) {
|
||||
CachingTokenFilter tokenStream = new CachingTokenFilter(stream);
|
||||
Highlighter highlighter;
|
||||
try {
|
||||
highlighter = new Highlighter(formatter, new SpanScorer(query, fieldName, tokenStream));
|
||||
tokenStream.reset();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
scorer = new QueryScorer(query, fieldName);
|
||||
if(!expanMultiTerm) {
|
||||
((QueryScorer)scorer).setExpandMultiTermQuery(false);
|
||||
}
|
||||
|
||||
return highlighter;
|
||||
} else {
|
||||
throw new RuntimeException("Unknown highlight mode");
|
||||
}
|
||||
|
||||
return new Highlighter(formatter, scorer);
|
||||
}
|
||||
|
||||
Highlighter getHighlighter(WeightedTerm[] weightedTerms, Formatter formatter) {
|
||||
if (mode == STANDARD) {
|
||||
return new Highlighter(formatter, new QueryScorer(weightedTerms));
|
||||
return new Highlighter(formatter, new QueryTermScorer(weightedTerms));
|
||||
} else if (mode == SPAN) {
|
||||
Highlighter highlighter;
|
||||
|
||||
highlighter = new Highlighter(formatter, new SpanScorer((WeightedSpanTerm[]) weightedTerms));
|
||||
highlighter = new Highlighter(formatter, new QueryScorer((WeightedSpanTerm[]) weightedTerms));
|
||||
|
||||
return highlighter;
|
||||
} else {
|
||||
|
@ -1815,16 +1812,14 @@ class SynonymTokenizer extends TokenStream {
|
|||
if (mode == SPAN) {
|
||||
tokenStream = new CachingTokenFilter(analyzer.tokenStream(HighlighterTest.FIELD_NAME,
|
||||
new StringReader(text)));
|
||||
scorer = new SpanScorer(query, HighlighterTest.FIELD_NAME,
|
||||
(CachingTokenFilter) tokenStream, expandMT);
|
||||
scorer = new QueryScorer(query, HighlighterTest.FIELD_NAME);
|
||||
|
||||
} else if (mode == STANDARD) {
|
||||
scorer = new QueryScorer(query);
|
||||
scorer = new QueryTermScorer(query);
|
||||
tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
|
||||
}
|
||||
Highlighter highlighter = new Highlighter(formatter, scorer);
|
||||
if (mode == SPAN) {
|
||||
((CachingTokenFilter) tokenStream).reset();
|
||||
}
|
||||
|
||||
highlighter.setTextFragmenter(frag);
|
||||
|
||||
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
|
||||
|
|
Loading…
Reference in New Issue