mirror of https://github.com/apache/lucene.git
- moved highlighting-specific classes to Highlighting Utils.java
- brace consistency in HU.java - removed unused final static in SolrPArams (should highlighting param defs be moved here?) git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@510338 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3b968a0bc9
commit
2ca3943e78
|
@ -60,14 +60,6 @@ public abstract class SolrParams {
|
|||
public static final String DEBUG_QUERY = "debugQuery";
|
||||
/** another query to explain against */
|
||||
public static final String EXPLAIN_OTHER = "explainOther";
|
||||
/** wether to highlight */
|
||||
public static final String HIGHLIGHT = "highlight";
|
||||
/** fields to highlight */
|
||||
public static final String HIGHLIGHT_FIELDS = "highlightFields";
|
||||
/** maximum highlight fragments to return */
|
||||
public static final String MAX_SNIPPETS = "maxSnippets";
|
||||
/** override default highlight Formatter class */
|
||||
public static final String HIGHLIGHT_FORMATTER_CLASS = "highlightFormatterClass";
|
||||
|
||||
/**
|
||||
* Should facet counts be calculated?
|
||||
|
|
|
@ -18,11 +18,14 @@ package org.apache.solr.util;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.List;
|
||||
import java.util.LinkedList;
|
||||
import java.util.ArrayList;
|
||||
import java.util.ListIterator;
|
||||
|
||||
import org.apache.solr.request.*;
|
||||
import org.apache.solr.search.DocIterator;
|
||||
|
@ -30,7 +33,7 @@ import org.apache.solr.search.DocList;
|
|||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.highlight.*;
|
||||
|
@ -38,8 +41,7 @@ import org.apache.lucene.search.highlight.*;
|
|||
/**
|
||||
* Collection of Utility and Factory methods for Highlighting.
|
||||
*/
|
||||
public class HighlightingUtils
|
||||
{
|
||||
public class HighlightingUtils {
|
||||
private static final String SIMPLE = "simple";
|
||||
|
||||
private static final String HIGHLIGHT = "hl";
|
||||
|
@ -53,8 +55,7 @@ public class HighlightingUtils
|
|||
private static final String FIELD_MATCH = PREFIX+"requireFieldMatch";
|
||||
|
||||
private static SolrParams DEFAULTS = null;
|
||||
static
|
||||
{
|
||||
static {
|
||||
Map<String,String> map = new HashMap<String,String>();
|
||||
map.put(SNIPPETS, "1");
|
||||
map.put(FRAGSIZE, "100");
|
||||
|
@ -66,8 +67,7 @@ public class HighlightingUtils
|
|||
}
|
||||
|
||||
/** Combine request parameters with highlighting defaults. */
|
||||
private static SolrParams getParams(SolrQueryRequest request)
|
||||
{
|
||||
private static SolrParams getParams(SolrQueryRequest request) {
|
||||
return new DefaultSolrParams(request.getParams(), DEFAULTS);
|
||||
}
|
||||
|
||||
|
@ -76,8 +76,7 @@ public class HighlightingUtils
|
|||
* @param request The current SolrQueryRequest
|
||||
* @return <code>true</code> if highlighting enabled, <code>false</code> if not.
|
||||
*/
|
||||
public static boolean isHighlightingEnabled(SolrQueryRequest request)
|
||||
{
|
||||
public static boolean isHighlightingEnabled(SolrQueryRequest request) {
|
||||
return getParams(request).getBool(HIGHLIGHT, false);
|
||||
}
|
||||
|
||||
|
@ -87,8 +86,7 @@ public class HighlightingUtils
|
|||
* @param fieldName The name of the field
|
||||
* @param request The current SolrQueryRequest
|
||||
*/
|
||||
public static Highlighter getHighlighter(Query query, String fieldName, SolrQueryRequest request)
|
||||
{
|
||||
public static Highlighter getHighlighter(Query query, String fieldName, SolrQueryRequest request) {
|
||||
Highlighter highlighter = new Highlighter(
|
||||
getFormatter(fieldName, request),
|
||||
getQueryScorer(query, fieldName, request));
|
||||
|
@ -102,15 +100,12 @@ public class HighlightingUtils
|
|||
* @param fieldName The name of the field
|
||||
* @param request The SolrQueryRequest
|
||||
*/
|
||||
public static QueryScorer getQueryScorer(Query query, String fieldName, SolrQueryRequest request)
|
||||
{
|
||||
public static QueryScorer getQueryScorer(Query query, String fieldName, SolrQueryRequest request) {
|
||||
boolean reqFieldMatch = getParams(request).getFieldBool(fieldName, FIELD_MATCH, false);
|
||||
if (reqFieldMatch)
|
||||
{
|
||||
if (reqFieldMatch) {
|
||||
return new QueryScorer(query, request.getSearcher().getReader(), fieldName);
|
||||
}
|
||||
else
|
||||
{
|
||||
else {
|
||||
return new QueryScorer(query);
|
||||
}
|
||||
}
|
||||
|
@ -123,25 +118,20 @@ public class HighlightingUtils
|
|||
* @param request The current SolrQueryRequest
|
||||
* @param defaultFields Programmatic default highlight fields, used if nothing is specified in the handler config or the request.
|
||||
*/
|
||||
public static String[] getHighlightFields(Query query, SolrQueryRequest request, String[] defaultFields)
|
||||
{
|
||||
public static String[] getHighlightFields(Query query, SolrQueryRequest request, String[] defaultFields) {
|
||||
String fields[] = getParams(request).getParams(FIELDS);
|
||||
|
||||
// if no fields specified in the request, or the handler, fall back to programmatic default, or default search field.
|
||||
if(emptyArray(fields))
|
||||
{
|
||||
if(emptyArray(fields)) {
|
||||
// use default search field if highlight fieldlist not specified.
|
||||
if (emptyArray(defaultFields))
|
||||
{
|
||||
if (emptyArray(defaultFields)) {
|
||||
fields = new String[]{request.getSchema().getDefaultSearchFieldName()};
|
||||
}
|
||||
else
|
||||
{
|
||||
else {
|
||||
fields = defaultFields;
|
||||
}
|
||||
}
|
||||
else if (fields.length == 1)
|
||||
{
|
||||
else if (fields.length == 1) {
|
||||
// if there's a single request/handler value, it may be a space/comma separated list
|
||||
fields = SolrPluginUtils.split(fields[0]);
|
||||
}
|
||||
|
@ -149,8 +139,7 @@ public class HighlightingUtils
|
|||
return fields;
|
||||
}
|
||||
|
||||
private static boolean emptyArray(String[] arr)
|
||||
{
|
||||
private static boolean emptyArray(String[] arr) {
|
||||
return (arr == null || arr.length == 0 || arr[0] == null || arr[0].trim().length() == 0);
|
||||
}
|
||||
|
||||
|
@ -161,8 +150,7 @@ public class HighlightingUtils
|
|||
* @param fieldName The name of the field
|
||||
* @param request The current SolrQueryRequest
|
||||
*/
|
||||
public static int getMaxSnippets(String fieldName, SolrQueryRequest request)
|
||||
{
|
||||
public static int getMaxSnippets(String fieldName, SolrQueryRequest request) {
|
||||
return Integer.parseInt(getParams(request).getFieldParam(fieldName, SNIPPETS));
|
||||
}
|
||||
|
||||
|
@ -175,8 +163,7 @@ public class HighlightingUtils
|
|||
* @param request The current SolrQueryRequest
|
||||
* @return An appropriate Formatter.
|
||||
*/
|
||||
public static Formatter getFormatter(String fieldName, SolrQueryRequest request)
|
||||
{
|
||||
public static Formatter getFormatter(String fieldName, SolrQueryRequest request) {
|
||||
SolrParams p = getParams(request);
|
||||
|
||||
// SimpleHTMLFormatter is the only supported Formatter at the moment
|
||||
|
@ -192,8 +179,7 @@ public class HighlightingUtils
|
|||
* @param request The current SolrQueryRequest
|
||||
* @return An appropriate Fragmenter.
|
||||
*/
|
||||
public static Fragmenter getFragmenter(String fieldName, SolrQueryRequest request)
|
||||
{
|
||||
public static Fragmenter getFragmenter(String fieldName, SolrQueryRequest request) {
|
||||
int fragsize = Integer.parseInt(getParams(request).getFieldParam(fieldName, FRAGSIZE));
|
||||
return (fragsize <= 0) ? new NullFragmenter() : new GapFragmenter(fragsize);
|
||||
}
|
||||
|
@ -210,8 +196,7 @@ public class HighlightingUtils
|
|||
* @return NamedList containing a NamedList for each document, which in
|
||||
* turns contains sets (field, summary) pairs.
|
||||
*/
|
||||
public static NamedList doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException
|
||||
{
|
||||
public static NamedList doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException {
|
||||
if (!isHighlightingEnabled(req))
|
||||
return null;
|
||||
|
||||
|
@ -232,13 +217,11 @@ public class HighlightingUtils
|
|||
|
||||
// Highlight each document
|
||||
DocIterator iterator = docs.iterator();
|
||||
for (int i = 0; i < docs.size(); i++)
|
||||
{
|
||||
for (int i = 0; i < docs.size(); i++) {
|
||||
int docId = iterator.nextDoc();
|
||||
Document doc = readDocs[i];
|
||||
NamedList docSummaries = new SimpleOrderedMap();
|
||||
for (String fieldName : fieldNames)
|
||||
{
|
||||
for (String fieldName : fieldNames) {
|
||||
fieldName = fieldName.trim();
|
||||
String[] docTexts = doc.getValues(fieldName);
|
||||
if (docTexts == null) continue;
|
||||
|
@ -249,24 +232,20 @@ public class HighlightingUtils
|
|||
|
||||
String[] summaries;
|
||||
TextFragment[] frag;
|
||||
if (docTexts.length == 1)
|
||||
{
|
||||
if (docTexts.length == 1) {
|
||||
// single-valued field
|
||||
TokenStream tstream;
|
||||
try
|
||||
{
|
||||
try {
|
||||
// attempt term vectors
|
||||
tstream = TokenSources.getTokenStream(searcher.getReader(), docId, fieldName);
|
||||
}
|
||||
catch (IllegalArgumentException e)
|
||||
{
|
||||
catch (IllegalArgumentException e) {
|
||||
// fall back to analyzer
|
||||
tstream = new TokenOrderingFilter(searcher.getSchema().getAnalyzer().tokenStream(fieldName, new StringReader(docTexts[0])), 10);
|
||||
}
|
||||
frag = highlighter.getBestTextFragments(tstream, docTexts[0], false, numFragments);
|
||||
}
|
||||
else
|
||||
{
|
||||
else {
|
||||
// multi-valued field
|
||||
MultiValueTokenStream tstream;
|
||||
tstream = new MultiValueTokenStream(fieldName, docTexts, searcher.getSchema().getAnalyzer(), true);
|
||||
|
@ -274,18 +253,16 @@ public class HighlightingUtils
|
|||
}
|
||||
// convert fragments back into text
|
||||
// TODO: we can include score and position information in output as snippet attributes
|
||||
if (frag.length > 0)
|
||||
{
|
||||
if (frag.length > 0) {
|
||||
ArrayList<String> fragTexts = new ArrayList<String>();
|
||||
for (int j = 0; j < frag.length; j++)
|
||||
{
|
||||
if ((frag[j] != null) && (frag[j].getScore() > 0))
|
||||
{
|
||||
for (int j = 0; j < frag.length; j++) {
|
||||
if ((frag[j] != null) && (frag[j].getScore() > 0)) {
|
||||
fragTexts.add(frag[j].toString());
|
||||
}
|
||||
}
|
||||
summaries = fragTexts.toArray(new String[0]);
|
||||
if (summaries.length > 0) docSummaries.add(fieldName, summaries);
|
||||
if (summaries.length > 0)
|
||||
docSummaries.add(fieldName, summaries);
|
||||
}
|
||||
}
|
||||
String printId = searcher.getSchema().printableUniqueKey(doc);
|
||||
|
@ -294,3 +271,160 @@ public class HighlightingUtils
|
|||
return fragments;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper class which creates a single TokenStream out of values from a
|
||||
* multi-valued field.
|
||||
*/
|
||||
class MultiValueTokenStream extends TokenStream {
|
||||
private String fieldName;
|
||||
private String[] values;
|
||||
private Analyzer analyzer;
|
||||
private int curIndex; // next index into the values array
|
||||
private int curOffset; // offset into concatenated string
|
||||
private TokenStream currentStream; // tokenStream currently being iterated
|
||||
private boolean orderTokenOffsets;
|
||||
|
||||
/** Constructs a TokenStream for consecutively-analyzed field values
|
||||
*
|
||||
* @param fieldName name of the field
|
||||
* @param values array of field data
|
||||
* @param analyzer analyzer instance
|
||||
*/
|
||||
public MultiValueTokenStream(String fieldName, String[] values,
|
||||
Analyzer analyzer, boolean orderTokenOffsets) {
|
||||
this.fieldName = fieldName;
|
||||
this.values = values;
|
||||
this.analyzer = analyzer;
|
||||
curIndex = -1;
|
||||
curOffset = 0;
|
||||
currentStream = null;
|
||||
this.orderTokenOffsets=orderTokenOffsets;
|
||||
}
|
||||
|
||||
/** Returns the next token in the stream, or null at EOS. */
|
||||
public Token next() throws IOException {
|
||||
int extra = 0;
|
||||
if(currentStream == null) {
|
||||
curIndex++;
|
||||
if(curIndex < values.length) {
|
||||
currentStream = analyzer.tokenStream(fieldName,
|
||||
new StringReader(values[curIndex]));
|
||||
if (orderTokenOffsets) currentStream = new TokenOrderingFilter(currentStream,10);
|
||||
// add extra space between multiple values
|
||||
if(curIndex > 0)
|
||||
extra = analyzer.getPositionIncrementGap(fieldName);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
Token nextToken = currentStream.next();
|
||||
if(nextToken == null) {
|
||||
curOffset += values[curIndex].length();
|
||||
currentStream = null;
|
||||
return next();
|
||||
}
|
||||
// create an modified token which is the offset into the concatenated
|
||||
// string of all values
|
||||
Token offsetToken = new Token(nextToken.termText(),
|
||||
nextToken.startOffset() + curOffset,
|
||||
nextToken.endOffset() + curOffset);
|
||||
offsetToken.setPositionIncrement(nextToken.getPositionIncrement() + extra*10);
|
||||
return offsetToken;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all values as a single String into which the Tokens index with
|
||||
* their offsets.
|
||||
*/
|
||||
public String asSingleValue() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for(String str : values)
|
||||
sb.append(str);
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* A simple modification of SimpleFragmenter which additionally creates new
|
||||
* fragments when an unusually-large position increment is encountered
|
||||
* (this behaves much better in the presence of multi-valued fields).
|
||||
*/
|
||||
class GapFragmenter extends SimpleFragmenter {
|
||||
/**
|
||||
* When a gap in term positions is observed that is at least this big, treat
|
||||
* the gap as a fragment delimiter.
|
||||
*/
|
||||
public static final int INCREMENT_THRESHOLD = 50;
|
||||
protected int fragOffsetAccum = 0;
|
||||
|
||||
public GapFragmenter() {
|
||||
}
|
||||
|
||||
public GapFragmenter(int fragsize) {
|
||||
super(fragsize);
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.lucene.search.highlight.TextFragmenter#start(java.lang.String)
|
||||
*/
|
||||
public void start(String originalText) {
|
||||
fragOffsetAccum = 0;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.lucene.search.highlight.TextFragmenter#isNewFragment(org.apache.lucene.analysis.Token)
|
||||
*/
|
||||
public boolean isNewFragment(Token token) {
|
||||
boolean isNewFrag =
|
||||
token.endOffset() >= fragOffsetAccum + getFragmentSize() ||
|
||||
token.getPositionIncrement() > INCREMENT_THRESHOLD;
|
||||
if(isNewFrag) {
|
||||
fragOffsetAccum += token.endOffset() - fragOffsetAccum;
|
||||
}
|
||||
return isNewFrag;
|
||||
}
|
||||
}
|
||||
|
||||
/** Orders Tokens in a window first by their startOffset ascending.
|
||||
* endOffset is currently ignored.
|
||||
* This is meant to work around fickleness in the highlighter only. It
|
||||
* can mess up token positions and should not be used for indexing or querying.
|
||||
*/
|
||||
class TokenOrderingFilter extends TokenFilter {
|
||||
private final int windowSize;
|
||||
private final LinkedList<Token> queue = new LinkedList<Token>();
|
||||
private boolean done=false;
|
||||
|
||||
protected TokenOrderingFilter(TokenStream input, int windowSize) {
|
||||
super(input);
|
||||
this.windowSize = windowSize;
|
||||
}
|
||||
|
||||
public Token next() throws IOException {
|
||||
while (!done && queue.size() < windowSize) {
|
||||
Token newTok = input.next();
|
||||
if (newTok==null) {
|
||||
done=true;
|
||||
break;
|
||||
}
|
||||
|
||||
// reverse iterating for better efficiency since we know the
|
||||
// list is already sorted, and most token start offsets will be too.
|
||||
ListIterator<Token> iter = queue.listIterator(queue.size());
|
||||
while(iter.hasPrevious()) {
|
||||
if (newTok.startOffset() >= iter.previous().startOffset()) {
|
||||
// insertion will be before what next() would return (what
|
||||
// we just compared against), so move back one so the insertion
|
||||
// will be after.
|
||||
iter.next();
|
||||
break;
|
||||
}
|
||||
}
|
||||
iter.add(newTok);
|
||||
}
|
||||
|
||||
return queue.isEmpty() ? null : queue.removeFirst();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,15 +18,11 @@
|
|||
package org.apache.solr.util;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.queryParser.ParseException;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.highlight.*;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.SolrException;
|
||||
import org.apache.solr.request.SolrParams;
|
||||
|
@ -849,156 +845,5 @@ public class SolrPluginUtils {
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper class which creates a single TokenStream out of values from a
|
||||
* multi-valued field.
|
||||
*/
|
||||
class MultiValueTokenStream extends TokenStream {
|
||||
private String fieldName;
|
||||
private String[] values;
|
||||
private Analyzer analyzer;
|
||||
private int curIndex; // next index into the values array
|
||||
private int curOffset; // offset into concatenated string
|
||||
private TokenStream currentStream; // tokenStream currently being iterated
|
||||
private boolean orderTokenOffsets;
|
||||
|
||||
/** Constructs a TokenStream for consecutively-analyzed field values
|
||||
*
|
||||
* @param fieldName name of the field
|
||||
* @param values array of field data
|
||||
* @param analyzer analyzer instance
|
||||
*/
|
||||
public MultiValueTokenStream(String fieldName, String[] values,
|
||||
Analyzer analyzer, boolean orderTokenOffsets) {
|
||||
this.fieldName = fieldName;
|
||||
this.values = values;
|
||||
this.analyzer = analyzer;
|
||||
curIndex = -1;
|
||||
curOffset = 0;
|
||||
currentStream = null;
|
||||
this.orderTokenOffsets=orderTokenOffsets;
|
||||
}
|
||||
|
||||
/** Returns the next token in the stream, or null at EOS. */
|
||||
public Token next() throws IOException {
|
||||
int extra = 0;
|
||||
if(currentStream == null) {
|
||||
curIndex++;
|
||||
if(curIndex < values.length) {
|
||||
currentStream = analyzer.tokenStream(fieldName,
|
||||
new StringReader(values[curIndex]));
|
||||
if (orderTokenOffsets) currentStream = new TokenOrderingFilter(currentStream,10);
|
||||
// add extra space between multiple values
|
||||
if(curIndex > 0)
|
||||
extra = analyzer.getPositionIncrementGap(fieldName);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
Token nextToken = currentStream.next();
|
||||
if(nextToken == null) {
|
||||
curOffset += values[curIndex].length();
|
||||
currentStream = null;
|
||||
return next();
|
||||
}
|
||||
// create an modified token which is the offset into the concatenated
|
||||
// string of all values
|
||||
Token offsetToken = new Token(nextToken.termText(),
|
||||
nextToken.startOffset() + curOffset,
|
||||
nextToken.endOffset() + curOffset);
|
||||
offsetToken.setPositionIncrement(nextToken.getPositionIncrement() + extra*10);
|
||||
return offsetToken;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all values as a single String into which the Tokens index with
|
||||
* their offsets.
|
||||
*/
|
||||
public String asSingleValue() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for(String str : values)
|
||||
sb.append(str);
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* A simple modification of SimpleFragmenter which additionally creates new
|
||||
* fragments when an unusually-large position increment is encountered
|
||||
* (this behaves much better in the presence of multi-valued fields).
|
||||
*/
|
||||
class GapFragmenter extends SimpleFragmenter {
|
||||
public static final int INCREMENT_THRESHOLD = 50;
|
||||
protected int fragOffsetAccum = 0;
|
||||
|
||||
public GapFragmenter() {
|
||||
}
|
||||
|
||||
public GapFragmenter(int fragsize) {
|
||||
super(fragsize);
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.lucene.search.highlight.TextFragmenter#start(java.lang.String)
|
||||
*/
|
||||
public void start(String originalText) {
|
||||
fragOffsetAccum = 0;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.lucene.search.highlight.TextFragmenter#isNewFragment(org.apache.lucene.analysis.Token)
|
||||
*/
|
||||
public boolean isNewFragment(Token token) {
|
||||
boolean isNewFrag =
|
||||
token.endOffset() >= fragOffsetAccum + getFragmentSize() ||
|
||||
token.getPositionIncrement() > INCREMENT_THRESHOLD;
|
||||
if(isNewFrag) {
|
||||
fragOffsetAccum += token.endOffset() - fragOffsetAccum;
|
||||
}
|
||||
return isNewFrag;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Orders Tokens in a window first by their startOffset ascending.
|
||||
* endOffset is currently ignored.
|
||||
* This is meant to work around fickleness in the highlighter only. It
|
||||
* can mess up token positions and should not be used for indexing or querying.
|
||||
*/
|
||||
class TokenOrderingFilter extends TokenFilter {
|
||||
private final int windowSize;
|
||||
private final LinkedList<Token> queue = new LinkedList<Token>();
|
||||
private boolean done=false;
|
||||
|
||||
protected TokenOrderingFilter(TokenStream input, int windowSize) {
|
||||
super(input);
|
||||
this.windowSize = windowSize;
|
||||
}
|
||||
|
||||
public Token next() throws IOException {
|
||||
while (!done && queue.size() < windowSize) {
|
||||
Token newTok = input.next();
|
||||
if (newTok==null) {
|
||||
done=true;
|
||||
break;
|
||||
}
|
||||
|
||||
// reverse iterating for better efficiency since we know the
|
||||
// list is already sorted, and most token start offsets will be too.
|
||||
ListIterator<Token> iter = queue.listIterator(queue.size());
|
||||
while(iter.hasPrevious()) {
|
||||
if (newTok.startOffset() >= iter.previous().startOffset()) {
|
||||
// insertion will be before what next() would return (what
|
||||
// we just compared against), so move back one so the insertion
|
||||
// will be after.
|
||||
iter.next();
|
||||
break;
|
||||
}
|
||||
}
|
||||
iter.add(newTok);
|
||||
}
|
||||
|
||||
return queue.isEmpty() ? null : queue.removeFirst();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue