LUCENE-1660: make enablePositionIncrement required up-front arg

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@781506 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2009-06-03 18:51:25 +00:00
parent 42693ff056
commit c91e76d0a5
6 changed files with 271 additions and 51 deletions

View File

@ -169,6 +169,11 @@ API Changes
Instead, when sorting by field, the application should explicitly
state the type of the field. (Mike McCandless)
17. LUCENE-1660: StopFilter, StandardAnalyzer, StopAnalyzer now
require up front specification of enablePositionIncrement (Mike
McCandless)
Bug fixes
1. LUCENE-1415: MultiPhraseQuery has incorrect hashCode() and equals()

View File

@ -26,6 +26,9 @@ import java.util.Set;
public final class StopAnalyzer extends Analyzer {
private Set stopWords;
// @deprecated
private boolean useDefaultStopPositionIncrement;
private boolean enablePositionIncrements;
/** An array containing some common English words that are not usually useful
for searching. */
@ -37,39 +40,99 @@ public final class StopAnalyzer extends Analyzer {
"they", "this", "to", "was", "will", "with"
};
/** Builds an analyzer which removes words in ENGLISH_STOP_WORDS. */
/** Builds an analyzer which removes words in
* ENGLISH_STOP_WORDS.
* @deprecated Use {@link #StopAnalyzer(boolean)} instead */
public StopAnalyzer() {
stopWords = StopFilter.makeStopSet(ENGLISH_STOP_WORDS);
useDefaultStopPositionIncrement = true;
}
/** Builds an analyzer which removes words in
* ENGLISH_STOP_WORDS.
* @param enablePositionIncrements See {@link
* StopFilter#setEnablePositionIncrements} */
public StopAnalyzer(boolean enablePositionIncrements) {
stopWords = StopFilter.makeStopSet(ENGLISH_STOP_WORDS);
this.enablePositionIncrements = enablePositionIncrements;
}
/** Builds an analyzer with the stop words from the given set.
*/
* @deprecated Use {@link #StopAnalyzer(Set, boolean)} instead */
public StopAnalyzer(Set stopWords) {
this.stopWords = stopWords;
useDefaultStopPositionIncrement = true;
}
/** Builds an analyzer which removes words in the provided array. */
/** Builds an analyzer with the stop words from the given set.
* @param stopWords Set of stop words
* @param enablePositionIncrements See {@link
* StopFilter#setEnablePositionIncrements} */
public StopAnalyzer(Set stopWords, boolean enablePositionIncrements) {
this.stopWords = stopWords;
this.enablePositionIncrements = enablePositionIncrements;
}
/** Builds an analyzer which removes words in the provided array.
* @deprecated Use {@link #StopAnalyzer(String[], boolean)} instead */
public StopAnalyzer(String[] stopWords) {
this.stopWords = StopFilter.makeStopSet(stopWords);
useDefaultStopPositionIncrement = true;
}
/** Builds an analyzer which removes words in the provided array.
* @param stopWords Array of stop words
* @param enablePositionIncrements See {@link
* StopFilter#setEnablePositionIncrements} */
public StopAnalyzer(String[] stopWords, boolean enablePositionIncrements) {
this.stopWords = StopFilter.makeStopSet(stopWords);
this.enablePositionIncrements = enablePositionIncrements;
}
/** Builds an analyzer with the stop words from the given file.
* @see WordlistLoader#getWordSet(File)
*/
* @deprecated Use {@link #StopAnalyzer(File, boolean)} instead */
public StopAnalyzer(File stopwordsFile) throws IOException {
stopWords = WordlistLoader.getWordSet(stopwordsFile);
useDefaultStopPositionIncrement = true;
}
/** Builds an analyzer with the stop words from the given file.
* @see WordlistLoader#getWordSet(File)
* @param stopwordsFile File to load stop words from
* @param enablePositionIncrements See {@link
* StopFilter#setEnablePositionIncrements} */
public StopAnalyzer(File stopwordsFile, boolean enablePositionIncrements) throws IOException {
stopWords = WordlistLoader.getWordSet(stopwordsFile);
this.enablePositionIncrements = enablePositionIncrements;
}
/** Builds an analyzer with the stop words from the given reader.
* @see WordlistLoader#getWordSet(Reader)
* @deprecated Use {@link #StopAnalyzer(Reader, boolean)} instead
*/
public StopAnalyzer(Reader stopwords) throws IOException {
stopWords = WordlistLoader.getWordSet(stopwords);
useDefaultStopPositionIncrement = true;
}
/** Builds an analyzer with the stop words from the given reader.
* @see WordlistLoader#getWordSet(Reader)
* @param stopwords Reader to load stop words from
* @param enablePositionIncrements See {@link
* StopFilter#setEnablePositionIncrements} */
public StopAnalyzer(Reader stopwords, boolean enablePositionIncrements) throws IOException {
stopWords = WordlistLoader.getWordSet(stopwords);
this.enablePositionIncrements = enablePositionIncrements;
}
/** Filters LowerCaseTokenizer with StopFilter. */
public TokenStream tokenStream(String fieldName, Reader reader) {
return new StopFilter(new LowerCaseTokenizer(reader), stopWords);
if (useDefaultStopPositionIncrement) {
return new StopFilter(new LowerCaseTokenizer(reader), stopWords);
} else {
return new StopFilter(enablePositionIncrements, new LowerCaseTokenizer(reader), stopWords);
}
}
/** Filters LowerCaseTokenizer with StopFilter. */
@ -82,7 +145,11 @@ public final class StopAnalyzer extends Analyzer {
if (streams == null) {
streams = new SavedStreams();
streams.source = new LowerCaseTokenizer(reader);
streams.result = new StopFilter(streams.source, stopWords);
if (useDefaultStopPositionIncrement) {
streams.result = new StopFilter(streams.source, stopWords);
} else {
streams.result = new StopFilter(enablePositionIncrements, streams.source, stopWords);
}
setPreviousTokenStream(streams);
} else
streams.source.reset(reader);

View File

@ -24,6 +24,7 @@ import java.util.List;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.queryParser.QueryParser; // for javadoc
/**
* Removes stop words from a token stream.
@ -31,6 +32,7 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
public final class StopFilter extends TokenFilter {
// deprecated
private static boolean ENABLE_POSITION_INCREMENTS_DEFAULT = false;
private final CharArraySet stopWords;
@ -41,19 +43,45 @@ public final class StopFilter extends TokenFilter {
/**
* Construct a token stream filtering the given input.
* @deprecated Use {@link #StopFilter(boolean, TokenStream, String[])} instead
*/
public StopFilter(TokenStream input, String [] stopWords)
{
this(input, stopWords, false);
this(ENABLE_POSITION_INCREMENTS_DEFAULT, input, stopWords, false);
}
/**
* Construct a token stream filtering the given input.
* @param enablePositionIncrements true if token positions should record the removed stop words
* @param input input TokenStream
* @param stopWords array of stop words
*/
public StopFilter(boolean enablePositionIncrements, TokenStream input, String [] stopWords)
{
this(enablePositionIncrements, input, stopWords, false);
}
/**
* Constructs a filter which removes words from the input
* TokenStream that are named in the array of words.
* @deprecated Use {@link #StopFilter(boolean, TokenStream, String[], boolean)} instead
*/
public StopFilter(TokenStream in, String[] stopWords, boolean ignoreCase) {
this(ENABLE_POSITION_INCREMENTS_DEFAULT, in, stopWords, ignoreCase);
}
/**
* Constructs a filter which removes words from the input
* TokenStream that are named in the array of words.
* @param enablePositionIncrements true if token positions should record the removed stop words
* @param in input TokenStream
* @param stopWords array of stop words
* @param ignoreCase true if case is ignored
*/
public StopFilter(boolean enablePositionIncrements, TokenStream in, String[] stopWords, boolean ignoreCase) {
super(in);
this.stopWords = (CharArraySet)makeStopSet(stopWords, ignoreCase);
this.enablePositionIncrements = enablePositionIncrements;
init();
}
@ -72,8 +100,30 @@ public final class StopFilter extends TokenFilter {
* @param input
* @param stopWords The set of Stop Words.
* @param ignoreCase -Ignore case when stopping.
* @deprecated Use {@link #StopFilter(boolean, TokenStream, Set, boolean)} instead
*/
public StopFilter(TokenStream input, Set stopWords, boolean ignoreCase)
{
this(ENABLE_POSITION_INCREMENTS_DEFAULT, input, stopWords, ignoreCase);
}
/**
* Construct a token stream filtering the given input.
* If <code>stopWords</code> is an instance of {@link CharArraySet} (true if
* <code>makeStopSet()</code> was used to construct the set) it will be directly used
* and <code>ignoreCase</code> will be ignored since <code>CharArraySet</code>
* directly controls case sensitivity.
* <p/>
* If <code>stopWords</code> is not an instance of {@link CharArraySet},
* a new CharArraySet will be constructed and <code>ignoreCase</code> will be
* used to specify the case sensitivity of that set.
*
* @param enablePositionIncrements true if token positions should record the removed stop words
* @param input Input TokenStream
* @param stopWords The set of Stop Words.
* @param ignoreCase -Ignore case when stopping.
*/
public StopFilter(boolean enablePositionIncrements, TokenStream input, Set stopWords, boolean ignoreCase)
{
super(input);
if (stopWords instanceof CharArraySet) {
@ -82,6 +132,7 @@ public final class StopFilter extends TokenFilter {
this.stopWords = new CharArraySet(stopWords.size(), ignoreCase);
this.stopWords.addAll(stopWords);
}
this.enablePositionIncrements = enablePositionIncrements;
init();
}
@ -90,9 +141,23 @@ public final class StopFilter extends TokenFilter {
* TokenStream that are named in the Set.
*
* @see #makeStopSet(java.lang.String[])
* @deprecated Use {@link #StopFilter(boolean, TokenStream, Set)} instead
*/
public StopFilter(TokenStream in, Set stopWords) {
this(in, stopWords, false);
this(ENABLE_POSITION_INCREMENTS_DEFAULT, in, stopWords, false);
}
/**
* Constructs a filter which removes words from the input
* TokenStream that are named in the Set.
*
* @param enablePositionIncrements true if token positions should record the removed stop words
* @param in Input stream
* @param stopWords The set of Stop Words.
* @see #makeStopSet(java.lang.String[])
*/
public StopFilter(boolean enablePositionIncrements, TokenStream in, Set stopWords) {
this(enablePositionIncrements, in, stopWords, false);
}
public void init() {
@ -190,6 +255,7 @@ public final class StopFilter extends TokenFilter {
/**
* @see #setEnablePositionIncrementsDefault(boolean).
* @deprecated Please specify this when you create the StopFilter
*/
public static boolean getEnablePositionIncrementsDefault() {
return ENABLE_POSITION_INCREMENTS_DEFAULT;
@ -205,6 +271,7 @@ public final class StopFilter extends TokenFilter {
* <p>
* Default : false.
* @see #setEnablePositionIncrements(boolean).
* @deprecated Please specify this when you create the StopFilter
*/
public static void setEnablePositionIncrementsDefault(boolean defaultValue) {
ENABLE_POSITION_INCREMENTS_DEFAULT = defaultValue;
@ -218,12 +285,20 @@ public final class StopFilter extends TokenFilter {
}
/**
* Set to <code>true</code> to make <b>this</b> StopFilter enable position increments to result tokens.
* <p>
* When set, when a token is stopped (omitted), the position increment of
* the following token is incremented.
* <p>
* Default: see {@link #setEnablePositionIncrementsDefault(boolean)}.
* If <code>true</code>, this StopFilter will preserve
* positions of the incoming tokens (ie, accumulate and
* set position increments of the removed stop tokens).
* Generally, <code>true</code> is best as it does not
* lose information (positions of the original tokens)
* during indexing.
*
* <p> When set, when a token is stopped
* (omitted), the position increment of the following
* token is incremented.
*
* <p> <b>NOTE</b>: be sure to also
* set {@link QueryParser#setEnablePositionIncrements} if
* you use QueryParser to create queries.
*/
public void setEnablePositionIncrements(boolean enable) {
this.enablePositionIncrements = enable;

View File

@ -44,6 +44,10 @@ public class StandardAnalyzer extends Analyzer {
private boolean replaceInvalidAcronym = defaultReplaceInvalidAcronym;
private static boolean defaultReplaceInvalidAcronym;
private boolean enableStopPositionIncrements;
// @deprecated
private boolean useDefaultStopPositionIncrements;
// Default to true (fixed the bug), unless the system prop is set
static {
@ -88,33 +92,89 @@ public class StandardAnalyzer extends Analyzer {
useful for searching. */
public static final String[] STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS;
/** Builds an analyzer with the default stop words ({@link #STOP_WORDS}). */
/** Builds an analyzer with the default stop words ({@link
* #STOP_WORDS}).
* @deprecated Use {@link #StandardAnalyzer(boolean, String[])},
* passing in null for the stop words, instead */
public StandardAnalyzer() {
this(STOP_WORDS);
}
/** Builds an analyzer with the given stop words. */
/** Builds an analyzer with the given stop words.
* @deprecated Use {@link #StandardAnalyzer(boolean, Set)}
* instead */
public StandardAnalyzer(Set stopWords) {
stopSet = stopWords;
useDefaultStopPositionIncrements = true;
}
/** Builds an analyzer with the given stop words. */
/** Builds an analyzer with the given stop words.
* @param enableStopPositionIncrements See {@link
* StopFilter#setEnablePositionIncrements}
* @param stopWords stop words */
public StandardAnalyzer(boolean enableStopPositionIncrements, Set stopWords) {
stopSet = stopWords;
this.enableStopPositionIncrements = enableStopPositionIncrements;
}
/** Builds an analyzer with the given stop words.
* @deprecated Use {@link #StandardAnalyzer(boolean,
* String[])} instead */
public StandardAnalyzer(String[] stopWords) {
if (stopWords == null) {
stopWords = STOP_WORDS;
}
stopSet = StopFilter.makeStopSet(stopWords);
useDefaultStopPositionIncrements = true;
}
/** Builds an analyzer with the given stop words.
* @param enableStopPositionIncrements See {@link
* StopFilter#setEnablePositionIncrements}
* @param stopWords Array of stop words */
public StandardAnalyzer(boolean enableStopPositionIncrements, String[] stopWords) {
stopSet = StopFilter.makeStopSet(stopWords);
this.enableStopPositionIncrements = enableStopPositionIncrements;
}
/** Builds an analyzer with the stop words from the given file.
* @see WordlistLoader#getWordSet(File)
* @deprecated Use {@link #StandardAnalyzer(boolean, File)}
* instead
*/
public StandardAnalyzer(File stopwords) throws IOException {
stopSet = WordlistLoader.getWordSet(stopwords);
useDefaultStopPositionIncrements = true;
}
/** Builds an analyzer with the stop words from the given file.
* @see WordlistLoader#getWordSet(File)
* @param enableStopPositionIncrements See {@link
* StopFilter#setEnablePositionIncrements}
* @param stopwords File to read stop words from */
public StandardAnalyzer(boolean enableStopPositionIncrements, File stopwords) throws IOException {
stopSet = WordlistLoader.getWordSet(stopwords);
this.enableStopPositionIncrements = enableStopPositionIncrements;
}
/** Builds an analyzer with the stop words from the given reader.
* @see WordlistLoader#getWordSet(Reader)
* @deprecated Use {@link #StandardAnalyzer(boolean, Reader)}
* instead
*/
public StandardAnalyzer(Reader stopwords) throws IOException {
stopSet = WordlistLoader.getWordSet(stopwords);
useDefaultStopPositionIncrements = true;
}
/** Builds an analyzer with the stop words from the given reader.
* @see WordlistLoader#getWordSet(Reader)
* @param enableStopPositionIncrements See {@link
* StopFilter#setEnablePositionIncrements}
* @param stopwords Reader to read stop words from */
public StandardAnalyzer(boolean enableStopPositionIncrements, Reader stopwords) throws IOException {
stopSet = WordlistLoader.getWordSet(stopwords);
this.enableStopPositionIncrements = enableStopPositionIncrements;
}
/**
@ -128,6 +188,7 @@ public class StandardAnalyzer extends Analyzer {
public StandardAnalyzer(boolean replaceInvalidAcronym) {
this(STOP_WORDS);
this.replaceInvalidAcronym = replaceInvalidAcronym;
useDefaultStopPositionIncrements = true;
}
/**
@ -141,6 +202,7 @@ public class StandardAnalyzer extends Analyzer {
public StandardAnalyzer(Reader stopwords, boolean replaceInvalidAcronym) throws IOException{
this(stopwords);
this.replaceInvalidAcronym = replaceInvalidAcronym;
useDefaultStopPositionIncrements = true;
}
/**
@ -154,6 +216,7 @@ public class StandardAnalyzer extends Analyzer {
public StandardAnalyzer(File stopwords, boolean replaceInvalidAcronym) throws IOException{
this(stopwords);
this.replaceInvalidAcronym = replaceInvalidAcronym;
useDefaultStopPositionIncrements = true;
}
/**
@ -168,6 +231,7 @@ public class StandardAnalyzer extends Analyzer {
public StandardAnalyzer(String [] stopwords, boolean replaceInvalidAcronym) throws IOException{
this(stopwords);
this.replaceInvalidAcronym = replaceInvalidAcronym;
useDefaultStopPositionIncrements = true;
}
/**
@ -181,6 +245,7 @@ public class StandardAnalyzer extends Analyzer {
public StandardAnalyzer(Set stopwords, boolean replaceInvalidAcronym) throws IOException{
this(stopwords);
this.replaceInvalidAcronym = replaceInvalidAcronym;
useDefaultStopPositionIncrements = true;
}
/** Constructs a {@link StandardTokenizer} filtered by a {@link
@ -190,7 +255,11 @@ public class StandardAnalyzer extends Analyzer {
tokenStream.setMaxTokenLength(maxTokenLength);
TokenStream result = new StandardFilter(tokenStream);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopSet);
if (useDefaultStopPositionIncrements) {
result = new StopFilter(result, stopSet);
} else {
result = new StopFilter(enableStopPositionIncrements, result, stopSet);
}
return result;
}
@ -229,7 +298,11 @@ public class StandardAnalyzer extends Analyzer {
streams.tokenStream = new StandardTokenizer(reader);
streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
streams.filteredTokenStream = new StopFilter(streams.filteredTokenStream, stopSet);
if (useDefaultStopPositionIncrements) {
streams.filteredTokenStream = new StopFilter(streams.filteredTokenStream, stopSet);
} else {
streams.filteredTokenStream = new StopFilter(enableStopPositionIncrements, streams.filteredTokenStream, stopSet);
}
} else {
streams.tokenStream.reset(reader);
}

View File

@ -36,7 +36,7 @@ public class TestStopFilter extends LuceneTestCase {
public void testExactCase() throws IOException {
StringReader reader = new StringReader("Now is The Time");
String[] stopWords = new String[] { "is", "the", "Time" };
TokenStream stream = new StopFilter(new WhitespaceTokenizer(reader), stopWords);
TokenStream stream = new StopFilter(false, new WhitespaceTokenizer(reader), stopWords);
final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
assertTrue(stream.incrementToken());
assertEquals("Now", termAtt.term());
@ -48,7 +48,7 @@ public class TestStopFilter extends LuceneTestCase {
public void testIgnoreCase() throws IOException {
StringReader reader = new StringReader("Now is The Time");
String[] stopWords = new String[] { "is", "the", "Time" };
TokenStream stream = new StopFilter(new WhitespaceTokenizer(reader), stopWords, true);
TokenStream stream = new StopFilter(false, new WhitespaceTokenizer(reader), stopWords, true);
final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
assertTrue(stream.incrementToken());
assertEquals("Now", termAtt.term());
@ -59,7 +59,7 @@ public class TestStopFilter extends LuceneTestCase {
StringReader reader = new StringReader("Now is The Time");
String[] stopWords = new String[] { "is", "the", "Time" };
Set stopSet = StopFilter.makeStopSet(stopWords);
TokenStream stream = new StopFilter(new WhitespaceTokenizer(reader), stopSet);
TokenStream stream = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet);
final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
assertTrue(stream.incrementToken());
assertEquals("Now", termAtt.term());
@ -85,11 +85,11 @@ public class TestStopFilter extends LuceneTestCase {
Set stopSet = StopFilter.makeStopSet(stopWords);
// with increments
StringReader reader = new StringReader(sb.toString());
StopFilter stpf = new StopFilter(new WhitespaceTokenizer(reader), stopSet);
StopFilter stpf = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet);
doTestStopPositons(stpf,true);
// without increments
reader = new StringReader(sb.toString());
stpf = new StopFilter(new WhitespaceTokenizer(reader), stopSet);
stpf = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet);
doTestStopPositons(stpf,false);
// with increments, concatenating two stop filters
ArrayList a0 = new ArrayList();
@ -108,9 +108,9 @@ public class TestStopFilter extends LuceneTestCase {
Set stopSet0 = StopFilter.makeStopSet(stopWords0);
Set stopSet1 = StopFilter.makeStopSet(stopWords1);
reader = new StringReader(sb.toString());
StopFilter stpf0 = new StopFilter(new WhitespaceTokenizer(reader), stopSet0); // first part of the set
StopFilter stpf0 = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet0); // first part of the set
stpf0.setEnablePositionIncrements(true);
StopFilter stpf01 = new StopFilter(stpf0, stopSet1); // two stop filters concatenated!
StopFilter stpf01 = new StopFilter(false, stpf0, stopSet1); // two stop filters concatenated!
doTestStopPositons(stpf01,true);
}

View File

@ -186,17 +186,9 @@ public class TestPositionIncrement extends LuceneTestCase {
hits = searcher.search(q, null, 1000).scoreDocs;
assertEquals(0, hits.length);
// analyzer to introduce stopwords and increment gaps
Analyzer stpa = new Analyzer() {
final WhitespaceAnalyzer a = new WhitespaceAnalyzer();
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream ts = a.tokenStream(fieldName,reader);
return new StopFilter(ts,new String[]{"stop"});
}
};
// should not find "1 2" because there is a gap of 1 in the index
QueryParser qp = new QueryParser("field",stpa);
QueryParser qp = new QueryParser("field",
new StopWhitespaceAnalyzer(false));
q = (PhraseQuery) qp.parse("\"1 2\"");
hits = searcher.search(q, null, 1000).scoreDocs;
assertEquals(0, hits.length);
@ -212,22 +204,30 @@ public class TestPositionIncrement extends LuceneTestCase {
hits = searcher.search(q, null, 1000).scoreDocs;
assertEquals(0, hits.length);
boolean dflt = StopFilter.getEnablePositionIncrementsDefault();
try {
// stop filter alone won't help, because query parser swallows the increments.
qp.setEnablePositionIncrements(false);
StopFilter.setEnablePositionIncrementsDefault(true);
q = (PhraseQuery) qp.parse("\"1 stop 2\"");
hits = searcher.search(q, null, 1000).scoreDocs;
assertEquals(0, hits.length);
// stop filter alone won't help, because query parser swallows the increments.
qp.setEnablePositionIncrements(false);
q = (PhraseQuery) qp.parse("\"1 stop 2\"");
hits = searcher.search(q, null, 1000).scoreDocs;
assertEquals(0, hits.length);
// when both qp qnd stopFilter propagate increments, we should find the doc.
qp.setEnablePositionIncrements(true);
q = (PhraseQuery) qp.parse("\"1 stop 2\"");
hits = searcher.search(q, null, 1000).scoreDocs;
assertEquals(1, hits.length);
} finally {
StopFilter.setEnablePositionIncrementsDefault(dflt);
// when both qp qnd stopFilter propagate increments, we should find the doc.
qp = new QueryParser("field",
new StopWhitespaceAnalyzer(true));
qp.setEnablePositionIncrements(true);
q = (PhraseQuery) qp.parse("\"1 stop 2\"");
hits = searcher.search(q, null, 1000).scoreDocs;
assertEquals(1, hits.length);
}
private static class StopWhitespaceAnalyzer extends Analyzer {
boolean enablePositionIncrements;
final WhitespaceAnalyzer a = new WhitespaceAnalyzer();
public StopWhitespaceAnalyzer(boolean enablePositionIncrements) {
this.enablePositionIncrements = enablePositionIncrements;
}
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream ts = a.tokenStream(fieldName,reader);
return new StopFilter(enablePositionIncrements, ts, new String[]{"stop"});
}
}