mirror of
https://github.com/apache/lucene.git
synced 2025-02-06 18:18:38 +00:00
LUCENE-1660: make enablePositionIncrement required up-front arg
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@781506 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
42693ff056
commit
c91e76d0a5
@ -169,6 +169,11 @@ API Changes
|
||||
Instead, when sorting by field, the application should explicitly
|
||||
state the type of the field. (Mike McCandless)
|
||||
|
||||
17. LUCENE-1660: StopFilter, StandardAnalyzer, StopAnalyzer now
|
||||
require up front specification of enablePositionIncrement (Mike
|
||||
McCandless)
|
||||
|
||||
|
||||
Bug fixes
|
||||
|
||||
1. LUCENE-1415: MultiPhraseQuery has incorrect hashCode() and equals()
|
||||
|
@ -26,6 +26,9 @@ import java.util.Set;
|
||||
|
||||
public final class StopAnalyzer extends Analyzer {
|
||||
private Set stopWords;
|
||||
// @deprecated
|
||||
private boolean useDefaultStopPositionIncrement;
|
||||
private boolean enablePositionIncrements;
|
||||
|
||||
/** An array containing some common English words that are not usually useful
|
||||
for searching. */
|
||||
@ -37,39 +40,99 @@ public final class StopAnalyzer extends Analyzer {
|
||||
"they", "this", "to", "was", "will", "with"
|
||||
};
|
||||
|
||||
/** Builds an analyzer which removes words in ENGLISH_STOP_WORDS. */
|
||||
/** Builds an analyzer which removes words in
|
||||
* ENGLISH_STOP_WORDS.
|
||||
* @deprecated Use {@link #StopAnalyzer(boolean)} instead */
|
||||
public StopAnalyzer() {
|
||||
stopWords = StopFilter.makeStopSet(ENGLISH_STOP_WORDS);
|
||||
useDefaultStopPositionIncrement = true;
|
||||
}
|
||||
|
||||
/** Builds an analyzer which removes words in
|
||||
* ENGLISH_STOP_WORDS.
|
||||
* @param enablePositionIncrements See {@link
|
||||
* StopFilter#setEnablePositionIncrements} */
|
||||
public StopAnalyzer(boolean enablePositionIncrements) {
|
||||
stopWords = StopFilter.makeStopSet(ENGLISH_STOP_WORDS);
|
||||
this.enablePositionIncrements = enablePositionIncrements;
|
||||
}
|
||||
|
||||
/** Builds an analyzer with the stop words from the given set.
|
||||
*/
|
||||
* @deprecated Use {@link #StopAnalyzer(Set, boolean)} instead */
|
||||
public StopAnalyzer(Set stopWords) {
|
||||
this.stopWords = stopWords;
|
||||
useDefaultStopPositionIncrement = true;
|
||||
}
|
||||
|
||||
/** Builds an analyzer which removes words in the provided array. */
|
||||
/** Builds an analyzer with the stop words from the given set.
|
||||
* @param stopWords Set of stop words
|
||||
* @param enablePositionIncrements See {@link
|
||||
* StopFilter#setEnablePositionIncrements} */
|
||||
public StopAnalyzer(Set stopWords, boolean enablePositionIncrements) {
|
||||
this.stopWords = stopWords;
|
||||
this.enablePositionIncrements = enablePositionIncrements;
|
||||
}
|
||||
|
||||
/** Builds an analyzer which removes words in the provided array.
|
||||
* @deprecated Use {@link #StopAnalyzer(String[], boolean)} instead */
|
||||
public StopAnalyzer(String[] stopWords) {
|
||||
this.stopWords = StopFilter.makeStopSet(stopWords);
|
||||
useDefaultStopPositionIncrement = true;
|
||||
}
|
||||
|
||||
/** Builds an analyzer which removes words in the provided array.
|
||||
* @param stopWords Array of stop words
|
||||
* @param enablePositionIncrements See {@link
|
||||
* StopFilter#setEnablePositionIncrements} */
|
||||
public StopAnalyzer(String[] stopWords, boolean enablePositionIncrements) {
|
||||
this.stopWords = StopFilter.makeStopSet(stopWords);
|
||||
this.enablePositionIncrements = enablePositionIncrements;
|
||||
}
|
||||
|
||||
/** Builds an analyzer with the stop words from the given file.
|
||||
* @see WordlistLoader#getWordSet(File)
|
||||
*/
|
||||
* @deprecated Use {@link #StopAnalyzer(File, boolean)} instead */
|
||||
public StopAnalyzer(File stopwordsFile) throws IOException {
|
||||
stopWords = WordlistLoader.getWordSet(stopwordsFile);
|
||||
useDefaultStopPositionIncrement = true;
|
||||
}
|
||||
|
||||
/** Builds an analyzer with the stop words from the given file.
|
||||
* @see WordlistLoader#getWordSet(File)
|
||||
* @param stopwordsFile File to load stop words from
|
||||
* @param enablePositionIncrements See {@link
|
||||
* StopFilter#setEnablePositionIncrements} */
|
||||
public StopAnalyzer(File stopwordsFile, boolean enablePositionIncrements) throws IOException {
|
||||
stopWords = WordlistLoader.getWordSet(stopwordsFile);
|
||||
this.enablePositionIncrements = enablePositionIncrements;
|
||||
}
|
||||
|
||||
/** Builds an analyzer with the stop words from the given reader.
|
||||
* @see WordlistLoader#getWordSet(Reader)
|
||||
* @deprecated Use {@link #StopAnalyzer(Reader, boolean)} instead
|
||||
*/
|
||||
public StopAnalyzer(Reader stopwords) throws IOException {
|
||||
stopWords = WordlistLoader.getWordSet(stopwords);
|
||||
useDefaultStopPositionIncrement = true;
|
||||
}
|
||||
|
||||
/** Builds an analyzer with the stop words from the given reader.
|
||||
* @see WordlistLoader#getWordSet(Reader)
|
||||
* @param stopwords Reader to load stop words from
|
||||
* @param enablePositionIncrements See {@link
|
||||
* StopFilter#setEnablePositionIncrements} */
|
||||
public StopAnalyzer(Reader stopwords, boolean enablePositionIncrements) throws IOException {
|
||||
stopWords = WordlistLoader.getWordSet(stopwords);
|
||||
this.enablePositionIncrements = enablePositionIncrements;
|
||||
}
|
||||
|
||||
/** Filters LowerCaseTokenizer with StopFilter. */
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new StopFilter(new LowerCaseTokenizer(reader), stopWords);
|
||||
if (useDefaultStopPositionIncrement) {
|
||||
return new StopFilter(new LowerCaseTokenizer(reader), stopWords);
|
||||
} else {
|
||||
return new StopFilter(enablePositionIncrements, new LowerCaseTokenizer(reader), stopWords);
|
||||
}
|
||||
}
|
||||
|
||||
/** Filters LowerCaseTokenizer with StopFilter. */
|
||||
@ -82,7 +145,11 @@ public final class StopAnalyzer extends Analyzer {
|
||||
if (streams == null) {
|
||||
streams = new SavedStreams();
|
||||
streams.source = new LowerCaseTokenizer(reader);
|
||||
streams.result = new StopFilter(streams.source, stopWords);
|
||||
if (useDefaultStopPositionIncrement) {
|
||||
streams.result = new StopFilter(streams.source, stopWords);
|
||||
} else {
|
||||
streams.result = new StopFilter(enablePositionIncrements, streams.source, stopWords);
|
||||
}
|
||||
setPreviousTokenStream(streams);
|
||||
} else
|
||||
streams.source.reset(reader);
|
||||
|
@ -24,6 +24,7 @@ import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.queryParser.QueryParser; // for javadoc
|
||||
|
||||
/**
|
||||
* Removes stop words from a token stream.
|
||||
@ -31,6 +32,7 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
|
||||
public final class StopFilter extends TokenFilter {
|
||||
|
||||
// deprecated
|
||||
private static boolean ENABLE_POSITION_INCREMENTS_DEFAULT = false;
|
||||
|
||||
private final CharArraySet stopWords;
|
||||
@ -41,19 +43,45 @@ public final class StopFilter extends TokenFilter {
|
||||
|
||||
/**
|
||||
* Construct a token stream filtering the given input.
|
||||
* @deprecated Use {@link #StopFilter(boolean, TokenStream, String[])} instead
|
||||
*/
|
||||
public StopFilter(TokenStream input, String [] stopWords)
|
||||
{
|
||||
this(input, stopWords, false);
|
||||
this(ENABLE_POSITION_INCREMENTS_DEFAULT, input, stopWords, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a token stream filtering the given input.
|
||||
* @param enablePositionIncrements true if token positions should record the removed stop words
|
||||
* @param input input TokenStream
|
||||
* @param stopWords array of stop words
|
||||
*/
|
||||
public StopFilter(boolean enablePositionIncrements, TokenStream input, String [] stopWords)
|
||||
{
|
||||
this(enablePositionIncrements, input, stopWords, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a filter which removes words from the input
|
||||
* TokenStream that are named in the array of words.
|
||||
* @deprecated Use {@link #StopFilter(boolean, TokenStream, String[], boolean)} instead
|
||||
*/
|
||||
public StopFilter(TokenStream in, String[] stopWords, boolean ignoreCase) {
|
||||
this(ENABLE_POSITION_INCREMENTS_DEFAULT, in, stopWords, ignoreCase);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a filter which removes words from the input
|
||||
* TokenStream that are named in the array of words.
|
||||
* @param enablePositionIncrements true if token positions should record the removed stop words
|
||||
* @param in input TokenStream
|
||||
* @param stopWords array of stop words
|
||||
* @param ignoreCase true if case is ignored
|
||||
*/
|
||||
public StopFilter(boolean enablePositionIncrements, TokenStream in, String[] stopWords, boolean ignoreCase) {
|
||||
super(in);
|
||||
this.stopWords = (CharArraySet)makeStopSet(stopWords, ignoreCase);
|
||||
this.enablePositionIncrements = enablePositionIncrements;
|
||||
init();
|
||||
}
|
||||
|
||||
@ -72,8 +100,30 @@ public final class StopFilter extends TokenFilter {
|
||||
* @param input
|
||||
* @param stopWords The set of Stop Words.
|
||||
* @param ignoreCase -Ignore case when stopping.
|
||||
* @deprecated Use {@link #StopFilter(boolean, TokenStream, Set, boolean)} instead
|
||||
*/
|
||||
public StopFilter(TokenStream input, Set stopWords, boolean ignoreCase)
|
||||
{
|
||||
this(ENABLE_POSITION_INCREMENTS_DEFAULT, input, stopWords, ignoreCase);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a token stream filtering the given input.
|
||||
* If <code>stopWords</code> is an instance of {@link CharArraySet} (true if
|
||||
* <code>makeStopSet()</code> was used to construct the set) it will be directly used
|
||||
* and <code>ignoreCase</code> will be ignored since <code>CharArraySet</code>
|
||||
* directly controls case sensitivity.
|
||||
* <p/>
|
||||
* If <code>stopWords</code> is not an instance of {@link CharArraySet},
|
||||
* a new CharArraySet will be constructed and <code>ignoreCase</code> will be
|
||||
* used to specify the case sensitivity of that set.
|
||||
*
|
||||
* @param enablePositionIncrements true if token positions should record the removed stop words
|
||||
* @param input Input TokenStream
|
||||
* @param stopWords The set of Stop Words.
|
||||
* @param ignoreCase -Ignore case when stopping.
|
||||
*/
|
||||
public StopFilter(boolean enablePositionIncrements, TokenStream input, Set stopWords, boolean ignoreCase)
|
||||
{
|
||||
super(input);
|
||||
if (stopWords instanceof CharArraySet) {
|
||||
@ -82,6 +132,7 @@ public final class StopFilter extends TokenFilter {
|
||||
this.stopWords = new CharArraySet(stopWords.size(), ignoreCase);
|
||||
this.stopWords.addAll(stopWords);
|
||||
}
|
||||
this.enablePositionIncrements = enablePositionIncrements;
|
||||
init();
|
||||
}
|
||||
|
||||
@ -90,9 +141,23 @@ public final class StopFilter extends TokenFilter {
|
||||
* TokenStream that are named in the Set.
|
||||
*
|
||||
* @see #makeStopSet(java.lang.String[])
|
||||
* @deprecated Use {@link #StopFilter(boolean, TokenStream, Set)} instead
|
||||
*/
|
||||
public StopFilter(TokenStream in, Set stopWords) {
|
||||
this(in, stopWords, false);
|
||||
this(ENABLE_POSITION_INCREMENTS_DEFAULT, in, stopWords, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a filter which removes words from the input
|
||||
* TokenStream that are named in the Set.
|
||||
*
|
||||
* @param enablePositionIncrements true if token positions should record the removed stop words
|
||||
* @param in Input stream
|
||||
* @param stopWords The set of Stop Words.
|
||||
* @see #makeStopSet(java.lang.String[])
|
||||
*/
|
||||
public StopFilter(boolean enablePositionIncrements, TokenStream in, Set stopWords) {
|
||||
this(enablePositionIncrements, in, stopWords, false);
|
||||
}
|
||||
|
||||
public void init() {
|
||||
@ -190,6 +255,7 @@ public final class StopFilter extends TokenFilter {
|
||||
|
||||
/**
|
||||
* @see #setEnablePositionIncrementsDefault(boolean).
|
||||
* @deprecated Please specify this when you create the StopFilter
|
||||
*/
|
||||
public static boolean getEnablePositionIncrementsDefault() {
|
||||
return ENABLE_POSITION_INCREMENTS_DEFAULT;
|
||||
@ -205,6 +271,7 @@ public final class StopFilter extends TokenFilter {
|
||||
* <p>
|
||||
* Default : false.
|
||||
* @see #setEnablePositionIncrements(boolean).
|
||||
* @deprecated Please specify this when you create the StopFilter
|
||||
*/
|
||||
public static void setEnablePositionIncrementsDefault(boolean defaultValue) {
|
||||
ENABLE_POSITION_INCREMENTS_DEFAULT = defaultValue;
|
||||
@ -218,12 +285,20 @@ public final class StopFilter extends TokenFilter {
|
||||
}
|
||||
|
||||
/**
|
||||
* Set to <code>true</code> to make <b>this</b> StopFilter enable position increments to result tokens.
|
||||
* <p>
|
||||
* When set, when a token is stopped (omitted), the position increment of
|
||||
* the following token is incremented.
|
||||
* <p>
|
||||
* Default: see {@link #setEnablePositionIncrementsDefault(boolean)}.
|
||||
* If <code>true</code>, this StopFilter will preserve
|
||||
* positions of the incoming tokens (ie, accumulate and
|
||||
* set position increments of the removed stop tokens).
|
||||
* Generally, <code>true</code> is best as it does not
|
||||
* lose information (positions of the original tokens)
|
||||
* during indexing.
|
||||
*
|
||||
* <p> When set, when a token is stopped
|
||||
* (omitted), the position increment of the following
|
||||
* token is incremented.
|
||||
*
|
||||
* <p> <b>NOTE</b>: be sure to also
|
||||
* set {@link QueryParser#setEnablePositionIncrements} if
|
||||
* you use QueryParser to create queries.
|
||||
*/
|
||||
public void setEnablePositionIncrements(boolean enable) {
|
||||
this.enablePositionIncrements = enable;
|
||||
|
@ -44,6 +44,10 @@ public class StandardAnalyzer extends Analyzer {
|
||||
private boolean replaceInvalidAcronym = defaultReplaceInvalidAcronym;
|
||||
|
||||
private static boolean defaultReplaceInvalidAcronym;
|
||||
private boolean enableStopPositionIncrements;
|
||||
|
||||
// @deprecated
|
||||
private boolean useDefaultStopPositionIncrements;
|
||||
|
||||
// Default to true (fixed the bug), unless the system prop is set
|
||||
static {
|
||||
@ -88,33 +92,89 @@ public class StandardAnalyzer extends Analyzer {
|
||||
useful for searching. */
|
||||
public static final String[] STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS;
|
||||
|
||||
/** Builds an analyzer with the default stop words ({@link #STOP_WORDS}). */
|
||||
/** Builds an analyzer with the default stop words ({@link
|
||||
* #STOP_WORDS}).
|
||||
* @deprecated Use {@link #StandardAnalyzer(boolean, String[])},
|
||||
* passing in null for the stop words, instead */
|
||||
public StandardAnalyzer() {
|
||||
this(STOP_WORDS);
|
||||
}
|
||||
|
||||
/** Builds an analyzer with the given stop words. */
|
||||
/** Builds an analyzer with the given stop words.
|
||||
* @deprecated Use {@link #StandardAnalyzer(boolean, Set)}
|
||||
* instead */
|
||||
public StandardAnalyzer(Set stopWords) {
|
||||
stopSet = stopWords;
|
||||
useDefaultStopPositionIncrements = true;
|
||||
}
|
||||
|
||||
/** Builds an analyzer with the given stop words. */
|
||||
/** Builds an analyzer with the given stop words.
|
||||
* @param enableStopPositionIncrements See {@link
|
||||
* StopFilter#setEnablePositionIncrements}
|
||||
* @param stopWords stop words */
|
||||
public StandardAnalyzer(boolean enableStopPositionIncrements, Set stopWords) {
|
||||
stopSet = stopWords;
|
||||
this.enableStopPositionIncrements = enableStopPositionIncrements;
|
||||
}
|
||||
|
||||
/** Builds an analyzer with the given stop words.
|
||||
* @deprecated Use {@link #StandardAnalyzer(boolean,
|
||||
* String[])} instead */
|
||||
public StandardAnalyzer(String[] stopWords) {
|
||||
if (stopWords == null) {
|
||||
stopWords = STOP_WORDS;
|
||||
}
|
||||
stopSet = StopFilter.makeStopSet(stopWords);
|
||||
useDefaultStopPositionIncrements = true;
|
||||
}
|
||||
|
||||
/** Builds an analyzer with the given stop words.
|
||||
* @param enableStopPositionIncrements See {@link
|
||||
* StopFilter#setEnablePositionIncrements}
|
||||
* @param stopWords Array of stop words */
|
||||
public StandardAnalyzer(boolean enableStopPositionIncrements, String[] stopWords) {
|
||||
stopSet = StopFilter.makeStopSet(stopWords);
|
||||
this.enableStopPositionIncrements = enableStopPositionIncrements;
|
||||
}
|
||||
|
||||
/** Builds an analyzer with the stop words from the given file.
|
||||
* @see WordlistLoader#getWordSet(File)
|
||||
* @deprecated Use {@link #StandardAnalyzer(boolean, File)}
|
||||
* instead
|
||||
*/
|
||||
public StandardAnalyzer(File stopwords) throws IOException {
|
||||
stopSet = WordlistLoader.getWordSet(stopwords);
|
||||
useDefaultStopPositionIncrements = true;
|
||||
}
|
||||
|
||||
/** Builds an analyzer with the stop words from the given file.
|
||||
* @see WordlistLoader#getWordSet(File)
|
||||
* @param enableStopPositionIncrements See {@link
|
||||
* StopFilter#setEnablePositionIncrements}
|
||||
* @param stopwords File to read stop words from */
|
||||
public StandardAnalyzer(boolean enableStopPositionIncrements, File stopwords) throws IOException {
|
||||
stopSet = WordlistLoader.getWordSet(stopwords);
|
||||
this.enableStopPositionIncrements = enableStopPositionIncrements;
|
||||
}
|
||||
|
||||
/** Builds an analyzer with the stop words from the given reader.
|
||||
* @see WordlistLoader#getWordSet(Reader)
|
||||
* @deprecated Use {@link #StandardAnalyzer(boolean, Reader)}
|
||||
* instead
|
||||
*/
|
||||
public StandardAnalyzer(Reader stopwords) throws IOException {
|
||||
stopSet = WordlistLoader.getWordSet(stopwords);
|
||||
useDefaultStopPositionIncrements = true;
|
||||
}
|
||||
|
||||
/** Builds an analyzer with the stop words from the given reader.
|
||||
* @see WordlistLoader#getWordSet(Reader)
|
||||
* @param enableStopPositionIncrements See {@link
|
||||
* StopFilter#setEnablePositionIncrements}
|
||||
* @param stopwords Reader to read stop words from */
|
||||
public StandardAnalyzer(boolean enableStopPositionIncrements, Reader stopwords) throws IOException {
|
||||
stopSet = WordlistLoader.getWordSet(stopwords);
|
||||
this.enableStopPositionIncrements = enableStopPositionIncrements;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -128,6 +188,7 @@ public class StandardAnalyzer extends Analyzer {
|
||||
public StandardAnalyzer(boolean replaceInvalidAcronym) {
|
||||
this(STOP_WORDS);
|
||||
this.replaceInvalidAcronym = replaceInvalidAcronym;
|
||||
useDefaultStopPositionIncrements = true;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -141,6 +202,7 @@ public class StandardAnalyzer extends Analyzer {
|
||||
public StandardAnalyzer(Reader stopwords, boolean replaceInvalidAcronym) throws IOException{
|
||||
this(stopwords);
|
||||
this.replaceInvalidAcronym = replaceInvalidAcronym;
|
||||
useDefaultStopPositionIncrements = true;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -154,6 +216,7 @@ public class StandardAnalyzer extends Analyzer {
|
||||
public StandardAnalyzer(File stopwords, boolean replaceInvalidAcronym) throws IOException{
|
||||
this(stopwords);
|
||||
this.replaceInvalidAcronym = replaceInvalidAcronym;
|
||||
useDefaultStopPositionIncrements = true;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -168,6 +231,7 @@ public class StandardAnalyzer extends Analyzer {
|
||||
public StandardAnalyzer(String [] stopwords, boolean replaceInvalidAcronym) throws IOException{
|
||||
this(stopwords);
|
||||
this.replaceInvalidAcronym = replaceInvalidAcronym;
|
||||
useDefaultStopPositionIncrements = true;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -181,6 +245,7 @@ public class StandardAnalyzer extends Analyzer {
|
||||
public StandardAnalyzer(Set stopwords, boolean replaceInvalidAcronym) throws IOException{
|
||||
this(stopwords);
|
||||
this.replaceInvalidAcronym = replaceInvalidAcronym;
|
||||
useDefaultStopPositionIncrements = true;
|
||||
}
|
||||
|
||||
/** Constructs a {@link StandardTokenizer} filtered by a {@link
|
||||
@ -190,7 +255,11 @@ public class StandardAnalyzer extends Analyzer {
|
||||
tokenStream.setMaxTokenLength(maxTokenLength);
|
||||
TokenStream result = new StandardFilter(tokenStream);
|
||||
result = new LowerCaseFilter(result);
|
||||
result = new StopFilter(result, stopSet);
|
||||
if (useDefaultStopPositionIncrements) {
|
||||
result = new StopFilter(result, stopSet);
|
||||
} else {
|
||||
result = new StopFilter(enableStopPositionIncrements, result, stopSet);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -229,7 +298,11 @@ public class StandardAnalyzer extends Analyzer {
|
||||
streams.tokenStream = new StandardTokenizer(reader);
|
||||
streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
|
||||
streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
|
||||
streams.filteredTokenStream = new StopFilter(streams.filteredTokenStream, stopSet);
|
||||
if (useDefaultStopPositionIncrements) {
|
||||
streams.filteredTokenStream = new StopFilter(streams.filteredTokenStream, stopSet);
|
||||
} else {
|
||||
streams.filteredTokenStream = new StopFilter(enableStopPositionIncrements, streams.filteredTokenStream, stopSet);
|
||||
}
|
||||
} else {
|
||||
streams.tokenStream.reset(reader);
|
||||
}
|
||||
|
@ -36,7 +36,7 @@ public class TestStopFilter extends LuceneTestCase {
|
||||
public void testExactCase() throws IOException {
|
||||
StringReader reader = new StringReader("Now is The Time");
|
||||
String[] stopWords = new String[] { "is", "the", "Time" };
|
||||
TokenStream stream = new StopFilter(new WhitespaceTokenizer(reader), stopWords);
|
||||
TokenStream stream = new StopFilter(false, new WhitespaceTokenizer(reader), stopWords);
|
||||
final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
|
||||
assertTrue(stream.incrementToken());
|
||||
assertEquals("Now", termAtt.term());
|
||||
@ -48,7 +48,7 @@ public class TestStopFilter extends LuceneTestCase {
|
||||
public void testIgnoreCase() throws IOException {
|
||||
StringReader reader = new StringReader("Now is The Time");
|
||||
String[] stopWords = new String[] { "is", "the", "Time" };
|
||||
TokenStream stream = new StopFilter(new WhitespaceTokenizer(reader), stopWords, true);
|
||||
TokenStream stream = new StopFilter(false, new WhitespaceTokenizer(reader), stopWords, true);
|
||||
final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
|
||||
assertTrue(stream.incrementToken());
|
||||
assertEquals("Now", termAtt.term());
|
||||
@ -59,7 +59,7 @@ public class TestStopFilter extends LuceneTestCase {
|
||||
StringReader reader = new StringReader("Now is The Time");
|
||||
String[] stopWords = new String[] { "is", "the", "Time" };
|
||||
Set stopSet = StopFilter.makeStopSet(stopWords);
|
||||
TokenStream stream = new StopFilter(new WhitespaceTokenizer(reader), stopSet);
|
||||
TokenStream stream = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet);
|
||||
final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
|
||||
assertTrue(stream.incrementToken());
|
||||
assertEquals("Now", termAtt.term());
|
||||
@ -85,11 +85,11 @@ public class TestStopFilter extends LuceneTestCase {
|
||||
Set stopSet = StopFilter.makeStopSet(stopWords);
|
||||
// with increments
|
||||
StringReader reader = new StringReader(sb.toString());
|
||||
StopFilter stpf = new StopFilter(new WhitespaceTokenizer(reader), stopSet);
|
||||
StopFilter stpf = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet);
|
||||
doTestStopPositons(stpf,true);
|
||||
// without increments
|
||||
reader = new StringReader(sb.toString());
|
||||
stpf = new StopFilter(new WhitespaceTokenizer(reader), stopSet);
|
||||
stpf = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet);
|
||||
doTestStopPositons(stpf,false);
|
||||
// with increments, concatenating two stop filters
|
||||
ArrayList a0 = new ArrayList();
|
||||
@ -108,9 +108,9 @@ public class TestStopFilter extends LuceneTestCase {
|
||||
Set stopSet0 = StopFilter.makeStopSet(stopWords0);
|
||||
Set stopSet1 = StopFilter.makeStopSet(stopWords1);
|
||||
reader = new StringReader(sb.toString());
|
||||
StopFilter stpf0 = new StopFilter(new WhitespaceTokenizer(reader), stopSet0); // first part of the set
|
||||
StopFilter stpf0 = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet0); // first part of the set
|
||||
stpf0.setEnablePositionIncrements(true);
|
||||
StopFilter stpf01 = new StopFilter(stpf0, stopSet1); // two stop filters concatenated!
|
||||
StopFilter stpf01 = new StopFilter(false, stpf0, stopSet1); // two stop filters concatenated!
|
||||
doTestStopPositons(stpf01,true);
|
||||
}
|
||||
|
||||
|
@ -186,17 +186,9 @@ public class TestPositionIncrement extends LuceneTestCase {
|
||||
hits = searcher.search(q, null, 1000).scoreDocs;
|
||||
assertEquals(0, hits.length);
|
||||
|
||||
// analyzer to introduce stopwords and increment gaps
|
||||
Analyzer stpa = new Analyzer() {
|
||||
final WhitespaceAnalyzer a = new WhitespaceAnalyzer();
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream ts = a.tokenStream(fieldName,reader);
|
||||
return new StopFilter(ts,new String[]{"stop"});
|
||||
}
|
||||
};
|
||||
|
||||
// should not find "1 2" because there is a gap of 1 in the index
|
||||
QueryParser qp = new QueryParser("field",stpa);
|
||||
QueryParser qp = new QueryParser("field",
|
||||
new StopWhitespaceAnalyzer(false));
|
||||
q = (PhraseQuery) qp.parse("\"1 2\"");
|
||||
hits = searcher.search(q, null, 1000).scoreDocs;
|
||||
assertEquals(0, hits.length);
|
||||
@ -212,22 +204,30 @@ public class TestPositionIncrement extends LuceneTestCase {
|
||||
hits = searcher.search(q, null, 1000).scoreDocs;
|
||||
assertEquals(0, hits.length);
|
||||
|
||||
boolean dflt = StopFilter.getEnablePositionIncrementsDefault();
|
||||
try {
|
||||
// stop filter alone won't help, because query parser swallows the increments.
|
||||
qp.setEnablePositionIncrements(false);
|
||||
StopFilter.setEnablePositionIncrementsDefault(true);
|
||||
q = (PhraseQuery) qp.parse("\"1 stop 2\"");
|
||||
hits = searcher.search(q, null, 1000).scoreDocs;
|
||||
assertEquals(0, hits.length);
|
||||
// stop filter alone won't help, because query parser swallows the increments.
|
||||
qp.setEnablePositionIncrements(false);
|
||||
q = (PhraseQuery) qp.parse("\"1 stop 2\"");
|
||||
hits = searcher.search(q, null, 1000).scoreDocs;
|
||||
assertEquals(0, hits.length);
|
||||
|
||||
// when both qp qnd stopFilter propagate increments, we should find the doc.
|
||||
qp.setEnablePositionIncrements(true);
|
||||
q = (PhraseQuery) qp.parse("\"1 stop 2\"");
|
||||
hits = searcher.search(q, null, 1000).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
} finally {
|
||||
StopFilter.setEnablePositionIncrementsDefault(dflt);
|
||||
// when both qp qnd stopFilter propagate increments, we should find the doc.
|
||||
qp = new QueryParser("field",
|
||||
new StopWhitespaceAnalyzer(true));
|
||||
qp.setEnablePositionIncrements(true);
|
||||
q = (PhraseQuery) qp.parse("\"1 stop 2\"");
|
||||
hits = searcher.search(q, null, 1000).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
}
|
||||
|
||||
private static class StopWhitespaceAnalyzer extends Analyzer {
|
||||
boolean enablePositionIncrements;
|
||||
final WhitespaceAnalyzer a = new WhitespaceAnalyzer();
|
||||
public StopWhitespaceAnalyzer(boolean enablePositionIncrements) {
|
||||
this.enablePositionIncrements = enablePositionIncrements;
|
||||
}
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream ts = a.tokenStream(fieldName,reader);
|
||||
return new StopFilter(enablePositionIncrements, ts, new String[]{"stop"});
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user