mirror of https://github.com/apache/lucene.git
LUCENE-1253: LengthFilter (and Solr's KeepWordTokenFilter) now require up front specification of enablePositionIncrement. Together with StopFilter they have a common base class (FilteringTokenFilter) that handles the position increments automatically. Implementors only need to override an accept() method that filters tokens
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1065343 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
277dfa0e88
commit
e7088279f7
|
@ -643,6 +643,12 @@ API Changes
|
||||||
deletes remain buffered so that the next time you open an NRT reader
|
deletes remain buffered so that the next time you open an NRT reader
|
||||||
and pass true, all deletes will be a applied. (Mike McCandless)
|
and pass true, all deletes will be a applied. (Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-1253: LengthFilter (and Solr's KeepWordTokenFilter) now
|
||||||
|
require up front specification of enablePositionIncrement. Together with
|
||||||
|
StopFilter they have a common base class (FilteringTokenFilter) that handles
|
||||||
|
the position increments automatically. Implementors only need to override an
|
||||||
|
accept() method that filters tokens. (Uwe Schindler, Robert Muir)
|
||||||
|
|
||||||
Bug fixes
|
Bug fixes
|
||||||
|
|
||||||
* LUCENE-2249: ParallelMultiSearcher should shut down thread pool on
|
* LUCENE-2249: ParallelMultiSearcher should shut down thread pool on
|
||||||
|
|
|
@ -22,10 +22,9 @@ import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.util.FilteringTokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
|
||||||
import org.apache.lucene.analysis.util.CharArraySet;
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.apache.lucene.queryParser.QueryParser;
|
import org.apache.lucene.queryParser.QueryParser;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
@ -42,14 +41,10 @@ import org.apache.lucene.util.Version;
|
||||||
* increments are preserved
|
* increments are preserved
|
||||||
* </ul>
|
* </ul>
|
||||||
*/
|
*/
|
||||||
public final class StopFilter extends TokenFilter {
|
public final class StopFilter extends FilteringTokenFilter {
|
||||||
|
|
||||||
private final CharArraySet stopWords;
|
private final CharArraySet stopWords;
|
||||||
private boolean enablePositionIncrements = true;
|
|
||||||
|
|
||||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Construct a token stream filtering the given input. If
|
* Construct a token stream filtering the given input. If
|
||||||
|
@ -75,7 +70,7 @@ public final class StopFilter extends TokenFilter {
|
||||||
*/
|
*/
|
||||||
public StopFilter(Version matchVersion, TokenStream input, Set<?> stopWords, boolean ignoreCase)
|
public StopFilter(Version matchVersion, TokenStream input, Set<?> stopWords, boolean ignoreCase)
|
||||||
{
|
{
|
||||||
super(input);
|
super(true, input);
|
||||||
this.stopWords = stopWords instanceof CharArraySet ? (CharArraySet) stopWords : new CharArraySet(matchVersion, stopWords, ignoreCase);
|
this.stopWords = stopWords instanceof CharArraySet ? (CharArraySet) stopWords : new CharArraySet(matchVersion, stopWords, ignoreCase);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -157,48 +152,8 @@ public final class StopFilter extends TokenFilter {
|
||||||
* Returns the next input Token whose term() is not a stop word.
|
* Returns the next input Token whose term() is not a stop word.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public final boolean incrementToken() throws IOException {
|
protected boolean accept() throws IOException {
|
||||||
// return the first non-stop word found
|
return !stopWords.contains(termAtt.buffer(), 0, termAtt.length());
|
||||||
int skippedPositions = 0;
|
|
||||||
while (input.incrementToken()) {
|
|
||||||
if (!stopWords.contains(termAtt.buffer(), 0, termAtt.length())) {
|
|
||||||
if (enablePositionIncrements) {
|
|
||||||
posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
skippedPositions += posIncrAtt.getPositionIncrement();
|
|
||||||
}
|
|
||||||
// reached EOS -- return false
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @see #setEnablePositionIncrements(boolean)
|
|
||||||
*/
|
|
||||||
public boolean getEnablePositionIncrements() {
|
|
||||||
return enablePositionIncrements;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* If <code>true</code>, this StopFilter will preserve
|
|
||||||
* positions of the incoming tokens (ie, accumulate and
|
|
||||||
* set position increments of the removed stop tokens).
|
|
||||||
* Generally, <code>true</code> is best as it does not
|
|
||||||
* lose information (positions of the original tokens)
|
|
||||||
* during indexing.
|
|
||||||
*
|
|
||||||
* Default is true.
|
|
||||||
*
|
|
||||||
* <p> When set, when a token is stopped
|
|
||||||
* (omitted), the position increment of the following
|
|
||||||
* token is incremented.
|
|
||||||
*
|
|
||||||
* <p> <b>NOTE</b>: be sure to also
|
|
||||||
* set {@link QueryParser#setEnablePositionIncrements} if
|
|
||||||
* you use QueryParser to create queries.
|
|
||||||
*/
|
|
||||||
public void setEnablePositionIncrements(boolean enable) {
|
|
||||||
this.enablePositionIncrements = enable;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.util.FilteringTokenFilter;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.util.CharArraySet;
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
|
|
||||||
|
@ -30,22 +31,19 @@ import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
*
|
*
|
||||||
* @since solr 1.3
|
* @since solr 1.3
|
||||||
*/
|
*/
|
||||||
public final class KeepWordFilter extends TokenFilter {
|
public final class KeepWordFilter extends FilteringTokenFilter {
|
||||||
private final CharArraySet words;
|
private final CharArraySet words;
|
||||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
|
|
||||||
/** The words set passed to this constructor will be directly used by this filter
|
/** The words set passed to this constructor will be directly used by this filter
|
||||||
* and should not be modified, */
|
* and should not be modified, */
|
||||||
public KeepWordFilter(TokenStream in, CharArraySet words) {
|
public KeepWordFilter(boolean enablePositionIncrements, TokenStream in, CharArraySet words) {
|
||||||
super(in);
|
super(enablePositionIncrements, in);
|
||||||
this.words = words;
|
this.words = words;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean incrementToken() throws IOException {
|
public boolean accept() throws IOException {
|
||||||
while (input.incrementToken()) {
|
return words.contains(termAtt.buffer(), 0, termAtt.length());
|
||||||
if (words.contains(termAtt.buffer(), 0, termAtt.length())) return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
|
import org.apache.lucene.analysis.util.FilteringTokenFilter;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -29,7 +30,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
* Note: Length is calculated as the number of UTF-16 code units.
|
* Note: Length is calculated as the number of UTF-16 code units.
|
||||||
* </p>
|
* </p>
|
||||||
*/
|
*/
|
||||||
public final class LengthFilter extends TokenFilter {
|
public final class LengthFilter extends FilteringTokenFilter {
|
||||||
|
|
||||||
private final int min;
|
private final int min;
|
||||||
private final int max;
|
private final int max;
|
||||||
|
@ -40,27 +41,15 @@ public final class LengthFilter extends TokenFilter {
|
||||||
* Build a filter that removes words that are too long or too
|
* Build a filter that removes words that are too long or too
|
||||||
* short from the text.
|
* short from the text.
|
||||||
*/
|
*/
|
||||||
public LengthFilter(TokenStream in, int min, int max)
|
public LengthFilter(boolean enablePositionIncrements, TokenStream in, int min, int max) {
|
||||||
{
|
super(enablePositionIncrements, in);
|
||||||
super(in);
|
|
||||||
this.min = min;
|
this.min = min;
|
||||||
this.max = max;
|
this.max = max;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the next input Token whose term() is the right len
|
|
||||||
*/
|
|
||||||
@Override
|
@Override
|
||||||
public final boolean incrementToken() throws IOException {
|
public boolean accept() throws IOException {
|
||||||
// return the first non-stop word found
|
final int len = termAtt.length();
|
||||||
while (input.incrementToken()) {
|
return (len >= min && len <= max);
|
||||||
int len = termAtt.length();
|
|
||||||
if (len >= min && len <= max) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
// note: else we ignore it but should we index each part of it?
|
|
||||||
}
|
|
||||||
// reached EOS -- return false
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,96 @@
|
||||||
|
package org.apache.lucene.analysis.util;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
|
import org.apache.lucene.queryParser.QueryParser; // for javadoc
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Abstract base class for TokenFilters that may remove tokens.
|
||||||
|
* You have to implement {@link #accept} and return a boolean if the current
|
||||||
|
* token should be preserved. {@link #incrementToken} uses this method
|
||||||
|
* to decide if a token should be passed to the caller.
|
||||||
|
*/
|
||||||
|
public abstract class FilteringTokenFilter extends TokenFilter {
|
||||||
|
|
||||||
|
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||||
|
private boolean enablePositionIncrements; // no init needed, as ctor enforces setting value!
|
||||||
|
|
||||||
|
public FilteringTokenFilter(boolean enablePositionIncrements, TokenStream input){
|
||||||
|
super(input);
|
||||||
|
this.enablePositionIncrements = enablePositionIncrements;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Override this method and return if the current input token should be returned by {@link #incrementToken}. */
|
||||||
|
protected abstract boolean accept() throws IOException;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final boolean incrementToken() throws IOException {
|
||||||
|
if (enablePositionIncrements) {
|
||||||
|
int skippedPositions = 0;
|
||||||
|
while (input.incrementToken()) {
|
||||||
|
if (accept()) {
|
||||||
|
if (skippedPositions != 0) {
|
||||||
|
posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
skippedPositions += posIncrAtt.getPositionIncrement();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
while (input.incrementToken()) {
|
||||||
|
if (accept()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// reached EOS -- return false
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @see #setEnablePositionIncrements(boolean)
|
||||||
|
*/
|
||||||
|
public boolean getEnablePositionIncrements() {
|
||||||
|
return enablePositionIncrements;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If <code>true</code>, this TokenFilter will preserve
|
||||||
|
* positions of the incoming tokens (ie, accumulate and
|
||||||
|
* set position increments of the removed tokens).
|
||||||
|
* Generally, <code>true</code> is best as it does not
|
||||||
|
* lose information (positions of the original tokens)
|
||||||
|
* during indexing.
|
||||||
|
*
|
||||||
|
* <p> When set, when a token is stopped
|
||||||
|
* (omitted), the position increment of the following
|
||||||
|
* token is incremented.
|
||||||
|
*
|
||||||
|
* <p> <b>NOTE</b>: be sure to also
|
||||||
|
* set {@link QueryParser#setEnablePositionIncrements} if
|
||||||
|
* you use QueryParser to create queries.
|
||||||
|
*/
|
||||||
|
public void setEnablePositionIncrements(boolean enable) {
|
||||||
|
this.enablePositionIncrements = enable;
|
||||||
|
}
|
||||||
|
}
|
|
@ -35,16 +35,26 @@ public class TestKeepWordFilter extends BaseTokenStreamTestCase {
|
||||||
words.add( "aaa" );
|
words.add( "aaa" );
|
||||||
words.add( "bbb" );
|
words.add( "bbb" );
|
||||||
|
|
||||||
String input = "aaa BBB ccc ddd EEE";
|
String input = "xxx yyy aaa zzz BBB ccc ddd EEE";
|
||||||
|
|
||||||
// Test Stopwords
|
// Test Stopwords
|
||||||
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
|
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
|
||||||
stream = new KeepWordFilter(stream, new CharArraySet(TEST_VERSION_CURRENT, words, true));
|
stream = new KeepWordFilter(true, stream, new CharArraySet(TEST_VERSION_CURRENT, words, true));
|
||||||
assertTokenStreamContents(stream, new String[] { "aaa", "BBB" });
|
assertTokenStreamContents(stream, new String[] { "aaa", "BBB" }, new int[] { 3, 2 });
|
||||||
|
|
||||||
// Now force case
|
// Now force case
|
||||||
stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
|
stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
|
||||||
stream = new KeepWordFilter(stream, new CharArraySet(TEST_VERSION_CURRENT,words, false));
|
stream = new KeepWordFilter(true, stream, new CharArraySet(TEST_VERSION_CURRENT,words, false));
|
||||||
assertTokenStreamContents(stream, new String[] { "aaa" });
|
assertTokenStreamContents(stream, new String[] { "aaa" }, new int[] { 3 });
|
||||||
|
|
||||||
|
// Test Stopwords
|
||||||
|
stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
|
||||||
|
stream = new KeepWordFilter(false, stream, new CharArraySet(TEST_VERSION_CURRENT, words, true));
|
||||||
|
assertTokenStreamContents(stream, new String[] { "aaa", "BBB" }, new int[] { 1, 1 });
|
||||||
|
|
||||||
|
// Now force case
|
||||||
|
stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
|
||||||
|
stream = new KeepWordFilter(false, stream, new CharArraySet(TEST_VERSION_CURRENT,words, false));
|
||||||
|
assertTokenStreamContents(stream, new String[] { "aaa" }, new int[] { 1 });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,19 +24,24 @@ import java.io.StringReader;
|
||||||
|
|
||||||
public class TestLengthFilter extends BaseTokenStreamTestCase {
|
public class TestLengthFilter extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
public void testFilter() throws Exception {
|
public void testFilterNoPosIncr() throws Exception {
|
||||||
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
|
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
|
||||||
new StringReader("short toolong evenmuchlongertext a ab toolong foo"));
|
new StringReader("short toolong evenmuchlongertext a ab toolong foo"));
|
||||||
LengthFilter filter = new LengthFilter(stream, 2, 6);
|
LengthFilter filter = new LengthFilter(false, stream, 2, 6);
|
||||||
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
|
assertTokenStreamContents(filter,
|
||||||
|
new String[]{"short", "ab", "foo"},
|
||||||
|
new int[]{1, 1, 1}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
assertTrue(filter.incrementToken());
|
public void testFilterWithPosIncr() throws Exception {
|
||||||
assertEquals("short", termAtt.toString());
|
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
|
||||||
assertTrue(filter.incrementToken());
|
new StringReader("short toolong evenmuchlongertext a ab toolong foo"));
|
||||||
assertEquals("ab", termAtt.toString());
|
LengthFilter filter = new LengthFilter(true, stream, 2, 6);
|
||||||
assertTrue(filter.incrementToken());
|
assertTokenStreamContents(filter,
|
||||||
assertEquals("foo", termAtt.toString());
|
new String[]{"short", "ab", "foo"},
|
||||||
assertFalse(filter.incrementToken());
|
new int[]{1, 4, 2}
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,22 +23,27 @@ import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.miscellaneous.KeepWordFilter;
|
import org.apache.lucene.analysis.miscellaneous.KeepWordFilter;
|
||||||
import org.apache.lucene.analysis.util.CharArraySet;
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @version $Id$
|
* @version $Id$
|
||||||
* @since solr 1.3
|
|
||||||
*/
|
*/
|
||||||
public class KeepWordFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
|
public class KeepWordFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
|
||||||
|
|
||||||
private CharArraySet words;
|
@Override
|
||||||
private boolean ignoreCase;
|
public void init(Map<String,String> args) {
|
||||||
|
super.init(args);
|
||||||
|
assureMatchVersion();
|
||||||
|
}
|
||||||
|
|
||||||
public void inform(ResourceLoader loader) {
|
public void inform(ResourceLoader loader) {
|
||||||
String wordFiles = args.get("words");
|
String wordFiles = args.get("words");
|
||||||
ignoreCase = getBoolean("ignoreCase", false);
|
ignoreCase = getBoolean("ignoreCase", false);
|
||||||
if (wordFiles != null) {
|
enablePositionIncrements = getBoolean("enablePositionIncrements",false);
|
||||||
|
|
||||||
|
if (wordFiles != null) {
|
||||||
try {
|
try {
|
||||||
words = getWordSet(loader, wordFiles, ignoreCase);
|
words = getWordSet(loader, wordFiles, ignoreCase);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
@ -47,6 +52,10 @@ public class KeepWordFilterFactory extends BaseTokenFilterFactory implements Res
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private CharArraySet words;
|
||||||
|
private boolean ignoreCase;
|
||||||
|
private boolean enablePositionIncrements;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the keep word list.
|
* Set the keep word list.
|
||||||
* NOTE: if ignoreCase==true, the words are expected to be lowercase
|
* NOTE: if ignoreCase==true, the words are expected to be lowercase
|
||||||
|
@ -62,15 +71,19 @@ public class KeepWordFilterFactory extends BaseTokenFilterFactory implements Res
|
||||||
this.ignoreCase = ignoreCase;
|
this.ignoreCase = ignoreCase;
|
||||||
}
|
}
|
||||||
|
|
||||||
public KeepWordFilter create(TokenStream input) {
|
public boolean isEnablePositionIncrements() {
|
||||||
return new KeepWordFilter(input, words);
|
return enablePositionIncrements;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isIgnoreCase() {
|
||||||
|
return ignoreCase;
|
||||||
}
|
}
|
||||||
|
|
||||||
public CharArraySet getWords() {
|
public CharArraySet getWords() {
|
||||||
return words;
|
return words;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isIgnoreCase() {
|
public KeepWordFilter create(TokenStream input) {
|
||||||
return ignoreCase;
|
return new KeepWordFilter(enablePositionIncrements, input, words);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,6 +27,7 @@ import java.util.Map;
|
||||||
*/
|
*/
|
||||||
public class LengthFilterFactory extends BaseTokenFilterFactory {
|
public class LengthFilterFactory extends BaseTokenFilterFactory {
|
||||||
int min,max;
|
int min,max;
|
||||||
|
boolean enablePositionIncrements;
|
||||||
public static final String MIN_KEY = "min";
|
public static final String MIN_KEY = "min";
|
||||||
public static final String MAX_KEY = "max";
|
public static final String MAX_KEY = "max";
|
||||||
|
|
||||||
|
@ -35,8 +36,10 @@ public class LengthFilterFactory extends BaseTokenFilterFactory {
|
||||||
super.init(args);
|
super.init(args);
|
||||||
min=Integer.parseInt(args.get(MIN_KEY));
|
min=Integer.parseInt(args.get(MIN_KEY));
|
||||||
max=Integer.parseInt(args.get(MAX_KEY));
|
max=Integer.parseInt(args.get(MAX_KEY));
|
||||||
|
enablePositionIncrements = getBoolean("enablePositionIncrements",false);
|
||||||
}
|
}
|
||||||
|
|
||||||
public LengthFilter create(TokenStream input) {
|
public LengthFilter create(TokenStream input) {
|
||||||
return new LengthFilter(input,min,max);
|
return new LengthFilter(enablePositionIncrements, input,min,max);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,9 +31,19 @@ public class LengthFilterTest extends BaseTokenTestCase {
|
||||||
Map<String, String> args = new HashMap<String, String>();
|
Map<String, String> args = new HashMap<String, String>();
|
||||||
args.put(LengthFilterFactory.MIN_KEY, String.valueOf(4));
|
args.put(LengthFilterFactory.MIN_KEY, String.valueOf(4));
|
||||||
args.put(LengthFilterFactory.MAX_KEY, String.valueOf(10));
|
args.put(LengthFilterFactory.MAX_KEY, String.valueOf(10));
|
||||||
|
// default: args.put("enablePositionIncrements", "false");
|
||||||
factory.init(args);
|
factory.init(args);
|
||||||
String test = "foo foobar super-duper-trooper";
|
String test = "foo foobar super-duper-trooper";
|
||||||
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(test)));
|
TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(test)));
|
||||||
assertTokenStreamContents(stream, new String[] { "foobar" });
|
assertTokenStreamContents(stream, new String[] { "foobar" }, new int[] { 1 });
|
||||||
|
|
||||||
|
factory = new LengthFilterFactory();
|
||||||
|
args = new HashMap<String, String>();
|
||||||
|
args.put(LengthFilterFactory.MIN_KEY, String.valueOf(4));
|
||||||
|
args.put(LengthFilterFactory.MAX_KEY, String.valueOf(10));
|
||||||
|
args.put("enablePositionIncrements", "true");
|
||||||
|
factory.init(args);
|
||||||
|
stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(test)));
|
||||||
|
assertTokenStreamContents(stream, new String[] { "foobar" }, new int[] { 2 });
|
||||||
}
|
}
|
||||||
}
|
}
|
Loading…
Reference in New Issue