mirror of https://github.com/apache/lucene.git
LUCENE-4963: Completely remove deprecated options in 5.0.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1479171 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8a7f2b6cc4
commit
fa76c30147
|
@ -33,7 +33,7 @@ import java.io.IOException;
|
|||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
* words="stopwords.txt" enablePositionIncrements="true"/>
|
||||
* words="stopwords.txt"
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
|
@ -42,7 +42,6 @@ public class StopFilterFactory extends TokenFilterFactory implements ResourceLoa
|
|||
private final String stopWordFiles;
|
||||
private final String format;
|
||||
private final boolean ignoreCase;
|
||||
private final boolean enablePositionIncrements;
|
||||
|
||||
/** Creates a new StopFilterFactory */
|
||||
public StopFilterFactory(Map<String,String> args) {
|
||||
|
@ -51,7 +50,6 @@ public class StopFilterFactory extends TokenFilterFactory implements ResourceLoa
|
|||
stopWordFiles = get(args, "words");
|
||||
format = get(args, "format");
|
||||
ignoreCase = getBoolean(args, "ignoreCase", false);
|
||||
enablePositionIncrements = getBoolean(args, "enablePositionIncrements", true);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
@ -70,10 +68,6 @@ public class StopFilterFactory extends TokenFilterFactory implements ResourceLoa
|
|||
}
|
||||
}
|
||||
|
||||
public boolean isEnablePositionIncrements() {
|
||||
return enablePositionIncrements;
|
||||
}
|
||||
|
||||
public boolean isIgnoreCase() {
|
||||
return ignoreCase;
|
||||
}
|
||||
|
@ -85,7 +79,6 @@ public class StopFilterFactory extends TokenFilterFactory implements ResourceLoa
|
|||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
StopFilter stopFilter = new StopFilter(luceneMatchVersion,input,stopWords);
|
||||
stopFilter.setEnablePositionIncrements(enablePositionIncrements);
|
||||
return stopFilter;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,20 +33,6 @@ public final class TypeTokenFilter extends FilteringTokenFilter {
|
|||
private final TypeAttribute typeAttribute = addAttribute(TypeAttribute.class);
|
||||
private final boolean useWhiteList;
|
||||
|
||||
/** @deprecated enablePositionIncrements=false is not supported anymore as of Lucene 4.4. */
|
||||
@Deprecated
|
||||
public TypeTokenFilter(Version version, boolean enablePositionIncrements, TokenStream input, Set<String> stopTypes, boolean useWhiteList) {
|
||||
super(version, enablePositionIncrements, input);
|
||||
this.stopTypes = stopTypes;
|
||||
this.useWhiteList = useWhiteList;
|
||||
}
|
||||
|
||||
/** @deprecated enablePositionIncrements=false is not supported anymore as of Lucene 4.4. */
|
||||
@Deprecated
|
||||
public TypeTokenFilter(Version version, boolean enablePositionIncrements, TokenStream input, Set<String> stopTypes) {
|
||||
this(version, enablePositionIncrements, input, stopTypes, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new {@link TypeTokenFilter}.
|
||||
* @param version the Lucene match version
|
||||
|
|
|
@ -41,7 +41,6 @@ import java.util.Set;
|
|||
*/
|
||||
public class TypeTokenFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
private final boolean useWhitelist;
|
||||
private final boolean enablePositionIncrements;
|
||||
private final String stopTypesFiles;
|
||||
private Set<String> stopTypes;
|
||||
|
||||
|
@ -49,7 +48,6 @@ public class TypeTokenFilterFactory extends TokenFilterFactory implements Resour
|
|||
public TypeTokenFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
stopTypesFiles = require(args, "types");
|
||||
enablePositionIncrements = getBoolean(args, "enablePositionIncrements", true);
|
||||
useWhitelist = getBoolean(args, "useWhitelist", false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
|
@ -68,18 +66,13 @@ public class TypeTokenFilterFactory extends TokenFilterFactory implements Resour
|
|||
}
|
||||
}
|
||||
|
||||
public boolean isEnablePositionIncrements() {
|
||||
return enablePositionIncrements;
|
||||
}
|
||||
|
||||
public Set<String> getStopTypes() {
|
||||
return stopTypes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
@SuppressWarnings("deprecation")
|
||||
final TokenStream filter = new TypeTokenFilter(luceneMatchVersion, enablePositionIncrements, input, stopTypes, useWhitelist);
|
||||
final TokenStream filter = new TypeTokenFilter(luceneMatchVersion, input, stopTypes, useWhitelist);
|
||||
return filter;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -137,11 +137,7 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase {
|
|||
Reader reader) {
|
||||
final Tokenizer source = new StandardTokenizer(matchVersion, reader);
|
||||
TokenStream result = new StandardFilter(matchVersion, source);
|
||||
StopFilter s = new StopFilter(matchVersion, result, HYPHENATIONS);
|
||||
if (!matchVersion.onOrAfter(Version.LUCENE_44)) {
|
||||
s.setEnablePositionIncrements(false);
|
||||
}
|
||||
result = s;
|
||||
result = new StopFilter(matchVersion, result, HYPHENATIONS);
|
||||
result = new ElisionFilter(result, DEFAULT_ARTICLES);
|
||||
result = new IrishLowerCaseFilter(result);
|
||||
result = new StopFilter(matchVersion, result, stopwords);
|
||||
|
|
|
@ -33,13 +33,6 @@ public final class KeepWordFilter extends FilteringTokenFilter {
|
|||
private final CharArraySet words;
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
|
||||
/** @deprecated enablePositionIncrements=false is not supported anymore as of Lucene 4.4. */
|
||||
@Deprecated
|
||||
public KeepWordFilter(Version version, boolean enablePositionIncrements, TokenStream in, CharArraySet words) {
|
||||
super(version, enablePositionIncrements, in);
|
||||
this.words = words;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new {@link KeepWordFilter}.
|
||||
* <p><b>NOTE</b>: The words set passed to this constructor will be directly
|
||||
|
|
|
@ -38,7 +38,6 @@ import java.io.IOException;
|
|||
*/
|
||||
public class KeepWordFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
private final boolean ignoreCase;
|
||||
private final boolean enablePositionIncrements;
|
||||
private final String wordFiles;
|
||||
private CharArraySet words;
|
||||
|
||||
|
@ -48,7 +47,6 @@ public class KeepWordFilterFactory extends TokenFilterFactory implements Resourc
|
|||
assureMatchVersion();
|
||||
wordFiles = get(args, "words");
|
||||
ignoreCase = getBoolean(args, "ignoreCase", false);
|
||||
enablePositionIncrements = getBoolean(args, "enablePositionIncrements", true);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
@ -61,10 +59,6 @@ public class KeepWordFilterFactory extends TokenFilterFactory implements Resourc
|
|||
}
|
||||
}
|
||||
|
||||
public boolean isEnablePositionIncrements() {
|
||||
return enablePositionIncrements;
|
||||
}
|
||||
|
||||
public boolean isIgnoreCase() {
|
||||
return ignoreCase;
|
||||
}
|
||||
|
@ -79,8 +73,7 @@ public class KeepWordFilterFactory extends TokenFilterFactory implements Resourc
|
|||
if (words == null) {
|
||||
return input;
|
||||
} else {
|
||||
@SuppressWarnings("deprecation")
|
||||
final TokenStream filter = new KeepWordFilter(luceneMatchVersion, enablePositionIncrements, input, words);
|
||||
final TokenStream filter = new KeepWordFilter(luceneMatchVersion, input, words);
|
||||
return filter;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,14 +35,6 @@ public final class LengthFilter extends FilteringTokenFilter {
|
|||
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
|
||||
/** @deprecated enablePositionIncrements=false is not supported anymore as of Lucene 4.4. */
|
||||
@Deprecated
|
||||
public LengthFilter(Version version, boolean enablePositionIncrements, TokenStream in, int min, int max) {
|
||||
super(version, enablePositionIncrements, in);
|
||||
this.min = min;
|
||||
this.max = max;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new {@link LengthFilter}. This will filter out tokens whose
|
||||
* {@link CharTermAttribute} is either too short ({@link CharTermAttribute#length()}
|
||||
|
|
|
@ -35,7 +35,6 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
public class LengthFilterFactory extends TokenFilterFactory {
|
||||
final int min;
|
||||
final int max;
|
||||
final boolean enablePositionIncrements;
|
||||
public static final String MIN_KEY = "min";
|
||||
public static final String MAX_KEY = "max";
|
||||
|
||||
|
@ -44,7 +43,6 @@ public class LengthFilterFactory extends TokenFilterFactory {
|
|||
super(args);
|
||||
min = requireInt(args, MIN_KEY);
|
||||
max = requireInt(args, MAX_KEY);
|
||||
enablePositionIncrements = getBoolean(args, "enablePositionIncrements", true);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
@ -52,8 +50,7 @@ public class LengthFilterFactory extends TokenFilterFactory {
|
|||
|
||||
@Override
|
||||
public LengthFilter create(TokenStream input) {
|
||||
@SuppressWarnings("deprecation")
|
||||
final LengthFilter filter = new LengthFilter(luceneMatchVersion, enablePositionIncrements, input,min,max);
|
||||
final LengthFilter filter = new LengthFilter(luceneMatchVersion, input,min,max);
|
||||
return filter;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,29 +32,15 @@ import java.io.IOException;
|
|||
*/
|
||||
public final class TrimFilter extends TokenFilter {
|
||||
|
||||
final boolean updateOffsets;
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
|
||||
/**
|
||||
* Create a new {@link TrimFilter}.
|
||||
* @param version the Lucene match version
|
||||
* @param in the stream to consume
|
||||
* @param updateOffsets whether to update offsets
|
||||
* @deprecated Offset updates are not supported anymore as of Lucene 4.4.
|
||||
*/
|
||||
@Deprecated
|
||||
public TrimFilter(Version version, TokenStream in, boolean updateOffsets) {
|
||||
super(in);
|
||||
if (updateOffsets && version.onOrAfter(Version.LUCENE_44)) {
|
||||
throw new IllegalArgumentException("updateOffsets=true is not supported anymore as of Lucene 4.4");
|
||||
}
|
||||
this.updateOffsets = updateOffsets;
|
||||
}
|
||||
|
||||
/** Create a new {@link TrimFilter} on top of <code>in</code>. */
|
||||
public TrimFilter(Version version, TokenStream in) {
|
||||
this(version, in, false);
|
||||
super(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -70,14 +56,12 @@ public final class TrimFilter extends TokenFilter {
|
|||
}
|
||||
int start = 0;
|
||||
int end = 0;
|
||||
int endOff = 0;
|
||||
|
||||
// eat the first characters
|
||||
for (start = 0; start < len && Character.isWhitespace(termBuffer[start]); start++) {
|
||||
}
|
||||
// eat the end characters
|
||||
for (end = len; end >= start && Character.isWhitespace(termBuffer[end - 1]); end--) {
|
||||
endOff++;
|
||||
}
|
||||
if (start > 0 || end < len) {
|
||||
if (start < end) {
|
||||
|
@ -85,11 +69,6 @@ public final class TrimFilter extends TokenFilter {
|
|||
} else {
|
||||
termAtt.setEmpty();
|
||||
}
|
||||
if (updateOffsets && len == offsetAtt.endOffset() - offsetAtt.startOffset()) {
|
||||
int newStart = offsetAtt.startOffset()+start;
|
||||
int newEnd = offsetAtt.endOffset() - (start<end ? endOff:0);
|
||||
offsetAtt.setOffset(newStart, newEnd);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
|
@ -37,12 +37,9 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
*/
|
||||
public class TrimFilterFactory extends TokenFilterFactory {
|
||||
|
||||
protected final boolean updateOffsets;
|
||||
|
||||
/** Creates a new TrimFilterFactory */
|
||||
public TrimFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
updateOffsets = getBoolean(args, "updateOffsets", false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
@ -50,8 +47,7 @@ public class TrimFilterFactory extends TokenFilterFactory {
|
|||
|
||||
@Override
|
||||
public TrimFilter create(TokenStream input) {
|
||||
@SuppressWarnings("deprecation")
|
||||
final TrimFilter filter = new TrimFilter(luceneMatchVersion, input, updateOffsets);
|
||||
final TrimFilter filter = new TrimFilter(luceneMatchVersion, input);
|
||||
return filter;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,35 +29,11 @@ import org.apache.lucene.util.Version;
|
|||
* You have to implement {@link #accept} and return a boolean if the current
|
||||
* token should be preserved. {@link #incrementToken} uses this method
|
||||
* to decide if a token should be passed to the caller.
|
||||
* <p><a name="version" />As of Lucene 4.4, an {@link IllegalArgumentException}
|
||||
* is thrown when trying to disable position increments when filtering terms.
|
||||
*/
|
||||
public abstract class FilteringTokenFilter extends TokenFilter {
|
||||
|
||||
private static void checkPositionIncrement(Version version, boolean enablePositionIncrements) {
|
||||
if (!enablePositionIncrements && version.onOrAfter(Version.LUCENE_44)) {
|
||||
throw new IllegalArgumentException("enablePositionIncrements=false is not supported anymore as of Lucene 4.4 as it can create broken token streams");
|
||||
}
|
||||
}
|
||||
|
||||
protected final Version version;
|
||||
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
private boolean enablePositionIncrements; // no init needed, as ctor enforces setting value!
|
||||
private boolean first = true;
|
||||
|
||||
/**
|
||||
* Create a new {@link FilteringTokenFilter}.
|
||||
* @param version the Lucene match <a href="#version">version</a>
|
||||
* @param enablePositionIncrements whether to increment position increments when filtering out terms
|
||||
* @param input the input to consume
|
||||
* @deprecated enablePositionIncrements=false is not supported anymore as of Lucene 4.4
|
||||
*/
|
||||
@Deprecated
|
||||
public FilteringTokenFilter(Version version, boolean enablePositionIncrements, TokenStream input){
|
||||
this(version, input);
|
||||
checkPositionIncrement(version, enablePositionIncrements);
|
||||
this.enablePositionIncrements = enablePositionIncrements;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new {@link FilteringTokenFilter}.
|
||||
|
@ -67,7 +43,6 @@ public abstract class FilteringTokenFilter extends TokenFilter {
|
|||
public FilteringTokenFilter(Version version, TokenStream in) {
|
||||
super(in);
|
||||
this.version = version;
|
||||
this.enablePositionIncrements = true;
|
||||
}
|
||||
|
||||
/** Override this method and return if the current input token should be returned by {@link #incrementToken}. */
|
||||
|
@ -75,31 +50,17 @@ public abstract class FilteringTokenFilter extends TokenFilter {
|
|||
|
||||
@Override
|
||||
public final boolean incrementToken() throws IOException {
|
||||
if (enablePositionIncrements) {
|
||||
int skippedPositions = 0;
|
||||
while (input.incrementToken()) {
|
||||
if (accept()) {
|
||||
if (skippedPositions != 0) {
|
||||
posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
skippedPositions += posIncrAtt.getPositionIncrement();
|
||||
}
|
||||
} else {
|
||||
while (input.incrementToken()) {
|
||||
if (accept()) {
|
||||
if (first) {
|
||||
// first token having posinc=0 is illegal.
|
||||
if (posIncrAtt.getPositionIncrement() == 0) {
|
||||
posIncrAtt.setPositionIncrement(1);
|
||||
}
|
||||
first = false;
|
||||
}
|
||||
return true;
|
||||
int skippedPositions = 0;
|
||||
while (input.incrementToken()) {
|
||||
if (accept()) {
|
||||
if (skippedPositions != 0) {
|
||||
posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
skippedPositions += posIncrAtt.getPositionIncrement();
|
||||
}
|
||||
|
||||
// reached EOS -- return false
|
||||
return false;
|
||||
}
|
||||
|
@ -107,36 +68,6 @@ public abstract class FilteringTokenFilter extends TokenFilter {
|
|||
@Override
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
first = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #setEnablePositionIncrements(boolean)
|
||||
*/
|
||||
public boolean getEnablePositionIncrements() {
|
||||
return enablePositionIncrements;
|
||||
}
|
||||
|
||||
/**
|
||||
* If <code>true</code>, this TokenFilter will preserve
|
||||
* positions of the incoming tokens (ie, accumulate and
|
||||
* set position increments of the removed tokens).
|
||||
* Generally, <code>true</code> is best as it does not
|
||||
* lose information (positions of the original tokens)
|
||||
* during indexing.
|
||||
*
|
||||
* <p> When set, when a token is stopped
|
||||
* (omitted), the position increment of the following
|
||||
* token is incremented.
|
||||
*
|
||||
* <p> <b>NOTE</b>: be sure to also
|
||||
* set org.apache.lucene.queryparser.classic.QueryParser#setEnablePositionIncrements if
|
||||
* you use QueryParser to create queries.
|
||||
* @deprecated enablePositionIncrements=false is not supported anymore as of Lucene 4.4
|
||||
*/
|
||||
@Deprecated
|
||||
public void setEnablePositionIncrements(boolean enable) {
|
||||
checkPositionIncrement(version, enable);
|
||||
this.enablePositionIncrements = enable;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,17 +17,13 @@ package org.apache.lucene.analysis.core;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
|
@ -72,11 +68,7 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
|
|||
// with increments
|
||||
StringReader reader = new StringReader(sb.toString());
|
||||
StopFilter stpf = new StopFilter(Version.LUCENE_40, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
|
||||
doTestStopPositons(stpf,true);
|
||||
// without increments
|
||||
reader = new StringReader(sb.toString());
|
||||
stpf = new StopFilter(Version.LUCENE_43, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
|
||||
doTestStopPositons(stpf,false);
|
||||
doTestStopPositons(stpf);
|
||||
// with increments, concatenating two stop filters
|
||||
ArrayList<String> a0 = new ArrayList<String>();
|
||||
ArrayList<String> a1 = new ArrayList<String>();
|
||||
|
@ -95,14 +87,11 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
|
|||
CharArraySet stopSet1 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords1);
|
||||
reader = new StringReader(sb.toString());
|
||||
StopFilter stpf0 = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet0); // first part of the set
|
||||
stpf0.setEnablePositionIncrements(true);
|
||||
StopFilter stpf01 = new StopFilter(TEST_VERSION_CURRENT, stpf0, stopSet1); // two stop filters concatenated!
|
||||
doTestStopPositons(stpf01,true);
|
||||
doTestStopPositons(stpf01);
|
||||
}
|
||||
|
||||
private void doTestStopPositons(StopFilter stpf, boolean enableIcrements) throws IOException {
|
||||
log("---> test with enable-increments-"+(enableIcrements?"enabled":"disabled"));
|
||||
stpf.setEnablePositionIncrements(enableIcrements);
|
||||
private void doTestStopPositons(StopFilter stpf) throws IOException {
|
||||
CharTermAttribute termAtt = stpf.getAttribute(CharTermAttribute.class);
|
||||
PositionIncrementAttribute posIncrAtt = stpf.getAttribute(PositionIncrementAttribute.class);
|
||||
stpf.reset();
|
||||
|
@ -111,7 +100,7 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
|
|||
log("Token "+i+": "+stpf);
|
||||
String w = English.intToEnglish(i).trim();
|
||||
assertEquals("expecting token "+i+" to be "+w,w,termAtt.toString());
|
||||
assertEquals("all but first token must have position increment of 3",enableIcrements?(i==0?1:3):1,posIncrAtt.getPositionIncrement());
|
||||
assertEquals("all but first token must have position increment of 3",i==0?1:3,posIncrAtt.getPositionIncrement());
|
||||
}
|
||||
assertFalse(stpf.incrementToken());
|
||||
stpf.end();
|
||||
|
@ -159,21 +148,5 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
|
|||
bufferedState = null;
|
||||
}
|
||||
}
|
||||
|
||||
public void testFirstPosInc() throws Exception {
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
TokenFilter filter = new MockSynonymFilter(tokenizer);
|
||||
StopFilter stopfilter = new StopFilter(Version.LUCENE_43, filter, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||
stopfilter.setEnablePositionIncrements(false);
|
||||
return new TokenStreamComponents(tokenizer, stopfilter);
|
||||
}
|
||||
};
|
||||
|
||||
assertAnalyzesTo(analyzer, "the quick brown fox",
|
||||
new String[] { "hte", "quick", "brown", "fox" },
|
||||
new int[] { 1, 1, 1, 1} );
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -17,6 +17,11 @@ package org.apache.lucene.analysis.core;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.Collections;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
@ -24,12 +29,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.util.English;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.Collections;
|
||||
import java.util.Set;
|
||||
|
||||
|
||||
public class TestTypeTokenFilter extends BaseTokenStreamTestCase {
|
||||
|
@ -37,7 +36,7 @@ public class TestTypeTokenFilter extends BaseTokenStreamTestCase {
|
|||
public void testTypeFilter() throws IOException {
|
||||
StringReader reader = new StringReader("121 is palindrome, while 123 is not");
|
||||
Set<String> stopTypes = asSet("<NUM>");
|
||||
TokenStream stream = new TypeTokenFilter(TEST_VERSION_CURRENT, true, new StandardTokenizer(TEST_VERSION_CURRENT, reader), stopTypes);
|
||||
TokenStream stream = new TypeTokenFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, reader), stopTypes);
|
||||
assertTokenStreamContents(stream, new String[]{"is", "palindrome", "while", "is", "not"});
|
||||
}
|
||||
|
||||
|
@ -63,11 +62,6 @@ public class TestTypeTokenFilter extends BaseTokenStreamTestCase {
|
|||
TypeTokenFilter typeTokenFilter = new TypeTokenFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, reader), stopSet);
|
||||
testPositons(typeTokenFilter);
|
||||
|
||||
// without increments
|
||||
reader = new StringReader(sb.toString());
|
||||
typeTokenFilter = new TypeTokenFilter(Version.LUCENE_43, false, new StandardTokenizer(TEST_VERSION_CURRENT, reader), stopSet);
|
||||
testPositons(typeTokenFilter);
|
||||
|
||||
}
|
||||
|
||||
private void testPositons(TypeTokenFilter stpf) throws IOException {
|
||||
|
@ -75,11 +69,10 @@ public class TestTypeTokenFilter extends BaseTokenStreamTestCase {
|
|||
CharTermAttribute termAttribute = stpf.getAttribute(CharTermAttribute.class);
|
||||
PositionIncrementAttribute posIncrAtt = stpf.getAttribute(PositionIncrementAttribute.class);
|
||||
stpf.reset();
|
||||
boolean enablePositionIncrements = stpf.getEnablePositionIncrements();
|
||||
while (stpf.incrementToken()) {
|
||||
log("Token: " + termAttribute.toString() + ": " + typeAtt.type() + " - " + posIncrAtt.getPositionIncrement());
|
||||
assertEquals("if position increment is enabled the positionIncrementAttribute value should be 3, otherwise 1",
|
||||
posIncrAtt.getPositionIncrement(), enablePositionIncrements ? 3 : 1);
|
||||
posIncrAtt.getPositionIncrement(), 3);
|
||||
}
|
||||
stpf.end();
|
||||
stpf.close();
|
||||
|
|
|
@ -30,27 +30,22 @@ public class TestTypeTokenFilterFactory extends BaseTokenStreamFactoryTestCase {
|
|||
|
||||
public void testInform() throws Exception {
|
||||
TypeTokenFilterFactory factory = (TypeTokenFilterFactory) tokenFilterFactory("Type",
|
||||
"types", "stoptypes-1.txt",
|
||||
"enablePositionIncrements", "true");
|
||||
"types", "stoptypes-1.txt");
|
||||
Set<String> types = factory.getStopTypes();
|
||||
assertTrue("types is null and it shouldn't be", types != null);
|
||||
assertTrue("types Size: " + types.size() + " is not: " + 2, types.size() == 2);
|
||||
assertTrue("enablePositionIncrements was set to true but not correctly parsed", factory.isEnablePositionIncrements());
|
||||
|
||||
factory = (TypeTokenFilterFactory) tokenFilterFactory("Type",
|
||||
"types", "stoptypes-1.txt, stoptypes-2.txt",
|
||||
"enablePositionIncrements", "false",
|
||||
"useWhitelist", "true");
|
||||
types = factory.getStopTypes();
|
||||
assertTrue("types is null and it shouldn't be", types != null);
|
||||
assertTrue("types Size: " + types.size() + " is not: " + 4, types.size() == 4);
|
||||
assertTrue("enablePositionIncrements was set to false but not correctly parsed", !factory.isEnablePositionIncrements());
|
||||
}
|
||||
|
||||
public void testCreationWithBlackList() throws Exception {
|
||||
TokenFilterFactory factory = tokenFilterFactory("Type",
|
||||
"types", "stoptypes-1.txt, stoptypes-2.txt",
|
||||
"enablePositionIncrements", "true");
|
||||
"types", "stoptypes-1.txt, stoptypes-2.txt");
|
||||
NumericTokenStream input = new NumericTokenStream();
|
||||
input.setIntValue(123);
|
||||
factory.create(input);
|
||||
|
@ -59,7 +54,6 @@ public class TestTypeTokenFilterFactory extends BaseTokenStreamFactoryTestCase {
|
|||
public void testCreationWithWhiteList() throws Exception {
|
||||
TokenFilterFactory factory = tokenFilterFactory("Type",
|
||||
"types", "stoptypes-1.txt, stoptypes-2.txt",
|
||||
"enablePositionIncrements", "true",
|
||||
"useWhitelist", "true");
|
||||
NumericTokenStream input = new NumericTokenStream();
|
||||
input.setIntValue(123);
|
||||
|
@ -68,7 +62,7 @@ public class TestTypeTokenFilterFactory extends BaseTokenStreamFactoryTestCase {
|
|||
|
||||
public void testMissingTypesParameter() throws Exception {
|
||||
try {
|
||||
tokenFilterFactory("Type", "enablePositionIncrements", "false");
|
||||
tokenFilterFactory("Type");
|
||||
fail("not supplying 'types' parameter should cause an IllegalArgumentException");
|
||||
} catch (IllegalArgumentException e) {
|
||||
// everything ok
|
||||
|
|
|
@ -28,7 +28,6 @@ import org.apache.lucene.analysis.MockTokenizer;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/** Test {@link KeepWordFilter} */
|
||||
public class TestKeepWordFilter extends BaseTokenStreamTestCase {
|
||||
|
@ -50,16 +49,6 @@ public class TestKeepWordFilter extends BaseTokenStreamTestCase {
|
|||
stream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
|
||||
stream = new KeepWordFilter(TEST_VERSION_CURRENT, stream, new CharArraySet(TEST_VERSION_CURRENT,words, false));
|
||||
assertTokenStreamContents(stream, new String[] { "aaa" }, new int[] { 3 });
|
||||
|
||||
// Test Stopwords
|
||||
stream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
|
||||
stream = new KeepWordFilter(Version.LUCENE_43, false, stream, new CharArraySet(TEST_VERSION_CURRENT, words, true));
|
||||
assertTokenStreamContents(stream, new String[] { "aaa", "BBB" }, new int[] { 1, 1 });
|
||||
|
||||
// Now force case
|
||||
stream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
|
||||
stream = new KeepWordFilter(Version.LUCENE_43, false, stream, new CharArraySet(TEST_VERSION_CURRENT,words, false));
|
||||
assertTokenStreamContents(stream, new String[] { "aaa" }, new int[] { 1 });
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
|
|
|
@ -17,25 +17,18 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
|
||||
public class TestLengthFilter extends BaseTokenStreamTestCase {
|
||||
|
||||
public void testFilterNoPosIncr() throws Exception {
|
||||
TokenStream stream = new MockTokenizer(
|
||||
new StringReader("short toolong evenmuchlongertext a ab toolong foo"), MockTokenizer.WHITESPACE, false);
|
||||
LengthFilter filter = new LengthFilter(Version.LUCENE_43, false, stream, 2, 6);
|
||||
assertTokenStreamContents(filter,
|
||||
new String[]{"short", "ab", "foo"},
|
||||
new int[]{1, 1, 1}
|
||||
);
|
||||
}
|
||||
|
||||
public void testFilterWithPosIncr() throws Exception {
|
||||
TokenStream stream = new MockTokenizer(
|
||||
|
|
|
@ -22,29 +22,15 @@ import java.io.StringReader;
|
|||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
|
||||
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TestLengthFilterFactory extends BaseTokenStreamFactoryTestCase {
|
||||
|
||||
public void test() throws Exception {
|
||||
Reader reader = new StringReader("foo foobar super-duper-trooper");
|
||||
TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
stream = tokenFilterFactory("Length",
|
||||
Version.LUCENE_43, new ClasspathResourceLoader(getClass()),
|
||||
"min", "4",
|
||||
"max", "10",
|
||||
"enablePositionIncrements", "false").create(stream);
|
||||
assertTokenStreamContents(stream, new String[] { "foobar" }, new int[] { 1 });
|
||||
}
|
||||
|
||||
public void testPositionIncrements() throws Exception {
|
||||
Reader reader = new StringReader("foo foobar super-duper-trooper");
|
||||
TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
stream = tokenFilterFactory("Length",
|
||||
"min", "4",
|
||||
"max", "10",
|
||||
"enablePositionIncrements", "true").create(stream);
|
||||
"max", "10").create(stream);
|
||||
assertTokenStreamContents(stream, new String[] { "foobar" }, new int[] { 2 });
|
||||
}
|
||||
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
@ -28,7 +27,12 @@ import org.apache.lucene.analysis.Token;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.*;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
|
@ -47,30 +51,9 @@ public class TestTrimFilter extends BaseTokenStreamTestCase {
|
|||
new Token(ccc, 0, ccc.length, 11, 15),
|
||||
new Token(whitespace, 0, whitespace.length, 16, 20),
|
||||
new Token(empty, 0, empty.length, 21, 21));
|
||||
ts = new TrimFilter(TEST_VERSION_CURRENT, ts, false);
|
||||
ts = new TrimFilter(TEST_VERSION_CURRENT, ts);
|
||||
|
||||
assertTokenStreamContents(ts, new String[] { "a", "b", "cCc", "", ""});
|
||||
|
||||
a = " a".toCharArray();
|
||||
b = "b ".toCharArray();
|
||||
ccc = " c ".toCharArray();
|
||||
whitespace = " ".toCharArray();
|
||||
ts = new IterTokenStream(
|
||||
new Token(a, 0, a.length, 0, 2),
|
||||
new Token(b, 0, b.length, 0, 2),
|
||||
new Token(ccc, 0, ccc.length, 0, 3),
|
||||
new Token(whitespace, 0, whitespace.length, 0, 3));
|
||||
ts = new TrimFilter(Version.LUCENE_43, ts, true);
|
||||
|
||||
assertTokenStreamContents(ts,
|
||||
new String[] { "a", "b", "c", "" },
|
||||
new int[] { 1, 0, 1, 3 },
|
||||
new int[] { 2, 1, 2, 3 },
|
||||
null,
|
||||
new int[] { 1, 1, 1, 1 },
|
||||
null,
|
||||
null,
|
||||
false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -92,10 +75,6 @@ public class TestTrimFilter extends BaseTokenStreamTestCase {
|
|||
this.tokens = tokens;
|
||||
}
|
||||
|
||||
public IterTokenStream(Collection<Token> tokens) {
|
||||
this(tokens.toArray(new Token[tokens.size()]));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (index >= tokens.length)
|
||||
|
@ -121,20 +100,10 @@ public class TestTrimFilter extends BaseTokenStreamTestCase {
|
|||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
|
||||
return new TokenStreamComponents(tokenizer, new TrimFilter(Version.LUCENE_43, tokenizer, true));
|
||||
return new TokenStreamComponents(tokenizer, new TrimFilter(TEST_VERSION_CURRENT, tokenizer));
|
||||
}
|
||||
};
|
||||
checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
|
||||
|
||||
Analyzer b = new Analyzer() {
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
|
||||
return new TokenStreamComponents(tokenizer, new TrimFilter(TEST_VERSION_CURRENT, tokenizer, false));
|
||||
}
|
||||
};
|
||||
checkRandomData(random(), b, 1000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
|
||||
public void testEmptyTerm() throws IOException {
|
||||
|
@ -142,9 +111,8 @@ public class TestTrimFilter extends BaseTokenStreamTestCase {
|
|||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new KeywordTokenizer(reader);
|
||||
final boolean updateOffsets = random().nextBoolean();
|
||||
final Version version = updateOffsets ? Version.LUCENE_43 : TEST_VERSION_CURRENT;
|
||||
return new TokenStreamComponents(tokenizer, new TrimFilter(version, tokenizer, updateOffsets));
|
||||
final Version version = TEST_VERSION_CURRENT;
|
||||
return new TokenStreamComponents(tokenizer, new TrimFilter(version, tokenizer));
|
||||
}
|
||||
};
|
||||
checkOneTermReuse(a, "", "");
|
||||
|
|
|
@ -31,13 +31,6 @@ public final class JapanesePartOfSpeechStopFilter extends FilteringTokenFilter {
|
|||
private final Set<String> stopTags;
|
||||
private final PartOfSpeechAttribute posAtt = addAttribute(PartOfSpeechAttribute.class);
|
||||
|
||||
/** @deprecated enablePositionIncrements=false is not supported anymore as of Lucene 4.4. */
|
||||
@Deprecated
|
||||
public JapanesePartOfSpeechStopFilter(Version version, boolean enablePositionIncrements, TokenStream input, Set<String> stopTags) {
|
||||
super(version, enablePositionIncrements, input);
|
||||
this.stopTags = stopTags;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new {@link JapanesePartOfSpeechStopFilter}.
|
||||
* @param version the Lucene match version
|
||||
|
|
|
@ -35,22 +35,19 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <analyzer>
|
||||
* <tokenizer class="solr.JapaneseTokenizerFactory"/>
|
||||
* <filter class="solr.JapanesePartOfSpeechStopFilterFactory"
|
||||
* tags="stopTags.txt"
|
||||
* enablePositionIncrements="true"/>
|
||||
* tags="stopTags.txt"/>
|
||||
* </analyzer>
|
||||
* </fieldType>
|
||||
* </pre>
|
||||
*/
|
||||
public class JapanesePartOfSpeechStopFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
private final String stopTagFiles;
|
||||
private final boolean enablePositionIncrements;
|
||||
private Set<String> stopTags;
|
||||
|
||||
/** Creates a new JapanesePartOfSpeechStopFilterFactory */
|
||||
public JapanesePartOfSpeechStopFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
stopTagFiles = get(args, "tags");
|
||||
enablePositionIncrements = getBoolean(args, "enablePositionIncrements", true);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
@ -73,8 +70,7 @@ public class JapanesePartOfSpeechStopFilterFactory extends TokenFilterFactory im
|
|||
public TokenStream create(TokenStream stream) {
|
||||
// if stoptags is null, it means the file is empty
|
||||
if (stopTags != null) {
|
||||
@SuppressWarnings("deprecation")
|
||||
final TokenStream filter = new JapanesePartOfSpeechStopFilter(luceneMatchVersion, enablePositionIncrements, stream, stopTags);
|
||||
final TokenStream filter = new JapanesePartOfSpeechStopFilter(luceneMatchVersion, stream, stopTags);
|
||||
return filter;
|
||||
} else {
|
||||
return stream;
|
||||
|
|
|
@ -165,13 +165,10 @@
|
|||
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
-->
|
||||
<!-- Case insensitive stop word removal.
|
||||
enablePositionIncrements=true ensures that a 'gap' is left to
|
||||
allow for accurate phrase queries.
|
||||
-->
|
||||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
|
|
|
@ -253,11 +253,9 @@
|
|||
-->
|
||||
<!--
|
||||
Case insensitive stop word removal. add
|
||||
enablePositionIncrements=true in both the index and query
|
||||
analyzers to leave a 'gap' for more accurate phrase queries.
|
||||
-->
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords.txt" enablePositionIncrements="true" />
|
||||
words="stopwords.txt" />
|
||||
<filter class="solr.WordDelimiterFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1" />
|
||||
|
@ -269,7 +267,7 @@
|
|||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
|
||||
ignoreCase="true" expand="true" />
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords.txt" enablePositionIncrements="true" />
|
||||
words="stopwords.txt" />
|
||||
<filter class="solr.WordDelimiterFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1" />
|
||||
|
@ -316,7 +314,7 @@
|
|||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory" />
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords.txt" enablePositionIncrements="true" />
|
||||
words="stopwords.txt" />
|
||||
<filter class="solr.WordDelimiterFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0" />
|
||||
|
@ -327,7 +325,7 @@
|
|||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
|
||||
ignoreCase="true" expand="true" />
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords.txt" enablePositionIncrements="true" />
|
||||
words="stopwords.txt" />
|
||||
<filter class="solr.WordDelimiterFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" />
|
||||
|
@ -346,7 +344,7 @@
|
|||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory" />
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords.txt" enablePositionIncrements="true" />
|
||||
words="stopwords.txt" />
|
||||
<filter class="solr.WordDelimiterFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0" />
|
||||
|
@ -360,7 +358,7 @@
|
|||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
|
||||
ignoreCase="true" expand="true" />
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords.txt" enablePositionIncrements="true" />
|
||||
words="stopwords.txt" />
|
||||
<filter class="solr.WordDelimiterFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" />
|
||||
|
|
|
@ -253,8 +253,6 @@
|
|||
-->
|
||||
<!--
|
||||
Case insensitive stop word removal. add
|
||||
enablePositionIncrements=true in both the index and query
|
||||
analyzers to leave a 'gap' for more accurate phrase queries.
|
||||
-->
|
||||
<filter class="solr.WordDelimiterFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
|
|
|
@ -111,7 +111,6 @@
|
|||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
|
@ -124,7 +123,6 @@
|
|||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
|
@ -141,7 +139,6 @@
|
|||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
|
@ -154,7 +151,6 @@
|
|||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
|
|
|
@ -119,7 +119,6 @@
|
|||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
|
@ -132,7 +131,6 @@
|
|||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
|
@ -149,7 +147,6 @@
|
|||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
|
@ -162,7 +159,6 @@
|
|||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
|
|
|
@ -112,7 +112,6 @@
|
|||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
|
@ -125,7 +124,6 @@
|
|||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
|
@ -142,7 +140,6 @@
|
|||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
|
@ -155,7 +152,6 @@
|
|||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
|
|
|
@ -119,7 +119,6 @@
|
|||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
|
@ -132,7 +131,6 @@
|
|||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
|
@ -149,7 +147,6 @@
|
|||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
|
@ -162,7 +159,6 @@
|
|||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
|
|
|
@ -53,7 +53,7 @@ public class TestSuggestSpellingConverter extends BaseTokenStreamTestCase {
|
|||
TokenStream filter = new PatternReplaceFilter(tokenizer,
|
||||
Pattern.compile("([^\\p{L}\\p{M}\\p{N}\\p{Cs}]*[\\p{L}\\p{M}\\p{N}\\p{Cs}\\_]+:)|([^\\p{L}\\p{M}\\p{N}\\p{Cs}])+"), " ", true);
|
||||
filter = new LowerCaseFilter(TEST_VERSION_CURRENT, filter);
|
||||
filter = new TrimFilter(TEST_VERSION_CURRENT, filter, false);
|
||||
filter = new TrimFilter(TEST_VERSION_CURRENT, filter);
|
||||
return new TokenStreamComponents(tokenizer, filter);
|
||||
}
|
||||
});
|
||||
|
|
|
@ -31,7 +31,6 @@ import org.apache.lucene.util.automaton.RegExp;
|
|||
*/
|
||||
public class MockTokenFilterFactory extends TokenFilterFactory {
|
||||
final CharacterRunAutomaton filter;
|
||||
final boolean enablePositionIncrements;
|
||||
|
||||
/** Creates a new MockTokenizerFactory */
|
||||
public MockTokenFilterFactory(Map<String, String> args) {
|
||||
|
@ -54,7 +53,6 @@ public class MockTokenFilterFactory extends TokenFilterFactory {
|
|||
throw new IllegalArgumentException
|
||||
("Configuration Error: either the 'stopset' or the 'stopregex' parameter must be specified.");
|
||||
}
|
||||
enablePositionIncrements = getBoolean(args, "enablePositionIncrements", true);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue