LUCENE-8356: Remove StandardFilter from Solr schemas

This commit is contained in:
Alan Woodward 2018-06-15 08:45:55 +01:00
parent fce841534a
commit 0fbe3d257e
30 changed files with 44 additions and 157 deletions

View File

@ -178,11 +178,6 @@ public class TestStandardFactories extends BaseTokenStreamFactoryTestCase {
});
assertTrue(expected.getMessage().contains("Unknown parameters"));
expected = expectThrows(IllegalArgumentException.class, () -> {
tokenFilterFactory("Standard", "bogusArg", "bogusValue");
});
assertTrue(expected.getMessage().contains("Unknown parameters"));
expected = expectThrows(IllegalArgumentException.class, () -> {
tokenFilterFactory("Classic", "bogusArg", "bogusValue");
});

View File

@ -23,15 +23,15 @@ doc.body.tokenized=true
docs.dir=reuters-out
-AnalyzerFactory(name:original-porter-stemmer,StandardTokenizer,
StandardFilter,EnglishPossessiveFilter,LowerCaseFilter,StopFilter,
EnglishPossessiveFilter,LowerCaseFilter,StopFilter,
PorterStemFilter)
-AnalyzerFactory(name:porter2-stemmer,StandardTokenizer,
StandardFilter,EnglishPossessiveFilter,LowerCaseFilter,StopFilter,
EnglishPossessiveFilter,LowerCaseFilter,StopFilter,
SnowballPorterFilter(language:English))
-AnalyzerFactory(name:no-stemmer,StandardTokenizer,
StandardFilter,EnglishPossessiveFilter,LowerCaseFilter,StopFilter)
EnglishPossessiveFilter,LowerCaseFilter,StopFilter)
{ "Rounds"
-NewAnalyzer(original-porter-stemmer)

View File

@ -35,7 +35,6 @@
<fieldType name="text" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>

View File

@ -79,7 +79,6 @@
<fieldType name="text" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
@ -94,7 +93,6 @@
<fieldType name="teststop" class="solr.TextField">
<analyzer>
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
</analyzer>
</fieldType>
@ -140,13 +138,11 @@
<fieldType name="standardtokfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="standardfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="lowerfilt" class="solr.TextField">

View File

@ -128,7 +128,6 @@
<fieldtype name="text" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
@ -143,7 +142,6 @@
<fieldtype name="teststop" class="solr.TextField">
<analyzer>
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldtype>
@ -188,13 +186,11 @@
<fieldtype name="standardtokfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="standardfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="lowerfilt" class="solr.TextField">

View File

@ -21,7 +21,6 @@
<fieldType name="text40" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory" luceneMatchVersion="LUCENE_40"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" luceneMatchVersion="5.0"/>
<filter class="solr.PorterStemFilterFactory"/>
@ -30,7 +29,6 @@
<fieldType name="textDefault" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>

View File

@ -26,7 +26,6 @@
<fieldType name="text" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldType>

View File

@ -62,7 +62,6 @@
<fieldType name="text" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
@ -77,7 +76,6 @@
<fieldType name="teststop" class="solr.TextField">
<analyzer>
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
</analyzer>
</fieldType>
@ -123,13 +121,11 @@
<fieldType name="standardtokfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="standardfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="lowerfilt" class="solr.TextField">

View File

@ -200,7 +200,6 @@
<fieldType name="teststop" class="solr.TextField">
<analyzer>
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
</analyzer>
</fieldType>
@ -246,13 +245,11 @@
<fieldType name="standardtokfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="standardfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="lowerfilt" class="solr.TextField">

View File

@ -38,7 +38,6 @@
<fieldType name="text" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
@ -49,14 +48,12 @@
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>

View File

@ -130,7 +130,6 @@
<fieldtype name="text" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
@ -145,7 +144,6 @@
<fieldtype name="teststop" class="solr.TextField">
<analyzer>
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldtype>
@ -190,13 +188,11 @@
<fieldtype name="standardtokfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="standardfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="lowerfilt" class="solr.TextField">

View File

@ -120,7 +120,6 @@
<fieldType name="text" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
@ -130,7 +129,6 @@
<fieldtype name="text_payload_tv" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
@ -145,12 +143,10 @@
<fieldType name="teststop" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldType>
@ -195,13 +191,11 @@
<fieldType name="standardtokfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="standardfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="lowerfilt" class="solr.TextField">

View File

@ -288,7 +288,6 @@ valued. -->
<fieldType name="text_length" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.LengthFilterFactory" min="2" max="32768"/>
</analyzer>

View File

@ -235,7 +235,6 @@
<fieldType name="teststop" class="solr.TextField">
<analyzer>
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
</analyzer>
</fieldType>
@ -292,13 +291,11 @@
<fieldType name="standardtokfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="standardfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="lowerfilt" class="solr.TextField">

View File

@ -166,7 +166,6 @@
<fieldType name="teststop" class="solr.TextField">
<analyzer>
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
</analyzer>
</fieldType>
@ -212,13 +211,11 @@
<fieldType name="standardtokfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="standardfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="lowerfilt" class="solr.TextField">

View File

@ -167,7 +167,6 @@
<fieldType name="teststop" class="solr.TextField">
<analyzer>
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
</analyzer>
</fieldType>
@ -213,13 +212,11 @@
<fieldType name="standardtokfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="standardfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="lowerfilt" class="solr.TextField">

View File

@ -27,7 +27,6 @@
<fieldType name="text" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>

View File

@ -285,22 +285,18 @@ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTe
assertNotNull("Expecting the 'StandardTokenizer' to be applied on the query for the 'text' field", tokenList);
assertEquals("Query has only one token", 1, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("JUMPING", null, "<ALPHANUM>", 0, 7, 1, new int[]{1}, null, false));
tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.standard.StandardFilter");
assertNotNull("Expecting the 'StandardFilter' to be applied on the query for the 'text' field", tokenList);
assertEquals("Query has only one token", 1, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("JUMPING", null, "<ALPHANUM>", 0, 7, 1, new int[]{1,1}, null, false));
tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.core.LowerCaseFilter");
assertNotNull("Expecting the 'LowerCaseFilter' to be applied on the query for the 'text' field", tokenList);
assertEquals("Query has only one token", 1, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("jumping", null, "<ALPHANUM>", 0, 7, 1, new int[]{1,1,1}, null, false));
assertToken(tokenList.get(0), new TokenInfo("jumping", null, "<ALPHANUM>", 0, 7, 1, new int[]{1,1}, null, false));
tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.core.StopFilter");
assertNotNull("Expecting the 'StopFilter' to be applied on the query for the 'text' field", tokenList);
assertEquals("Query has only one token", 1, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("jumping", null, "<ALPHANUM>", 0, 7, 1, new int[]{1,1,1,1}, null, false));
assertToken(tokenList.get(0), new TokenInfo("jumping", null, "<ALPHANUM>", 0, 7, 1, new int[]{1,1,1}, null, false));
tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.en.PorterStemFilter");
assertNotNull("Expecting the 'PorterStemFilter' to be applied on the query for the 'text' field", tokenList);
assertEquals("Query has only one token", 1, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("jump", null, "<ALPHANUM>", 0, 7, 1, new int[]{1,1,1,1,1}, null, false));
assertToken(tokenList.get(0), new TokenInfo("jump", null, "<ALPHANUM>", 0, 7, 1, new int[]{1,1,1,1}, null, false));
indexResult = textResult.get("index");
assertEquals("The 'text' field has only a single value", 1, indexResult.size());
valueResult = (NamedList<List<NamedList>>) indexResult.get("The Fox Jumped Over The Dogs");
@ -313,37 +309,28 @@ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTe
assertToken(tokenList.get(3), new TokenInfo("Over", null, "<ALPHANUM>", 15, 19, 4, new int[]{4}, null, false));
assertToken(tokenList.get(4), new TokenInfo("The", null, "<ALPHANUM>", 20, 23, 5, new int[]{5}, null, false));
assertToken(tokenList.get(5), new TokenInfo("Dogs", null, "<ALPHANUM>", 24, 28, 6, new int[]{6}, null, false));
tokenList = valueResult.get("org.apache.lucene.analysis.standard.StandardFilter");
assertNotNull("Expecting the 'StandardFilter' to be applied on the index for the 'text' field", tokenList);
assertEquals("Expecting 6 tokens", 6, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("The", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1}, null, false));
assertToken(tokenList.get(1), new TokenInfo("Fox", null, "<ALPHANUM>", 4, 7, 2, new int[]{2,2}, null, false));
assertToken(tokenList.get(2), new TokenInfo("Jumped", null, "<ALPHANUM>", 8, 14, 3, new int[]{3,3}, null, false));
assertToken(tokenList.get(3), new TokenInfo("Over", null, "<ALPHANUM>", 15, 19, 4, new int[]{4,4}, null, false));
assertToken(tokenList.get(4), new TokenInfo("The", null, "<ALPHANUM>", 20, 23, 5, new int[]{5,5}, null, false));
assertToken(tokenList.get(5), new TokenInfo("Dogs", null, "<ALPHANUM>", 24, 28, 6, new int[]{6,6}, null, false));
tokenList = valueResult.get("org.apache.lucene.analysis.core.LowerCaseFilter");
assertNotNull("Expecting the 'LowerCaseFilter' to be applied on the index for the 'text' field", tokenList);
assertEquals("Expecting 6 tokens", 6, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("the", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1}, null, false));
assertToken(tokenList.get(1), new TokenInfo("fox", null, "<ALPHANUM>", 4, 7, 2, new int[]{2,2,2}, null, false));
assertToken(tokenList.get(2), new TokenInfo("jumped", null, "<ALPHANUM>", 8, 14, 3, new int[]{3,3,3}, null, false));
assertToken(tokenList.get(3), new TokenInfo("over", null, "<ALPHANUM>", 15, 19, 4, new int[]{4,4,4}, null, false));
assertToken(tokenList.get(4), new TokenInfo("the", null, "<ALPHANUM>", 20, 23, 5, new int[]{5,5,5}, null, false));
assertToken(tokenList.get(5), new TokenInfo("dogs", null, "<ALPHANUM>", 24, 28, 6, new int[]{6,6,6}, null, false));
assertToken(tokenList.get(0), new TokenInfo("the", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1}, null, false));
assertToken(tokenList.get(1), new TokenInfo("fox", null, "<ALPHANUM>", 4, 7, 2, new int[]{2,2}, null, false));
assertToken(tokenList.get(2), new TokenInfo("jumped", null, "<ALPHANUM>", 8, 14, 3, new int[]{3,3}, null, false));
assertToken(tokenList.get(3), new TokenInfo("over", null, "<ALPHANUM>", 15, 19, 4, new int[]{4,4}, null, false));
assertToken(tokenList.get(4), new TokenInfo("the", null, "<ALPHANUM>", 20, 23, 5, new int[]{5,5}, null, false));
assertToken(tokenList.get(5), new TokenInfo("dogs", null, "<ALPHANUM>", 24, 28, 6, new int[]{6,6}, null, false));
tokenList = valueResult.get("org.apache.lucene.analysis.core.StopFilter");
assertNotNull("Expecting the 'StopFilter' to be applied on the index for the 'text' field", tokenList);
assertEquals("Expecting 4 tokens after stop word removal", 4, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 4, 7, 2, new int[]{2,2,2,2}, null, false));
assertToken(tokenList.get(1), new TokenInfo("jumped", null, "<ALPHANUM>", 8, 14, 3, new int[]{3,3,3,3}, null, false));
assertToken(tokenList.get(2), new TokenInfo("over", null, "<ALPHANUM>", 15, 19, 4, new int[]{4,4,4,4}, null, false));
assertToken(tokenList.get(3), new TokenInfo("dogs", null, "<ALPHANUM>", 24, 28, 6, new int[]{6,6,6,6}, null, false));
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 4, 7, 2, new int[]{2,2,2}, null, false));
assertToken(tokenList.get(1), new TokenInfo("jumped", null, "<ALPHANUM>", 8, 14, 3, new int[]{3,3,3}, null, false));
assertToken(tokenList.get(2), new TokenInfo("over", null, "<ALPHANUM>", 15, 19, 4, new int[]{4,4,4}, null, false));
assertToken(tokenList.get(3), new TokenInfo("dogs", null, "<ALPHANUM>", 24, 28, 6, new int[]{6,6,6}, null, false));
tokenList = valueResult.get("org.apache.lucene.analysis.en.PorterStemFilter");
assertNotNull("Expecting the 'PorterStemFilter' to be applied on the index for the 'text' field", tokenList);
assertEquals("Expecting 4 tokens", 4, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 4, 7, 2, new int[]{2,2,2,2,2}, null, false));
assertToken(tokenList.get(1), new TokenInfo("jump", null, "<ALPHANUM>", 8, 14, 3, new int[]{3,3,3,3,3}, null, true));
assertToken(tokenList.get(2), new TokenInfo("over", null, "<ALPHANUM>", 15, 19, 4, new int[]{4,4,4,4,4}, null, false));
assertToken(tokenList.get(3), new TokenInfo("dog", null, "<ALPHANUM>", 24, 28, 6, new int[]{6,6,6,6,6}, null, false));
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 4, 7, 2, new int[]{2,2,2,2}, null, false));
assertToken(tokenList.get(1), new TokenInfo("jump", null, "<ALPHANUM>", 8, 14, 3, new int[]{3,3,3,3}, null, true));
assertToken(tokenList.get(2), new TokenInfo("over", null, "<ALPHANUM>", 15, 19, 4, new int[]{4,4,4,4}, null, false));
assertToken(tokenList.get(3), new TokenInfo("dog", null, "<ALPHANUM>", 24, 28, 6, new int[]{6,6,6,6}, null, false));
}
}

View File

@ -210,8 +210,8 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
assertToken(tokenList.get(7), new TokenInfo("lazy", null, "<ALPHANUM>", 34, 38, 8, new int[]{8}, null, false));
assertToken(tokenList.get(8), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 9, new int[]{9}, null, true));
assertToken(tokenList.get(9), new TokenInfo("dogs", null, "<ALPHANUM>", 45, 49, 10, new int[]{10}, null, false));
tokenList = indexPart.get("org.apache.lucene.analysis.standard.StandardFilter");
assertNotNull("Expcting StandardFilter analysis breakdown", tokenList);
tokenList = indexPart.get("org.apache.lucene.analysis.core.LowerCaseFilter");
assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList);
assertEquals(tokenList.size(), 10);
assertToken(tokenList.get(0), new TokenInfo("the", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1}, null, false));
assertToken(tokenList.get(1), new TokenInfo("quick", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2}, null, false));
@ -223,41 +223,28 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
assertToken(tokenList.get(7), new TokenInfo("lazy", null, "<ALPHANUM>", 34, 38, 8, new int[]{8,8}, null, false));
assertToken(tokenList.get(8), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 9, new int[]{9,9}, null, true));
assertToken(tokenList.get(9), new TokenInfo("dogs", null, "<ALPHANUM>", 45, 49, 10, new int[]{10,10}, null, false));
tokenList = indexPart.get("org.apache.lucene.analysis.core.LowerCaseFilter");
assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList);
assertEquals(tokenList.size(), 10);
assertToken(tokenList.get(0), new TokenInfo("the", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1}, null, false));
assertToken(tokenList.get(1), new TokenInfo("quick", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2,2}, null, false));
assertToken(tokenList.get(2), new TokenInfo("red", null, "<ALPHANUM>", 10, 13, 3, new int[]{3,3,3}, null, false));
assertToken(tokenList.get(3), new TokenInfo("fox", null, "<ALPHANUM>", 14, 17, 4, new int[]{4,4,4}, null, true));
assertToken(tokenList.get(4), new TokenInfo("jumped", null, "<ALPHANUM>", 18, 24, 5, new int[]{5,5,5}, null, false));
assertToken(tokenList.get(5), new TokenInfo("over", null, "<ALPHANUM>", 25, 29, 6, new int[]{6,6,6}, null, false));
assertToken(tokenList.get(6), new TokenInfo("the", null, "<ALPHANUM>", 30, 33, 7, new int[]{7,7,7}, null, false));
assertToken(tokenList.get(7), new TokenInfo("lazy", null, "<ALPHANUM>", 34, 38, 8, new int[]{8,8,8}, null, false));
assertToken(tokenList.get(8), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 9, new int[]{9,9,9}, null, true));
assertToken(tokenList.get(9), new TokenInfo("dogs", null, "<ALPHANUM>", 45, 49, 10, new int[]{10,10,10}, null, false));
tokenList = indexPart.get("org.apache.lucene.analysis.core.StopFilter");
assertNotNull("Expcting StopFilter analysis breakdown", tokenList);
assertEquals(tokenList.size(), 8);
assertToken(tokenList.get(0), new TokenInfo("quick", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2,2}, null, false));
assertToken(tokenList.get(1), new TokenInfo("red", null, "<ALPHANUM>", 10, 13, 3, new int[]{3,3,3}, null, false));
assertToken(tokenList.get(2), new TokenInfo("fox", null, "<ALPHANUM>", 14, 17, 4, new int[]{4,4,4}, null, true));
assertToken(tokenList.get(3), new TokenInfo("jumped", null, "<ALPHANUM>", 18, 24, 5, new int[]{5,5,5}, null, false));
assertToken(tokenList.get(4), new TokenInfo("over", null, "<ALPHANUM>", 25, 29, 6, new int[]{6,6,6}, null, false));
assertToken(tokenList.get(5), new TokenInfo("lazy", null, "<ALPHANUM>", 34, 38, 8, new int[]{8,8,8}, null, false));
assertToken(tokenList.get(6), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 9, new int[]{9,9,9}, null, true));
assertToken(tokenList.get(7), new TokenInfo("dogs", null, "<ALPHANUM>", 45, 49, 10, new int[]{10,10,10}, null, false));
tokenList = indexPart.get("org.apache.lucene.analysis.en.PorterStemFilter");
assertNotNull("Expcting PorterStemFilter analysis breakdown", tokenList);
assertEquals(tokenList.size(), 8);
assertToken(tokenList.get(0), new TokenInfo("quick", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2,2,2}, null, false));
assertToken(tokenList.get(1), new TokenInfo("red", null, "<ALPHANUM>", 10, 13, 3, new int[]{3,3,3,3}, null, false));
assertToken(tokenList.get(2), new TokenInfo("fox", null, "<ALPHANUM>", 14, 17, 4, new int[]{4,4,4,4}, null, true));
assertToken(tokenList.get(3), new TokenInfo("jumped", null, "<ALPHANUM>", 18, 24, 5, new int[]{5,5,5,5}, null, false));
assertToken(tokenList.get(3), new TokenInfo("jump", null, "<ALPHANUM>", 18, 24, 5, new int[]{5,5,5,5}, null, false));
assertToken(tokenList.get(4), new TokenInfo("over", null, "<ALPHANUM>", 25, 29, 6, new int[]{6,6,6,6}, null, false));
assertToken(tokenList.get(5), new TokenInfo("lazy", null, "<ALPHANUM>", 34, 38, 8, new int[]{8,8,8,8}, null, false));
assertToken(tokenList.get(5), new TokenInfo("lazi", null, "<ALPHANUM>", 34, 38, 8, new int[]{8,8,8,8}, null, false));
assertToken(tokenList.get(6), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 9, new int[]{9,9,9,9}, null, true));
assertToken(tokenList.get(7), new TokenInfo("dogs", null, "<ALPHANUM>", 45, 49, 10, new int[]{10,10,10,10}, null, false));
tokenList = indexPart.get("org.apache.lucene.analysis.en.PorterStemFilter");
assertNotNull("Expcting PorterStemFilter analysis breakdown", tokenList);
assertEquals(tokenList.size(), 8);
assertToken(tokenList.get(0), new TokenInfo("quick", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2,2,2,2}, null, false));
assertToken(tokenList.get(1), new TokenInfo("red", null, "<ALPHANUM>", 10, 13, 3, new int[]{3,3,3,3,3}, null, false));
assertToken(tokenList.get(2), new TokenInfo("fox", null, "<ALPHANUM>", 14, 17, 4, new int[]{4,4,4,4,4}, null, true));
assertToken(tokenList.get(3), new TokenInfo("jump", null, "<ALPHANUM>", 18, 24, 5, new int[]{5,5,5,5,5}, null, false));
assertToken(tokenList.get(4), new TokenInfo("over", null, "<ALPHANUM>", 25, 29, 6, new int[]{6,6,6,6,6}, null, false));
assertToken(tokenList.get(5), new TokenInfo("lazi", null, "<ALPHANUM>", 34, 38, 8, new int[]{8,8,8,8,8}, null, false));
assertToken(tokenList.get(6), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 9, new int[]{9,9,9,9,9}, null, true));
assertToken(tokenList.get(7), new TokenInfo("dog", null, "<ALPHANUM>", 45, 49, 10, new int[]{10,10,10,10,10}, null, false));
assertToken(tokenList.get(7), new TokenInfo("dog", null, "<ALPHANUM>", 45, 49, 10, new int[]{10,10,10,10}, null, false));
NamedList<List<NamedList>> queryPart = textType.get("query");
assertNotNull("expecting a query token analysis for field type 'text'", queryPart);
@ -267,26 +254,21 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
assertEquals("Expecting StandardTokenizer to produce 2 tokens from '" + request.getQuery() + "'", 2, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, new int[]{1}, null, false));
assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, new int[]{2}, null, false));
tokenList = queryPart.get("org.apache.lucene.analysis.standard.StandardFilter");
assertNotNull("Expcting StandardFilter analysis breakdown", tokenList);
assertEquals(2, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1}, null, false));
assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2}, null, false));
tokenList = queryPart.get("org.apache.lucene.analysis.core.LowerCaseFilter");
assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList);
assertEquals(2, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1}, null, false));
assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2,2}, null, false));
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1}, null, false));
assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2}, null, false));
tokenList = queryPart.get("org.apache.lucene.analysis.core.StopFilter");
assertNotNull("Expcting StopFilter analysis breakdown", tokenList);
assertEquals(2, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1,1}, null, false));
assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2,2,2}, null, false));
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1}, null, false));
assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2,2}, null, false));
tokenList = queryPart.get("org.apache.lucene.analysis.en.PorterStemFilter");
assertNotNull("Expcting PorterStemFilter analysis breakdown", tokenList);
assertEquals(2, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1,1,1}, null, false));
assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2,2,2,2}, null, false));
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1,1}, null, false));
assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2,2,2}, null, false));
NamedList<NamedList> nameTextType = fieldTypes.get("nametext");
assertNotNull("expecting result for field type 'nametext'", nameTextType);

View File

@ -82,7 +82,6 @@ public class TestFieldTypeResource extends SolrRestletTestBase {
"/response/lst[@name='fieldType']/str[@name='name'] = 'teststop'",
"/response/lst[@name='fieldType']/str[@name='class'] = 'solr.TextField'",
"/response/lst[@name='fieldType']/lst[@name='analyzer']/lst[@name='tokenizer']/str[@name='class'] = 'solr.LowerCaseTokenizerFactory'",
"/response/lst[@name='fieldType']/lst[@name='analyzer']/arr[@name='filters']/lst/str[@name='class'][.='solr.StandardFilterFactory']",
"/response/lst[@name='fieldType']/lst[@name='analyzer']/arr[@name='filters']/lst/str[@name='class'][.='solr.StopFilterFactory']",
"/response/lst[@name='fieldType']/lst[@name='analyzer']/arr[@name='filters']/lst/str[@name='words'][.='stopwords.txt']"
);

View File

@ -27,14 +27,13 @@ Because filters consume one `TokenStream` and produce a new `TokenStream`, they
<fieldType name="text" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPorterFilterFactory"/>
</analyzer>
</fieldType>
----
This example starts with Solr's standard tokenizer, which breaks the field's text into tokens. Those tokens then pass through Solr's standard filter, which removes dots from acronyms, and performs a few other common operations. All the tokens are then set to lowercase, which will facilitate case-insensitive matching at query time.
This example starts with Solr's standard tokenizer, which breaks the field's text into tokens. All the tokens are then set to lowercase, which will facilitate case-insensitive matching at query time.
The last filter in the above example is a stemmer filter that uses the Porter stemming algorithm. A stemmer is basically a set of mapping rules that maps the various forms of a word back to the base, or _stem_, word from which they derive. For example, in English the words "hugs", "hugging" and "hugged" are all forms of the stem word "hug". The stemmer will replace all of these terms with "hug", which is what will be indexed. This means that a query for "hug" will match the term "hugged", but not "huge".

View File

@ -40,7 +40,6 @@ For example:
<fieldType name="nametext" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.EnglishPorterFilterFactory"/>

View File

@ -1361,19 +1361,6 @@ Spanish stemmer, Spanish words:
*Out:* "cant", "cant"
== Standard Filter
This filter removes dots from acronyms and the substring "'s" from the end of tokens. This filter depends on the tokens being tagged with the appropriate term-type to recognize acronyms and words with apostrophes.
*Factory class:* `solr.StandardFilterFactory`
*Arguments:* None
[WARNING]
====
This filter is no longer operational in Solr when the `luceneMatchVersion` (in `solrconfig.xml`) is higher than "3.1".
====
== Stop Filter
This filter discards, or _stops_ analysis of, tokens that are on the given stop words list. A standard stop words list is included in the Solr `conf` directory, named `stopwords.txt`, which is appropriate for typical English language text.

View File

@ -85,7 +85,6 @@ To be used as the basis for a suggestion, the field must be stored. You may want
<fieldType class="solr.TextField" name="textSuggest" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>

View File

@ -25,7 +25,7 @@ You configure the tokenizer for a text field type in `schema.xml` with a `<token
<fieldType name="text" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
----

View File

@ -130,7 +130,6 @@
<fieldtype name="text" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
@ -145,7 +144,6 @@
<fieldtype name="teststop" class="solr.TextField">
<analyzer>
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldtype>
@ -190,13 +188,11 @@
<fieldtype name="standardtokfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="standardfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="lowerfilt" class="solr.TextField">

View File

@ -105,7 +105,6 @@
<fieldType name="text" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
@ -120,7 +119,6 @@
<fieldType name="teststop" class="solr.TextField">
<analyzer>
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldType>
@ -165,13 +163,11 @@
<fieldType name="standardtokfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="standardfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="lowerfilt" class="solr.TextField">

View File

@ -22,13 +22,11 @@
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords-fr.txt"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords-fr.txt"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
@ -36,13 +34,11 @@
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords-en.txt"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords-en.txt"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>

View File

@ -126,7 +126,6 @@
<fieldtype name="text" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
@ -141,7 +140,6 @@
<fieldtype name="teststop" class="solr.TextField">
<analyzer>
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldtype>
@ -176,13 +174,11 @@
<fieldtype name="standardtokfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="standardfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="lowerfilt" class="solr.TextField">

View File

@ -32,7 +32,6 @@ that just finds leaf <fieldType> and <field> nodes and there's no reason to brea
<fieldType name="text" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>