From 0fbe3d257e173b78bd7f9681967351613a7254ab Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 15 Jun 2018 08:45:55 +0100 Subject: [PATCH] LUCENE-8356: Remove StandardFilter from Solr schemas --- .../standard/TestStandardFactories.java | 5 -- .../conf/english-porter-comparison.alg | 6 +- .../langid/solr/collection1/conf/schema.xml | 1 - .../conf/schema-copyfield-test.xml | 4 -- .../solr/collection1/conf/schema-hash.xml | 4 -- .../conf/schema-luceneMatchVersion.xml | 2 - .../conf/schema-not-required-unique-key.xml | 1 - .../conf/schema-required-fields.xml | 4 -- .../solr/collection1/conf/schema-rest.xml | 3 - .../collection1/conf/schema-spellchecker.xml | 3 - .../solr/collection1/conf/schema-sql.xml | 4 -- .../solr/collection1/conf/schema.xml | 6 -- .../solr/collection1/conf/schema11.xml | 1 - .../solr/collection1/conf/schema12.xml | 3 - .../solr/collection1/conf/schema15.xml | 3 - .../solr/collection1/conf/schemasurround.xml | 3 - .../test-files/solr/crazy-path-to-schema.xml | 1 - .../DocumentAnalysisRequestHandlerTest.java | 47 +++++--------- .../FieldAnalysisRequestHandlerTest.java | 62 +++++++------------ .../rest/schema/TestFieldTypeResource.java | 1 - solr/solr-ref-guide/src/about-filters.adoc | 3 +- solr/solr-ref-guide/src/analyzers.adoc | 1 - .../src/filter-descriptions.adoc | 13 ---- solr/solr-ref-guide/src/suggester.adoc | 1 - solr/solr-ref-guide/src/tokenizers.adoc | 2 +- .../solr/collection1/conf/schema-sql.xml | 4 -- .../solrj/solr/collection1/conf/schema.xml | 4 -- .../solr/configsets/shared/conf/schema.xml | 4 -- .../solr/configsets/streaming/conf/schema.xml | 4 -- .../solrj/solr/crazy-path-to-schema.xml | 1 - 30 files changed, 44 insertions(+), 157 deletions(-) diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardFactories.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardFactories.java index 85c77609adf..00bc7c64ad0 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardFactories.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardFactories.java @@ -178,11 +178,6 @@ public class TestStandardFactories extends BaseTokenStreamFactoryTestCase { }); assertTrue(expected.getMessage().contains("Unknown parameters")); - expected = expectThrows(IllegalArgumentException.class, () -> { - tokenFilterFactory("Standard", "bogusArg", "bogusValue"); - }); - assertTrue(expected.getMessage().contains("Unknown parameters")); - expected = expectThrows(IllegalArgumentException.class, () -> { tokenFilterFactory("Classic", "bogusArg", "bogusValue"); }); diff --git a/lucene/benchmark/conf/english-porter-comparison.alg b/lucene/benchmark/conf/english-porter-comparison.alg index 682983c8a5d..e83f04a8dae 100644 --- a/lucene/benchmark/conf/english-porter-comparison.alg +++ b/lucene/benchmark/conf/english-porter-comparison.alg @@ -23,15 +23,15 @@ doc.body.tokenized=true docs.dir=reuters-out -AnalyzerFactory(name:original-porter-stemmer,StandardTokenizer, - StandardFilter,EnglishPossessiveFilter,LowerCaseFilter,StopFilter, + EnglishPossessiveFilter,LowerCaseFilter,StopFilter, PorterStemFilter) -AnalyzerFactory(name:porter2-stemmer,StandardTokenizer, - StandardFilter,EnglishPossessiveFilter,LowerCaseFilter,StopFilter, + EnglishPossessiveFilter,LowerCaseFilter,StopFilter, SnowballPorterFilter(language:English)) -AnalyzerFactory(name:no-stemmer,StandardTokenizer, - StandardFilter,EnglishPossessiveFilter,LowerCaseFilter,StopFilter) + EnglishPossessiveFilter,LowerCaseFilter,StopFilter) { "Rounds" -NewAnalyzer(original-porter-stemmer) diff --git a/solr/contrib/langid/src/test-files/langid/solr/collection1/conf/schema.xml b/solr/contrib/langid/src/test-files/langid/solr/collection1/conf/schema.xml index 5e52e99f224..fe96000359f 100644 --- a/solr/contrib/langid/src/test-files/langid/solr/collection1/conf/schema.xml +++ b/solr/contrib/langid/src/test-files/langid/solr/collection1/conf/schema.xml @@ -35,7 +35,6 @@ - diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-copyfield-test.xml b/solr/core/src/test-files/solr/collection1/conf/schema-copyfield-test.xml index adea3c19a64..f36751e7dbf 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-copyfield-test.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-copyfield-test.xml @@ -79,7 +79,6 @@ - @@ -94,7 +93,6 @@ - @@ -140,13 +138,11 @@ - - diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-hash.xml b/solr/core/src/test-files/solr/collection1/conf/schema-hash.xml index 09488356ff3..3e8aa151963 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-hash.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-hash.xml @@ -128,7 +128,6 @@ - @@ -143,7 +142,6 @@ - @@ -188,13 +186,11 @@ - - diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-luceneMatchVersion.xml b/solr/core/src/test-files/solr/collection1/conf/schema-luceneMatchVersion.xml index 406cfd23bb2..832aa504988 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-luceneMatchVersion.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-luceneMatchVersion.xml @@ -21,7 +21,6 @@ - @@ -30,7 +29,6 @@ - diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-not-required-unique-key.xml b/solr/core/src/test-files/solr/collection1/conf/schema-not-required-unique-key.xml index ba8bbd5e801..484e29608af 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-not-required-unique-key.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-not-required-unique-key.xml @@ -26,7 +26,6 @@ - diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-required-fields.xml b/solr/core/src/test-files/solr/collection1/conf/schema-required-fields.xml index 66add46c7b7..4210d5b73ac 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-required-fields.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-required-fields.xml @@ -62,7 +62,6 @@ - @@ -77,7 +76,6 @@ - @@ -123,13 +121,11 @@ - - diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml b/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml index 5e2ac212916..2a043564dcd 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml @@ -200,7 +200,6 @@ - @@ -246,13 +245,11 @@ - - diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-spellchecker.xml b/solr/core/src/test-files/solr/collection1/conf/schema-spellchecker.xml index 3b6174f581b..896f139a390 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-spellchecker.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-spellchecker.xml @@ -38,7 +38,6 @@ - @@ -49,14 +48,12 @@ - - diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-sql.xml b/solr/core/src/test-files/solr/collection1/conf/schema-sql.xml index 48bd7842d73..430d9663c0b 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-sql.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-sql.xml @@ -130,7 +130,6 @@ - @@ -145,7 +144,6 @@ - @@ -190,13 +188,11 @@ - - diff --git a/solr/core/src/test-files/solr/collection1/conf/schema.xml b/solr/core/src/test-files/solr/collection1/conf/schema.xml index b8528477eb2..64d6fddc4c1 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema.xml @@ -120,7 +120,6 @@ - @@ -130,7 +129,6 @@ - @@ -145,12 +143,10 @@ - - @@ -195,13 +191,11 @@ - - diff --git a/solr/core/src/test-files/solr/collection1/conf/schema11.xml b/solr/core/src/test-files/solr/collection1/conf/schema11.xml index 6f38e7fb8ab..d3344a2f37f 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema11.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema11.xml @@ -288,7 +288,6 @@ valued. --> - diff --git a/solr/core/src/test-files/solr/collection1/conf/schema12.xml b/solr/core/src/test-files/solr/collection1/conf/schema12.xml index de1b9980974..894767607c1 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema12.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema12.xml @@ -235,7 +235,6 @@ - @@ -292,13 +291,11 @@ - - diff --git a/solr/core/src/test-files/solr/collection1/conf/schema15.xml b/solr/core/src/test-files/solr/collection1/conf/schema15.xml index cae5aeea9c1..80d19e9b2a8 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema15.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema15.xml @@ -166,7 +166,6 @@ - @@ -212,13 +211,11 @@ - - diff --git a/solr/core/src/test-files/solr/collection1/conf/schemasurround.xml b/solr/core/src/test-files/solr/collection1/conf/schemasurround.xml index 49f95a01e7f..213acc7b032 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schemasurround.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schemasurround.xml @@ -167,7 +167,6 @@ - @@ -213,13 +212,11 @@ - - diff --git a/solr/core/src/test-files/solr/crazy-path-to-schema.xml b/solr/core/src/test-files/solr/crazy-path-to-schema.xml index 5b443ea3c52..fbfdfee9076 100644 --- a/solr/core/src/test-files/solr/crazy-path-to-schema.xml +++ b/solr/core/src/test-files/solr/crazy-path-to-schema.xml @@ -27,7 +27,6 @@ - diff --git a/solr/core/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java b/solr/core/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java index 7f195263054..34fb186c802 100644 --- a/solr/core/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java +++ b/solr/core/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java @@ -285,22 +285,18 @@ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTe assertNotNull("Expecting the 'StandardTokenizer' to be applied on the query for the 'text' field", tokenList); assertEquals("Query has only one token", 1, tokenList.size()); assertToken(tokenList.get(0), new TokenInfo("JUMPING", null, "", 0, 7, 1, new int[]{1}, null, false)); - tokenList = (List) queryResult.get("org.apache.lucene.analysis.standard.StandardFilter"); - assertNotNull("Expecting the 'StandardFilter' to be applied on the query for the 'text' field", tokenList); - assertEquals("Query has only one token", 1, tokenList.size()); - assertToken(tokenList.get(0), new TokenInfo("JUMPING", null, "", 0, 7, 1, new int[]{1,1}, null, false)); tokenList = (List) queryResult.get("org.apache.lucene.analysis.core.LowerCaseFilter"); assertNotNull("Expecting the 'LowerCaseFilter' to be applied on the query for the 'text' field", tokenList); assertEquals("Query has only one token", 1, tokenList.size()); - assertToken(tokenList.get(0), new TokenInfo("jumping", null, "", 0, 7, 1, new int[]{1,1,1}, null, false)); + assertToken(tokenList.get(0), new TokenInfo("jumping", null, "", 0, 7, 1, new int[]{1,1}, null, false)); tokenList = (List) queryResult.get("org.apache.lucene.analysis.core.StopFilter"); assertNotNull("Expecting the 'StopFilter' to be applied on the query for the 'text' field", tokenList); assertEquals("Query has only one token", 1, tokenList.size()); - assertToken(tokenList.get(0), new TokenInfo("jumping", null, "", 0, 7, 1, new int[]{1,1,1,1}, null, false)); + assertToken(tokenList.get(0), new TokenInfo("jumping", null, "", 0, 7, 1, new int[]{1,1,1}, null, false)); tokenList = (List) queryResult.get("org.apache.lucene.analysis.en.PorterStemFilter"); assertNotNull("Expecting the 'PorterStemFilter' to be applied on the query for the 'text' field", tokenList); assertEquals("Query has only one token", 1, tokenList.size()); - assertToken(tokenList.get(0), new TokenInfo("jump", null, "", 0, 7, 1, new int[]{1,1,1,1,1}, null, false)); + assertToken(tokenList.get(0), new TokenInfo("jump", null, "", 0, 7, 1, new int[]{1,1,1,1}, null, false)); indexResult = textResult.get("index"); assertEquals("The 'text' field has only a single value", 1, indexResult.size()); valueResult = (NamedList>) indexResult.get("The Fox Jumped Over The Dogs"); @@ -313,37 +309,28 @@ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTe assertToken(tokenList.get(3), new TokenInfo("Over", null, "", 15, 19, 4, new int[]{4}, null, false)); assertToken(tokenList.get(4), new TokenInfo("The", null, "", 20, 23, 5, new int[]{5}, null, false)); assertToken(tokenList.get(5), new TokenInfo("Dogs", null, "", 24, 28, 6, new int[]{6}, null, false)); - tokenList = valueResult.get("org.apache.lucene.analysis.standard.StandardFilter"); - assertNotNull("Expecting the 'StandardFilter' to be applied on the index for the 'text' field", tokenList); - assertEquals("Expecting 6 tokens", 6, tokenList.size()); - assertToken(tokenList.get(0), new TokenInfo("The", null, "", 0, 3, 1, new int[]{1,1}, null, false)); - assertToken(tokenList.get(1), new TokenInfo("Fox", null, "", 4, 7, 2, new int[]{2,2}, null, false)); - assertToken(tokenList.get(2), new TokenInfo("Jumped", null, "", 8, 14, 3, new int[]{3,3}, null, false)); - assertToken(tokenList.get(3), new TokenInfo("Over", null, "", 15, 19, 4, new int[]{4,4}, null, false)); - assertToken(tokenList.get(4), new TokenInfo("The", null, "", 20, 23, 5, new int[]{5,5}, null, false)); - assertToken(tokenList.get(5), new TokenInfo("Dogs", null, "", 24, 28, 6, new int[]{6,6}, null, false)); tokenList = valueResult.get("org.apache.lucene.analysis.core.LowerCaseFilter"); assertNotNull("Expecting the 'LowerCaseFilter' to be applied on the index for the 'text' field", tokenList); assertEquals("Expecting 6 tokens", 6, tokenList.size()); - assertToken(tokenList.get(0), new TokenInfo("the", null, "", 0, 3, 1, new int[]{1,1,1}, null, false)); - assertToken(tokenList.get(1), new TokenInfo("fox", null, "", 4, 7, 2, new int[]{2,2,2}, null, false)); - assertToken(tokenList.get(2), new TokenInfo("jumped", null, "", 8, 14, 3, new int[]{3,3,3}, null, false)); - assertToken(tokenList.get(3), new TokenInfo("over", null, "", 15, 19, 4, new int[]{4,4,4}, null, false)); - assertToken(tokenList.get(4), new TokenInfo("the", null, "", 20, 23, 5, new int[]{5,5,5}, null, false)); - assertToken(tokenList.get(5), new TokenInfo("dogs", null, "", 24, 28, 6, new int[]{6,6,6}, null, false)); + assertToken(tokenList.get(0), new TokenInfo("the", null, "", 0, 3, 1, new int[]{1,1}, null, false)); + assertToken(tokenList.get(1), new TokenInfo("fox", null, "", 4, 7, 2, new int[]{2,2}, null, false)); + assertToken(tokenList.get(2), new TokenInfo("jumped", null, "", 8, 14, 3, new int[]{3,3}, null, false)); + assertToken(tokenList.get(3), new TokenInfo("over", null, "", 15, 19, 4, new int[]{4,4}, null, false)); + assertToken(tokenList.get(4), new TokenInfo("the", null, "", 20, 23, 5, new int[]{5,5}, null, false)); + assertToken(tokenList.get(5), new TokenInfo("dogs", null, "", 24, 28, 6, new int[]{6,6}, null, false)); tokenList = valueResult.get("org.apache.lucene.analysis.core.StopFilter"); assertNotNull("Expecting the 'StopFilter' to be applied on the index for the 'text' field", tokenList); assertEquals("Expecting 4 tokens after stop word removal", 4, tokenList.size()); - assertToken(tokenList.get(0), new TokenInfo("fox", null, "", 4, 7, 2, new int[]{2,2,2,2}, null, false)); - assertToken(tokenList.get(1), new TokenInfo("jumped", null, "", 8, 14, 3, new int[]{3,3,3,3}, null, false)); - assertToken(tokenList.get(2), new TokenInfo("over", null, "", 15, 19, 4, new int[]{4,4,4,4}, null, false)); - assertToken(tokenList.get(3), new TokenInfo("dogs", null, "", 24, 28, 6, new int[]{6,6,6,6}, null, false)); + assertToken(tokenList.get(0), new TokenInfo("fox", null, "", 4, 7, 2, new int[]{2,2,2}, null, false)); + assertToken(tokenList.get(1), new TokenInfo("jumped", null, "", 8, 14, 3, new int[]{3,3,3}, null, false)); + assertToken(tokenList.get(2), new TokenInfo("over", null, "", 15, 19, 4, new int[]{4,4,4}, null, false)); + assertToken(tokenList.get(3), new TokenInfo("dogs", null, "", 24, 28, 6, new int[]{6,6,6}, null, false)); tokenList = valueResult.get("org.apache.lucene.analysis.en.PorterStemFilter"); assertNotNull("Expecting the 'PorterStemFilter' to be applied on the index for the 'text' field", tokenList); assertEquals("Expecting 4 tokens", 4, tokenList.size()); - assertToken(tokenList.get(0), new TokenInfo("fox", null, "", 4, 7, 2, new int[]{2,2,2,2,2}, null, false)); - assertToken(tokenList.get(1), new TokenInfo("jump", null, "", 8, 14, 3, new int[]{3,3,3,3,3}, null, true)); - assertToken(tokenList.get(2), new TokenInfo("over", null, "", 15, 19, 4, new int[]{4,4,4,4,4}, null, false)); - assertToken(tokenList.get(3), new TokenInfo("dog", null, "", 24, 28, 6, new int[]{6,6,6,6,6}, null, false)); + assertToken(tokenList.get(0), new TokenInfo("fox", null, "", 4, 7, 2, new int[]{2,2,2,2}, null, false)); + assertToken(tokenList.get(1), new TokenInfo("jump", null, "", 8, 14, 3, new int[]{3,3,3,3}, null, true)); + assertToken(tokenList.get(2), new TokenInfo("over", null, "", 15, 19, 4, new int[]{4,4,4,4}, null, false)); + assertToken(tokenList.get(3), new TokenInfo("dog", null, "", 24, 28, 6, new int[]{6,6,6,6}, null, false)); } } diff --git a/solr/core/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java b/solr/core/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java index 260dc4a86a5..ef7de391e78 100644 --- a/solr/core/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java +++ b/solr/core/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java @@ -210,8 +210,8 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB assertToken(tokenList.get(7), new TokenInfo("lazy", null, "", 34, 38, 8, new int[]{8}, null, false)); assertToken(tokenList.get(8), new TokenInfo("brown", null, "", 39, 44, 9, new int[]{9}, null, true)); assertToken(tokenList.get(9), new TokenInfo("dogs", null, "", 45, 49, 10, new int[]{10}, null, false)); - tokenList = indexPart.get("org.apache.lucene.analysis.standard.StandardFilter"); - assertNotNull("Expcting StandardFilter analysis breakdown", tokenList); + tokenList = indexPart.get("org.apache.lucene.analysis.core.LowerCaseFilter"); + assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList); assertEquals(tokenList.size(), 10); assertToken(tokenList.get(0), new TokenInfo("the", null, "", 0, 3, 1, new int[]{1,1}, null, false)); assertToken(tokenList.get(1), new TokenInfo("quick", null, "", 4, 9, 2, new int[]{2,2}, null, false)); @@ -223,41 +223,28 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB assertToken(tokenList.get(7), new TokenInfo("lazy", null, "", 34, 38, 8, new int[]{8,8}, null, false)); assertToken(tokenList.get(8), new TokenInfo("brown", null, "", 39, 44, 9, new int[]{9,9}, null, true)); assertToken(tokenList.get(9), new TokenInfo("dogs", null, "", 45, 49, 10, new int[]{10,10}, null, false)); - tokenList = indexPart.get("org.apache.lucene.analysis.core.LowerCaseFilter"); - assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList); - assertEquals(tokenList.size(), 10); - assertToken(tokenList.get(0), new TokenInfo("the", null, "", 0, 3, 1, new int[]{1,1,1}, null, false)); - assertToken(tokenList.get(1), new TokenInfo("quick", null, "", 4, 9, 2, new int[]{2,2,2}, null, false)); - assertToken(tokenList.get(2), new TokenInfo("red", null, "", 10, 13, 3, new int[]{3,3,3}, null, false)); - assertToken(tokenList.get(3), new TokenInfo("fox", null, "", 14, 17, 4, new int[]{4,4,4}, null, true)); - assertToken(tokenList.get(4), new TokenInfo("jumped", null, "", 18, 24, 5, new int[]{5,5,5}, null, false)); - assertToken(tokenList.get(5), new TokenInfo("over", null, "", 25, 29, 6, new int[]{6,6,6}, null, false)); - assertToken(tokenList.get(6), new TokenInfo("the", null, "", 30, 33, 7, new int[]{7,7,7}, null, false)); - assertToken(tokenList.get(7), new TokenInfo("lazy", null, "", 34, 38, 8, new int[]{8,8,8}, null, false)); - assertToken(tokenList.get(8), new TokenInfo("brown", null, "", 39, 44, 9, new int[]{9,9,9}, null, true)); - assertToken(tokenList.get(9), new TokenInfo("dogs", null, "", 45, 49, 10, new int[]{10,10,10}, null, false)); tokenList = indexPart.get("org.apache.lucene.analysis.core.StopFilter"); assertNotNull("Expcting StopFilter analysis breakdown", tokenList); assertEquals(tokenList.size(), 8); + assertToken(tokenList.get(0), new TokenInfo("quick", null, "", 4, 9, 2, new int[]{2,2,2}, null, false)); + assertToken(tokenList.get(1), new TokenInfo("red", null, "", 10, 13, 3, new int[]{3,3,3}, null, false)); + assertToken(tokenList.get(2), new TokenInfo("fox", null, "", 14, 17, 4, new int[]{4,4,4}, null, true)); + assertToken(tokenList.get(3), new TokenInfo("jumped", null, "", 18, 24, 5, new int[]{5,5,5}, null, false)); + assertToken(tokenList.get(4), new TokenInfo("over", null, "", 25, 29, 6, new int[]{6,6,6}, null, false)); + assertToken(tokenList.get(5), new TokenInfo("lazy", null, "", 34, 38, 8, new int[]{8,8,8}, null, false)); + assertToken(tokenList.get(6), new TokenInfo("brown", null, "", 39, 44, 9, new int[]{9,9,9}, null, true)); + assertToken(tokenList.get(7), new TokenInfo("dogs", null, "", 45, 49, 10, new int[]{10,10,10}, null, false)); + tokenList = indexPart.get("org.apache.lucene.analysis.en.PorterStemFilter"); + assertNotNull("Expcting PorterStemFilter analysis breakdown", tokenList); + assertEquals(tokenList.size(), 8); assertToken(tokenList.get(0), new TokenInfo("quick", null, "", 4, 9, 2, new int[]{2,2,2,2}, null, false)); assertToken(tokenList.get(1), new TokenInfo("red", null, "", 10, 13, 3, new int[]{3,3,3,3}, null, false)); assertToken(tokenList.get(2), new TokenInfo("fox", null, "", 14, 17, 4, new int[]{4,4,4,4}, null, true)); - assertToken(tokenList.get(3), new TokenInfo("jumped", null, "", 18, 24, 5, new int[]{5,5,5,5}, null, false)); + assertToken(tokenList.get(3), new TokenInfo("jump", null, "", 18, 24, 5, new int[]{5,5,5,5}, null, false)); assertToken(tokenList.get(4), new TokenInfo("over", null, "", 25, 29, 6, new int[]{6,6,6,6}, null, false)); - assertToken(tokenList.get(5), new TokenInfo("lazy", null, "", 34, 38, 8, new int[]{8,8,8,8}, null, false)); + assertToken(tokenList.get(5), new TokenInfo("lazi", null, "", 34, 38, 8, new int[]{8,8,8,8}, null, false)); assertToken(tokenList.get(6), new TokenInfo("brown", null, "", 39, 44, 9, new int[]{9,9,9,9}, null, true)); - assertToken(tokenList.get(7), new TokenInfo("dogs", null, "", 45, 49, 10, new int[]{10,10,10,10}, null, false)); - tokenList = indexPart.get("org.apache.lucene.analysis.en.PorterStemFilter"); - assertNotNull("Expcting PorterStemFilter analysis breakdown", tokenList); - assertEquals(tokenList.size(), 8); - assertToken(tokenList.get(0), new TokenInfo("quick", null, "", 4, 9, 2, new int[]{2,2,2,2,2}, null, false)); - assertToken(tokenList.get(1), new TokenInfo("red", null, "", 10, 13, 3, new int[]{3,3,3,3,3}, null, false)); - assertToken(tokenList.get(2), new TokenInfo("fox", null, "", 14, 17, 4, new int[]{4,4,4,4,4}, null, true)); - assertToken(tokenList.get(3), new TokenInfo("jump", null, "", 18, 24, 5, new int[]{5,5,5,5,5}, null, false)); - assertToken(tokenList.get(4), new TokenInfo("over", null, "", 25, 29, 6, new int[]{6,6,6,6,6}, null, false)); - assertToken(tokenList.get(5), new TokenInfo("lazi", null, "", 34, 38, 8, new int[]{8,8,8,8,8}, null, false)); - assertToken(tokenList.get(6), new TokenInfo("brown", null, "", 39, 44, 9, new int[]{9,9,9,9,9}, null, true)); - assertToken(tokenList.get(7), new TokenInfo("dog", null, "", 45, 49, 10, new int[]{10,10,10,10,10}, null, false)); + assertToken(tokenList.get(7), new TokenInfo("dog", null, "", 45, 49, 10, new int[]{10,10,10,10}, null, false)); NamedList> queryPart = textType.get("query"); assertNotNull("expecting a query token analysis for field type 'text'", queryPart); @@ -267,26 +254,21 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB assertEquals("Expecting StandardTokenizer to produce 2 tokens from '" + request.getQuery() + "'", 2, tokenList.size()); assertToken(tokenList.get(0), new TokenInfo("fox", null, "", 0, 3, 1, new int[]{1}, null, false)); assertToken(tokenList.get(1), new TokenInfo("brown", null, "", 4, 9, 2, new int[]{2}, null, false)); - tokenList = queryPart.get("org.apache.lucene.analysis.standard.StandardFilter"); - assertNotNull("Expcting StandardFilter analysis breakdown", tokenList); - assertEquals(2, tokenList.size()); - assertToken(tokenList.get(0), new TokenInfo("fox", null, "", 0, 3, 1, new int[]{1,1}, null, false)); - assertToken(tokenList.get(1), new TokenInfo("brown", null, "", 4, 9, 2, new int[]{2,2}, null, false)); tokenList = queryPart.get("org.apache.lucene.analysis.core.LowerCaseFilter"); assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList); assertEquals(2, tokenList.size()); - assertToken(tokenList.get(0), new TokenInfo("fox", null, "", 0, 3, 1, new int[]{1,1,1}, null, false)); - assertToken(tokenList.get(1), new TokenInfo("brown", null, "", 4, 9, 2, new int[]{2,2,2}, null, false)); + assertToken(tokenList.get(0), new TokenInfo("fox", null, "", 0, 3, 1, new int[]{1,1}, null, false)); + assertToken(tokenList.get(1), new TokenInfo("brown", null, "", 4, 9, 2, new int[]{2,2}, null, false)); tokenList = queryPart.get("org.apache.lucene.analysis.core.StopFilter"); assertNotNull("Expcting StopFilter analysis breakdown", tokenList); assertEquals(2, tokenList.size()); - assertToken(tokenList.get(0), new TokenInfo("fox", null, "", 0, 3, 1, new int[]{1,1,1,1}, null, false)); - assertToken(tokenList.get(1), new TokenInfo("brown", null, "", 4, 9, 2, new int[]{2,2,2,2}, null, false)); + assertToken(tokenList.get(0), new TokenInfo("fox", null, "", 0, 3, 1, new int[]{1,1,1}, null, false)); + assertToken(tokenList.get(1), new TokenInfo("brown", null, "", 4, 9, 2, new int[]{2,2,2}, null, false)); tokenList = queryPart.get("org.apache.lucene.analysis.en.PorterStemFilter"); assertNotNull("Expcting PorterStemFilter analysis breakdown", tokenList); assertEquals(2, tokenList.size()); - assertToken(tokenList.get(0), new TokenInfo("fox", null, "", 0, 3, 1, new int[]{1,1,1,1,1}, null, false)); - assertToken(tokenList.get(1), new TokenInfo("brown", null, "", 4, 9, 2, new int[]{2,2,2,2,2}, null, false)); + assertToken(tokenList.get(0), new TokenInfo("fox", null, "", 0, 3, 1, new int[]{1,1,1,1}, null, false)); + assertToken(tokenList.get(1), new TokenInfo("brown", null, "", 4, 9, 2, new int[]{2,2,2,2}, null, false)); NamedList nameTextType = fieldTypes.get("nametext"); assertNotNull("expecting result for field type 'nametext'", nameTextType); diff --git a/solr/core/src/test/org/apache/solr/rest/schema/TestFieldTypeResource.java b/solr/core/src/test/org/apache/solr/rest/schema/TestFieldTypeResource.java index 4fd94c92a37..ea19af0d2ed 100644 --- a/solr/core/src/test/org/apache/solr/rest/schema/TestFieldTypeResource.java +++ b/solr/core/src/test/org/apache/solr/rest/schema/TestFieldTypeResource.java @@ -82,7 +82,6 @@ public class TestFieldTypeResource extends SolrRestletTestBase { "/response/lst[@name='fieldType']/str[@name='name'] = 'teststop'", "/response/lst[@name='fieldType']/str[@name='class'] = 'solr.TextField'", "/response/lst[@name='fieldType']/lst[@name='analyzer']/lst[@name='tokenizer']/str[@name='class'] = 'solr.LowerCaseTokenizerFactory'", - "/response/lst[@name='fieldType']/lst[@name='analyzer']/arr[@name='filters']/lst/str[@name='class'][.='solr.StandardFilterFactory']", "/response/lst[@name='fieldType']/lst[@name='analyzer']/arr[@name='filters']/lst/str[@name='class'][.='solr.StopFilterFactory']", "/response/lst[@name='fieldType']/lst[@name='analyzer']/arr[@name='filters']/lst/str[@name='words'][.='stopwords.txt']" ); diff --git a/solr/solr-ref-guide/src/about-filters.adoc b/solr/solr-ref-guide/src/about-filters.adoc index 8bf993d5648..dbb10a6fe0a 100644 --- a/solr/solr-ref-guide/src/about-filters.adoc +++ b/solr/solr-ref-guide/src/about-filters.adoc @@ -27,14 +27,13 @@ Because filters consume one `TokenStream` and produce a new `TokenStream`, they - ---- -This example starts with Solr's standard tokenizer, which breaks the field's text into tokens. Those tokens then pass through Solr's standard filter, which removes dots from acronyms, and performs a few other common operations. All the tokens are then set to lowercase, which will facilitate case-insensitive matching at query time. +This example starts with Solr's standard tokenizer, which breaks the field's text into tokens. All the tokens are then set to lowercase, which will facilitate case-insensitive matching at query time. The last filter in the above example is a stemmer filter that uses the Porter stemming algorithm. A stemmer is basically a set of mapping rules that maps the various forms of a word back to the base, or _stem_, word from which they derive. For example, in English the words "hugs", "hugging" and "hugged" are all forms of the stem word "hug". The stemmer will replace all of these terms with "hug", which is what will be indexed. This means that a query for "hug" will match the term "hugged", but not "huge". diff --git a/solr/solr-ref-guide/src/analyzers.adoc b/solr/solr-ref-guide/src/analyzers.adoc index 2edfe9c72fd..6e641655853 100644 --- a/solr/solr-ref-guide/src/analyzers.adoc +++ b/solr/solr-ref-guide/src/analyzers.adoc @@ -40,7 +40,6 @@ For example: - diff --git a/solr/solr-ref-guide/src/filter-descriptions.adoc b/solr/solr-ref-guide/src/filter-descriptions.adoc index a83652e84f6..95e83b6ade5 100644 --- a/solr/solr-ref-guide/src/filter-descriptions.adoc +++ b/solr/solr-ref-guide/src/filter-descriptions.adoc @@ -1361,19 +1361,6 @@ Spanish stemmer, Spanish words: *Out:* "cant", "cant" -== Standard Filter - -This filter removes dots from acronyms and the substring "'s" from the end of tokens. This filter depends on the tokens being tagged with the appropriate term-type to recognize acronyms and words with apostrophes. - -*Factory class:* `solr.StandardFilterFactory` - -*Arguments:* None - -[WARNING] -==== -This filter is no longer operational in Solr when the `luceneMatchVersion` (in `solrconfig.xml`) is higher than "3.1". -==== - == Stop Filter This filter discards, or _stops_ analysis of, tokens that are on the given stop words list. A standard stop words list is included in the Solr `conf` directory, named `stopwords.txt`, which is appropriate for typical English language text. diff --git a/solr/solr-ref-guide/src/suggester.adoc b/solr/solr-ref-guide/src/suggester.adoc index 0f9e12e040a..7b141b48a36 100644 --- a/solr/solr-ref-guide/src/suggester.adoc +++ b/solr/solr-ref-guide/src/suggester.adoc @@ -85,7 +85,6 @@ To be used as the basis for a suggestion, the field must be stored. You may want - diff --git a/solr/solr-ref-guide/src/tokenizers.adoc b/solr/solr-ref-guide/src/tokenizers.adoc index b3352399783..82e730d5164 100644 --- a/solr/solr-ref-guide/src/tokenizers.adoc +++ b/solr/solr-ref-guide/src/tokenizers.adoc @@ -25,7 +25,7 @@ You configure the tokenizer for a text field type in `schema.xml` with a ` - + ---- diff --git a/solr/solrj/src/test-files/solrj/solr/collection1/conf/schema-sql.xml b/solr/solrj/src/test-files/solrj/solr/collection1/conf/schema-sql.xml index 8cdc1f074ca..3a1f32815c8 100644 --- a/solr/solrj/src/test-files/solrj/solr/collection1/conf/schema-sql.xml +++ b/solr/solrj/src/test-files/solrj/solr/collection1/conf/schema-sql.xml @@ -130,7 +130,6 @@ - @@ -145,7 +144,6 @@ - @@ -190,13 +188,11 @@ - - diff --git a/solr/solrj/src/test-files/solrj/solr/collection1/conf/schema.xml b/solr/solrj/src/test-files/solrj/solr/collection1/conf/schema.xml index 94fc2ede5c0..02b505378e9 100644 --- a/solr/solrj/src/test-files/solrj/solr/collection1/conf/schema.xml +++ b/solr/solrj/src/test-files/solrj/solr/collection1/conf/schema.xml @@ -105,7 +105,6 @@ - @@ -120,7 +119,6 @@ - @@ -165,13 +163,11 @@ - - diff --git a/solr/solrj/src/test-files/solrj/solr/configsets/shared/conf/schema.xml b/solr/solrj/src/test-files/solrj/solr/configsets/shared/conf/schema.xml index a3559b9749d..c46c8cbd935 100644 --- a/solr/solrj/src/test-files/solrj/solr/configsets/shared/conf/schema.xml +++ b/solr/solrj/src/test-files/solrj/solr/configsets/shared/conf/schema.xml @@ -22,13 +22,11 @@ - - @@ -36,13 +34,11 @@ - - diff --git a/solr/solrj/src/test-files/solrj/solr/configsets/streaming/conf/schema.xml b/solr/solrj/src/test-files/solrj/solr/configsets/streaming/conf/schema.xml index 1a3b96f676e..aa96296b580 100644 --- a/solr/solrj/src/test-files/solrj/solr/configsets/streaming/conf/schema.xml +++ b/solr/solrj/src/test-files/solrj/solr/configsets/streaming/conf/schema.xml @@ -126,7 +126,6 @@ - @@ -141,7 +140,6 @@ - @@ -176,13 +174,11 @@ - - diff --git a/solr/solrj/src/test-files/solrj/solr/crazy-path-to-schema.xml b/solr/solrj/src/test-files/solrj/solr/crazy-path-to-schema.xml index 6954fc643f7..9d0cfa23f90 100644 --- a/solr/solrj/src/test-files/solrj/solr/crazy-path-to-schema.xml +++ b/solr/solrj/src/test-files/solrj/solr/crazy-path-to-schema.xml @@ -32,7 +32,6 @@ that just finds leaf and nodes and there's no reason to brea -