From 6c0c3182181838a6cfe4f155addf8b8b7a83f584 Mon Sep 17 00:00:00 2001
From: Simon Willnauer NOTE: This class uses the same {@link Version}
* dependent settings as {@link StandardAnalyzer}. NOTE: This class uses the same {@link Version}
* dependent settings as {@link StandardAnalyzer}."\\W+"
; Divides text at non-letters (NOT Character.isLetter(c)) */
public static final Pattern NON_WORD_PATTERN = Pattern.compile("\\W+");
diff --git a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
index 00ba204091f..b7e9622890a 100644
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
@@ -52,7 +52,7 @@ import java.util.Map;
*
NOTE: This class uses the same {@link Version} * dependent settings as {@link StandardAnalyzer}.
*/ -public class ThaiAnalyzer extends Analyzer { +public final class ThaiAnalyzer extends Analyzer { private final Version matchVersion; public ThaiAnalyzer(Version matchVersion) { - setOverridesTokenStreamMethod(ThaiAnalyzer.class); this.matchVersion = matchVersion; } @@ -59,13 +58,6 @@ public class ThaiAnalyzer extends Analyzer { @Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { - if (overridesTokenStreamMethod) { - // LUCENE-1678: force fallback to tokenStream() if we - // have been subclassed and that subclass overrides - // tokenStream but not reusableTokenStream - return tokenStream(fieldName, reader); - } - SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); diff --git a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java index dd122688535..6c22bf797f7 100644 --- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java +++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java @@ -21,13 +21,9 @@ import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.InputStreamReader; -import java.io.Reader; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.WhitespaceTokenizer; -import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.util.Version; /** @@ -68,24 +64,6 @@ public class TestGermanStemFilter extends BaseTokenStreamTestCase { checkReuse(a, "Tischen", "tisch"); } - /** - * subclass that acts just like whitespace analyzer for testing - */ - private class GermanSubclassAnalyzer extends GermanAnalyzer { - public GermanSubclassAnalyzer(Version matchVersion) { - super(matchVersion); - } - - @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new WhitespaceTokenizer(reader); - } - } - - public void testLUCENE1678BWComp() throws Exception { - checkReuse(new GermanSubclassAnalyzer(Version.LUCENE_CURRENT), "Tischen", "Tischen"); - } - /* * Test that changes to the exclusion table are applied immediately * when using reusable token streams. diff --git a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java index aa737ec41b1..d6b54e2b1e2 100644 --- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java +++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java @@ -18,12 +18,9 @@ package org.apache.lucene.analysis.nl; */ import java.io.File; -import java.io.Reader; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.util.Version; /** @@ -127,27 +124,6 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase { checkOneTermReuse(a, "lichamelijkheden", "licham"); } - /** - * subclass that acts just like whitespace analyzer for testing - */ - private class DutchSubclassAnalyzer extends DutchAnalyzer { - public DutchSubclassAnalyzer(Version matchVersion) { - super(matchVersion); - } - @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new WhitespaceTokenizer(reader); - } - } - - public void testLUCENE1678BWComp() throws Exception { - Analyzer a = new DutchSubclassAnalyzer(Version.LUCENE_CURRENT); - checkOneTermReuse(a, "lichaamsziek", "lichaamsziek"); - checkOneTermReuse(a, "lichamelijk", "lichamelijk"); - checkOneTermReuse(a, "lichamelijke", "lichamelijke"); - checkOneTermReuse(a, "lichamelijkheden", "lichamelijkheden"); - } - /* * Test that changes to the exclusion table are applied immediately * when using reusable token streams. diff --git a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java index be2f4c9126b..6933dfa6cba 100644 --- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java +++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java @@ -148,27 +148,6 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase { } - /** - * subclass that acts just like whitespace analyzer for testing - */ - private class QueryAutoStopWordSubclassAnalyzer extends QueryAutoStopWordAnalyzer { - public QueryAutoStopWordSubclassAnalyzer(Version matchVersion) { - super(matchVersion, new WhitespaceAnalyzer()); - } - - @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new WhitespaceTokenizer(reader); - } - } - - public void testLUCENE1678BWComp() throws Exception { - QueryAutoStopWordAnalyzer a = new QueryAutoStopWordSubclassAnalyzer(Version.LUCENE_CURRENT); - a.addStopWords(reader, "repetitiveField", 10); - int numHits = search(a, "repetitiveField:boring"); - assertFalse(numHits == 0); - } - /* * analyzer that does not support reuse * it is LetterTokenizer on odd invocations, WhitespaceTokenizer on even. diff --git a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java index 09d280e9f52..1991af0505d 100644 --- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java +++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java @@ -213,28 +213,6 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase { new int[] { 1, 0, 1, 0, 1, 0, 1 }); } - /** - * subclass that acts just like whitespace analyzer for testing - */ - private class ShingleWrapperSubclassAnalyzer extends ShingleAnalyzerWrapper { - public ShingleWrapperSubclassAnalyzer() { - super(org.apache.lucene.util.Version.LUCENE_CURRENT); - } - - @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new WhitespaceTokenizer(reader); - } - }; - - public void testLUCENE1678BWComp() throws Exception { - Analyzer a = new ShingleWrapperSubclassAnalyzer(); - assertAnalyzesToReuse(a, "this is a test", - new String[] { "this", "is", "a", "test" }, - new int[] { 0, 5, 8, 10 }, - new int[] { 4, 7, 9, 14 }); - } - /* * analyzer that does not support reuse * it is LetterTokenizer on odd invocations, WhitespaceTokenizer on even. diff --git a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java index 8c7334ac828..ba532d670de 100644 --- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java +++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java @@ -17,12 +17,7 @@ package org.apache.lucene.analysis.th; * limitations under the License. */ -import java.io.Reader; - import org.apache.lucene.analysis.BaseTokenStreamTestCase; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.util.Version; /** @@ -124,22 +119,4 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase { "บริษัทชื่อ XY&Z - คุยกับ xyz@demo.com", new String[] { "บริษัท", "ชื่อ", "xy&z", "คุย", "กับ", "xyz@demo.com" }); } - - /** - * subclass that acts just like whitespace analyzer for testing - */ - private class ThaiSubclassAnalyzer extends ThaiAnalyzer { - public ThaiSubclassAnalyzer(Version matchVersion) { - super(matchVersion); - } - @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new WhitespaceTokenizer(reader); - } - } - - public void testLUCENE1678BWComp() throws Exception { - ThaiSubclassAnalyzer a = new ThaiSubclassAnalyzer(Version.LUCENE_CURRENT); - assertAnalyzesToReuse(a, "การที่ได้ต้องแสดงว่างานดี", new String[] { "การที่ได้ต้องแสดงว่างานดี" }); - } } diff --git a/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java b/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java index 5341b45781c..135a132526d 100644 --- a/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java +++ b/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java @@ -58,7 +58,7 @@ import org.apache.lucene.util.Version; * supported anymore in such a case. * */ -public class SmartChineseAnalyzer extends Analyzer { +public final class SmartChineseAnalyzer extends Analyzer { private final Set> stopWords; diff --git a/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyAnalyzer.java b/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyAnalyzer.java index 387288aa38c..982428ac356 100644 --- a/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyAnalyzer.java +++ b/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyAnalyzer.java @@ -69,7 +69,7 @@ import java.io.IOException; * java.text.Collator over several languages. * */ -public class ICUCollationKeyAnalyzer extends Analyzer { +public final class ICUCollationKeyAnalyzer extends Analyzer { private Collator collator; public ICUCollationKeyAnalyzer(Collator collator) { diff --git a/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java b/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java index 960d64f833d..c0260fa79d8 100644 --- a/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java +++ b/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java @@ -40,7 +40,7 @@ import java.util.Set; * * */ -public class SnowballAnalyzer extends Analyzer { +public final class SnowballAnalyzer extends Analyzer { private String name; private Set> stopSet; private final Version matchVersion; @@ -48,7 +48,6 @@ public class SnowballAnalyzer extends Analyzer { /** Builds the named analyzer with no stop words. */ public SnowballAnalyzer(Version matchVersion, String name) { this.name = name; - setOverridesTokenStreamMethod(SnowballAnalyzer.class); this.matchVersion = matchVersion; } @@ -80,7 +79,7 @@ public class SnowballAnalyzer extends Analyzer { private class SavedStreams { Tokenizer source; TokenStream result; - }; + } /** Returns a (possibly reused) {@link StandardTokenizer} filtered by a * {@link StandardFilter}, a {@link LowerCaseFilter}, @@ -88,13 +87,6 @@ public class SnowballAnalyzer extends Analyzer { @Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { - if (overridesTokenStreamMethod) { - // LUCENE-1678: force fallback to tokenStream() if we - // have been subclassed and that subclass overrides - // tokenStream but not reusableTokenStream - return tokenStream(fieldName, reader); - } - SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); diff --git a/contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java b/contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java index c0a7eed6917..bc960779ffd 100644 --- a/contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java +++ b/contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java @@ -17,11 +17,8 @@ package org.apache.lucene.analysis.snowball; * limitations under the License. */ -import java.io.Reader; - import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.index.Payload; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; @@ -86,26 +83,6 @@ public class TestSnowball extends BaseTokenStreamTestCase { new String[]{"she", "abhor", "him"}); } - /** - * subclass that acts just like whitespace analyzer for testing - */ - private class SnowballSubclassAnalyzer extends SnowballAnalyzer { - public SnowballSubclassAnalyzer(String name) { - super(Version.LUCENE_CURRENT, name); - } - - @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new WhitespaceTokenizer(reader); - } - } - - public void testLUCENE1678BWComp() throws Exception { - Analyzer a = new SnowballSubclassAnalyzer("English"); - assertAnalyzesToReuse(a, "he abhorred accents", - new String[]{"he", "abhorred", "accents"}); - } - public void testFilterTokens() throws Exception { SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English"); TermAttribute termAtt = filter.getAttribute(TermAttribute.class);