diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicFilters.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicFilters.java index 0c25e5d0dc7..547d7ca86ad 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicFilters.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicFilters.java @@ -34,8 +34,7 @@ public class TestArabicFilters extends BaseTokenStreamFactoryTestCase { */ public void testNormalizer() throws Exception { Reader reader = new StringReader("الذين مَلكت أيمانكم"); - Tokenizer tokenizer = tokenizerFactory("Standard").create(); - tokenizer.setReader(reader); + Tokenizer tokenizer = whitespaceMockTokenizer(reader); TokenStream stream = tokenFilterFactory("ArabicNormalization").create(tokenizer); assertTokenStreamContents(stream, new String[] {"الذين", "ملكت", "ايمانكم"}); } @@ -45,8 +44,7 @@ public class TestArabicFilters extends BaseTokenStreamFactoryTestCase { */ public void testStemmer() throws Exception { Reader reader = new StringReader("الذين مَلكت أيمانكم"); - Tokenizer tokenizer = tokenizerFactory("Standard").create(); - tokenizer.setReader(reader); + Tokenizer tokenizer = whitespaceMockTokenizer(reader); TokenStream stream = tokenFilterFactory("ArabicNormalization").create(tokenizer); stream = tokenFilterFactory("ArabicStem").create(stream); assertTokenStreamContents(stream, new String[] {"ذين", "ملكت", "ايمانكم"}); @@ -57,8 +55,7 @@ public class TestArabicFilters extends BaseTokenStreamFactoryTestCase { */ public void testPersianCharFilter() throws Exception { Reader reader = charFilterFactory("Persian").create(new StringReader("می‌خورد")); - Tokenizer tokenizer = tokenizerFactory("Standard").create(); - tokenizer.setReader(reader); + Tokenizer tokenizer = whitespaceMockTokenizer(reader); assertTokenStreamContents(tokenizer, new String[] { "می", "خورد" }); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniNormalizationFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniNormalizationFilter.java index a5946ada773..a91b0d5eba9 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniNormalizationFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniNormalizationFilter.java @@ -22,6 +22,7 @@ import java.io.Reader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; @@ -32,7 +33,7 @@ public class TestSoraniNormalizationFilter extends BaseTokenStreamTestCase { Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new KeywordTokenizer(); + Tokenizer tokenizer = new MockTokenizer(MockTokenizer.KEYWORD, false); return new TokenStreamComponents(tokenizer, new SoraniNormalizationFilter(tokenizer)); } }; @@ -87,6 +88,13 @@ public class TestSoraniNormalizationFilter extends BaseTokenStreamTestCase { } public void testEmptyTerm() throws IOException { + Analyzer a = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + Tokenizer tokenizer = new KeywordTokenizer(); + return new TokenStreamComponents(tokenizer, new SoraniNormalizationFilter(tokenizer)); + } + }; checkOneTerm(a, "", ""); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java index f3f5c72e9ee..6385ff74580 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java @@ -180,7 +180,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { public void testTokenEndingWithWordComponentOfMinimumLength() throws Exception { CharArraySet dict = makeDictionary("ab", "cd", "ef"); - Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT); + Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); tokenizer.setReader(new StringReader("abcdef")); DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, @@ -200,7 +200,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { public void testWordComponentWithLessThanMinimumLength() throws Exception { CharArraySet dict = makeDictionary("abc", "d", "efg"); - Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT); + Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); tokenizer.setReader(new StringReader("abcdefg")); DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, @@ -222,7 +222,8 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { CharArraySet dict = makeDictionary("Rind", "Fleisch", "Draht", "Schere", "Gesetz", "Aufgabe", "Überwachung"); - Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT); + MockTokenizer wsTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); + wsTokenizer.setEnableChecks(false); // we will reset in a strange place wsTokenizer.setReader(new StringReader("Rindfleischüberwachungsgesetz")); DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, wsTokenizer, dict, @@ -246,7 +247,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { public void testRetainMockAttribute() throws Exception { CharArraySet dict = makeDictionary("abc", "d", "efg"); - Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT); + Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); tokenizer.setReader(new StringReader("abcdefg")); TokenStream stream = new MockRetainAttributeFilter(tokenizer); stream = new DictionaryCompoundWordTokenFilter( diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java index 89667f7271d..7c4334b7277 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java @@ -22,6 +22,7 @@ import java.io.StringReader; import java.util.Random; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockReaderWrapper; import org.apache.lucene.analysis.TokenStream; @@ -29,7 +30,6 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.BasicOperations; @@ -44,7 +44,7 @@ import org.apache.lucene.util.automaton.Transition; * Any tests here need to probably consider unicode version of the JRE (it could * cause false fails). */ -public class TestDuelingAnalyzers extends LuceneTestCase { +public class TestDuelingAnalyzers extends BaseTokenStreamTestCase { private CharacterRunAutomaton jvmLetter; @Override @@ -71,7 +71,7 @@ public class TestDuelingAnalyzers extends LuceneTestCase { Analyzer right = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT); + Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); return new TokenStreamComponents(tokenizer, tokenizer); } }; @@ -91,7 +91,7 @@ public class TestDuelingAnalyzers extends LuceneTestCase { Analyzer right = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT); + Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); return new TokenStreamComponents(tokenizer, tokenizer); } }; @@ -109,7 +109,7 @@ public class TestDuelingAnalyzers extends LuceneTestCase { Analyzer right = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT); + Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); return new TokenStreamComponents(tokenizer, tokenizer); } }; @@ -128,7 +128,7 @@ public class TestDuelingAnalyzers extends LuceneTestCase { Analyzer right = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT); + Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); return new TokenStreamComponents(tokenizer, tokenizer); } }; @@ -146,7 +146,7 @@ public class TestDuelingAnalyzers extends LuceneTestCase { Analyzer right = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT); + Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); return new TokenStreamComponents(tokenizer, tokenizer); } }; @@ -165,7 +165,7 @@ public class TestDuelingAnalyzers extends LuceneTestCase { Analyzer right = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT); + Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); return new TokenStreamComponents(tokenizer, tokenizer); } }; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java index f1708ea37a0..b597eb043ec 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java @@ -179,7 +179,7 @@ public class TestFactories extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tf = tokenizer.create(); + Tokenizer tf = tokenizer.create(newAttributeFactory()); if (tokenfilter != null) { return new TokenStreamComponents(tf, tokenfilter.create(tf)); } else { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java index 06d80a2995d..f3cc4e03b07 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java @@ -355,6 +355,11 @@ public class TestRandomChains extends BaseTokenStreamTestCase { return TEST_VERSION_CURRENT; } }); + put(AttributeFactory.class, new ArgProducer() { + @Override public Object create(Random random) { + return newAttributeFactory(random); + } + }); put(Set.class, new ArgProducer() { @Override public Object create(Random random) { // TypeTokenFilter @@ -582,10 +587,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase { Object[] args = new Object[paramTypes.length]; for (int i = 0; i < args.length; i++) { Class paramType = paramTypes[i]; - if (paramType == AttributeFactory.class) { - // TODO: maybe the collator one...??? - args[i] = AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY; - } else if (paramType == AttributeSource.class) { + if (paramType == AttributeSource.class) { // TODO: args[i] = new AttributeSource(); // this is currently too scary to deal with! args[i] = null; // force IAE diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java index 4fbf37e7455..bbb656c9794 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java @@ -50,7 +50,7 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT); + Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); return new TokenStreamComponents(tokenizer); } }; @@ -298,7 +298,7 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase { new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT); + Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); TokenStream tokenStream = new MockGraphTokenFilter(random(), tokenizer); return new TokenStreamComponents(tokenizer, tokenStream); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestTypeTokenFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestTypeTokenFilter.java index 660c85d8559..b15c9628afc 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestTypeTokenFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestTypeTokenFilter.java @@ -36,7 +36,7 @@ public class TestTypeTokenFilter extends BaseTokenStreamTestCase { public void testTypeFilter() throws IOException { StringReader reader = new StringReader("121 is palindrome, while 123 is not"); Set stopTypes = asSet(""); - final StandardTokenizer input = new StandardTokenizer(TEST_VERSION_CURRENT); + final StandardTokenizer input = new StandardTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); input.setReader(reader); TokenStream stream = new TypeTokenFilter(TEST_VERSION_CURRENT, input, stopTypes); assertTokenStreamContents(stream, new String[]{"is", "palindrome", "while", "is", "not"}); @@ -85,7 +85,7 @@ public class TestTypeTokenFilter extends BaseTokenStreamTestCase { public void testTypeFilterWhitelist() throws IOException { StringReader reader = new StringReader("121 is palindrome, while 123 is not"); Set stopTypes = Collections.singleton(""); - final StandardTokenizer input = new StandardTokenizer(TEST_VERSION_CURRENT); + final StandardTokenizer input = new StandardTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); input.setReader(reader); TokenStream stream = new TypeTokenFilter(TEST_VERSION_CURRENT, input, stopTypes, true); assertTokenStreamContents(stream, new String[]{"121", "123"}); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java index 1d924e8ff87..865e6c6b46f 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java @@ -47,7 +47,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase { sb.append(whitespace); sb.append("testing 1234"); String input = sb.toString(); - UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT); + UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); tokenizer.setReader(new StringReader(input)); BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" }); } @@ -56,7 +56,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT); + Tokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); return new TokenStreamComponents(tokenizer); } }; @@ -103,7 +103,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase { private Analyzer urlAnalyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT); + UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); tokenizer.setMaxTokenLength(Integer.MAX_VALUE); // Tokenize arbitrary length URLs TokenFilter filter = new URLFilter(tokenizer); return new TokenStreamComponents(tokenizer, filter); @@ -113,7 +113,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase { private Analyzer emailAnalyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT); + UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); TokenFilter filter = new EmailFilter(tokenizer); return new TokenStreamComponents(tokenizer, filter); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java index cbe35bc5594..3132787f292 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java @@ -42,7 +42,7 @@ public class TestGermanStemFilter extends BaseTokenStreamTestCase { Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer t = new KeywordTokenizer(); + Tokenizer t = new MockTokenizer(MockTokenizer.KEYWORD, false); return new TokenStreamComponents(t, new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, t))); } @@ -54,12 +54,6 @@ public class TestGermanStemFilter extends BaseTokenStreamTestCase { vocOut.close(); } - // LUCENE-3043: we use keywordtokenizer in this test, - // so ensure the stemmer does not crash on zero-length strings. - public void testEmpty() throws Exception { - assertAnalyzesTo(analyzer, "", new String[] { "" }); - } - public void testKeyword() throws IOException { final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sängerinnen"), false); Analyzer a = new Analyzer() { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilter.java index 1359a5ccbdf..7d2f6196d47 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilter.java @@ -24,6 +24,7 @@ import java.io.Reader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; @@ -37,9 +38,8 @@ public class TestGalicianStemFilter extends BaseTokenStreamTestCase { private Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer source = new StandardTokenizer(TEST_VERSION_CURRENT); - TokenStream result = new LowerCaseFilter(TEST_VERSION_CURRENT, source); - return new TokenStreamComponents(source, new GalicianStemFilter(result)); + Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false); + return new TokenStreamComponents(source, new GalicianStemFilter(source)); } }; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiFilters.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiFilters.java index 242f414df7e..0eb1e682631 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiFilters.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiFilters.java @@ -34,8 +34,7 @@ public class TestHindiFilters extends BaseTokenStreamFactoryTestCase { */ public void testIndicNormalizer() throws Exception { Reader reader = new StringReader("ত্‍ अाैर"); - TokenStream stream = tokenizerFactory("Standard").create(); - ((Tokenizer)stream).setReader(reader); + TokenStream stream = whitespaceMockTokenizer(reader); stream = tokenFilterFactory("IndicNormalization").create(stream); assertTokenStreamContents(stream, new String[] { "ৎ", "और" }); } @@ -45,8 +44,7 @@ public class TestHindiFilters extends BaseTokenStreamFactoryTestCase { */ public void testHindiNormalizer() throws Exception { Reader reader = new StringReader("क़िताब"); - TokenStream stream = tokenizerFactory("Standard").create(); - ((Tokenizer)stream).setReader(reader); + TokenStream stream = whitespaceMockTokenizer(reader); stream = tokenFilterFactory("IndicNormalization").create(stream); stream = tokenFilterFactory("HindiNormalization").create(stream); assertTokenStreamContents(stream, new String[] {"किताब"}); @@ -57,8 +55,7 @@ public class TestHindiFilters extends BaseTokenStreamFactoryTestCase { */ public void testStemmer() throws Exception { Reader reader = new StringReader("किताबें"); - TokenStream stream = tokenizerFactory("Standard").create(); - ((Tokenizer)stream).setReader(reader); + TokenStream stream = whitespaceMockTokenizer(reader); stream = tokenFilterFactory("IndicNormalization").create(stream); stream = tokenFilterFactory("HindiNormalization").create(stream); stream = tokenFilterFactory("HindiStem").create(stream); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemmer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemmer.java index 2a5b78cfc6c..24a2c7449ab 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemmer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemmer.java @@ -22,6 +22,7 @@ import java.io.Reader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; @@ -33,7 +34,7 @@ public class TestIndonesianStemmer extends BaseTokenStreamTestCase { Analyzer a = new Analyzer() { @Override public TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new KeywordTokenizer(); + Tokenizer tokenizer = new MockTokenizer(MockTokenizer.KEYWORD, false); return new TokenStreamComponents(tokenizer, new IndonesianStemFilter(tokenizer)); } }; @@ -114,7 +115,7 @@ public class TestIndonesianStemmer extends BaseTokenStreamTestCase { Analyzer b = new Analyzer() { @Override public TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new KeywordTokenizer(); + Tokenizer tokenizer = new MockTokenizer(MockTokenizer.KEYWORD, false); return new TokenStreamComponents(tokenizer, new IndonesianStemFilter(tokenizer, false)); } }; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestSingleTokenTokenFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestSingleTokenTokenFilter.java index 679295b1c46..0f6510e948e 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestSingleTokenTokenFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestSingleTokenTokenFilter.java @@ -37,7 +37,8 @@ public class TestSingleTokenTokenFilter extends LuceneTestCase { assertEquals(token, tokenAtt); assertFalse(ts.incrementToken()); - token = new Token("hallo", 10, 20, "someType"); + token = new Token("hallo", 10, 20); + token.setType("someType"); ts.setToken(token); ts.reset(); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java index 7d27b05b6f3..c770de15bbf 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java @@ -46,11 +46,11 @@ public class TestTrimFilter extends BaseTokenStreamTestCase { char[] whitespace = " ".toCharArray(); char[] empty = "".toCharArray(); - TokenStream ts = new IterTokenStream(new Token(a, 0, a.length, 1, 5), - new Token(b, 0, b.length, 6, 10), - new Token(ccc, 0, ccc.length, 11, 15), - new Token(whitespace, 0, whitespace.length, 16, 20), - new Token(empty, 0, empty.length, 21, 21)); + TokenStream ts = new IterTokenStream(new Token(new String(a, 0, a.length), 1, 5), + new Token(new String(b, 0, b.length), 6, 10), + new Token(new String(ccc, 0, ccc.length), 11, 15), + new Token(new String(whitespace, 0, whitespace.length), 16, 20), + new Token(new String(empty, 0, empty.length), 21, 21)); ts = new TrimFilter(TEST_VERSION_CURRENT, ts); assertTokenStreamContents(ts, new String[] { "a", "b", "cCc", "", ""}); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java index b76d668459a..787c9b9ab92 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java @@ -355,7 +355,8 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(TEST_VERSION_CURRENT, tokenizer, flags, protectedWords)); } }; - checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER); + // TODO: properly support positionLengthAttribute + checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER, 20, false, false); } } @@ -379,7 +380,8 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase { return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(TEST_VERSION_CURRENT, tokenizer, flags, protectedWords)); } }; - checkRandomData(random(), a, 100*RANDOM_MULTIPLIER, 8192); + // TODO: properly support positionLengthAttribute + checkRandomData(random(), a, 100*RANDOM_MULTIPLIER, 8192, false, false); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java index 3e55f8e2191..69a9e8996d3 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java @@ -27,11 +27,14 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.charfilter.MappingCharFilter; import org.apache.lucene.analysis.charfilter.NormalizeCharMap; +import static org.apache.lucene.analysis.path.PathHierarchyTokenizer.DEFAULT_DELIMITER; +import static org.apache.lucene.analysis.path.PathHierarchyTokenizer.DEFAULT_SKIP; + public class TestPathHierarchyTokenizer extends BaseTokenStreamTestCase { public void testBasic() throws Exception { String path = "/a/b/c"; - PathHierarchyTokenizer t = new PathHierarchyTokenizer(); + PathHierarchyTokenizer t = new PathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP); t.setReader(new StringReader(path)); assertTokenStreamContents(t, new String[]{"/a", "/a/b", "/a/b/c"}, @@ -43,7 +46,7 @@ public class TestPathHierarchyTokenizer extends BaseTokenStreamTestCase { public void testEndOfDelimiter() throws Exception { String path = "/a/b/c/"; - PathHierarchyTokenizer t = new PathHierarchyTokenizer(); + PathHierarchyTokenizer t = new PathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP); t.setReader( new StringReader(path) ); assertTokenStreamContents(t, new String[]{"/a", "/a/b", "/a/b/c", "/a/b/c/"}, @@ -55,7 +58,7 @@ public class TestPathHierarchyTokenizer extends BaseTokenStreamTestCase { public void testStartOfChar() throws Exception { String path = "a/b/c"; - PathHierarchyTokenizer t = new PathHierarchyTokenizer(); + PathHierarchyTokenizer t = new PathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP); t.setReader( new StringReader(path) ); assertTokenStreamContents(t, new String[]{"a", "a/b", "a/b/c"}, @@ -67,7 +70,7 @@ public class TestPathHierarchyTokenizer extends BaseTokenStreamTestCase { public void testStartOfCharEndOfDelimiter() throws Exception { String path = "a/b/c/"; - PathHierarchyTokenizer t = new PathHierarchyTokenizer( ); + PathHierarchyTokenizer t = new PathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP); t.setReader( new StringReader(path) ); assertTokenStreamContents(t, new String[]{"a", "a/b", "a/b/c", "a/b/c/"}, @@ -79,7 +82,7 @@ public class TestPathHierarchyTokenizer extends BaseTokenStreamTestCase { public void testOnlyDelimiter() throws Exception { String path = "/"; - PathHierarchyTokenizer t = new PathHierarchyTokenizer( ); + PathHierarchyTokenizer t = new PathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP); t.setReader( new StringReader(path) ); assertTokenStreamContents(t, new String[]{"/"}, @@ -91,7 +94,7 @@ public class TestPathHierarchyTokenizer extends BaseTokenStreamTestCase { public void testOnlyDelimiters() throws Exception { String path = "//"; - PathHierarchyTokenizer t = new PathHierarchyTokenizer( ); + PathHierarchyTokenizer t = new PathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP); t.setReader(new StringReader(path)); assertTokenStreamContents(t, new String[]{"/", "//"}, @@ -103,7 +106,7 @@ public class TestPathHierarchyTokenizer extends BaseTokenStreamTestCase { public void testReplace() throws Exception { String path = "/a/b/c"; - PathHierarchyTokenizer t = new PathHierarchyTokenizer( '/', '\\' ); + PathHierarchyTokenizer t = new PathHierarchyTokenizer(newAttributeFactory(), '/', '\\', DEFAULT_SKIP); t.setReader(new StringReader(path)); assertTokenStreamContents(t, new String[]{"\\a", "\\a\\b", "\\a\\b\\c"}, @@ -115,7 +118,7 @@ public class TestPathHierarchyTokenizer extends BaseTokenStreamTestCase { public void testWindowsPath() throws Exception { String path = "c:\\a\\b\\c"; - PathHierarchyTokenizer t = new PathHierarchyTokenizer( '\\', '\\' ); + PathHierarchyTokenizer t = new PathHierarchyTokenizer(newAttributeFactory(), '\\', '\\', DEFAULT_SKIP); t.setReader(new StringReader(path)); assertTokenStreamContents(t, new String[]{"c:", "c:\\a", "c:\\a\\b", "c:\\a\\b\\c"}, @@ -131,7 +134,7 @@ public class TestPathHierarchyTokenizer extends BaseTokenStreamTestCase { NormalizeCharMap normMap = builder.build(); String path = "c:\\a\\b\\c"; Reader cs = new MappingCharFilter(normMap, new StringReader(path)); - PathHierarchyTokenizer t = new PathHierarchyTokenizer( ); + PathHierarchyTokenizer t = new PathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP); t.setReader(cs); assertTokenStreamContents(t, new String[]{"c:", "c:/a", "c:/a/b", "c:/a/b/c"}, @@ -143,7 +146,7 @@ public class TestPathHierarchyTokenizer extends BaseTokenStreamTestCase { public void testBasicSkip() throws Exception { String path = "/a/b/c"; - PathHierarchyTokenizer t = new PathHierarchyTokenizer( 1 ); + PathHierarchyTokenizer t = new PathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, 1); t.setReader(new StringReader(path)); assertTokenStreamContents(t, new String[]{"/b", "/b/c"}, @@ -155,7 +158,7 @@ public class TestPathHierarchyTokenizer extends BaseTokenStreamTestCase { public void testEndOfDelimiterSkip() throws Exception { String path = "/a/b/c/"; - PathHierarchyTokenizer t = new PathHierarchyTokenizer( 1 ); + PathHierarchyTokenizer t = new PathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, 1); t.setReader(new StringReader(path)); assertTokenStreamContents(t, new String[]{"/b", "/b/c", "/b/c/"}, @@ -167,7 +170,7 @@ public class TestPathHierarchyTokenizer extends BaseTokenStreamTestCase { public void testStartOfCharSkip() throws Exception { String path = "a/b/c"; - PathHierarchyTokenizer t = new PathHierarchyTokenizer( 1 ); + PathHierarchyTokenizer t = new PathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, 1); t.setReader(new StringReader(path)); assertTokenStreamContents(t, new String[]{"/b", "/b/c"}, @@ -179,7 +182,7 @@ public class TestPathHierarchyTokenizer extends BaseTokenStreamTestCase { public void testStartOfCharEndOfDelimiterSkip() throws Exception { String path = "a/b/c/"; - PathHierarchyTokenizer t = new PathHierarchyTokenizer(1 ); + PathHierarchyTokenizer t = new PathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, 1); t.setReader(new StringReader(path)); assertTokenStreamContents(t, new String[]{"/b", "/b/c", "/b/c/"}, @@ -191,7 +194,7 @@ public class TestPathHierarchyTokenizer extends BaseTokenStreamTestCase { public void testOnlyDelimiterSkip() throws Exception { String path = "/"; - PathHierarchyTokenizer t = new PathHierarchyTokenizer( 1 ); + PathHierarchyTokenizer t = new PathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, 1); t.setReader(new StringReader(path)); assertTokenStreamContents(t, new String[]{}, @@ -203,7 +206,7 @@ public class TestPathHierarchyTokenizer extends BaseTokenStreamTestCase { public void testOnlyDelimitersSkip() throws Exception { String path = "//"; - PathHierarchyTokenizer t = new PathHierarchyTokenizer( 1 ); + PathHierarchyTokenizer t = new PathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, 1); t.setReader( new StringReader(path)); assertTokenStreamContents(t, new String[]{"/"}, @@ -218,11 +221,12 @@ public class TestPathHierarchyTokenizer extends BaseTokenStreamTestCase { Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new PathHierarchyTokenizer(); + Tokenizer tokenizer = new PathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP); return new TokenStreamComponents(tokenizer, tokenizer); } }; - checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER); + // TODO: properly support positionLengthAttribute + checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER, 20, false, false); } /** blast some random large strings through the analyzer */ @@ -231,10 +235,11 @@ public class TestPathHierarchyTokenizer extends BaseTokenStreamTestCase { Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new PathHierarchyTokenizer(); + Tokenizer tokenizer = new PathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP); return new TokenStreamComponents(tokenizer, tokenizer); } }; - checkRandomData(random, a, 100*RANDOM_MULTIPLIER, 1027); + // TODO: properly support positionLengthAttribute + checkRandomData(random, a, 100*RANDOM_MULTIPLIER, 1027, false, false); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/path/TestReversePathHierarchyTokenizer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/path/TestReversePathHierarchyTokenizer.java index ea77da66087..3a41fcc7094 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/path/TestReversePathHierarchyTokenizer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/path/TestReversePathHierarchyTokenizer.java @@ -26,11 +26,14 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Analyzer.TokenStreamComponents; +import static org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer.DEFAULT_DELIMITER; +import static org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer.DEFAULT_SKIP; + public class TestReversePathHierarchyTokenizer extends BaseTokenStreamTestCase { public void testBasicReverse() throws Exception { String path = "/a/b/c"; - ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer(); + ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP); t.setReader(new StringReader(path)); assertTokenStreamContents(t, new String[]{"/a/b/c", "a/b/c", "b/c", "c"}, @@ -42,7 +45,7 @@ public class TestReversePathHierarchyTokenizer extends BaseTokenStreamTestCase { public void testEndOfDelimiterReverse() throws Exception { String path = "/a/b/c/"; - ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer(); + ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP); t.setReader(new StringReader(path)); assertTokenStreamContents(t, new String[]{"/a/b/c/", "a/b/c/", "b/c/", "c/"}, @@ -54,7 +57,7 @@ public class TestReversePathHierarchyTokenizer extends BaseTokenStreamTestCase { public void testStartOfCharReverse() throws Exception { String path = "a/b/c"; - ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer(); + ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP); t.setReader(new StringReader(path)); assertTokenStreamContents(t, new String[]{"a/b/c", "b/c", "c"}, @@ -66,7 +69,7 @@ public class TestReversePathHierarchyTokenizer extends BaseTokenStreamTestCase { public void testStartOfCharEndOfDelimiterReverse() throws Exception { String path = "a/b/c/"; - ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer(); + ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP); t.setReader(new StringReader(path)); assertTokenStreamContents(t, new String[]{"a/b/c/", "b/c/", "c/"}, @@ -78,7 +81,7 @@ public class TestReversePathHierarchyTokenizer extends BaseTokenStreamTestCase { public void testOnlyDelimiterReverse() throws Exception { String path = "/"; - ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer(); + ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP); t.setReader(new StringReader(path)); assertTokenStreamContents(t, new String[]{"/"}, @@ -90,7 +93,7 @@ public class TestReversePathHierarchyTokenizer extends BaseTokenStreamTestCase { public void testOnlyDelimitersReverse() throws Exception { String path = "//"; - ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer(); + ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP); t.setReader(new StringReader(path)); assertTokenStreamContents(t, new String[]{"//", "/"}, @@ -102,7 +105,7 @@ public class TestReversePathHierarchyTokenizer extends BaseTokenStreamTestCase { public void testEndOfDelimiterReverseSkip() throws Exception { String path = "/a/b/c/"; - ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( 1 ); + ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, 1); t.setReader(new StringReader(path)); new StringReader(path); assertTokenStreamContents(t, @@ -115,7 +118,7 @@ public class TestReversePathHierarchyTokenizer extends BaseTokenStreamTestCase { public void testStartOfCharReverseSkip() throws Exception { String path = "a/b/c"; - ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( 1 ); + ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, 1); t.setReader(new StringReader(path)); assertTokenStreamContents(t, new String[]{"a/b/", "b/"}, @@ -127,7 +130,7 @@ public class TestReversePathHierarchyTokenizer extends BaseTokenStreamTestCase { public void testStartOfCharEndOfDelimiterReverseSkip() throws Exception { String path = "a/b/c/"; - ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( 1 ); + ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, 1); t.setReader(new StringReader(path)); assertTokenStreamContents(t, new String[]{"a/b/", "b/"}, @@ -139,7 +142,7 @@ public class TestReversePathHierarchyTokenizer extends BaseTokenStreamTestCase { public void testOnlyDelimiterReverseSkip() throws Exception { String path = "/"; - ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( 1 ); + ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, 1); t.setReader(new StringReader(path)); assertTokenStreamContents(t, new String[]{}, @@ -151,7 +154,7 @@ public class TestReversePathHierarchyTokenizer extends BaseTokenStreamTestCase { public void testOnlyDelimitersReverseSkip() throws Exception { String path = "//"; - ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( 1 ); + ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, 1); t.setReader(new StringReader(path)); assertTokenStreamContents(t, new String[]{"/"}, @@ -163,7 +166,7 @@ public class TestReversePathHierarchyTokenizer extends BaseTokenStreamTestCase { public void testReverseSkip2() throws Exception { String path = "/a/b/c/"; - ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( 2 ); + ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, 2); t.setReader( new StringReader(path)); assertTokenStreamContents(t, new String[]{"/a/", "a/"}, @@ -178,11 +181,12 @@ public class TestReversePathHierarchyTokenizer extends BaseTokenStreamTestCase { Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new ReversePathHierarchyTokenizer(); + Tokenizer tokenizer = new ReversePathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP); return new TokenStreamComponents(tokenizer, tokenizer); } }; - checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER); + // TODO: properly support positionLengthAttribute + checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER, 20, false, false); } /** blast some random large strings through the analyzer */ @@ -191,10 +195,11 @@ public class TestReversePathHierarchyTokenizer extends BaseTokenStreamTestCase { Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new ReversePathHierarchyTokenizer(); + Tokenizer tokenizer = new ReversePathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP); return new TokenStreamComponents(tokenizer, tokenizer); } }; - checkRandomData(random, a, 100*RANDOM_MULTIPLIER, 1027); + // TODO: properly support positionLengthAttribute + checkRandomData(random, a, 100*RANDOM_MULTIPLIER, 1027, false, false); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java index ba1d4a5351d..892527b146e 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java @@ -53,7 +53,7 @@ public class TestPatternTokenizer extends BaseTokenStreamTestCase }; for( String[] test : tests ) { - TokenStream stream = new PatternTokenizer(Pattern.compile(test[1]), Integer.parseInt(test[0])); + TokenStream stream = new PatternTokenizer(newAttributeFactory(), Pattern.compile(test[1]), Integer.parseInt(test[0])); ((Tokenizer)stream).setReader(new StringReader(test[2])); String out = tsToString( stream ); // System.out.println( test[2] + " ==> " + out ); @@ -86,7 +86,7 @@ public class TestPatternTokenizer extends BaseTokenStreamTestCase CharFilter charStream = new MappingCharFilter( normMap, new StringReader( INPUT ) ); // create PatternTokenizer - Tokenizer stream = new PatternTokenizer(Pattern.compile("[,;/\\s]+"), -1); + Tokenizer stream = new PatternTokenizer(newAttributeFactory(), Pattern.compile("[,;/\\s]+"), -1); stream.setReader(charStream); assertTokenStreamContents(stream, new String[] { "Günther", "Günther", "is", "here" }, @@ -95,7 +95,7 @@ public class TestPatternTokenizer extends BaseTokenStreamTestCase INPUT.length()); charStream = new MappingCharFilter( normMap, new StringReader( INPUT ) ); - stream = new PatternTokenizer(Pattern.compile("Günther"), 0); + stream = new PatternTokenizer(newAttributeFactory(), Pattern.compile("Günther"), 0); stream.setReader(charStream); assertTokenStreamContents(stream, new String[] { "Günther", "Günther" }, @@ -132,7 +132,7 @@ public class TestPatternTokenizer extends BaseTokenStreamTestCase Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new PatternTokenizer(Pattern.compile("a"), -1); + Tokenizer tokenizer = new PatternTokenizer(newAttributeFactory(), Pattern.compile("a"), -1); return new TokenStreamComponents(tokenizer); } }; @@ -141,7 +141,7 @@ public class TestPatternTokenizer extends BaseTokenStreamTestCase Analyzer b = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new PatternTokenizer(Pattern.compile("a"), 0); + Tokenizer tokenizer = new PatternTokenizer(newAttributeFactory(), Pattern.compile("a"), 0); return new TokenStreamComponents(tokenizer); } }; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizerFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizerFactory.java index cb93ccf3b0b..29e02430bbb 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizerFactory.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizerFactory.java @@ -28,7 +28,7 @@ public class TestPatternTokenizerFactory extends BaseTokenStreamFactoryTestCase public void testFactory() throws Exception { final Reader reader = new StringReader("Günther Günther is here"); // create PatternTokenizer - Tokenizer stream = tokenizerFactory("Pattern", "pattern", "[,;/\\s]+").create(); + Tokenizer stream = tokenizerFactory("Pattern", "pattern", "[,;/\\s]+").create(newAttributeFactory()); stream.setReader(reader); assertTokenStreamContents(stream, new String[] { "Günther", "Günther", "is", "here" }); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java index c3177e2f149..02a20de016c 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java @@ -18,18 +18,14 @@ package org.apache.lucene.analysis.pt; */ import java.io.IOException; -import java.io.Reader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.Analyzer.TokenStreamComponents; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; -import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import static org.apache.lucene.analysis.VocabularyAssert.*; @@ -41,9 +37,8 @@ public class TestPortugueseLightStemFilter extends BaseTokenStreamTestCase { private Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer source = new StandardTokenizer(TEST_VERSION_CURRENT); - TokenStream result = new LowerCaseFilter(TEST_VERSION_CURRENT, source); - return new TokenStreamComponents(source, new PortugueseLightStemFilter(result)); + Tokenizer source = new MockTokenizer(MockTokenizer.SIMPLE, true); + return new TokenStreamComponents(source, new PortugueseLightStemFilter(source)); } }; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java index 66f093612b3..585993380ea 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java @@ -18,18 +18,14 @@ package org.apache.lucene.analysis.pt; */ import java.io.IOException; -import java.io.Reader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.Analyzer.TokenStreamComponents; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; -import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import static org.apache.lucene.analysis.VocabularyAssert.*; @@ -41,9 +37,8 @@ public class TestPortugueseMinimalStemFilter extends BaseTokenStreamTestCase { private Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer source = new StandardTokenizer(TEST_VERSION_CURRENT); - TokenStream result = new LowerCaseFilter(TEST_VERSION_CURRENT, source); - return new TokenStreamComponents(source, new PortugueseMinimalStemFilter(result)); + Tokenizer source = new MockTokenizer(MockTokenizer.SIMPLE, true); + return new TokenStreamComponents(source, new PortugueseMinimalStemFilter(source)); } }; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java index bcbaea97ea4..39a6c685bbc 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java @@ -20,7 +20,6 @@ package org.apache.lucene.analysis.pt; import static org.apache.lucene.analysis.VocabularyAssert.assertVocabulary; import java.io.IOException; -import java.io.Reader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; @@ -28,9 +27,7 @@ import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; -import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; /** @@ -40,9 +37,8 @@ public class TestPortugueseStemFilter extends BaseTokenStreamTestCase { private Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer source = new StandardTokenizer(TEST_VERSION_CURRENT); - TokenStream result = new LowerCaseFilter(TEST_VERSION_CURRENT, source); - return new TokenStreamComponents(source, new PortugueseStemFilter(result)); + Tokenizer source = new MockTokenizer(MockTokenizer.SIMPLE, true); + return new TokenStreamComponents(source, new PortugueseStemFilter(source)); } }; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java index 27398d9a98b..d407277dd27 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java @@ -1096,7 +1096,8 @@ public class ShingleFilterTest extends BaseTokenStreamTestCase { private static Token createToken (String term, int start, int offset, int positionIncrement) { - Token token = new Token(start, offset); + Token token = new Token(); + token.setOffset(start, offset); token.copyBuffer(term.toCharArray(), 0, term.length()); token.setPositionIncrement(positionIncrement); return token; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java index 57ff1173fe7..873734d4f10 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java @@ -151,7 +151,9 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase { dogDetector.addAttribute(CheckClearAttributesAttribute.class); theDetector.addAttribute(CheckClearAttributesAttribute.class); - final TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(whitespaceMockTokenizer(buffer2.toString())); + MockTokenizer tokenizer = new MockTokenizer(tee1.getAttributeFactory(), MockTokenizer.WHITESPACE, false); + tokenizer.setReader(new StringReader(buffer2.toString())); + final TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(tokenizer); tee2.addSinkTokenStream(dogDetector); tee2.addSinkTokenStream(theDetector); final TokenStream source2 = tee2; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardFactories.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardFactories.java index be5bae6ee21..89bc451ec3d 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardFactories.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardFactories.java @@ -34,7 +34,7 @@ public class TestStandardFactories extends BaseTokenStreamFactoryTestCase { */ public void testStandardTokenizer() throws Exception { Reader reader = new StringReader("Wha\u0301t's this thing do?"); - Tokenizer stream = tokenizerFactory("Standard").create(); + Tokenizer stream = tokenizerFactory("Standard").create(newAttributeFactory()); stream.setReader(reader); assertTokenStreamContents(stream, new String[]{"Wha\u0301t's", "this", "thing", "do"}); @@ -49,7 +49,7 @@ public class TestStandardFactories extends BaseTokenStreamFactoryTestCase { String content = "one two three " + longWord + " four five six"; Reader reader = new StringReader(content); Tokenizer stream = tokenizerFactory("Standard", - "maxTokenLength", "1000").create(); + "maxTokenLength", "1000").create(newAttributeFactory()); stream.setReader(reader); assertTokenStreamContents(stream, new String[]{"one", "two", "three", longWord, "four", "five", "six"}); @@ -60,7 +60,7 @@ public class TestStandardFactories extends BaseTokenStreamFactoryTestCase { */ public void testClassicTokenizer() throws Exception { Reader reader = new StringReader("What's this thing do?"); - Tokenizer stream = tokenizerFactory("Classic").create(); + Tokenizer stream = tokenizerFactory("Classic").create(newAttributeFactory()); stream.setReader(reader); assertTokenStreamContents(stream, new String[]{"What's", "this", "thing", "do"}); @@ -75,7 +75,7 @@ public class TestStandardFactories extends BaseTokenStreamFactoryTestCase { String content = "one two three " + longWord + " four five six"; Reader reader = new StringReader(content); Tokenizer stream = tokenizerFactory("Classic", - "maxTokenLength", "1000").create(); + "maxTokenLength", "1000").create(newAttributeFactory()); stream.setReader(reader); assertTokenStreamContents(stream, new String[]{"one", "two", "three", longWord, "four", "five", "six"}); @@ -86,7 +86,7 @@ public class TestStandardFactories extends BaseTokenStreamFactoryTestCase { */ public void testStandardFilter() throws Exception { Reader reader = new StringReader("What's this thing do?"); - Tokenizer tokenizer = tokenizerFactory("Classic").create(); + Tokenizer tokenizer = tokenizerFactory("Classic").create(newAttributeFactory()); tokenizer.setReader(reader); TokenStream stream = tokenFilterFactory("Classic").create(tokenizer); assertTokenStreamContents(stream, @@ -109,7 +109,7 @@ public class TestStandardFactories extends BaseTokenStreamFactoryTestCase { */ public void testWhitespaceTokenizer() throws Exception { Reader reader = new StringReader("What's this thing do?"); - Tokenizer stream = tokenizerFactory("Whitespace").create(); + Tokenizer stream = tokenizerFactory("Whitespace").create(newAttributeFactory()); stream.setReader(reader); assertTokenStreamContents(stream, new String[] { "What's", "this", "thing", "do?" }); @@ -120,7 +120,7 @@ public class TestStandardFactories extends BaseTokenStreamFactoryTestCase { */ public void testLetterTokenizer() throws Exception { Reader reader = new StringReader("What's this thing do?"); - Tokenizer stream = tokenizerFactory("Letter").create(); + Tokenizer stream = tokenizerFactory("Letter").create(newAttributeFactory()); stream.setReader(reader); assertTokenStreamContents(stream, new String[] { "What", "s", "this", "thing", "do" }); @@ -131,7 +131,7 @@ public class TestStandardFactories extends BaseTokenStreamFactoryTestCase { */ public void testLowerCaseTokenizer() throws Exception { Reader reader = new StringReader("What's this thing do?"); - Tokenizer stream = tokenizerFactory("LowerCase").create(); + Tokenizer stream = tokenizerFactory("LowerCase").create(newAttributeFactory()); stream.setReader(reader); assertTokenStreamContents(stream, new String[] { "what", "s", "this", "thing", "do" }); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java index e8eda12a15e..d8ce2d5bbcc 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java @@ -31,7 +31,7 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenStreamFactoryTes public void testUAX29URLEmailTokenizer() throws Exception { Reader reader = new StringReader("Wha\u0301t's this thing do?"); - Tokenizer stream = tokenizerFactory("UAX29URLEmail").create(); + Tokenizer stream = tokenizerFactory("UAX29URLEmail").create(newAttributeFactory()); stream.setReader(reader); assertTokenStreamContents(stream, new String[] { "Wha\u0301t's", "this", "thing", "do" }); @@ -39,7 +39,7 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenStreamFactoryTes public void testArabic() throws Exception { Reader reader = new StringReader("الفيلم الوثائقي الأول عن ويكيبيديا يسمى \"الحقيقة بالأرقام: قصة ويكيبيديا\" (بالإنجليزية: Truth in Numbers: The Wikipedia Story)، سيتم إطلاقه في 2008."); - Tokenizer stream = tokenizerFactory("UAX29URLEmail").create(); + Tokenizer stream = tokenizerFactory("UAX29URLEmail").create(newAttributeFactory()); stream.setReader(reader); assertTokenStreamContents(stream, new String[] { "الفيلم", "الوثائقي", "الأول", "عن", "ويكيبيديا", "يسمى", "الحقيقة", "بالأرقام", "قصة", "ويكيبيديا", @@ -48,7 +48,7 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenStreamFactoryTes public void testChinese() throws Exception { Reader reader = new StringReader("我是中国人。 1234 Tests "); - Tokenizer stream = tokenizerFactory("UAX29URLEmail").create(); + Tokenizer stream = tokenizerFactory("UAX29URLEmail").create(newAttributeFactory()); stream.setReader(reader); assertTokenStreamContents(stream, new String[] { "我", "是", "中", "国", "人", "1234", "Tests" }); @@ -56,7 +56,7 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenStreamFactoryTes public void testKorean() throws Exception { Reader reader = new StringReader("안녕하세요 한글입니다"); - Tokenizer stream = tokenizerFactory("UAX29URLEmail").create(); + Tokenizer stream = tokenizerFactory("UAX29URLEmail").create(newAttributeFactory()); stream.setReader(reader); assertTokenStreamContents(stream, new String[] { "안녕하세요", "한글입니다" }); @@ -64,7 +64,7 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenStreamFactoryTes public void testHyphen() throws Exception { Reader reader = new StringReader("some-dashed-phrase"); - Tokenizer stream = tokenizerFactory("UAX29URLEmail").create(); + Tokenizer stream = tokenizerFactory("UAX29URLEmail").create(newAttributeFactory()); stream.setReader(reader); assertTokenStreamContents(stream, new String[] { "some", "dashed", "phrase" }); @@ -87,7 +87,7 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenStreamFactoryTes + " blah Sirrah woof " + "http://[a42:a7b6::]/qSmxSUU4z/%52qVl4\n"; Reader reader = new StringReader(textWithURLs); - Tokenizer stream = tokenizerFactory("UAX29URLEmail").create(); + Tokenizer stream = tokenizerFactory("UAX29URLEmail").create(newAttributeFactory()); stream.setReader(reader); assertTokenStreamContents(stream, new String[] { @@ -126,7 +126,7 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenStreamFactoryTes + "lMahAA.j/5.RqUjS745.DtkcYdi@d2-4gb-l6.ae\n" + "lv'p@tqk.vj5s0tgl.0dlu7su3iyiaz.dqso.494.3hb76.XN--MGBAAM7A8H\n"; Reader reader = new StringReader(textWithEmails); - Tokenizer stream = tokenizerFactory("UAX29URLEmail").create(); + Tokenizer stream = tokenizerFactory("UAX29URLEmail").create(newAttributeFactory()); stream.setReader(reader); assertTokenStreamContents(stream, new String[] { @@ -157,7 +157,7 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenStreamFactoryTes String content = "one two three " + longWord + " four five six"; Reader reader = new StringReader(content); Tokenizer stream = tokenizerFactory("UAX29URLEmail", - "maxTokenLength", "1000").create(); + "maxTokenLength", "1000").create(newAttributeFactory()); stream.setReader(reader); assertTokenStreamContents(stream, new String[] {"one", "two", "three", longWord, "four", "five", "six" }); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiTokenizerFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiTokenizerFactory.java index 07ceb9f8ded..49949820427 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiTokenizerFactory.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiTokenizerFactory.java @@ -31,7 +31,7 @@ public class TestThaiTokenizerFactory extends BaseTokenStreamFactoryTestCase { */ public void testWordBreak() throws Exception { assumeTrue("JRE does not support Thai dictionary-based BreakIterator", ThaiTokenizer.DBBI_AVAILABLE); - Tokenizer tokenizer = tokenizerFactory("Thai").create(); + Tokenizer tokenizer = tokenizerFactory("Thai").create(newAttributeFactory()); tokenizer.setReader(new StringReader("การที่ได้ต้องแสดงว่างานดี")); assertTokenStreamContents(tokenizer, new String[] {"การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี"}); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java index 84a181e4ffc..a470c9fefaa 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java @@ -52,7 +52,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase { } // internal buffer size is 1024 make sure we have a surrogate pair right at the border builder.insert(1023, "\ud801\udc1c"); - Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT); + Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); tokenizer.setReader(new StringReader(builder.toString())); assertTokenStreamContents(tokenizer, builder.toString().toLowerCase(Locale.ROOT).split(" ")); } @@ -70,7 +70,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase { builder.append("a"); } builder.append("\ud801\udc1cabc"); - Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT); + Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); tokenizer.setReader(new StringReader(builder.toString())); assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT)}); } @@ -85,7 +85,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase { for (int i = 0; i < 255; i++) { builder.append("A"); } - Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT); + Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); tokenizer.setReader(new StringReader(builder.toString() + builder.toString())); assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)}); } @@ -100,7 +100,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase { builder.append("A"); } builder.append("\ud801\udc1c"); - Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT); + Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); tokenizer.setReader(new StringReader(builder.toString() + builder.toString())); assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)}); } @@ -110,7 +110,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase { Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT) { + Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()) { @Override protected int normalize(int c) { if (c > 0xffff) { @@ -148,7 +148,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase { Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT) { + Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()) { @Override protected int normalize(int c) { if (c <= 0xffff) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java index 1d19d21097e..788eb373405 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java @@ -40,7 +40,7 @@ public class TestElision extends BaseTokenStreamTestCase { public void testElision() throws Exception { String test = "Plop, juste pour voir l'embrouille avec O'brian. M'enfin."; - Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT); + Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, newAttributeFactory()); tokenizer.setReader(new StringReader(test)); CharArraySet articles = new CharArraySet(TEST_VERSION_CURRENT, asSet("l", "M"), false); TokenFilter filter = new ElisionFilter(tokenizer, articles); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestSegmentingTokenizerBase.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestSegmentingTokenizerBase.java index 2b87129a6b5..699f4eb4205 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestSegmentingTokenizerBase.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestSegmentingTokenizerBase.java @@ -140,7 +140,7 @@ public class TestSegmentingTokenizerBase extends BaseTokenStreamTestCase { private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); public WholeSentenceTokenizer() { - super(BreakIterator.getSentenceInstance(Locale.ROOT)); + super(newAttributeFactory(), BreakIterator.getSentenceInstance(Locale.ROOT)); } @Override @@ -178,7 +178,7 @@ public class TestSegmentingTokenizerBase extends BaseTokenStreamTestCase { private PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); public SentenceAndWordTokenizer() { - super(BreakIterator.getSentenceInstance(Locale.ROOT)); + super(newAttributeFactory(), BreakIterator.getSentenceInstance(Locale.ROOT)); } @Override diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/TestWikipediaTokenizerFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/TestWikipediaTokenizerFactory.java index 099e4460ab9..6632bf95cdd 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/TestWikipediaTokenizerFactory.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/TestWikipediaTokenizerFactory.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer; public class TestWikipediaTokenizerFactory extends BaseTokenStreamFactoryTestCase { public void testTokenizer() throws Exception { Reader reader = new StringReader("This is a [[Category:foo]]"); - Tokenizer tokenizer = tokenizerFactory("Wikipedia").create(); + Tokenizer tokenizer = tokenizerFactory("Wikipedia").create(newAttributeFactory()); tokenizer.setReader(reader); assertTokenStreamContents(tokenizer, new String[] { "This", "is", "a", "foo" }, diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java index 5459556a5d7..40ec291ab8a 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java @@ -20,6 +20,7 @@ package org.apache.lucene.analysis.wikipedia; import java.io.StringReader; import java.io.IOException; +import java.util.Collections; import java.util.Random; import java.util.Set; import java.util.HashSet; @@ -39,7 +40,7 @@ public class WikipediaTokenizerTest extends BaseTokenStreamTestCase { public void testSimple() throws Exception { String text = "This is a [[Category:foo]]"; - WikipediaTokenizer tf = new WikipediaTokenizer(); + WikipediaTokenizer tf = new WikipediaTokenizer(newAttributeFactory(), WikipediaTokenizer.TOKENS_ONLY, Collections.emptySet()); tf.setReader(new StringReader(text)); assertTokenStreamContents(tf, new String[] { "This", "is", "a", "foo" }, @@ -62,7 +63,7 @@ public class WikipediaTokenizerTest extends BaseTokenStreamTestCase { + " [http://foo.boo.com/test/test/ Test Test] [http://foo.boo.com/test/test/test.html Test Test]" + " [http://foo.boo.com/test/test/test.html?g=b&c=d Test Test] Citation martian code"; - WikipediaTokenizer tf = new WikipediaTokenizer(); + WikipediaTokenizer tf = new WikipediaTokenizer(newAttributeFactory(), WikipediaTokenizer.TOKENS_ONLY, Collections.emptySet()); tf.setReader(new StringReader(test)); assertTokenStreamContents(tf, new String[] {"link", "This", "is", "a", @@ -104,7 +105,7 @@ public class WikipediaTokenizerTest extends BaseTokenStreamTestCase { } public void testLinkPhrases() throws Exception { - WikipediaTokenizer tf = new WikipediaTokenizer(); + WikipediaTokenizer tf = new WikipediaTokenizer(newAttributeFactory(), WikipediaTokenizer.TOKENS_ONLY, Collections.emptySet()); tf.setReader(new StringReader(LINK_PHRASES)); checkLinkPhrases(tf); } @@ -118,7 +119,7 @@ public class WikipediaTokenizerTest extends BaseTokenStreamTestCase { public void testLinks() throws Exception { String test = "[http://lucene.apache.org/java/docs/index.html#news here] [http://lucene.apache.org/java/docs/index.html?b=c here] [https://lucene.apache.org/java/docs/index.html?b=c here]"; - WikipediaTokenizer tf = new WikipediaTokenizer(); + WikipediaTokenizer tf = new WikipediaTokenizer(newAttributeFactory(), WikipediaTokenizer.TOKENS_ONLY, Collections.emptySet()); tf.setReader(new StringReader(test)); assertTokenStreamContents(tf, new String[] { "http://lucene.apache.org/java/docs/index.html#news", "here", @@ -134,7 +135,7 @@ public class WikipediaTokenizerTest extends BaseTokenStreamTestCase { untoks.add(WikipediaTokenizer.CATEGORY); untoks.add(WikipediaTokenizer.ITALICS); //should be exactly the same, regardless of untoks - WikipediaTokenizer tf = new WikipediaTokenizer(WikipediaTokenizer.TOKENS_ONLY, untoks); + WikipediaTokenizer tf = new WikipediaTokenizer(newAttributeFactory(), WikipediaTokenizer.TOKENS_ONLY, untoks); tf.setReader(new StringReader(LINK_PHRASES)); checkLinkPhrases(tf); String test = "[[Category:a b c d]] [[Category:e f g]] [[link here]] [[link there]] ''italics here'' something ''more italics'' [[Category:h i j]]"; @@ -155,7 +156,7 @@ public class WikipediaTokenizerTest extends BaseTokenStreamTestCase { untoks.add(WikipediaTokenizer.ITALICS); String test = "[[Category:a b c d]] [[Category:e f g]] [[link here]] [[link there]] ''italics here'' something ''more italics'' [[Category:h i j]]"; //should output all the indivual tokens plus the untokenized tokens as well. Untokenized tokens - WikipediaTokenizer tf = new WikipediaTokenizer(WikipediaTokenizer.BOTH, untoks); + WikipediaTokenizer tf = new WikipediaTokenizer(newAttributeFactory(), WikipediaTokenizer.BOTH, untoks); tf.setReader(new StringReader(test)); assertTokenStreamContents(tf, new String[] { "a b c d", "a", "b", "c", "d", "e f g", "e", "f", "g", @@ -167,7 +168,7 @@ public class WikipediaTokenizerTest extends BaseTokenStreamTestCase { ); // now check the flags, TODO: add way to check flags from BaseTokenStreamTestCase? - tf = new WikipediaTokenizer(WikipediaTokenizer.BOTH, untoks); + tf = new WikipediaTokenizer(newAttributeFactory(), WikipediaTokenizer.BOTH, untoks); tf.setReader(new StringReader(test)); int expectedFlags[] = new int[] { UNTOKENIZED_TOKEN_FLAG, 0, 0, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, 0, 0, 0, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, 0 }; @@ -187,11 +188,12 @@ public class WikipediaTokenizerTest extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new WikipediaTokenizer(); + Tokenizer tokenizer = new WikipediaTokenizer(newAttributeFactory(), WikipediaTokenizer.TOKENS_ONLY, Collections.emptySet()); return new TokenStreamComponents(tokenizer, tokenizer); } }; - checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER); + // TODO: properly support positionLengthAttribute + checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER, 20, false, false); } /** blast some random large strings through the analyzer */ @@ -201,10 +203,11 @@ public class WikipediaTokenizerTest extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new WikipediaTokenizer(); + Tokenizer tokenizer = new WikipediaTokenizer(newAttributeFactory(), WikipediaTokenizer.TOKENS_ONLY, Collections.emptySet()); return new TokenStreamComponents(tokenizer, tokenizer); } }; - checkRandomData(random, a, 100*RANDOM_MULTIPLIER, 8192); + // TODO: properly support positionLengthAttribute + checkRandomData(random, a, 100*RANDOM_MULTIPLIER, 8192, false, false); } } diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2CharFilter.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2CharFilter.java index 54d5267dbf7..3e8bfa32824 100644 --- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2CharFilter.java +++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2CharFilter.java @@ -77,7 +77,7 @@ public class TestICUNormalizer2CharFilter extends BaseTokenStreamTestCase { CharFilter reader = new ICUNormalizer2CharFilter(new StringReader(input), Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE)); - Tokenizer tokenStream = new NGramTokenizer(TEST_VERSION_CURRENT, 1, 1); + Tokenizer tokenStream = new NGramTokenizer(TEST_VERSION_CURRENT, newAttributeFactory(), 1, 1); tokenStream.setReader(reader); assertTokenStreamContents(tokenStream, diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java index fdbed4fdb51..abcf09728d8 100644 --- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java +++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java @@ -42,7 +42,7 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase { sb.append(whitespace); sb.append("testing 1234"); String input = sb.toString(); - ICUTokenizer tokenizer = new ICUTokenizer(new DefaultICUTokenizerConfig(false)); + ICUTokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false)); tokenizer.setReader(new StringReader(input)); assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" }); } @@ -53,7 +53,7 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase { sb.append('a'); } String input = sb.toString(); - ICUTokenizer tokenizer = new ICUTokenizer(new DefaultICUTokenizerConfig(false)); + ICUTokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false)); tokenizer.setReader(new StringReader(input)); char token[] = new char[4096]; Arrays.fill(token, 'a'); @@ -70,7 +70,7 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase { private Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new ICUTokenizer(new DefaultICUTokenizerConfig(false)); + Tokenizer tokenizer = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false)); TokenFilter filter = new ICUNormalizer2Filter(tokenizer); return new TokenStreamComponents(tokenizer, filter); } diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerCJK.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerCJK.java index 5dcc87e6b08..ee806334176 100644 --- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerCJK.java +++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerCJK.java @@ -30,7 +30,7 @@ public class TestICUTokenizerCJK extends BaseTokenStreamTestCase { Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - return new TokenStreamComponents(new ICUTokenizer()); + return new TokenStreamComponents(new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(true))); } }; diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerFactory.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerFactory.java index 033da1c1dae..dacd3734891 100644 --- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerFactory.java +++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerFactory.java @@ -33,7 +33,7 @@ public class TestICUTokenizerFactory extends BaseTokenStreamTestCase { Reader reader = new StringReader("การที่ได้ต้องแสดงว่างานดี This is a test ກວ່າດອກ"); ICUTokenizerFactory factory = new ICUTokenizerFactory(new HashMap()); factory.inform(new ClasspathResourceLoader(getClass())); - Tokenizer stream = factory.create(); + Tokenizer stream = factory.create(newAttributeFactory()); stream.setReader(reader); assertTokenStreamContents(stream, new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี", @@ -48,7 +48,7 @@ public class TestICUTokenizerFactory extends BaseTokenStreamTestCase { args.put(ICUTokenizerFactory.RULEFILES, "Latn:Latin-break-only-on-whitespace.rbbi"); ICUTokenizerFactory factory = new ICUTokenizerFactory(args); factory.inform(new ClasspathResourceLoader(this.getClass())); - Tokenizer stream = factory.create(); + Tokenizer stream = factory.create(newAttributeFactory()); stream.setReader(reader); assertTokenStreamContents(stream, new String[] { "Don't,break.at?/(punct)!", "\u201Cnice\u201D", "85_At:all;", "`really\"", "+2=3$5,&813", "!@#%$^)(*@#$" }, @@ -62,7 +62,7 @@ public class TestICUTokenizerFactory extends BaseTokenStreamTestCase { args.put(ICUTokenizerFactory.RULEFILES, "Latn:Latin-dont-break-on-hyphens.rbbi"); ICUTokenizerFactory factory = new ICUTokenizerFactory(args); factory.inform(new ClasspathResourceLoader(getClass())); - Tokenizer stream = factory.create(); + Tokenizer stream = factory.create(newAttributeFactory()); stream.setReader(reader); assertTokenStreamContents(stream, new String[] { "One-two", "punch", @@ -82,7 +82,7 @@ public class TestICUTokenizerFactory extends BaseTokenStreamTestCase { args.put(ICUTokenizerFactory.RULEFILES, "Cyrl:KeywordTokenizer.rbbi,Thai:KeywordTokenizer.rbbi"); ICUTokenizerFactory factory = new ICUTokenizerFactory(args); factory.inform(new ClasspathResourceLoader(getClass())); - Tokenizer stream = factory.create(); + Tokenizer stream = factory.create(newAttributeFactory()); stream.setReader(reader); assertTokenStreamContents(stream, new String[] { "Some", "English", "Немного русский. ", diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java index eeec413d70d..55dd9466fe7 100644 --- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java +++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java @@ -41,7 +41,7 @@ public class TestWithCJKBigramFilter extends BaseTokenStreamTestCase { private Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer source = new ICUTokenizer(new DefaultICUTokenizerConfig(false)); + Tokenizer source = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false)); TokenStream result = new CJKBigramFilter(source); return new TokenStreamComponents(source, new StopFilter(TEST_VERSION_CURRENT, result, CharArraySet.EMPTY_SET)); } @@ -56,7 +56,7 @@ public class TestWithCJKBigramFilter extends BaseTokenStreamTestCase { private Analyzer analyzer2 = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer source = new ICUTokenizer(new DefaultICUTokenizerConfig(false)); + Tokenizer source = new ICUTokenizer(newAttributeFactory(), new DefaultICUTokenizerConfig(false)); // we put this before the CJKBigramFilter, because the normalization might combine // some halfwidth katakana forms, which will affect the bigramming. TokenStream result = new ICUNormalizer2Filter(source); diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestExtendedMode.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestExtendedMode.java index 7a1570074e4..ea011ba657f 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestExtendedMode.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestExtendedMode.java @@ -36,7 +36,7 @@ public class TestExtendedMode extends BaseTokenStreamTestCase { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new JapaneseTokenizer(null, true, Mode.EXTENDED); + Tokenizer tokenizer = new JapaneseTokenizer(newAttributeFactory(), null, true, Mode.EXTENDED); return new TokenStreamComponents(tokenizer, tokenizer); } }; diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java index 7b1d13630c1..4c4345b5c56 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java @@ -32,7 +32,7 @@ public class TestJapaneseBaseFormFilter extends BaseTokenStreamTestCase { private Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.DEFAULT_MODE); + Tokenizer tokenizer = new JapaneseTokenizer(newAttributeFactory(), null, true, JapaneseTokenizer.DEFAULT_MODE); return new TokenStreamComponents(tokenizer, new JapaneseBaseFormFilter(tokenizer)); } }; @@ -48,7 +48,7 @@ public class TestJapaneseBaseFormFilter extends BaseTokenStreamTestCase { Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer source = new JapaneseTokenizer(null, true, JapaneseTokenizer.DEFAULT_MODE); + Tokenizer source = new JapaneseTokenizer(newAttributeFactory(), null, true, JapaneseTokenizer.DEFAULT_MODE); TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new JapaneseBaseFormFilter(sink)); } diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilterFactory.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilterFactory.java index 985d09404cc..8c996ba6ebd 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilterFactory.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilterFactory.java @@ -32,7 +32,7 @@ public class TestJapaneseBaseFormFilterFactory extends BaseTokenStreamTestCase { public void testBasics() throws IOException { JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap()); tokenizerFactory.inform(new StringMockResourceLoader("")); - TokenStream ts = tokenizerFactory.create(); + TokenStream ts = tokenizerFactory.create(newAttributeFactory()); ((Tokenizer)ts).setReader(new StringReader("それはまだ実験段階にあります")); JapaneseBaseFormFilterFactory factory = new JapaneseBaseFormFilterFactory(new HashMap()); ts = factory.create(ts); diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilter.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilter.java index 57461aeacf6..9cfc1820d1c 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilter.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilter.java @@ -45,7 +45,7 @@ public class TestJapaneseIterationMarkCharFilter extends BaseTokenStreamTestCase private Analyzer japaneseAnalyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new JapaneseTokenizer(null, false, JapaneseTokenizer.Mode.SEARCH); + Tokenizer tokenizer = new JapaneseTokenizer(newAttributeFactory(), null, false, JapaneseTokenizer.Mode.SEARCH); return new TokenStreamComponents(tokenizer, tokenizer); } diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilterFactory.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilterFactory.java index 9008f86fe64..00d29f556ff 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilterFactory.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilterFactory.java @@ -50,7 +50,7 @@ public class TestJapaneseIterationMarkCharFilterFactory extends BaseTokenStreamT CharFilter filter = filterFactory.create( new StringReader("時々馬鹿々々しいところゞゝゝミスヾ") ); - TokenStream tokenStream = tokenizerFactory.create(); + TokenStream tokenStream = tokenizerFactory.create(newAttributeFactory()); ((Tokenizer)tokenStream).setReader(filter); assertTokenStreamContents(tokenStream, new String[]{"時時", "馬鹿馬鹿しい", "ところどころ", "ミ", "スズ"}); } @@ -67,7 +67,7 @@ public class TestJapaneseIterationMarkCharFilterFactory extends BaseTokenStreamT CharFilter filter = filterFactory.create( new StringReader("時々馬鹿々々しいところゞゝゝミスヾ") ); - TokenStream tokenStream = tokenizerFactory.create(); + TokenStream tokenStream = tokenizerFactory.create(newAttributeFactory()); ((Tokenizer)tokenStream).setReader(filter); assertTokenStreamContents(tokenStream, new String[]{"時時", "馬鹿馬鹿しい", "ところ", "ゞ", "ゝ", "ゝ", "ミス", "ヾ"}); } @@ -84,7 +84,7 @@ public class TestJapaneseIterationMarkCharFilterFactory extends BaseTokenStreamT CharFilter filter = filterFactory.create( new StringReader("時々馬鹿々々しいところゞゝゝミスヾ") ); - TokenStream tokenStream = tokenizerFactory.create(); + TokenStream tokenStream = tokenizerFactory.create(newAttributeFactory()); ((Tokenizer)tokenStream).setReader(filter); assertTokenStreamContents(tokenStream, new String[]{"時々", "馬鹿", "々", "々", "しい", "ところどころ", "ミ", "スズ"}); } diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilterFactory.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilterFactory.java index 716b5437494..a599271919c 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilterFactory.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilterFactory.java @@ -32,7 +32,7 @@ public class TestJapaneseKatakanaStemFilterFactory extends BaseTokenStreamTestCa public void testKatakanaStemming() throws IOException { JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap()); tokenizerFactory.inform(new StringMockResourceLoader("")); - TokenStream tokenStream = tokenizerFactory.create(); + TokenStream tokenStream = tokenizerFactory.create(newAttributeFactory()); ((Tokenizer)tokenStream).setReader(new StringReader("明後日パーティーに行く予定がある。図書館で資料をコピーしました。")); JapaneseKatakanaStemFilterFactory filterFactory = new JapaneseKatakanaStemFilterFactory(new HashMap());; assertTokenStreamContents(filterFactory.create(tokenStream), diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilter.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilter.java index eae9ae917ec..5252b919279 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilter.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilter.java @@ -35,7 +35,7 @@ public class TestJapaneseReadingFormFilter extends BaseTokenStreamTestCase { private Analyzer katakanaAnalyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH); + Tokenizer tokenizer = new JapaneseTokenizer(newAttributeFactory(), null, true, JapaneseTokenizer.Mode.SEARCH); return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(tokenizer, false)); } }; @@ -43,7 +43,7 @@ public class TestJapaneseReadingFormFilter extends BaseTokenStreamTestCase { private Analyzer romajiAnalyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH); + Tokenizer tokenizer = new JapaneseTokenizer(newAttributeFactory(), null, true, JapaneseTokenizer.Mode.SEARCH); return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(tokenizer, true)); } }; @@ -59,7 +59,7 @@ public class TestJapaneseReadingFormFilter extends BaseTokenStreamTestCase { Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH); + Tokenizer tokenizer = new JapaneseTokenizer(newAttributeFactory(), null, true, JapaneseTokenizer.Mode.SEARCH); TokenStream stream = new CJKWidthFilter(tokenizer); return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(stream, false)); } @@ -79,7 +79,7 @@ public class TestJapaneseReadingFormFilter extends BaseTokenStreamTestCase { Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH); + Tokenizer tokenizer = new JapaneseTokenizer(newAttributeFactory(), null, true, JapaneseTokenizer.Mode.SEARCH); TokenStream stream = new CJKWidthFilter(tokenizer); return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(stream, true)); } diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java index 1dc39615bde..2f09becd176 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java @@ -62,7 +62,7 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase { private Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new JapaneseTokenizer(readDict(), false, Mode.SEARCH); + Tokenizer tokenizer = new JapaneseTokenizer(newAttributeFactory(), readDict(), false, Mode.SEARCH); return new TokenStreamComponents(tokenizer, tokenizer); } }; @@ -70,7 +70,7 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase { private Analyzer analyzerNormal = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new JapaneseTokenizer(readDict(), false, Mode.NORMAL); + Tokenizer tokenizer = new JapaneseTokenizer(newAttributeFactory(), readDict(), false, Mode.NORMAL); return new TokenStreamComponents(tokenizer, tokenizer); } }; @@ -78,7 +78,7 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase { private Analyzer analyzerNoPunct = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new JapaneseTokenizer(readDict(), true, Mode.SEARCH); + Tokenizer tokenizer = new JapaneseTokenizer(newAttributeFactory(), readDict(), true, Mode.SEARCH); return new TokenStreamComponents(tokenizer, tokenizer); } }; @@ -86,7 +86,7 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase { private Analyzer extendedModeAnalyzerNoPunct = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new JapaneseTokenizer(readDict(), true, Mode.EXTENDED); + Tokenizer tokenizer = new JapaneseTokenizer(newAttributeFactory(), readDict(), true, Mode.EXTENDED); return new TokenStreamComponents(tokenizer, tokenizer); } }; @@ -202,7 +202,7 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase { new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new JapaneseTokenizer(readDict(), false, Mode.SEARCH); + Tokenizer tokenizer = new JapaneseTokenizer(newAttributeFactory(), readDict(), false, Mode.SEARCH); TokenStream graph = new MockGraphTokenFilter(random(), tokenizer); return new TokenStreamComponents(tokenizer, graph); } @@ -352,7 +352,7 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase { final Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - JapaneseTokenizer tokenizer = new JapaneseTokenizer(readDict(), false, Mode.SEARCH); + JapaneseTokenizer tokenizer = new JapaneseTokenizer(newAttributeFactory(), readDict(), false, Mode.SEARCH); tokenizer.setGraphvizFormatter(gv2); return new TokenStreamComponents(tokenizer, tokenizer); } diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizerFactory.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizerFactory.java index 48bca5564ad..a409a4a3124 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizerFactory.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizerFactory.java @@ -34,7 +34,7 @@ public class TestJapaneseTokenizerFactory extends BaseTokenStreamTestCase { public void testSimple() throws IOException { JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(new HashMap()); factory.inform(new StringMockResourceLoader("")); - TokenStream ts = factory.create(); + TokenStream ts = factory.create(newAttributeFactory()); ((Tokenizer)ts).setReader(new StringReader("これは本ではない")); assertTokenStreamContents(ts, new String[] { "これ", "は", "本", "で", "は", "ない" }, @@ -49,7 +49,7 @@ public class TestJapaneseTokenizerFactory extends BaseTokenStreamTestCase { public void testDefaults() throws IOException { JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(new HashMap()); factory.inform(new StringMockResourceLoader("")); - TokenStream ts = factory.create(); + TokenStream ts = factory.create(newAttributeFactory()); ((Tokenizer)ts).setReader(new StringReader("シニアソフトウェアエンジニア")); assertTokenStreamContents(ts, new String[] { "シニア", "シニアソフトウェアエンジニア", "ソフトウェア", "エンジニア" } @@ -64,7 +64,7 @@ public class TestJapaneseTokenizerFactory extends BaseTokenStreamTestCase { args.put("mode", "normal"); JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(args); factory.inform(new StringMockResourceLoader("")); - TokenStream ts = factory.create(); + TokenStream ts = factory.create(newAttributeFactory()); ((Tokenizer)ts).setReader(new StringReader("シニアソフトウェアエンジニア")); assertTokenStreamContents(ts, new String[] { "シニアソフトウェアエンジニア" } @@ -85,7 +85,7 @@ public class TestJapaneseTokenizerFactory extends BaseTokenStreamTestCase { args.put("userDictionary", "userdict.txt"); JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(args); factory.inform(new StringMockResourceLoader(userDict)); - TokenStream ts = factory.create(); + TokenStream ts = factory.create(newAttributeFactory()); ((Tokenizer)ts).setReader(new StringReader("関西国際空港に行った")); assertTokenStreamContents(ts, new String[] { "関西", "国際", "空港", "に", "行っ", "た" } @@ -100,7 +100,7 @@ public class TestJapaneseTokenizerFactory extends BaseTokenStreamTestCase { args.put("discardPunctuation", "false"); JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory(args); factory.inform(new StringMockResourceLoader("")); - TokenStream ts = factory.create(); + TokenStream ts = factory.create(newAttributeFactory()); ((Tokenizer)ts).setReader(new StringReader("今ノルウェーにいますが、来週の頭日本に戻ります。楽しみにしています!お寿司が食べたいな。。。")); assertTokenStreamContents(ts, new String[] { "今", "ノルウェー", "に", "い", "ます", "が", "、", diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestSearchMode.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestSearchMode.java index fa92e94a870..d100acd5a11 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestSearchMode.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestSearchMode.java @@ -34,7 +34,7 @@ public class TestSearchMode extends BaseTokenStreamTestCase { private final Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = new JapaneseTokenizer(null, true, Mode.SEARCH); + Tokenizer tokenizer = new JapaneseTokenizer(newAttributeFactory(), null, true, Mode.SEARCH); return new TokenStreamComponents(tokenizer, tokenizer); } }; diff --git a/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java b/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java index 9069fc6d59b..169c3c69424 100644 --- a/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java +++ b/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java @@ -17,7 +17,6 @@ package org.apache.lucene.analysis.phonetic; import java.io.IOException; -import java.io.StringReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; @@ -25,54 +24,47 @@ import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.util.TestUtil; public class DoubleMetaphoneFilterTest extends BaseTokenStreamTestCase { - - private TokenStream whitespaceTokenizer(String data) throws IOException { - WhitespaceTokenizer whitespaceTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT); - whitespaceTokenizer.setReader(new StringReader(data)); - return whitespaceTokenizer; - } public void testSize4FalseInject() throws Exception { - TokenStream stream = whitespaceTokenizer("international"); + TokenStream stream = whitespaceMockTokenizer("international"); TokenStream filter = new DoubleMetaphoneFilter(stream, 4, false); assertTokenStreamContents(filter, new String[] { "ANTR" }); } public void testSize4TrueInject() throws Exception { - TokenStream stream = whitespaceTokenizer("international"); + TokenStream stream = whitespaceMockTokenizer("international"); TokenStream filter = new DoubleMetaphoneFilter(stream, 4, true); assertTokenStreamContents(filter, new String[] { "international", "ANTR" }); } public void testAlternateInjectFalse() throws Exception { - TokenStream stream = whitespaceTokenizer("Kuczewski"); + TokenStream stream = whitespaceMockTokenizer("Kuczewski"); TokenStream filter = new DoubleMetaphoneFilter(stream, 4, false); assertTokenStreamContents(filter, new String[] { "KSSK", "KXFS" }); } public void testSize8FalseInject() throws Exception { - TokenStream stream = whitespaceTokenizer("international"); + TokenStream stream = whitespaceMockTokenizer("international"); TokenStream filter = new DoubleMetaphoneFilter(stream, 8, false); assertTokenStreamContents(filter, new String[] { "ANTRNXNL" }); } public void testNonConvertableStringsWithInject() throws Exception { - TokenStream stream = whitespaceTokenizer("12345 #$%@#^%&"); + TokenStream stream = whitespaceMockTokenizer("12345 #$%@#^%&"); TokenStream filter = new DoubleMetaphoneFilter(stream, 8, true); assertTokenStreamContents(filter, new String[] { "12345", "#$%@#^%&" }); } public void testNonConvertableStringsWithoutInject() throws Exception { - TokenStream stream = whitespaceTokenizer("12345 #$%@#^%&"); + TokenStream stream = whitespaceMockTokenizer("12345 #$%@#^%&"); TokenStream filter = new DoubleMetaphoneFilter(stream, 8, false); assertTokenStreamContents(filter, new String[] { "12345", "#$%@#^%&" }); // should have something after the stream - stream = whitespaceTokenizer("12345 #$%@#^%& hello"); + stream = whitespaceMockTokenizer("12345 #$%@#^%& hello"); filter = new DoubleMetaphoneFilter(stream, 8, false); assertTokenStreamContents(filter, new String[] { "12345", "#$%@#^%&", "HL" }); } diff --git a/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java b/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java index 4b6abfff108..bbcf0eda5cf 100644 --- a/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java +++ b/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java @@ -110,7 +110,7 @@ public class TestBeiderMorseFilter extends BaseTokenStreamTestCase { } public void testCustomAttribute() throws IOException { - TokenStream stream = new KeywordTokenizer(); + TokenStream stream = new MockTokenizer(MockTokenizer.KEYWORD, false); ((Tokenizer)stream).setReader(new StringReader("D'Angelo")); stream = new PatternKeywordMarkerFilter(stream, Pattern.compile(".*")); stream = new BeiderMorseFilter(stream, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true)); diff --git a/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestHMMChineseTokenizerFactory.java b/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestHMMChineseTokenizerFactory.java index e1557f3f969..619f8689d33 100644 --- a/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestHMMChineseTokenizerFactory.java +++ b/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestHMMChineseTokenizerFactory.java @@ -34,7 +34,7 @@ public class TestHMMChineseTokenizerFactory extends BaseTokenStreamTestCase { public void testSimple() throws Exception { Reader reader = new StringReader("我购买了道具和服装。"); TokenizerFactory factory = new HMMChineseTokenizerFactory(new HashMap()); - Tokenizer tokenizer = factory.create(); + Tokenizer tokenizer = factory.create(newAttributeFactory()); tokenizer.setReader(reader); // TODO: fix smart chinese to not emit punctuation tokens // at the moment: you have to clean up with WDF, or use the stoplist, etc diff --git a/lucene/core/src/test/org/apache/lucene/analysis/TestToken.java b/lucene/core/src/test/org/apache/lucene/analysis/TestToken.java index 9008f19d591..60862e2c64d 100644 --- a/lucene/core/src/test/org/apache/lucene/analysis/TestToken.java +++ b/lucene/core/src/test/org/apache/lucene/analysis/TestToken.java @@ -40,16 +40,9 @@ public class TestToken extends LuceneTestCase { assertEquals("word", t.type()); assertEquals(0, t.getFlags()); - t = new Token(6, 22); - t.copyBuffer(content, 0, content.length); - assertEquals("hello", t.toString()); - assertEquals("hello", t.toString()); - assertEquals(6, t.startOffset()); - assertEquals(22, t.endOffset()); - assertEquals("word", t.type()); - assertEquals(0, t.getFlags()); - - t = new Token(6, 22, 7); + t = new Token(); + t.setOffset(6, 22); + t.setFlags(7); t.copyBuffer(content, 0, content.length); assertEquals("hello", t.toString()); assertEquals("hello", t.toString()); @@ -58,7 +51,9 @@ public class TestToken extends LuceneTestCase { assertEquals("word", t.type()); assertEquals(7, t.getFlags()); - t = new Token(6, 22, "junk"); + t = new Token(); + t.setOffset(6, 22); + t.setType("junk"); t.copyBuffer(content, 0, content.length); assertEquals("hello", t.toString()); assertEquals("hello", t.toString()); @@ -174,7 +169,8 @@ public class TestToken extends LuceneTestCase { } public void testClone() throws Exception { - Token t = new Token(0, 5); + Token t = new Token(); + t.setOffset(0, 5); char[] content = "hello".toCharArray(); t.copyBuffer(content, 0, 5); char[] buf = t.buffer(); @@ -195,7 +191,8 @@ public class TestToken extends LuceneTestCase { assertEquals("", t.toString()); assertEquals("", copy.toString()); - t = new Token(0, 5); + t = new Token(); + t.setOffset(0, 5); char[] content = "hello".toCharArray(); t.copyBuffer(content, 0, 5); char[] buf = t.buffer(); @@ -245,7 +242,8 @@ public class TestToken extends LuceneTestCase { } public void testAttributeReflection() throws Exception { - Token t = new Token("foobar", 6, 22, 8); + Token t = new Token("foobar", 6, 22); + t.setFlags(8); TestUtil.assertAttributeReflection(t, new HashMap() {{ put(CharTermAttribute.class.getName() + "#term", "foobar"); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPayloads.java b/lucene/core/src/test/org/apache/lucene/index/TestPayloads.java index 1b3fdc3796c..19822871eea 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestPayloads.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestPayloads.java @@ -591,7 +591,6 @@ public class TestPayloads extends LuceneTestCase { Field field = new TextField("field", "", Field.Store.NO); TokenStream ts = new MockTokenizer(MockTokenizer.WHITESPACE, true); ((Tokenizer)ts).setReader(new StringReader("here we go")); - assertFalse(ts.hasAttribute(PayloadAttribute.class)); field.setTokenStream(ts); doc.add(field); writer.addDocument(doc); @@ -603,7 +602,6 @@ public class TestPayloads extends LuceneTestCase { writer.addDocument(doc); ts = new MockTokenizer(MockTokenizer.WHITESPACE, true); ((Tokenizer)ts).setReader(new StringReader("another")); - assertFalse(ts.hasAttribute(PayloadAttribute.class)); field.setTokenStream(ts); writer.addDocument(doc); DirectoryReader reader = writer.getReader(); @@ -625,7 +623,6 @@ public class TestPayloads extends LuceneTestCase { Field field = new TextField("field", "", Field.Store.NO); TokenStream ts = new MockTokenizer(MockTokenizer.WHITESPACE, true); ((Tokenizer)ts).setReader(new StringReader("here we go")); - assertFalse(ts.hasAttribute(PayloadAttribute.class)); field.setTokenStream(ts); doc.add(field); Field field2 = new TextField("field", "", Field.Store.NO); @@ -638,8 +635,6 @@ public class TestPayloads extends LuceneTestCase { Field field3 = new TextField("field", "", Field.Store.NO); ts = new MockTokenizer(MockTokenizer.WHITESPACE, true); ((Tokenizer)ts).setReader(new StringReader("nopayload")); - - assertFalse(ts.hasAttribute(PayloadAttribute.class)); field3.setTokenStream(ts); doc.add(field3); writer.addDocument(doc); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPayloadsOnVectors.java b/lucene/core/src/test/org/apache/lucene/index/TestPayloadsOnVectors.java index 5d3d760d6f2..3202788ba5e 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestPayloadsOnVectors.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestPayloadsOnVectors.java @@ -51,7 +51,6 @@ public class TestPayloadsOnVectors extends LuceneTestCase { Field field = new Field("field", "", customType); TokenStream ts = new MockTokenizer(MockTokenizer.WHITESPACE, true); ((Tokenizer)ts).setReader(new StringReader("here we go")); - assertFalse(ts.hasAttribute(PayloadAttribute.class)); field.setTokenStream(ts); doc.add(field); writer.addDocument(doc); @@ -65,7 +64,6 @@ public class TestPayloadsOnVectors extends LuceneTestCase { ts = new MockTokenizer(MockTokenizer.WHITESPACE, true); ((Tokenizer)ts).setReader(new StringReader("another")); - assertFalse(ts.hasAttribute(PayloadAttribute.class)); field.setTokenStream(ts); writer.addDocument(doc); @@ -96,7 +94,6 @@ public class TestPayloadsOnVectors extends LuceneTestCase { Field field = new Field("field", "", customType); TokenStream ts = new MockTokenizer(MockTokenizer.WHITESPACE, true); ((Tokenizer)ts).setReader(new StringReader("here we go")); - assertFalse(ts.hasAttribute(PayloadAttribute.class)); field.setTokenStream(ts); doc.add(field); Field field2 = new Field("field", "", customType); @@ -109,7 +106,6 @@ public class TestPayloadsOnVectors extends LuceneTestCase { Field field3 = new Field("field", "", customType); ts = new MockTokenizer(MockTokenizer.WHITESPACE, true); ((Tokenizer)ts).setReader(new StringReader("nopayload")); - assertFalse(ts.hasAttribute(PayloadAttribute.class)); field3.setTokenStream(ts); doc.add(field3); writer.addDocument(doc); diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java index 484671ca692..fd0fa414f68 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java @@ -67,7 +67,8 @@ public class TokenGroup { tot += score; } } - Token token = new Token(termStartOffset, termEndOffset); + Token token = new Token(); + token.setOffset(termStartOffset, termEndOffset); token.setEmpty().append(termAtt); tokens[numTokens] = token; scores[numTokens] = score; diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java index e6f21ea9ad5..96edb8aa5df 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java @@ -314,10 +314,10 @@ public class HighlighterPhraseTest extends LuceneTestCase { public void reset() { this.i = -1; this.tokens = new Token[] { - new Token(new char[] { 't', 'h', 'e' }, 0, 3, 0, 3), - new Token(new char[] { 'f', 'o', 'x' }, 0, 3, 4, 7), - new Token(new char[] { 'd', 'i', 'd' }, 0, 3, 8, 11), - new Token(new char[] { 'j', 'u', 'm', 'p' }, 0, 4, 16, 20) }; + new Token("the", 0, 3), + new Token("fox", 4, 7), + new Token("did", 8, 11), + new Token("jump", 16, 20) }; this.tokens[3].setPositionIncrement(2); } } @@ -354,10 +354,10 @@ public class HighlighterPhraseTest extends LuceneTestCase { public void reset() { this.i = -1; this.tokens = new Token[] { - new Token(new char[] { 't', 'h', 'e' }, 0, 3, 0, 3), - new Token(new char[] { 'f', 'o', 'x' }, 0, 3, 4, 7), - new Token(new char[] { 'j', 'u', 'm', 'p' }, 0, 4, 8, 14), - new Token(new char[] { 'j', 'u', 'm', 'p', 'e', 'd' }, 0, 6, 8, 14) }; + new Token("the", 0, 3), + new Token("fox", 4, 7), + new Token("jump", 8, 14), + new Token("jumped", 8, 14) }; this.tokens[3].setPositionIncrement(0); } } diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java index 03984ef6882..30b86f6267d 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java @@ -2013,7 +2013,8 @@ final class SynonymTokenizer extends TokenStream { } st = new StringTokenizer(expansions, ","); if (st.hasMoreTokens()) { - currentRealToken = new Token(realOffsetAtt.startOffset(), realOffsetAtt.endOffset()); + currentRealToken = new Token(); + currentRealToken.setOffset(realOffsetAtt.startOffset(), realOffsetAtt.endOffset()); currentRealToken.copyBuffer(realTermAtt.buffer(), 0, realTermAtt.length()); } diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java index 3207ea571d7..a725ce8828c 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java @@ -78,12 +78,12 @@ public class TokenSourcesTest extends LuceneTestCase { public void reset() { this.i = -1; this.tokens = new Token[] { - new Token(new char[] {'t', 'h', 'e'}, 0, 3, 0, 3), - new Token(new char[] {'{', 'f', 'o', 'x', '}'}, 0, 5, 0, 7), - new Token(new char[] {'f', 'o', 'x'}, 0, 3, 4, 7), - new Token(new char[] {'d', 'i', 'd'}, 0, 3, 8, 11), - new Token(new char[] {'n', 'o', 't'}, 0, 3, 12, 15), - new Token(new char[] {'j', 'u', 'm', 'p'}, 0, 4, 16, 20)}; + new Token("the", 0, 3), + new Token("{fox}", 0, 7), + new Token("fox", 4, 7), + new Token("did", 8, 11), + new Token("not", 12, 15), + new Token("jump", 16, 20)}; this.tokens[1].setPositionIncrement(0); } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java index 2f51452dfe0..88bab92f550 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java @@ -39,6 +39,7 @@ import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.AttributeSource.AttributeFactory; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LineFileDocs; import org.apache.lucene.util.LuceneTestCase; @@ -933,5 +934,18 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { mockTokenizer.setReader(new StringReader(input)); return mockTokenizer; } - + + /** Returns a new AttributeFactory impl */ + public static AttributeFactory newAttributeFactory(Random random) { + if (random.nextBoolean()) { + return Token.TOKEN_ATTRIBUTE_FACTORY; + } else { + return AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY; + } + } + + /** Returns a new AttributeFactory impl */ + public static AttributeFactory newAttributeFactory() { + return newAttributeFactory(random()); + } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java index 9ae5094fea3..dab64b0a8cc 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java +++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java @@ -98,7 +98,7 @@ public class MockTokenizer extends Tokenizer { } public MockTokenizer(CharacterRunAutomaton runAutomaton, boolean lowerCase, int maxTokenLength) { - this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, runAutomaton, lowerCase, maxTokenLength); + this(BaseTokenStreamTestCase.newAttributeFactory(), runAutomaton, lowerCase, maxTokenLength); } public MockTokenizer(CharacterRunAutomaton runAutomaton, boolean lowerCase) {