FieldTermStack
is a stack that keeps query terms in the specified field
@@ -49,24 +35,24 @@ public class FieldTermStack {
private final String fieldName;
LinkedListjava org.apache.lucene.queryParser.QueryParser <input>
- */
- public static void main(String[] args) throws Exception {
- if (args.length == 0) {
- System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser ");
- System.exit(0);
- }
- PrecedenceQueryParser qp = new PrecedenceQueryParser("field",
- new org.apache.lucene.analysis.SimpleAnalyzer());
- Query q = qp.parse(args[0]);
- System.out.println(q.toString("field"));
- }
-
// * Query ::= ( Clause )*
// * Clause ::= ["+", "-"] [java org.apache.lucene.queryParser.QueryParser <input>
+ */
+// public static void main(String[] args) throws Exception {
+// if (args.length == 0) {
+// System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser ");
+// System.exit(0);
+// }
+// PrecedenceQueryParser qp = new PrecedenceQueryParser("field",
+// new org.apache.lucene.analysis.SimpleAnalyzer());
+// Query q = qp.parse(args[0]);
+// System.out.println(q.toString("field"));
+// }
}
diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj
index 9794e13eb30..9cd21242042 100644
--- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj
+++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj
@@ -606,16 +606,16 @@ public class PrecedenceQueryParser {
* Usage:java org.apache.lucene.queryParser.QueryParser <input>
*/
- public static void main(String[] args) throws Exception {
- if (args.length == 0) {
- System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser ");
- System.exit(0);
- }
- PrecedenceQueryParser qp = new PrecedenceQueryParser("field",
- new org.apache.lucene.analysis.SimpleAnalyzer());
- Query q = qp.parse(args[0]);
- System.out.println(q.toString("field"));
- }
+// public static void main(String[] args) throws Exception {
+// if (args.length == 0) {
+// System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser ");
+// System.exit(0);
+// }
+// PrecedenceQueryParser qp = new PrecedenceQueryParser("field",
+// new org.apache.lucene.analysis.SimpleAnalyzer());
+// Query q = qp.parse(args[0]);
+// System.out.println(q.toString("field"));
+// }
}
PARSER_END(PrecedenceQueryParser)
diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerQPHelper.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerQPHelper.java
index e53e3c12f2d..e98cc6f80a3 100644
--- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerQPHelper.java
+++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerQPHelper.java
@@ -20,11 +20,9 @@ package org.apache.lucene.queryParser.standard;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java
index 27b3dfa4294..addbca26476 100644
--- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java
+++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java
@@ -32,7 +32,6 @@ import java.util.Locale;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
@@ -341,8 +340,9 @@ public class TestQPHelper extends LocalizedTestCase {
"t�rm term term");
assertQueryEquals("�mlaut", new MockAnalyzer(MockTokenizer.WHITESPACE, false), "�mlaut");
- assertQueryEquals("\"\"", new KeywordAnalyzer(), "");
- assertQueryEquals("foo:\"\"", new KeywordAnalyzer(), "foo:");
+ // FIXME: change MockAnalyzer to not extend CharTokenizer for this test
+ //assertQueryEquals("\"\"", new KeywordAnalyzer(), "");
+ //assertQueryEquals("foo:\"\"", new KeywordAnalyzer(), "foo:");
assertQueryEquals("a AND b", null, "+a +b");
assertQueryEquals("(a AND b)", null, "+a +b");
diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java
index b08c306bf14..b3a28dbe1b0 100644
--- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java
+++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java
@@ -30,7 +30,6 @@ import java.util.List;
import java.util.Locale;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
@@ -333,8 +332,9 @@ public class TestQueryParserWrapper extends LocalizedTestCase {
"t�rm term term");
assertQueryEquals("�mlaut", new MockAnalyzer(MockTokenizer.WHITESPACE, false), "�mlaut");
- assertQueryEquals("\"\"", new KeywordAnalyzer(), "");
- assertQueryEquals("foo:\"\"", new KeywordAnalyzer(), "foo:");
+ //FIXME: Change MockAnalyzer to not extend CharTokenizer for this test
+ //assertQueryEquals("\"\"", new KeywordAnalyzer(), "");
+ //assertQueryEquals("foo:\"\"", new KeywordAnalyzer(), "foo:");
assertQueryEquals("a AND b", null, "+a +b");
assertQueryEquals("(a AND b)", null, "+a +b");
diff --git a/lucene/contrib/spellchecker/build.xml b/lucene/contrib/spellchecker/build.xml
index d89be94a268..3d92680dd33 100755
--- a/lucene/contrib/spellchecker/build.xml
+++ b/lucene/contrib/spellchecker/build.xml
@@ -24,4 +24,20 @@
java org.apache.lucene.queryParser.QueryParser <input>
- */
- public static void main(String[] args) throws Exception {
- if (args.length == 0) {
- System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser ");
- System.exit(0);
- }
- QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field",
- new org.apache.lucene.analysis.SimpleAnalyzer());
- Query q = qp.parse(args[0]);
- System.out.println(q.toString("field"));
- }
-
// * Query ::= ( Clause )*
// * Clause ::= ["+", "-"] [java org.apache.lucene.queryParser.QueryParser <input>
+ */
+// public static void main(String[] args) throws Exception {
+// if (args.length == 0) {
+// System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser ");
+// System.exit(0);
+// }
+// QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field",
+// new org.apache.lucene.analysis.SimpleAnalyzer());
+// Query q = qp.parse(args[0]);
+// System.out.println(q.toString("field"));
+// }
}
diff --git a/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj b/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj
index 1784114a4b8..fa4eed3cbc0 100644
--- a/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj
+++ b/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj
@@ -1111,16 +1111,16 @@ public class QueryParser {
* Usage:java org.apache.lucene.queryParser.QueryParser <input>
*/
- public static void main(String[] args) throws Exception {
- if (args.length == 0) {
- System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser ");
- System.exit(0);
- }
- QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field",
- new org.apache.lucene.analysis.SimpleAnalyzer());
- Query q = qp.parse(args[0]);
- System.out.println(q.toString("field"));
- }
+// public static void main(String[] args) throws Exception {
+// if (args.length == 0) {
+// System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser ");
+// System.exit(0);
+// }
+// QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field",
+// new org.apache.lucene.analysis.SimpleAnalyzer());
+// Query q = qp.parse(args[0]);
+// System.out.println(q.toString("field"));
+// }
}
PARSER_END(QueryParser)
diff --git a/lucene/src/test/org/apache/lucene/analysis/MockTokenizer.java b/lucene/src/test/org/apache/lucene/analysis/MockTokenizer.java
index 0472b0038f4..539692021b8 100644
--- a/lucene/src/test/org/apache/lucene/analysis/MockTokenizer.java
+++ b/lucene/src/test/org/apache/lucene/analysis/MockTokenizer.java
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.util.Version;
+import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp;
@@ -45,6 +46,13 @@ public class MockTokenizer extends CharTokenizer {
private final boolean lowerCase;
private int state;
+ public MockTokenizer(AttributeFactory factory, Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
+ super(Version.LUCENE_CURRENT, factory, input);
+ this.runAutomaton = runAutomaton;
+ this.lowerCase = lowerCase;
+ this.state = runAutomaton.getInitialState();
+ }
+
public MockTokenizer(Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
super(Version.LUCENE_CURRENT, input);
this.runAutomaton = runAutomaton;
diff --git a/lucene/src/test/org/apache/lucene/analysis/TestCharTokenizers.java b/lucene/src/test/org/apache/lucene/analysis/TestCharTokenizers.java
index 26f1737dbeb..77c2883305a 100644
--- a/lucene/src/test/org/apache/lucene/analysis/TestCharTokenizers.java
+++ b/lucene/src/test/org/apache/lucene/analysis/TestCharTokenizers.java
@@ -46,8 +46,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
}
// internal buffer size is 1024 make sure we have a surrogate pair right at the border
builder.insert(1023, "\ud801\udc1c");
- LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(
- TEST_VERSION_CURRENT, new StringReader(builder.toString()));
+ MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString()), MockTokenizer.SIMPLE, true);
assertTokenStreamContents(tokenizer, builder.toString().toLowerCase().split(" "));
}
@@ -64,8 +63,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
builder.append("a");
}
builder.append("\ud801\udc1cabc");
- LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(
- TEST_VERSION_CURRENT, new StringReader(builder.toString()));
+ MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString()), MockTokenizer.SIMPLE, true);
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase()});
}
}
@@ -79,8 +77,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
for (int i = 0; i < 255; i++) {
builder.append("A");
}
- LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(
- TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString()));
+ MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString() + builder.toString()), MockTokenizer.SIMPLE, true);
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(), builder.toString().toLowerCase()});
}
@@ -94,42 +91,10 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
builder.append("A");
}
builder.append("\ud801\udc1c");
- LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(
- TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString()));
+ MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString() + builder.toString()), MockTokenizer.SIMPLE, true);
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(), builder.toString().toLowerCase()});
}
- public void testLowerCaseTokenizer() throws IOException {
- StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
- LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT,
- reader);
- assertTokenStreamContents(tokenizer, new String[] { "tokenizer",
- "\ud801\udc44test" });
- }
-
- public void testLowerCaseTokenizerBWCompat() throws IOException {
- StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
- LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(Version.LUCENE_30,
- reader);
- assertTokenStreamContents(tokenizer, new String[] { "tokenizer", "test" });
- }
-
- public void testWhitespaceTokenizer() throws IOException {
- StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
- WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
- reader);
- assertTokenStreamContents(tokenizer, new String[] { "Tokenizer",
- "\ud801\udc1ctest" });
- }
-
- public void testWhitespaceTokenizerBWCompat() throws IOException {
- StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
- WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_30,
- reader);
- assertTokenStreamContents(tokenizer, new String[] { "Tokenizer",
- "\ud801\udc1ctest" });
- }
-
public void testIsTokenCharCharInSubclass() {
new TestingCharTokenizer(Version.LUCENE_30, new StringReader(""));
try {
diff --git a/lucene/src/test/org/apache/lucene/analysis/TestToken.java b/lucene/src/test/org/apache/lucene/analysis/TestToken.java
index be5f6116497..552259d876b 100644
--- a/lucene/src/test/org/apache/lucene/analysis/TestToken.java
+++ b/lucene/src/test/org/apache/lucene/analysis/TestToken.java
@@ -239,7 +239,7 @@ public class TestToken extends LuceneTestCase {
}
public void testTokenAttributeFactory() throws Exception {
- TokenStream ts = new WhitespaceTokenizer(Token.TOKEN_ATTRIBUTE_FACTORY, new StringReader("foo bar"));
+ TokenStream ts = new MockTokenizer(Token.TOKEN_ATTRIBUTE_FACTORY, new StringReader("foo bar"), MockTokenizer.WHITESPACE, false);
assertTrue("TypeAttribute is not implemented by SenselessAttributeImpl",
ts.addAttribute(SenselessAttribute.class) instanceof SenselessAttributeImpl);
diff --git a/lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java b/lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java
index 01a1fa1f993..ad292b2ffdb 100644
--- a/lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java
+++ b/lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java
@@ -25,8 +25,6 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
index 7d1ddfb8f6b..b9dfcd38ad0 100644
--- a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
+++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
@@ -42,7 +42,6 @@ import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Document;
@@ -538,67 +537,6 @@ public class TestIndexWriter extends LuceneTestCase {
}
}
- /**
- * Make sure we skip wicked long terms.
- */
- public void testWickedLongTerm() throws IOException {
- RAMDirectory dir = new RAMDirectory();
- IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
- TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)));
-
- char[] chars = new char[DocumentsWriter.MAX_TERM_LENGTH_UTF8];
- Arrays.fill(chars, 'x');
- Document doc = new Document();
- final String bigTerm = new String(chars);
-
- // This produces a too-long term:
- String contents = "abc xyz x" + bigTerm + " another term";
- doc.add(new Field("content", contents, Field.Store.NO, Field.Index.ANALYZED));
- writer.addDocument(doc);
-
- // Make sure we can add another normal document
- doc = new Document();
- doc.add(new Field("content", "abc bbb ccc", Field.Store.NO, Field.Index.ANALYZED));
- writer.addDocument(doc);
- writer.close();
-
- IndexReader reader = IndexReader.open(dir, true);
-
- // Make sure all terms < max size were indexed
- assertEquals(2, reader.docFreq(new Term("content", "abc")));
- assertEquals(1, reader.docFreq(new Term("content", "bbb")));
- assertEquals(1, reader.docFreq(new Term("content", "term")));
- assertEquals(1, reader.docFreq(new Term("content", "another")));
-
- // Make sure position is still incremented when
- // massive term is skipped:
- TermPositions tps = reader.termPositions(new Term("content", "another"));
- assertTrue(tps.next());
- assertEquals(1, tps.freq());
- assertEquals(3, tps.nextPosition());
-
- // Make sure the doc that has the massive term is in
- // the index:
- assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs());
-
- reader.close();
-
- // Make sure we can add a document with exactly the
- // maximum length term, and search on that term:
- doc = new Document();
- doc.add(new Field("content", bigTerm, Field.Store.NO, Field.Index.ANALYZED));
- StandardAnalyzer sa = new StandardAnalyzer(TEST_VERSION_CURRENT);
- sa.setMaxTokenLength(100000);
- writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa));
- writer.addDocument(doc);
- writer.close();
- reader = IndexReader.open(dir, true);
- assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
- reader.close();
-
- dir.close();
- }
-
public void testOptimizeMaxNumSegments() throws IOException {
MockRAMDirectory dir = new MockRAMDirectory();
diff --git a/lucene/src/test/org/apache/lucene/index/TestPayloads.java b/lucene/src/test/org/apache/lucene/index/TestPayloads.java
index ece9a76640f..3f972347c01 100644
--- a/lucene/src/test/org/apache/lucene/index/TestPayloads.java
+++ b/lucene/src/test/org/apache/lucene/index/TestPayloads.java
@@ -32,7 +32,6 @@ import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
diff --git a/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java b/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java
index 511645c9292..3691b8775a9 100644
--- a/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java
+++ b/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java
@@ -29,7 +29,6 @@ import java.util.HashSet;
import java.util.Locale;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
@@ -255,8 +254,10 @@ public class TestQueryParser extends LocalizedTestCase {
assertQueryEquals("türm term term", new MockAnalyzer(), "türm term term");
assertQueryEquals("ümlaut", new MockAnalyzer(), "ümlaut");
- assertQueryEquals("\"\"", new KeywordAnalyzer(), "");
- assertQueryEquals("foo:\"\"", new KeywordAnalyzer(), "foo:");
+ // FIXME: enhance MockAnalyzer to be able to support this
+ // it must no longer extend CharTokenizer
+ //assertQueryEquals("\"\"", new KeywordAnalyzer(), "");
+ //assertQueryEquals("foo:\"\"", new KeywordAnalyzer(), "foo:");
assertQueryEquals("a AND b", null, "+a +b");
assertQueryEquals("(a AND b)", null, "+a +b");
diff --git a/lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java b/lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java
index 474dd06c7ee..d76dc7133fd 100644
--- a/lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java
+++ b/lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java
@@ -19,8 +19,6 @@ package org.apache.lucene.search;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.SimpleAnalyzer;
-import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
diff --git a/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java b/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java
index 48a6dec5768..c3724875191 100644
--- a/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java
+++ b/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java
@@ -21,19 +21,15 @@ import java.io.Reader;
import java.io.IOException;
import java.io.StringReader;
import java.util.Collection;
-import java.util.Collections;
-import java.util.Iterator;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
@@ -44,7 +40,6 @@ import org.apache.lucene.index.TermPositions;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.store.MockRAMDirectory;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.analysis.LowerCaseTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.index.Payload;
import org.apache.lucene.search.payloads.PayloadSpanUtil;
@@ -52,9 +47,7 @@ import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.Spans;
-import org.apache.lucene.util.Version;
import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.automaton.BasicAutomata;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp;
diff --git a/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java b/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java
index fcb4d2a771b..e7a6064c2ed 100644
--- a/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java
+++ b/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java
@@ -20,7 +20,8 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Random;
-import org.apache.lucene.analysis.KeywordAnalyzer;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
@@ -50,7 +51,7 @@ public class TestRegexpRandom2 extends LuceneTestCase {
super.setUp();
random = newRandom();
RAMDirectory dir = new RAMDirectory();
- IndexWriter writer = new IndexWriter(dir, new KeywordAnalyzer(),
+ IndexWriter writer = new IndexWriter(dir, new MockAnalyzer(MockTokenizer.KEYWORD, false),
IndexWriter.MaxFieldLength.UNLIMITED);
Document doc = new Document();
diff --git a/lucene/src/test/org/apache/lucene/search/TestSloppyPhraseQuery.java b/lucene/src/test/org/apache/lucene/search/TestSloppyPhraseQuery.java
index bd755d4d26d..f928eb82b31 100755
--- a/lucene/src/test/org/apache/lucene/search/TestSloppyPhraseQuery.java
+++ b/lucene/src/test/org/apache/lucene/search/TestSloppyPhraseQuery.java
@@ -20,7 +20,6 @@ package org.apache.lucene.search;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
diff --git a/lucene/src/test/org/apache/lucene/search/TestTermVectors.java b/lucene/src/test/org/apache/lucene/search/TestTermVectors.java
index e1feda7a155..39fd3de7e76 100644
--- a/lucene/src/test/org/apache/lucene/search/TestTermVectors.java
+++ b/lucene/src/test/org/apache/lucene/search/TestTermVectors.java
@@ -20,7 +20,6 @@ package org.apache.lucene.search;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.*;
diff --git a/modules/analysis/CHANGES.txt b/modules/analysis/CHANGES.txt
index 8e1b0db29b6..ec107a230e9 100644
--- a/modules/analysis/CHANGES.txt
+++ b/modules/analysis/CHANGES.txt
@@ -34,6 +34,16 @@ New Features
(... in progress)
* LUCENE-2413: Consolidated all Lucene analyzers into common.
+ - o.a.l.analysis.KeywordAnalyzer -> o.a.l.analysis.core.KeywordAnalyzer
+ - o.a.l.analysis.KeywordTokenizer -> o.a.l.analysis.core.KeywordTokenizer
+ - o.a.l.analysis.LetterTokenizer -> o.a.l.analysis.core.LetterTokenizer
+ - o.a.l.analysis.LowerCaseFilter -> o.a.l.analysis.core.LowerCaseFilter
+ - o.a.l.analysis.LowerCaseTokenizer -> o.a.l.analysis.core.LowerCaseTokenizer
+ - o.a.l.analysis.SimpleAnalyzer -> o.a.l.analysis.core.SimpleAnalyzer
+ - o.a.l.analysis.StopAnalyzer -> o.a.l.analysis.core.StopAnalyzer
+ - o.a.l.analysis.StopFilter -> o.a.l.analysis.core.StopFilter
+ - o.a.l.analysis.WhitespaceAnalyzer -> o.a.l.analysis.core.WhitespaceAnalyzer
+ - o.a.l.analysis.WhitespaceTokenizer -> o.a.l.analysis.core.WhitespaceTokenizer
- o.a.l.analysis.PorterStemFilter -> o.a.l.analysis.en.PorterStemFilter
- o.a.l.analysis.ASCIIFoldingFilter -> o.a.l.analysis.miscellaneous.ASCIIFoldingFilter
- o.a.l.analysis.ISOLatin1AccentFilter -> o.a.l.analysis.miscellaneous.ISOLatin1AccentFilter
@@ -44,6 +54,9 @@ New Features
- o.a.l.analysis.BaseCharFilter -> o.a.l.analysis.charfilter.BaseCharFilter
- o.a.l.analysis.MappingCharFilter -> o.a.l.analysis.charfilter.MappingCharFilter
- o.a.l.analysis.NormalizeCharMap -> o.a.l.analysis.charfilter.NormalizeCharMap
+ - o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.util.ReusableAnalyzerBase
+ - o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
+ - o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
... (in progress)
Build
diff --git a/modules/analysis/common/build.xml b/modules/analysis/common/build.xml
index 5344c71aef0..596c04c9f9d 100644
--- a/modules/analysis/common/build.xml
+++ b/modules/analysis/common/build.xml
@@ -38,7 +38,7 @@
* If you are unsure how exactly a regular expression should look like, consider
@@ -157,7 +157,7 @@ public final class PatternAnalyzer extends Analyzer {
* given stop set (after previously having applied toLowerCase()
* if applicable). For example, created via
* {@link StopFilter#makeStopSet(Version, String[])}and/or
- * {@link org.apache.lucene.analysis.WordlistLoader}as in
+ * {@link org.apache.lucene.analysis.util.WordlistLoader}as in
* WordlistLoader.getWordSet(new File("samples/fulltext/stopwords.txt")
* or other stop words
* lists .
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
index 65db885116f..8224c6eca76 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
@@ -19,18 +19,18 @@ package org.apache.lucene.analysis.nl;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
-import org.apache.lucene.analysis.LowerCaseFilter;
-import org.apache.lucene.analysis.ReusableAnalyzerBase;
-import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer; // for javadoc
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.Version;
import java.io.File;
@@ -171,7 +171,7 @@ public final class DutchAnalyzer extends ReusableAnalyzerBase {
public DutchAnalyzer(Version matchVersion, File stopwords) {
// this is completely broken!
try {
- stoptable = org.apache.lucene.analysis.WordlistLoader.getWordSet(stopwords);
+ stoptable = org.apache.lucene.analysis.util.WordlistLoader.getWordSet(stopwords);
} catch (IOException e) {
// TODO: throw IOException
throw new RuntimeException(e);
@@ -208,7 +208,7 @@ public final class DutchAnalyzer extends ReusableAnalyzerBase {
@Deprecated
public void setStemExclusionTable(File exclusionlist) {
try {
- excltable = org.apache.lucene.analysis.WordlistLoader.getWordSet(exclusionlist);
+ excltable = org.apache.lucene.analysis.util.WordlistLoader.getWordSet(exclusionlist);
setPreviousTokenStream(null); // force a new stemmer to be created
} catch (IOException e) {
// TODO: throw IOException
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
index fcf3042eb8d..1ad31111f46 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
@@ -23,16 +23,16 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
-import org.apache.lucene.analysis.LowerCaseFilter;
-import org.apache.lucene.analysis.StopFilter;
-import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.NorwegianStemmer;
@@ -106,11 +106,11 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
/**
* Creates a
- * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
+ * {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
- * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
+ * {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
index 4a521c8f1e3..7bd77612c44 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
@@ -23,16 +23,16 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
-import org.apache.lucene.analysis.LowerCaseFilter;
-import org.apache.lucene.analysis.StopFilter;
-import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.PortugueseStemmer;
@@ -106,11 +106,11 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
/**
* Creates a
- * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
+ * {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
- * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
+ * {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
index 3e454634ce5..41720e6333e 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
@@ -21,7 +21,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.Version;
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
index e065525a433..a06d222090c 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
@@ -23,15 +23,15 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
-import org.apache.lucene.analysis.LowerCaseFilter;
-import org.apache.lucene.analysis.StopFilter;
-import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.RomanianStemmer;
@@ -110,11 +110,11 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
/**
* Creates a
- * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
+ * {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
- * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
+ * {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
index 7d59b850559..1b94cdb0721 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
@@ -25,16 +25,16 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
-import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
-import org.apache.lucene.analysis.StopFilter;
-import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.util.Version;
/**
@@ -161,10 +161,10 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
/**
* Creates
- * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
+ * {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
- * @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
+ * @return {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java
index 967c8eba105..1a244e4afaa 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java
@@ -20,7 +20,7 @@ package org.apache.lucene.analysis.ru;
import java.io.Reader;
import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.Tokenizer; // for javadocs
-import org.apache.lucene.analysis.LetterTokenizer; // for javadocs
+import org.apache.lucene.analysis.core.LetterTokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer; // for javadocs
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java
index 5beec03b830..6b96e16220d 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java
@@ -19,9 +19,9 @@ package org.apache.lucene.analysis.ru;
import java.io.IOException;
-import org.apache.lucene.analysis.LowerCaseFilter; // for javadoc
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
/**
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java
index 3cdb5d208f3..11655a87a6b 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java
@@ -17,8 +17,8 @@ package org.apache.lucene.analysis.ru;
* limitations under the License.
*/
+import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // for javadoc
-import org.apache.lucene.analysis.LowerCaseFilter; // for javadoc
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java
index a718472604b..ed0306b595f 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java
@@ -18,6 +18,8 @@ package org.apache.lucene.analysis.snowball;
*/
import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.standard.*;
import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
import org.apache.lucene.util.Version;
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
index 52a49b55bf6..f2e8894c397 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
@@ -21,10 +21,10 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter; // javadoc @link
-import org.apache.lucene.analysis.LowerCaseFilter; // javadoc @link
import org.tartarus.snowball.SnowballProgram;
/**
diff --git a/lucene/src/java/org/apache/lucene/analysis/standard/READ_BEFORE_REGENERATING.txt b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/READ_BEFORE_REGENERATING.txt
similarity index 100%
rename from lucene/src/java/org/apache/lucene/analysis/standard/READ_BEFORE_REGENERATING.txt
rename to modules/analysis/common/src/java/org/apache/lucene/analysis/standard/READ_BEFORE_REGENERATING.txt
diff --git a/lucene/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
similarity index 94%
rename from lucene/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
rename to modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
index a09ce1ff629..93e8ab85418 100644
--- a/lucene/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
@@ -18,6 +18,11 @@ package org.apache.lucene.analysis.standard;
*/
import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.core.StopAnalyzer;
+import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.Version;
import java.io.File;
diff --git a/lucene/src/java/org/apache/lucene/analysis/standard/StandardFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
similarity index 100%
rename from lucene/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
rename to modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
diff --git a/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
similarity index 100%
rename from lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
rename to modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
diff --git a/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.java
similarity index 100%
rename from lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.java
rename to modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.java
diff --git a/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.jflex b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.jflex
similarity index 100%
rename from lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.jflex
rename to modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.jflex
diff --git a/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.java
similarity index 100%
rename from lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.java
rename to modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.java
diff --git a/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.jflex b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.jflex
similarity index 100%
rename from lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.jflex
rename to modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.jflex
diff --git a/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerInterface.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerInterface.java
similarity index 100%
rename from lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerInterface.java
rename to modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerInterface.java
diff --git a/lucene/src/java/org/apache/lucene/analysis/standard/package.html b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/package.html
similarity index 100%
rename from lucene/src/java/org/apache/lucene/analysis/standard/package.html
rename to modules/analysis/common/src/java/org/apache/lucene/analysis/standard/package.html
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
index 49653c77755..dc59097c764 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
@@ -23,16 +23,16 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
-import org.apache.lucene.analysis.LowerCaseFilter;
-import org.apache.lucene.analysis.StopFilter;
-import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.SwedishStemmer;
@@ -106,11 +106,11 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
/**
* Creates a
- * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
+ * {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
- * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
+ * {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
index d67ff98279c..f0eb1d6c7c6 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
@@ -18,16 +18,16 @@ package org.apache.lucene.analysis.th;
import java.io.Reader;
-import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.StopAnalyzer;
-import org.apache.lucene.analysis.StopFilter;
-import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.core.StopAnalyzer;
+import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import org.apache.lucene.util.Version;
/**
@@ -46,10 +46,10 @@ public final class ThaiAnalyzer extends ReusableAnalyzerBase {
/**
* Creates
- * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
+ * {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
- * @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
+ * @return {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link ThaiWordFilter}, and
* {@link StopFilter}
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
index 831bba35c15..9751c1ac147 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
@@ -24,7 +24,7 @@ import java.text.BreakIterator;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
index b0d9a4750d4..313c0171368 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
@@ -23,14 +23,14 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
-import org.apache.lucene.analysis.StopFilter;
-import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.TurkishStemmer;
@@ -109,11 +109,11 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
/**
* Creates a
- * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
+ * {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
- * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
+ * {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link TurkishLowerCaseFilter},
* {@link StopFilter}, {@link KeywordMarkerFilter} if a stem
diff --git a/lucene/src/java/org/apache/lucene/analysis/ReusableAnalyzerBase.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/util/ReusableAnalyzerBase.java
similarity index 96%
rename from lucene/src/java/org/apache/lucene/analysis/ReusableAnalyzerBase.java
rename to modules/analysis/common/src/java/org/apache/lucene/analysis/util/ReusableAnalyzerBase.java
index 2c3986a6281..ba0575d8a29 100644
--- a/lucene/src/java/org/apache/lucene/analysis/ReusableAnalyzerBase.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/util/ReusableAnalyzerBase.java
@@ -15,11 +15,16 @@
* limitations under the License.
*/
-package org.apache.lucene.analysis;
+package org.apache.lucene.analysis.util;
import java.io.IOException;
import java.io.Reader;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+
/**
* An convenience subclass of Analyzer that makes it easy to implement
* {@link TokenStream} reuse.
diff --git a/lucene/src/java/org/apache/lucene/analysis/StopwordAnalyzerBase.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java
similarity index 95%
rename from lucene/src/java/org/apache/lucene/analysis/StopwordAnalyzerBase.java
rename to modules/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java
index 4e6821205dd..8ff688600b9 100644
--- a/lucene/src/java/org/apache/lucene/analysis/StopwordAnalyzerBase.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java
@@ -15,14 +15,14 @@
* limitations under the License.
*/
-package org.apache.lucene.analysis;
+package org.apache.lucene.analysis.util;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.analysis.CharArraySet;
-import org.apache.lucene.analysis.ReusableAnalyzerBase;
-import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.Version;
/**
diff --git a/lucene/src/java/org/apache/lucene/analysis/WordlistLoader.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
similarity index 99%
rename from lucene/src/java/org/apache/lucene/analysis/WordlistLoader.java
rename to modules/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
index ac8a2248b39..78aa03d6c4f 100644
--- a/lucene/src/java/org/apache/lucene/analysis/WordlistLoader.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
@@ -1,4 +1,4 @@
-package org.apache.lucene.analysis;
+package org.apache.lucene.analysis.util;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
diff --git a/lucene/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java b/modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java
similarity index 98%
rename from lucene/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java
rename to modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java
index 509b7486942..7c594228770 100644
--- a/lucene/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java
@@ -20,8 +20,8 @@ package org.apache.lucene.collation;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.KeywordTokenizer;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
import java.text.Collator;
import java.io.Reader;
diff --git a/lucene/src/java/org/apache/lucene/collation/CollationKeyFilter.java b/modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyFilter.java
similarity index 100%
rename from lucene/src/java/org/apache/lucene/collation/CollationKeyFilter.java
rename to modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyFilter.java
diff --git a/lucene/src/java/org/apache/lucene/collation/package.html b/modules/analysis/common/src/java/org/apache/lucene/collation/package.html
similarity index 100%
rename from lucene/src/java/org/apache/lucene/collation/package.html
rename to modules/analysis/common/src/java/org/apache/lucene/collation/package.html
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java
index e01bcbde966..dda2f2d22fd 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java
@@ -22,8 +22,8 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.util.Version;
/**
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java
index 6b4d31ca3ec..89cdc103b23 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java
@@ -23,8 +23,8 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.core.LowerCaseTokenizer;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
-import org.apache.lucene.analysis.LowerCaseTokenizer;
/**
* Test the Brazilian Stem Filter, which only modifies the term text.
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java
index 08d567a3eab..448a5c70d70 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java
@@ -23,7 +23,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CharReader;
import org.apache.lucene.analysis.CharStream;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
public class TestMappingCharFilter extends BaseTokenStreamTestCase {
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/cn/TestChineseTokenizer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/cn/TestChineseTokenizer.java
index 7f125ad8a13..0f24353b0d4 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/cn/TestChineseTokenizer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/cn/TestChineseTokenizer.java
@@ -24,7 +24,7 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.Version;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
index dc7d0b3ad6d..769db62b876 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
@@ -24,7 +24,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
index 9fc9f9780fc..28bfbf69572 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
@@ -25,8 +25,8 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
diff --git a/lucene/src/test/org/apache/lucene/analysis/TestAnalyzers.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
similarity index 81%
rename from lucene/src/test/org/apache/lucene/analysis/TestAnalyzers.java
rename to modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
index e78832c7e13..febda6fe29b 100644
--- a/lucene/src/test/org/apache/lucene/analysis/TestAnalyzers.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
@@ -1,4 +1,4 @@
-package org.apache.lucene.analysis;
+package org.apache.lucene.analysis.core;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -21,11 +21,21 @@ import java.io.IOException;
import java.io.StringReader;
import java.io.Reader;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.core.LowerCaseTokenizer;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.core.SimpleAnalyzer;
+import org.apache.lucene.analysis.core.StopAnalyzer;
+import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.Payload;
+import org.apache.lucene.util.Version;
public class TestAnalyzers extends BaseTokenStreamTestCase {
@@ -214,6 +224,38 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
new String [] { "abac\uDC16adaba" });
}
+ public void testLowerCaseTokenizer() throws IOException {
+ StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
+ LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT,
+ reader);
+ assertTokenStreamContents(tokenizer, new String[] { "tokenizer",
+ "\ud801\udc44test" });
+ }
+
+ @Deprecated
+ public void testLowerCaseTokenizerBWCompat() throws IOException {
+ StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
+ LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(Version.LUCENE_30,
+ reader);
+ assertTokenStreamContents(tokenizer, new String[] { "tokenizer", "test" });
+ }
+
+ public void testWhitespaceTokenizer() throws IOException {
+ StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
+ WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
+ reader);
+ assertTokenStreamContents(tokenizer, new String[] { "Tokenizer",
+ "\ud801\udc1ctest" });
+ }
+
+ @Deprecated
+ public void testWhitespaceTokenizerBWCompat() throws IOException {
+ StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
+ WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_30,
+ reader);
+ assertTokenStreamContents(tokenizer, new String[] { "Tokenizer",
+ "\ud801\udc1ctest" });
+ }
}
final class PayloadSetter extends TokenFilter {
diff --git a/lucene/src/test/org/apache/lucene/analysis/TestKeywordAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java
similarity index 93%
rename from lucene/src/test/org/apache/lucene/analysis/TestKeywordAnalyzer.java
rename to modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java
index 712e917ade9..3baa63157d2 100644
--- a/lucene/src/test/org/apache/lucene/analysis/TestKeywordAnalyzer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java
@@ -1,4 +1,4 @@
-package org.apache.lucene.analysis;
+package org.apache.lucene.analysis.core;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -19,6 +19,9 @@ package org.apache.lucene.analysis;
import java.io.StringReader;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -27,10 +30,7 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
-import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.store.RAMDirectory;
public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
@@ -43,8 +43,7 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
super.setUp();
directory = new RAMDirectory();
IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(
- TEST_VERSION_CURRENT, new SimpleAnalyzer(
- TEST_VERSION_CURRENT)));
+ TEST_VERSION_CURRENT, new SimpleAnalyzer(TEST_VERSION_CURRENT)));
Document doc = new Document();
doc.add(new Field("partnum", "Q36", Field.Store.YES, Field.Index.NOT_ANALYZED));
diff --git a/lucene/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
similarity index 77%
rename from lucene/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java
rename to modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
index 40c82391c48..803606f0877 100644
--- a/lucene/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
@@ -1,7 +1,20 @@
-package org.apache.lucene.analysis;
+package org.apache.lucene.analysis.core;
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermPositions;
+import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
@@ -232,4 +245,64 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
assertAnalyzesTo(sa, "test\u02C6test", new String[] { "test\u02C6test" });
}
+ /**
+ * Make sure we skip wicked long terms.
+ */
+ public void testWickedLongTerm() throws IOException {
+ RAMDirectory dir = new RAMDirectory();
+ IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
+ TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)));
+
+ char[] chars = new char[IndexWriter.MAX_TERM_LENGTH];
+ Arrays.fill(chars, 'x');
+ Document doc = new Document();
+ final String bigTerm = new String(chars);
+
+ // This produces a too-long term:
+ String contents = "abc xyz x" + bigTerm + " another term";
+ doc.add(new Field("content", contents, Field.Store.NO, Field.Index.ANALYZED));
+ writer.addDocument(doc);
+
+ // Make sure we can add another normal document
+ doc = new Document();
+ doc.add(new Field("content", "abc bbb ccc", Field.Store.NO, Field.Index.ANALYZED));
+ writer.addDocument(doc);
+ writer.close();
+
+ IndexReader reader = IndexReader.open(dir, true);
+
+ // Make sure all terms < max size were indexed
+ assertEquals(2, reader.docFreq(new Term("content", "abc")));
+ assertEquals(1, reader.docFreq(new Term("content", "bbb")));
+ assertEquals(1, reader.docFreq(new Term("content", "term")));
+ assertEquals(1, reader.docFreq(new Term("content", "another")));
+
+ // Make sure position is still incremented when
+ // massive term is skipped:
+ TermPositions tps = reader.termPositions(new Term("content", "another"));
+ assertTrue(tps.next());
+ assertEquals(1, tps.freq());
+ assertEquals(3, tps.nextPosition());
+
+ // Make sure the doc that has the massive term is in
+ // the index:
+ assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs());
+
+ reader.close();
+
+ // Make sure we can add a document with exactly the
+ // maximum length term, and search on that term:
+ doc = new Document();
+ doc.add(new Field("content", bigTerm, Field.Store.NO, Field.Index.ANALYZED));
+ StandardAnalyzer sa = new StandardAnalyzer(TEST_VERSION_CURRENT);
+ sa.setMaxTokenLength(100000);
+ writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa));
+ writer.addDocument(doc);
+ writer.close();
+ reader = IndexReader.open(dir, true);
+ assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
+ reader.close();
+
+ dir.close();
+ }
}
diff --git a/lucene/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java
similarity index 95%
rename from lucene/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java
rename to modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java
index 4eb35df33f7..a453d62ec8d 100644
--- a/lucene/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java
@@ -1,4 +1,4 @@
-package org.apache.lucene.analysis;
+package org.apache.lucene.analysis.core;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -17,6 +17,9 @@ package org.apache.lucene.analysis;
* limitations under the License.
*/
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.Version;
diff --git a/lucene/src/test/org/apache/lucene/analysis/TestStopFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java
similarity index 95%
rename from lucene/src/test/org/apache/lucene/analysis/TestStopFilter.java
rename to modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java
index ec989a51817..c17843f6903 100644
--- a/lucene/src/test/org/apache/lucene/analysis/TestStopFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java
@@ -1,4 +1,4 @@
-package org.apache.lucene.analysis;
+package org.apache.lucene.analysis.core;
/**
* Copyright 2005 The Apache Software Foundation
@@ -16,6 +16,10 @@ package org.apache.lucene.analysis;
* limitations under the License.
*/
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.English;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java
index d2e9b522c73..75bb06057d4 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java
@@ -22,8 +22,8 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
/**
* Test the Czech Stemmer.
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
index 8b85b27ef85..94574ca4f2d 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
@@ -23,8 +23,8 @@ import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.core.LowerCaseTokenizer;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
-import org.apache.lucene.analysis.LowerCaseTokenizer;
import org.apache.lucene.util.Version;
public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
index 213be5e6bb9..0376ff5bebe 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
@@ -24,10 +24,10 @@ import java.io.InputStreamReader;
import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.KeywordTokenizer;
-import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
/**
* Test the German stemmer. The stemming algorithm is known to work less
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java
index 613f540e778..96a61221c2c 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java
@@ -26,11 +26,11 @@ import java.util.zip.ZipFile;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
-import org.apache.lucene.analysis.KeywordTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
/**
* Test the PorterStemFilter with Martin Porter's test data.
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiNormalizer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiNormalizer.java
index 42c478163d8..8d5195dc897 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiNormalizer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiNormalizer.java
@@ -23,7 +23,7 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
/**
* Test HindiNormalizer
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiStemmer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiStemmer.java
index cce0015cf67..06cda993915 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiStemmer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiStemmer.java
@@ -23,7 +23,7 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
/**
* Test HindiStemmer
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemmer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemmer.java
index 09c3c94252b..3d8468db5f7 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemmer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianStemmer.java
@@ -22,9 +22,9 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.KeywordTokenizer;
-import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
/**
* Tests {@link IndonesianStemmer}
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/in/TestIndicNormalizer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/in/TestIndicNormalizer.java
index b1ffd9b4b50..1d586d18493 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/in/TestIndicNormalizer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/in/TestIndicNormalizer.java
@@ -23,7 +23,7 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
/**
* Test IndicNormalizer
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/PatternAnalyzerTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/PatternAnalyzerTest.java
index a3464706916..a0a59f661ab 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/PatternAnalyzerTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/PatternAnalyzerTest.java
@@ -22,8 +22,8 @@ import java.util.Arrays;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.StopAnalyzer;
/**
* Verifies the behavior of PatternAnalyzer.
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestASCIIFoldingFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestASCIIFoldingFilter.java
index d76954f8bc5..824a8995a51 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestASCIIFoldingFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestASCIIFoldingFilter.java
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.miscellaneous;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import java.io.StringReader;
import java.util.List;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestHyphenatedWordsFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestHyphenatedWordsFilter.java
index 5545fa8a24f..a51fa227435 100755
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestHyphenatedWordsFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestHyphenatedWordsFilter.java
@@ -21,7 +21,7 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
/**
* HyphenatedWordsFilter test
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestISOLatin1AccentFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestISOLatin1AccentFilter.java
index 4225ecf0a35..8092810b2f6 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestISOLatin1AccentFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestISOLatin1AccentFilter.java
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.miscellaneous;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import java.io.StringReader;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java
index bea2a485503..e1ebf7d2bfc 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java
@@ -23,7 +23,7 @@ import java.util.Set;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
/** Test {@link KeepWordFilter} */
public class TestKeepWordFilter extends BaseTokenStreamTestCase {
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java
index b24112b0b9c..9500c40356a 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java
@@ -10,7 +10,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.junit.Test;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java
index 010110c6178..de8b7311d19 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java
@@ -18,6 +18,7 @@ package org.apache.lucene.analysis.miscellaneous;
*/
import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import java.io.StringReader;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalzyerWrapper.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalzyerWrapper.java
index a06bac83a9f..fa2c51d2d1a 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalzyerWrapper.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalzyerWrapper.java
@@ -3,6 +3,8 @@ package org.apache.lucene.analysis.miscellaneous;
import java.io.StringReader;
import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.core.SimpleAnalyzer;
+import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java
index 1fe55e37a0a..a266fff3395 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.miscellaneous;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import java.io.IOException;
import java.io.StringReader;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java
index b10fc739b8e..c7c9ae5efba 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.miscellaneous;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import java.io.IOException;
import java.io.StringReader;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
index bf9e1290d7c..463faf403a1 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
@@ -6,7 +6,7 @@ import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.KeywordTokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.en.PorterStemFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
index e04a4694e42..e17d6936269 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
@@ -20,12 +20,12 @@ package org.apache.lucene.analysis.miscellaneous;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CharArraySet;
-import org.apache.lucene.analysis.KeywordTokenizer;
-import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.miscellaneous.SingleTokenTokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java
index 346af2a353a..903e254408f 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.ngram;
*/
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import java.io.StringReader;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java
index ed7c1701e52..9443a25976f 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.ngram;
*/
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import java.io.StringReader;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java
index 071d6019c06..56418b5aadb 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java
@@ -25,7 +25,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CharReader;
import org.apache.lucene.analysis.CharStream;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
/**
* Tests {@link PatternReplaceCharFilter}
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceFilter.java
index 41e664b6a31..56e7da392ea 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceFilter.java
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.pattern;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import java.io.StringReader;
import java.util.regex.Pattern;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java
index 1e8970bbbfc..dc1e53fb5de 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java
@@ -17,7 +17,7 @@ package org.apache.lucene.analysis.payloads;
*/
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.index.Payload;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
index a0f479e6edc..7cc9a4a56d5 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.payloads;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java
index e503395e325..371e45c1b16 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java
@@ -17,7 +17,7 @@ package org.apache.lucene.analysis.payloads;
*/
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.index.Payload;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java
index b07bd72d79c..aacebe85894 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.payloads;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java
index 2993dcf21e6..7ed432a9ddf 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java
@@ -22,10 +22,10 @@ import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.LetterTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceAnalyzer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.LetterTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java
index fe1a3197bcc..b55b7353a30 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java
@@ -20,7 +20,7 @@ package org.apache.lucene.analysis.reverse;
import java.io.StringReader;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
index 6cda8139b66..ba7346c5889 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
@@ -22,10 +22,10 @@ import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.LetterTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceAnalyzer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.LetterTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java
index d5c43f792bb..34ffcb8b457 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java
@@ -24,7 +24,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.*;
public class ShingleFilterTest extends BaseTokenStreamTestCase {
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java
index c715df04554..363b97dfc0e 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java
@@ -24,6 +24,7 @@ import java.util.Iterator;
import java.util.LinkedList;
import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.miscellaneous.EmptyTokenStream;
import org.apache.lucene.analysis.miscellaneous.PrefixAndSuffixAwareTokenFilter;
import org.apache.lucene.analysis.miscellaneous.SingleTokenTokenStream;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java
index 7479a6d680f..344fbded5ff 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java
@@ -22,7 +22,7 @@ import java.text.SimpleDateFormat;
import java.util.Locale;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
public class DateRecognizerSinkTokenizerTest extends BaseTokenStreamTestCase {
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java
index 54adc3b7823..57e7831761a 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java
@@ -19,11 +19,11 @@ package org.apache.lucene.analysis.sinks;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CachingTokenFilter;
-import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceAnalyzer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java
index 8e993682c02..10d9cc4fe67 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java
@@ -20,7 +20,7 @@ import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
public class TokenRangeSinkTokenizerTest extends BaseTokenStreamTestCase {
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java
index 6455e73c06c..bb3fe3c546a 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java
@@ -22,7 +22,7 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocab.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocab.java
index fd7a3a1611e..22790cc7dca 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocab.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocab.java
@@ -25,9 +25,9 @@ import java.io.StringReader;
import java.util.zip.ZipFile;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.KeywordTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
/**
* Test the snowball filters against the snowball data tests
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java
index 25e23cdb698..a8cbff57a7d 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java
@@ -21,7 +21,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java
index c2d1b1ddd0f..3885c36f881 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java
@@ -21,7 +21,7 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
/**
* Test the Turkish lowercase filter.
diff --git a/lucene/src/test/org/apache/lucene/index/TestWordlistLoader.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java
similarity index 96%
rename from lucene/src/test/org/apache/lucene/index/TestWordlistLoader.java
rename to modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java
index 07305239e3f..74356c42828 100644
--- a/lucene/src/test/org/apache/lucene/index/TestWordlistLoader.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java
@@ -1,4 +1,4 @@
-package org.apache.lucene.index;
+package org.apache.lucene.analysis.util;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -25,7 +25,7 @@ import java.util.Set;
import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.util.WordlistLoader;
public class TestWordlistLoader extends LuceneTestCase {
diff --git a/lucene/src/test/org/apache/lucene/collation/CollationTestBase.java b/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java
similarity index 100%
rename from lucene/src/test/org/apache/lucene/collation/CollationTestBase.java
rename to modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java
diff --git a/lucene/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java
similarity index 100%
rename from lucene/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java
rename to modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java
diff --git a/lucene/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java b/modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java
similarity index 98%
rename from lucene/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java
rename to modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java
index 533242baac0..543c0efe9a6 100644
--- a/lucene/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java
@@ -20,7 +20,7 @@ package org.apache.lucene.collation;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.KeywordTokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
import java.text.Collator;
import java.util.Locale;
diff --git a/modules/analysis/icu/build.xml b/modules/analysis/icu/build.xml
index a415ab70be1..6e0e64df880 100644
--- a/modules/analysis/icu/build.xml
+++ b/modules/analysis/icu/build.xml
@@ -38,6 +38,30 @@