diff --git a/gradle/java/modules.gradle b/gradle/java/modules.gradle
index 0855423c983..5e334ab2db2 100644
--- a/gradle/java/modules.gradle
+++ b/gradle/java/modules.gradle
@@ -214,7 +214,7 @@ allprojects {
     }
 
     // Configure (tasks.test, sourceSets.test)
-    tasks.matching { it.name == "test" }.all { Test task ->
+    tasks.matching { it.name ==~ /test(_[0-9]+)?/ }.all { Test task ->
       configureTestTaskForSourceSet(task, task.project.sourceSets.test)
     }
 
diff --git a/gradle/validation/rat-sources.gradle b/gradle/validation/rat-sources.gradle
index 3bc0d35c660..4d9759188cb 100644
--- a/gradle/validation/rat-sources.gradle
+++ b/gradle/validation/rat-sources.gradle
@@ -102,6 +102,7 @@ allprojects {
                     break
 
                 case ":lucene:analysis:common":
+                case ":lucene:analysis.tests":
                     exclude "src/**/*.aff"
                     exclude "src/**/*.dic"
                     exclude "src/**/*.good"
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index a964e732160..8d18a409ca6 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -153,6 +153,12 @@ Bug Fixes
 * LUCENE-10349: Fix all analyzers to behave according to their documentation:
   getDefaultStopSet() methods now return unmodifiable CharArraySets.  (Uwe Schindler)
 
+* LUCENE-10352: Add missing service provider entries: KoreanNumberFilterFactory,
+  DaitchMokotoffSoundexFilterFactory (Uwe Schindler, Robert Muir)
+
+* LUCENE-10352: Fixed ctor argument checks: JapaneseKatakanaStemFilter,
+  DoubleMetaphoneFilter (Uwe Schindler, Robert Muir)
+
 Other
 ---------------------
 
@@ -163,6 +169,13 @@ Other
 * LUCENE-10310: TestXYDocValuesQueries#doRandomDistanceTest does not produce random circles with radius
   with '0' value any longer.
 
+* LUCENE-10352: Removed duplicate instances of StringMockResourceLoader and migrated class to
+  test-framework.  (Uwe Schindler, Robert Muir)
+
+* LUCENE-10352: Convert TestAllAnalyzersHaveFactories and TestRandomChains to a global integration test
+  and discover classes to check from module system. The test now checks all analyzer modules,
+  so it may discover new bugs outside of analysis:common module.  (Uwe Schindler, Robert Muir)
+
 ======================= Lucene 9.0.0 =======================
 
 New Features
diff --git a/lucene/analysis.tests/build.gradle b/lucene/analysis.tests/build.gradle
new file mode 100644
index 00000000000..be1c51fecac
--- /dev/null
+++ b/lucene/analysis.tests/build.gradle
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+apply plugin: 'java-library'
+
+description = 'Module integration tests for all :lucene:analysis modules'
+
+dependencies {
+  moduleTestImplementation project(':lucene:analysis:common')
+  moduleTestImplementation project(':lucene:analysis:icu')
+  moduleTestImplementation project(':lucene:analysis:kuromoji')
+  moduleTestImplementation project(':lucene:analysis:morfologik')
+  moduleTestImplementation project(':lucene:analysis:nori')
+  moduleTestImplementation project(':lucene:analysis:opennlp')
+  moduleTestImplementation project(':lucene:analysis:phonetic')
+  moduleTestImplementation project(':lucene:analysis:smartcn')
+  moduleTestImplementation project(':lucene:analysis:stempel')
+  moduleTestImplementation project(':lucene:test-framework')
+}
diff --git a/lucene/analysis.tests/src/test/module-info.java b/lucene/analysis.tests/src/test/module-info.java
new file mode 100644
index 00000000000..502611624a0
--- /dev/null
+++ b/lucene/analysis.tests/src/test/module-info.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Test module for global integration tests of all {@code org.apache.lucene.analysis}
+ * packages/modules.
+ */
+@SuppressWarnings({"requires-automatic"})
+module org.apache.lucene.analysis.tests {
+  requires java.xml;
+  requires org.apache.lucene.core;
+  requires org.apache.lucene.analysis.common;
+  requires org.apache.lucene.analysis.icu;
+  requires org.apache.lucene.analysis.kuromoji;
+  requires org.apache.lucene.analysis.morfologik;
+  requires org.apache.lucene.analysis.nori;
+  requires org.apache.lucene.analysis.opennlp;
+  requires org.apache.lucene.analysis.phonetic;
+  requires org.apache.lucene.analysis.smartcn;
+  requires org.apache.lucene.analysis.stempel;
+  requires org.apache.lucene.test_framework;
+  requires junit;
+
+  exports org.apache.lucene.analysis.tests;
+}
diff --git a/lucene/analysis.tests/src/test/org/apache/lucene/analysis/tests/ModuleClassDiscovery.java b/lucene/analysis.tests/src/test/org/apache/lucene/analysis/tests/ModuleClassDiscovery.java
new file mode 100644
index 00000000000..28b90e58691
--- /dev/null
+++ b/lucene/analysis.tests/src/test/org/apache/lucene/analysis/tests/ModuleClassDiscovery.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.tests;
+
+import java.io.IOException;
+import java.lang.module.ResolvedModule;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.SortedMap;
+import java.util.TreeMap;
+import java.util.function.Predicate;
+import org.apache.lucene.tests.util.LuceneTestCase;
+import org.junit.Assert;
+
+/** Discovers all classes from the module graph and loads them (without initialization) */
+abstract class ModuleClassDiscovery {
+
+  private static final Module THIS_MODULE = ModuleClassDiscovery.class.getModule();
+  private static final ModuleLayer LAYER = THIS_MODULE.getLayer();
+  private static final SortedMap<String, ResolvedModule> ALL_ANALYSIS_MODULES;
+
+  private static final Predicate<String> ALLOW_MODULES =
+      name ->
+          name.equals("org.apache.lucene.core") || name.startsWith("org.apache.lucene.analysis.");
+
+  static {
+    Assert.assertTrue(
+        "Analysis integration tests must run in Java Module System as named module",
+        THIS_MODULE.isNamed());
+    Assert.assertNotNull("Module layer is missing", LAYER);
+
+    var mods = new TreeMap<String, ResolvedModule>();
+    discoverAnalysisModules(LAYER, mods);
+    ALL_ANALYSIS_MODULES = Collections.unmodifiableSortedMap(mods);
+    if (LuceneTestCase.VERBOSE) {
+      System.out.println(
+          "Discovered the following analysis modules: " + ALL_ANALYSIS_MODULES.keySet());
+    }
+  }
+
+  private static void discoverAnalysisModules(
+      ModuleLayer layer, Map<String, ResolvedModule> result) {
+    for (var mod : layer.configuration().modules()) {
+      String name = mod.name();
+      if (ALLOW_MODULES.test(name) && !Objects.equals(name, THIS_MODULE.getName())) {
+        result.put(name, mod);
+      }
+    }
+    for (var parent : layer.parents()) {
+      discoverAnalysisModules(parent, result);
+    }
+  }
+
+  /** Finds all classes in package across all analysis modules */
+  public static List<Class<?>> getClassesForPackage(String pkgname) throws IOException {
+    final var prefix = pkgname.concat(".");
+    final var classes = new ArrayList<Class<?>>();
+    for (var resolvedModule : ALL_ANALYSIS_MODULES.values()) {
+      final var module = LAYER.findModule(resolvedModule.name()).orElseThrow();
+      try (var reader = resolvedModule.reference().open()) {
+        reader
+            .list()
+            .filter(entry -> entry.endsWith(".class"))
+            .map(entry -> entry.substring(0, entry.length() - 6).replace('/', '.'))
+            .filter(clazzname -> clazzname.startsWith(prefix))
+            .sorted()
+            .map(
+                clazzname ->
+                    Objects.requireNonNull(
+                        Class.forName(module, clazzname),
+                        "Class '" + clazzname + "' not found in module '" + module.getName() + "'"))
+            .forEach(classes::add);
+      }
+    }
+    Assert.assertFalse("No classes found in package:" + pkgname, classes.isEmpty());
+    return classes;
+  }
+}
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java b/lucene/analysis.tests/src/test/org/apache/lucene/analysis/tests/TestAllAnalyzersHaveFactories.java
similarity index 69%
rename from lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java
rename to lucene/analysis.tests/src/test/org/apache/lucene/analysis/tests/TestAllAnalyzersHaveFactories.java
index 945177b69af..c7df6e16f93 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java
+++ b/lucene/analysis.tests/src/test/org/apache/lucene/analysis/tests/TestAllAnalyzersHaveFactories.java
@@ -14,15 +14,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.lucene.analysis.core;
+package org.apache.lucene.analysis.tests;
 
 import java.io.Reader;
 import java.io.StringReader;
 import java.lang.reflect.Modifier;
-import java.util.Collections;
 import java.util.HashMap;
-import java.util.HashSet;
-import java.util.IdentityHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -34,27 +31,17 @@ import org.apache.lucene.analysis.TokenFilterFactory;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.TokenizerFactory;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.core.UnicodeWhitespaceTokenizer;
 import org.apache.lucene.analysis.miscellaneous.PatternKeywordMarkerFilter;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer;
 import org.apache.lucene.analysis.sinks.TeeSinkTokenFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.sr.SerbianNormalizationRegularFilter;
-import org.apache.lucene.analysis.util.StringMockResourceLoader;
-import org.apache.lucene.tests.analysis.CrankyTokenFilter;
-import org.apache.lucene.tests.analysis.MockCharFilter;
-import org.apache.lucene.tests.analysis.MockFixedLengthPayloadFilter;
-import org.apache.lucene.tests.analysis.MockGraphTokenFilter;
-import org.apache.lucene.tests.analysis.MockHoleInjectingTokenFilter;
-import org.apache.lucene.tests.analysis.MockLowerCaseFilter;
-import org.apache.lucene.tests.analysis.MockRandomLookaheadTokenFilter;
-import org.apache.lucene.tests.analysis.MockSynonymFilter;
-import org.apache.lucene.tests.analysis.MockTokenFilter;
-import org.apache.lucene.tests.analysis.MockTokenizer;
-import org.apache.lucene.tests.analysis.MockVariableLengthPayloadFilter;
-import org.apache.lucene.tests.analysis.SimplePayloadFilter;
-import org.apache.lucene.tests.analysis.ValidatingTokenFilter;
+import org.apache.lucene.analysis.stempel.StempelFilter;
 import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.tests.util.StringMockResourceLoader;
 import org.apache.lucene.util.ResourceLoader;
 import org.apache.lucene.util.ResourceLoaderAware;
 import org.apache.lucene.util.Version;
@@ -65,71 +52,37 @@ import org.apache.lucene.util.Version;
  */
 public class TestAllAnalyzersHaveFactories extends LuceneTestCase {
 
-  // these are test-only components (e.g. test-framework)
-  private static final Set<Class<?>> testComponents =
-      Collections.newSetFromMap(new IdentityHashMap<Class<?>, Boolean>());
-
-  static {
-    Collections.<Class<?>>addAll(
-        testComponents,
-        MockTokenizer.class,
-        MockCharFilter.class,
-        MockFixedLengthPayloadFilter.class,
-        MockGraphTokenFilter.class,
-        MockHoleInjectingTokenFilter.class,
-        MockLowerCaseFilter.class,
-        MockRandomLookaheadTokenFilter.class,
-        MockSynonymFilter.class,
-        MockTokenFilter.class,
-        MockVariableLengthPayloadFilter.class,
-        ValidatingTokenFilter.class,
-        CrankyTokenFilter.class,
-        SimplePayloadFilter.class);
-  }
-
   // these are 'crazy' components like cachingtokenfilter. does it make sense to add factories for
   // these?
   private static final Set<Class<?>> crazyComponents =
-      Collections.newSetFromMap(new IdentityHashMap<Class<?>, Boolean>());
-
-  static {
-    Collections.<Class<?>>addAll(
-        crazyComponents, CachingTokenFilter.class, TeeSinkTokenFilter.class);
-  }
+      Set.of(CachingTokenFilter.class, TeeSinkTokenFilter.class);
 
   // these are oddly-named (either the actual analyzer, or its factory)
   // they do actually have factories.
   // TODO: clean this up!
   private static final Set<Class<?>> oddlyNamedComponents =
-      Collections.newSetFromMap(new IdentityHashMap<Class<?>, Boolean>());
-
-  static {
-    Collections.<Class<?>>addAll(
-        oddlyNamedComponents,
-        // this is supported via an option to PathHierarchyTokenizer's factory
-        ReversePathHierarchyTokenizer.class,
-        SnowballFilter.class, // this is called SnowballPorterFilterFactory
-        PatternKeywordMarkerFilter.class,
-        SetKeywordMarkerFilter.class,
-        UnicodeWhitespaceTokenizer.class, // a supported option via WhitespaceTokenizerFactory
-        // class from core, but StopFilterFactory creates one from this module
-        org.apache.lucene.analysis.StopFilter.class,
-        // class from core, but LowerCaseFilterFactory creates one from this module
-        org.apache.lucene.analysis.LowerCaseFilter.class);
-  }
+      Set.of(
+          // this is supported via an option to PathHierarchyTokenizer's factory
+          ReversePathHierarchyTokenizer.class,
+          SnowballFilter.class, // this is called SnowballPorterFilterFactory
+          StempelFilter.class, // this is called StempelPolishStemFilterFactory
+          PatternKeywordMarkerFilter.class,
+          SetKeywordMarkerFilter.class,
+          UnicodeWhitespaceTokenizer.class, // a supported option via WhitespaceTokenizerFactory
+          // class from core, but StopFilterFactory creates one from this module
+          org.apache.lucene.analysis.StopFilter.class,
+          // class from core, but LowerCaseFilterFactory creates one from this module
+          org.apache.lucene.analysis.LowerCaseFilter.class);
 
   // The following token filters are excused from having their factory.
-  private static final Set<Class<?>> tokenFiltersWithoutFactory = new HashSet<>();
-
-  static {
-    tokenFiltersWithoutFactory.add(SerbianNormalizationRegularFilter.class);
-  }
+  private static final Set<Class<?>> tokenFiltersWithoutFactory =
+      Set.of(SerbianNormalizationRegularFilter.class);
 
   private static final ResourceLoader loader = new StringMockResourceLoader("");
 
   public void test() throws Exception {
     List<Class<?>> analysisClasses =
-        TestRandomChains.getClassesForPackage("org.apache.lucene.analysis");
+        ModuleClassDiscovery.getClassesForPackage("org.apache.lucene.analysis");
 
     for (final Class<?> c : analysisClasses) {
       final int modifiers = c.getModifiers();
@@ -141,7 +94,6 @@ public class TestAllAnalyzersHaveFactories extends LuceneTestCase {
           || c.isAnonymousClass()
           || c.isMemberClass()
           || c.isInterface()
-          || testComponents.contains(c)
           || crazyComponents.contains(c)
           || oddlyNamedComponents.contains(c)
           || tokenFiltersWithoutFactory.contains(c)
diff --git a/lucene/analysis.tests/src/test/org/apache/lucene/analysis/tests/TestRandomChains.java b/lucene/analysis.tests/src/test/org/apache/lucene/analysis/tests/TestRandomChains.java
new file mode 100644
index 00000000000..208c882532c
--- /dev/null
+++ b/lucene/analysis.tests/src/test/org/apache/lucene/analysis/tests/TestRandomChains.java
@@ -0,0 +1,961 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.tests;
+
+import com.ibm.icu.text.Normalizer2;
+import com.ibm.icu.text.Transliterator;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.io.StringReader;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Modifier;
+import java.text.DateFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.IdentityHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+import java.util.function.Function;
+import java.util.function.Predicate;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import org.apache.commons.codec.Encoder;
+import org.apache.commons.codec.language.Caverphone2;
+import org.apache.commons.codec.language.ColognePhonetic;
+import org.apache.commons.codec.language.DoubleMetaphone;
+import org.apache.commons.codec.language.Metaphone;
+import org.apache.commons.codec.language.Nysiis;
+import org.apache.commons.codec.language.RefinedSoundex;
+import org.apache.commons.codec.language.Soundex;
+import org.apache.commons.codec.language.bm.PhoneticEngine;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArrayMap;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.CharFilter;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
+import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
+import org.apache.lucene.analysis.compound.HyphenationCompoundWordTokenFilter;
+import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
+import org.apache.lucene.analysis.core.FlattenGraphFilter;
+import org.apache.lucene.analysis.hunspell.Dictionary;
+import org.apache.lucene.analysis.icu.segmentation.DefaultICUTokenizerConfig;
+import org.apache.lucene.analysis.icu.segmentation.ICUTokenizerConfig;
+import org.apache.lucene.analysis.ja.JapaneseCompletionFilter;
+import org.apache.lucene.analysis.ja.JapaneseTokenizer;
+import org.apache.lucene.analysis.ko.KoreanTokenizer;
+import org.apache.lucene.analysis.minhash.MinHashFilter;
+import org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilter;
+import org.apache.lucene.analysis.miscellaneous.ConditionalTokenFilter;
+import org.apache.lucene.analysis.miscellaneous.FingerprintFilter;
+import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
+import org.apache.lucene.analysis.miscellaneous.LimitTokenOffsetFilter;
+import org.apache.lucene.analysis.miscellaneous.LimitTokenPositionFilter;
+import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
+import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
+import org.apache.lucene.analysis.pattern.PatternTypingFilter;
+import org.apache.lucene.analysis.payloads.IdentityEncoder;
+import org.apache.lucene.analysis.payloads.PayloadEncoder;
+import org.apache.lucene.analysis.pl.PolishAnalyzer;
+import org.apache.lucene.analysis.shingle.FixedShingleFilter;
+import org.apache.lucene.analysis.shingle.ShingleFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.stempel.StempelStemmer;
+import org.apache.lucene.analysis.synonym.SynonymMap;
+import org.apache.lucene.store.ByteBuffersDirectory;
+import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.tests.analysis.MockTokenFilter;
+import org.apache.lucene.tests.analysis.MockTokenizer;
+import org.apache.lucene.tests.analysis.ValidatingTokenFilter;
+import org.apache.lucene.tests.util.Rethrow;
+import org.apache.lucene.tests.util.TestUtil;
+import org.apache.lucene.tests.util.automaton.AutomatonTestUtil;
+import org.apache.lucene.util.AttributeFactory;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.IgnoreRandomChains;
+import org.apache.lucene.util.Version;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.CharacterRunAutomaton;
+import org.apache.lucene.util.automaton.Operations;
+import org.apache.lucene.util.automaton.RegExp;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.tartarus.snowball.SnowballStemmer;
+import org.xml.sax.InputSource;
+
+/** tests random analysis chains */
+public class TestRandomChains extends BaseTokenStreamTestCase {
+
+  static List<Constructor<? extends Tokenizer>> tokenizers;
+  static List<Constructor<? extends TokenFilter>> tokenfilters;
+  static List<Constructor<? extends CharFilter>> charfilters;
+
+  static List<Class<? extends SnowballStemmer>> snowballStemmers;
+
+  private static final Set<Class<?>> avoidConditionals =
+      Set.of(
+          FingerprintFilter.class,
+          MinHashFilter.class,
+          ConcatenateGraphFilter.class,
+          // ShingleFilter doesn't handle input graphs correctly, so wrapping it in a condition can
+          // expose inconsistent offsets
+          // https://issues.apache.org/jira/browse/LUCENE-4170
+          ShingleFilter.class,
+          FixedShingleFilter.class,
+          // FlattenGraphFilter changes the output graph entirely, so wrapping it in a condition
+          // can break position lengths
+          FlattenGraphFilter.class,
+          // LimitToken*Filters don't set end offsets correctly
+          LimitTokenOffsetFilter.class,
+          LimitTokenCountFilter.class,
+          LimitTokenPositionFilter.class);
+
+  private static final Map<Constructor<?>, Predicate<Object[]>> brokenConstructors;
+
+  static {
+    try {
+      final Map<Constructor<?>, Predicate<Object[]>> map = new HashMap<>();
+      // LimitToken*Filter can only use special ctor when last arg is true
+      for (final var c :
+          List.of(
+              LimitTokenCountFilter.class,
+              LimitTokenOffsetFilter.class,
+              LimitTokenPositionFilter.class)) {
+        map.put(
+            c.getConstructor(TokenStream.class, int.class, boolean.class),
+            args -> {
+              assert args.length == 3;
+              return false == ((Boolean) args[2]); // args are broken if consumeAllTokens is false
+            });
+      }
+      brokenConstructors = Collections.unmodifiableMap(map);
+    } catch (Exception e) {
+      throw new Error(e);
+    }
+  }
+
+  private static final Map<Class<?>, Function<Random, Object>> argProducers =
+      Collections.unmodifiableMap(
+          new IdentityHashMap<Class<?>, Function<Random, Object>>() {
+            {
+              put(
+                  int.class,
+                  random -> {
+                    // TODO: could cause huge ram usage to use full int range for some filters
+                    // (e.g. allocate enormous arrays)
+                    // return Integer.valueOf(random.nextInt());
+                    return Integer.valueOf(TestUtil.nextInt(random, -50, 50));
+                  });
+              put(
+                  char.class,
+                  random -> {
+                    // TODO: fix any filters that care to throw IAE instead.
+                    // also add a unicode validating filter to validate termAtt?
+                    // return Character.valueOf((char)random.nextInt(65536));
+                    while (true) {
+                      char c = (char) random.nextInt(65536);
+                      if (c < '\uD800' || c > '\uDFFF') {
+                        return Character.valueOf(c);
+                      }
+                    }
+                  });
+              put(float.class, Random::nextFloat);
+              put(boolean.class, Random::nextBoolean);
+              put(byte.class, random -> (byte) random.nextInt(256));
+              put(
+                  byte[].class,
+                  random -> {
+                    byte[] bytes = new byte[random.nextInt(256)];
+                    random.nextBytes(bytes);
+                    return bytes;
+                  });
+              put(Random.class, random -> new Random(random.nextLong()));
+              put(Version.class, random -> Version.LATEST);
+              put(AttributeFactory.class, BaseTokenStreamTestCase::newAttributeFactory);
+              put(AttributeSource.class, random -> null); // force IAE/NPE
+              put(
+                  Set.class,
+                  random -> {
+                    // TypeTokenFilter
+                    Set<String> set = new HashSet<>();
+                    int num = random.nextInt(5);
+                    for (int i = 0; i < num; i++) {
+                      set.add(
+                          StandardTokenizer.TOKEN_TYPES[
+                              random.nextInt(StandardTokenizer.TOKEN_TYPES.length)]);
+                    }
+                    return set;
+                  });
+              put(
+                  Collection.class,
+                  random -> {
+                    // CapitalizationFilter
+                    Collection<char[]> col = new ArrayList<>();
+                    int num = random.nextInt(5);
+                    for (int i = 0; i < num; i++) {
+                      col.add(TestUtil.randomSimpleString(random).toCharArray());
+                    }
+                    return col;
+                  });
+              put(
+                  CharArraySet.class,
+                  random -> {
+                    int num = random.nextInt(10);
+                    CharArraySet set = new CharArraySet(num, random.nextBoolean());
+                    for (int i = 0; i < num; i++) {
+                      // TODO: make nastier
+                      set.add(TestUtil.randomSimpleString(random));
+                    }
+                    return set;
+                  });
+              // TODO: don't want to make the exponentially slow ones Dawid documents
+              // in TestPatternReplaceFilter, so dont use truly random patterns (for now)
+              put(Pattern.class, random -> Pattern.compile("a"));
+              put(
+                  Pattern[].class,
+                  random ->
+                      new Pattern[] {Pattern.compile("([a-z]+)"), Pattern.compile("([0-9]+)")});
+              put(
+                  PayloadEncoder.class,
+                  random ->
+                      new IdentityEncoder()); // the other encoders will throw exceptions if tokens
+              // arent numbers?
+              put(
+                  Dictionary.class,
+                  random -> {
+                    // TODO: make nastier
+                    InputStream affixStream =
+                        TestRandomChains.class.getResourceAsStream("simple.aff");
+                    InputStream dictStream =
+                        TestRandomChains.class.getResourceAsStream("simple.dic");
+                    try {
+                      return new Dictionary(
+                          new ByteBuffersDirectory(), "dictionary", affixStream, dictStream);
+                    } catch (Exception ex) {
+                      Rethrow.rethrow(ex);
+                      return null; // unreachable code
+                    }
+                  });
+              put(
+                  HyphenationTree.class,
+                  random -> {
+                    // TODO: make nastier
+                    try {
+                      InputSource is =
+                          new InputSource(
+                              TestRandomChains.class.getResource("da_UTF8.xml").toExternalForm());
+                      HyphenationTree hyphenator =
+                          HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
+                      return hyphenator;
+                    } catch (Exception ex) {
+                      Rethrow.rethrow(ex);
+                      return null; // unreachable code
+                    }
+                  });
+              put(
+                  SnowballStemmer.class,
+                  random -> {
+                    try {
+                      var clazz = snowballStemmers.get(random.nextInt(snowballStemmers.size()));
+                      return clazz.getConstructor().newInstance();
+                    } catch (Exception ex) {
+                      Rethrow.rethrow(ex);
+                      return null; // unreachable code
+                    }
+                  });
+              put(
+                  String.class,
+                  random -> {
+                    // TODO: make nastier
+                    if (random.nextBoolean()) {
+                      // a token type
+                      return StandardTokenizer.TOKEN_TYPES[
+                          random.nextInt(StandardTokenizer.TOKEN_TYPES.length)];
+                    } else {
+                      return TestUtil.randomSimpleString(random);
+                    }
+                  });
+              put(
+                  NormalizeCharMap.class,
+                  random -> {
+                    NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
+                    // we can't add duplicate keys, or NormalizeCharMap gets angry
+                    Set<String> keys = new HashSet<>();
+                    int num = random.nextInt(5);
+                    // System.out.println("NormalizeCharMap=");
+                    for (int i = 0; i < num; i++) {
+                      String key = TestUtil.randomSimpleString(random);
+                      if (!keys.contains(key) && key.length() > 0) {
+                        String value = TestUtil.randomSimpleString(random);
+                        builder.add(key, value);
+                        keys.add(key);
+                        // System.out.println("mapping: '" + key + "' => '" + value + "'");
+                      }
+                    }
+                    return builder.build();
+                  });
+              put(
+                  CharacterRunAutomaton.class,
+                  random -> {
+                    // TODO: could probably use a purely random automaton
+                    switch (random.nextInt(5)) {
+                      case 0:
+                        return MockTokenizer.KEYWORD;
+                      case 1:
+                        return MockTokenizer.SIMPLE;
+                      case 2:
+                        return MockTokenizer.WHITESPACE;
+                      case 3:
+                        return MockTokenFilter.EMPTY_STOPSET;
+                      default:
+                        return MockTokenFilter.ENGLISH_STOPSET;
+                    }
+                  });
+              put(
+                  CharArrayMap.class,
+                  random -> {
+                    int num = random.nextInt(10);
+                    CharArrayMap<String> map = new CharArrayMap<>(num, random.nextBoolean());
+                    for (int i = 0; i < num; i++) {
+                      // TODO: make nastier
+                      map.put(
+                          TestUtil.randomSimpleString(random), TestUtil.randomSimpleString(random));
+                    }
+                    return map;
+                  });
+              put(
+                  StemmerOverrideMap.class,
+                  random -> {
+                    int num = random.nextInt(10);
+                    StemmerOverrideFilter.Builder builder =
+                        new StemmerOverrideFilter.Builder(random.nextBoolean());
+                    for (int i = 0; i < num; i++) {
+                      String input = "";
+                      do {
+                        input = TestUtil.randomRealisticUnicodeString(random);
+                      } while (input.isEmpty());
+                      String out = "";
+                      TestUtil.randomSimpleString(random);
+                      do {
+                        out = TestUtil.randomRealisticUnicodeString(random);
+                      } while (out.isEmpty());
+                      builder.add(input, out);
+                    }
+                    try {
+                      return builder.build();
+                    } catch (Exception ex) {
+                      Rethrow.rethrow(ex);
+                      return null; // unreachable code
+                    }
+                  });
+              put(
+                  SynonymMap.class,
+                  new Function<Random, Object>() {
+                    @Override
+                    public Object apply(Random random) {
+                      SynonymMap.Builder b = new SynonymMap.Builder(random.nextBoolean());
+                      final int numEntries = atLeast(10);
+                      for (int j = 0; j < numEntries; j++) {
+                        addSyn(
+                            b,
+                            randomNonEmptyString(random),
+                            randomNonEmptyString(random),
+                            random.nextBoolean());
+                      }
+                      try {
+                        return b.build();
+                      } catch (Exception ex) {
+                        Rethrow.rethrow(ex);
+                        return null; // unreachable code
+                      }
+                    }
+
+                    private void addSyn(
+                        SynonymMap.Builder b, String input, String output, boolean keepOrig) {
+                      b.add(
+                          new CharsRef(input.replaceAll(" +", "\u0000")),
+                          new CharsRef(output.replaceAll(" +", "\u0000")),
+                          keepOrig);
+                    }
+
+                    private String randomNonEmptyString(Random random) {
+                      while (true) {
+                        final String s = TestUtil.randomUnicodeString(random).trim();
+                        if (s.length() != 0 && s.indexOf('\u0000') == -1) {
+                          return s;
+                        }
+                      }
+                    }
+                  });
+              put(
+                  DateFormat.class,
+                  random -> {
+                    if (random.nextBoolean()) return null;
+                    return DateFormat.getDateInstance(DateFormat.DEFAULT, randomLocale(random));
+                  });
+              put(
+                  Automaton.class,
+                  random -> {
+                    return Operations.determinize(
+                        new RegExp(AutomatonTestUtil.randomRegexp(random), RegExp.NONE)
+                            .toAutomaton(),
+                        Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
+                  });
+              put(
+                  PatternTypingFilter.PatternTypingRule[].class,
+                  random -> {
+                    int numRules = TestUtil.nextInt(random, 1, 3);
+                    PatternTypingFilter.PatternTypingRule[] patternTypingRules =
+                        new PatternTypingFilter.PatternTypingRule[numRules];
+                    for (int i = 0; i < patternTypingRules.length; i++) {
+                      String s = TestUtil.randomSimpleString(random, 1, 2);
+                      // random regex with one group
+                      String regex = s + "(.*)";
+                      // pattern rule with a template that accepts one group.
+                      patternTypingRules[i] =
+                          new PatternTypingFilter.PatternTypingRule(
+                              Pattern.compile(regex), TestUtil.nextInt(random, 1, 8), s + "_$1");
+                    }
+                    return patternTypingRules;
+                  });
+
+              // ICU:
+              put(
+                  Normalizer2.class,
+                  random -> {
+                    switch (random.nextInt(5)) {
+                      case 0:
+                        return Normalizer2.getNFCInstance();
+                      case 1:
+                        return Normalizer2.getNFDInstance();
+                      case 2:
+                        return Normalizer2.getNFKCInstance();
+                      case 3:
+                        return Normalizer2.getNFKDInstance();
+                      default:
+                        return Normalizer2.getNFKCCasefoldInstance();
+                    }
+                  });
+              final var icuTransliterators = Collections.list(Transliterator.getAvailableIDs());
+              Collections.sort(icuTransliterators);
+              put(
+                  Transliterator.class,
+                  random ->
+                      Transliterator.getInstance(
+                          icuTransliterators.get(random.nextInt(icuTransliterators.size()))));
+              put(
+                  ICUTokenizerConfig.class,
+                  random ->
+                      new DefaultICUTokenizerConfig(random.nextBoolean(), random.nextBoolean()));
+
+              // Kuromoji:
+              final var jaComplFilterModes = JapaneseCompletionFilter.Mode.values();
+              put(
+                  JapaneseCompletionFilter.Mode.class,
+                  random -> jaComplFilterModes[random.nextInt(jaComplFilterModes.length)]);
+              final var jaTokModes = JapaneseTokenizer.Mode.values();
+              put(
+                  JapaneseTokenizer.Mode.class,
+                  random -> jaTokModes[random.nextInt(jaTokModes.length)]);
+              put(org.apache.lucene.analysis.ja.dict.UserDictionary.class, random -> null);
+
+              // Nori:
+              final var koComplFilterModes = KoreanTokenizer.DecompoundMode.values();
+              put(
+                  KoreanTokenizer.DecompoundMode.class,
+                  random -> koComplFilterModes[random.nextInt(koComplFilterModes.length)]);
+              put(org.apache.lucene.analysis.ko.dict.UserDictionary.class, random -> null);
+
+              // Phonetic:
+              final var bmNameTypes = org.apache.commons.codec.language.bm.NameType.values();
+              final var bmRuleTypes =
+                  Stream.of(org.apache.commons.codec.language.bm.RuleType.values())
+                      .filter(e -> e != org.apache.commons.codec.language.bm.RuleType.RULES)
+                      .toArray(org.apache.commons.codec.language.bm.RuleType[]::new);
+              put(
+                  PhoneticEngine.class,
+                  random ->
+                      new PhoneticEngine(
+                          bmNameTypes[random.nextInt(bmNameTypes.length)],
+                          bmRuleTypes[random.nextInt(bmRuleTypes.length)],
+                          random.nextBoolean()));
+              put(
+                  Encoder.class,
+                  random -> {
+                    switch (random.nextInt(7)) {
+                      case 0:
+                        return new DoubleMetaphone();
+                      case 1:
+                        return new Metaphone();
+                      case 2:
+                        return new Soundex();
+                      case 3:
+                        return new RefinedSoundex();
+                      case 4:
+                        return new Caverphone2();
+                      case 5:
+                        return new ColognePhonetic();
+                      default:
+                        return new Nysiis();
+                    }
+                  });
+
+              // Stempel
+              put(
+                  StempelStemmer.class,
+                  random -> new StempelStemmer(PolishAnalyzer.getDefaultTable()));
+            }
+          });
+
+  static final Set<Class<?>> allowedTokenizerArgs = argProducers.keySet(),
+      allowedTokenFilterArgs =
+          union(argProducers.keySet(), List.of(TokenStream.class, CommonGramsFilter.class)),
+      allowedCharFilterArgs = union(argProducers.keySet(), List.of(Reader.class));
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    List<Class<?>> analysisClasses =
+        ModuleClassDiscovery.getClassesForPackage("org.apache.lucene.analysis");
+    tokenizers = new ArrayList<>();
+    tokenfilters = new ArrayList<>();
+    charfilters = new ArrayList<>();
+    for (final Class<?> c : analysisClasses) {
+      final int modifiers = c.getModifiers();
+      if (
+      // don't waste time with abstract classes, deprecated, or @IgnoreRandomChains annotated
+      // classes:
+      Modifier.isAbstract(modifiers)
+          || !Modifier.isPublic(modifiers)
+          || c.isSynthetic()
+          || c.isAnonymousClass()
+          || c.isMemberClass()
+          || c.isInterface()
+          || c.isAnnotationPresent(Deprecated.class)
+          || c.isAnnotationPresent(IgnoreRandomChains.class)
+          || !(Tokenizer.class.isAssignableFrom(c)
+              || TokenFilter.class.isAssignableFrom(c)
+              || CharFilter.class.isAssignableFrom(c))) {
+        continue;
+      }
+
+      for (final Constructor<?> ctor : c.getConstructors()) {
+        // don't test synthetic, deprecated, or @IgnoreRandomChains annotated ctors, they likely
+        // have known bugs:
+        if (ctor.isSynthetic()
+            || ctor.isAnnotationPresent(Deprecated.class)
+            || ctor.isAnnotationPresent(IgnoreRandomChains.class)) {
+          continue;
+        }
+        // conditional filters are tested elsewhere
+        if (ConditionalTokenFilter.class.isAssignableFrom(c)) {
+          continue;
+        }
+        if (Tokenizer.class.isAssignableFrom(c)) {
+          assertTrue(
+              ctor.toGenericString() + " has unsupported parameter types",
+              allowedTokenizerArgs.containsAll(Arrays.asList(ctor.getParameterTypes())));
+          tokenizers.add(castConstructor(Tokenizer.class, ctor));
+        } else if (TokenFilter.class.isAssignableFrom(c)) {
+          assertTrue(
+              ctor.toGenericString() + " has unsupported parameter types",
+              allowedTokenFilterArgs.containsAll(Arrays.asList(ctor.getParameterTypes())));
+          tokenfilters.add(castConstructor(TokenFilter.class, ctor));
+        } else if (CharFilter.class.isAssignableFrom(c)) {
+          assertTrue(
+              ctor.toGenericString() + " has unsupported parameter types",
+              allowedCharFilterArgs.containsAll(Arrays.asList(ctor.getParameterTypes())));
+          charfilters.add(castConstructor(CharFilter.class, ctor));
+        } else {
+          fail("Cannot get here");
+        }
+      }
+    }
+
+    final Comparator<Constructor<?>> ctorComp = Comparator.comparing(Constructor::toGenericString);
+    Collections.sort(tokenizers, ctorComp);
+    Collections.sort(tokenfilters, ctorComp);
+    Collections.sort(charfilters, ctorComp);
+    if (VERBOSE) {
+      System.out.println("tokenizers = " + tokenizers);
+      System.out.println("tokenfilters = " + tokenfilters);
+      System.out.println("charfilters = " + charfilters);
+    }
+
+    // TODO: Eclipse does not get that cast right, so make explicit:
+    final Function<Class<?>, Class<? extends SnowballStemmer>> stemmerCast =
+        c -> c.asSubclass(SnowballStemmer.class);
+    snowballStemmers =
+        ModuleClassDiscovery.getClassesForPackage("org.tartarus.snowball.ext").stream()
+            .filter(c -> c.getName().endsWith("Stemmer"))
+            .map(stemmerCast)
+            .sorted(Comparator.comparing(Class::getName))
+            .collect(Collectors.toList());
+    if (VERBOSE) {
+      System.out.println("snowballStemmers = " + snowballStemmers);
+    }
+  }
+
+  @AfterClass
+  public static void afterClass() {
+    tokenizers = null;
+    tokenfilters = null;
+    charfilters = null;
+    snowballStemmers = null;
+  }
+
+  /** Creates a static/unmodifiable set from 2 collections as union. */
+  private static <T> Set<T> union(Collection<T> c1, Collection<T> c2) {
+    return Stream.concat(c1.stream(), c2.stream()).collect(Collectors.toUnmodifiableSet());
+  }
+
+  /**
+   * Hack to work around the stupidness of Oracle's strict Java backwards compatibility. {@code
+   * Class<T>#getConstructors()} should return unmodifiable {@code List<Constructor<T>>} not array!
+   */
+  @SuppressWarnings("unchecked")
+  private static <T> Constructor<T> castConstructor(Class<T> instanceClazz, Constructor<?> ctor) {
+    return (Constructor<T>) ctor;
+  }
+
+  @SuppressWarnings("unchecked")
+  static <T> T newRandomArg(Random random, Class<T> paramType) {
+    final Function<Random, Object> producer = argProducers.get(paramType);
+    assertNotNull("No producer for arguments of type " + paramType.getName() + " found", producer);
+    return (T) producer.apply(random);
+  }
+
+  static Object[] newTokenizerArgs(Random random, Class<?>[] paramTypes) {
+    Object[] args = new Object[paramTypes.length];
+    for (int i = 0; i < args.length; i++) {
+      Class<?> paramType = paramTypes[i];
+      args[i] = newRandomArg(random, paramType);
+    }
+    return args;
+  }
+
+  static Object[] newCharFilterArgs(Random random, Reader reader, Class<?>[] paramTypes) {
+    Object[] args = new Object[paramTypes.length];
+    for (int i = 0; i < args.length; i++) {
+      Class<?> paramType = paramTypes[i];
+      if (paramType == Reader.class) {
+        args[i] = reader;
+      } else {
+        args[i] = newRandomArg(random, paramType);
+      }
+    }
+    return args;
+  }
+
+  static Object[] newFilterArgs(Random random, TokenStream stream, Class<?>[] paramTypes) {
+    Object[] args = new Object[paramTypes.length];
+    for (int i = 0; i < args.length; i++) {
+      Class<?> paramType = paramTypes[i];
+      if (paramType == TokenStream.class) {
+        args[i] = stream;
+      } else if (paramType == CommonGramsFilter.class) {
+        // TODO: fix this one, thats broken: CommonGramsQueryFilter takes this one explicitly
+        args[i] = new CommonGramsFilter(stream, newRandomArg(random, CharArraySet.class));
+      } else {
+        args[i] = newRandomArg(random, paramType);
+      }
+    }
+    return args;
+  }
+
+  static class MockRandomAnalyzer extends Analyzer {
+    final long seed;
+
+    MockRandomAnalyzer(long seed) {
+      this.seed = seed;
+    }
+
+    @Override
+    protected TokenStreamComponents createComponents(String fieldName) {
+      Random random = new Random(seed);
+      TokenizerSpec tokenizerSpec = newTokenizer(random);
+      // System.out.println("seed=" + seed + ",create tokenizer=" + tokenizerSpec.toString);
+      TokenFilterSpec filterSpec = newFilterChain(random, tokenizerSpec.tokenizer);
+      // System.out.println("seed=" + seed + ",create filter=" + filterSpec.toString);
+      return new TokenStreamComponents(tokenizerSpec.tokenizer, filterSpec.stream);
+    }
+
+    @Override
+    protected Reader initReader(String fieldName, Reader reader) {
+      Random random = new Random(seed);
+      CharFilterSpec charfilterspec = newCharFilterChain(random, reader);
+      return charfilterspec.reader;
+    }
+
+    @Override
+    public String toString() {
+      Random random = new Random(seed);
+      StringBuilder sb = new StringBuilder();
+      CharFilterSpec charFilterSpec = newCharFilterChain(random, new StringReader(""));
+      sb.append("\ncharfilters=");
+      sb.append(charFilterSpec.toString);
+      // intentional: initReader gets its own separate random
+      random = new Random(seed);
+      TokenizerSpec tokenizerSpec = newTokenizer(random);
+      sb.append("\n");
+      sb.append("tokenizer=");
+      sb.append(tokenizerSpec.toString);
+      TokenFilterSpec tokenFilterSpec = newFilterChain(random, tokenizerSpec.tokenizer);
+      sb.append("\n");
+      sb.append("filters=");
+      sb.append(tokenFilterSpec.toString);
+      return sb.toString();
+    }
+
+    private <T> T createComponent(
+        Constructor<T> ctor, Object[] args, StringBuilder descr, boolean isConditional) {
+      try {
+        final T instance = ctor.newInstance(args);
+        /*
+        if (descr.length() > 0) {
+          descr.append(",");
+        }
+        */
+        descr.append("\n  ");
+        if (isConditional) {
+          descr.append("Conditional:");
+        }
+        descr.append(ctor.getDeclaringClass().getName());
+        String params = Arrays.deepToString(args);
+        params = params.substring(1, params.length() - 1);
+        descr.append("(").append(params).append(")");
+        return instance;
+      } catch (InvocationTargetException ite) {
+        final Throwable cause = ite.getCause();
+        if (cause instanceof IllegalArgumentException
+            || cause instanceof UnsupportedOperationException) {
+          // thats ok, ignore
+          if (VERBOSE) {
+            System.err.println("Ignoring IAE/UOE from ctor:");
+            cause.printStackTrace(System.err);
+          }
+        } else {
+          Rethrow.rethrow(cause);
+        }
+      } catch (IllegalAccessException | InstantiationException iae) {
+        Rethrow.rethrow(iae);
+      }
+      return null; // no success
+    }
+
+    private boolean broken(Constructor<?> ctor, Object[] args) {
+      final Predicate<Object[]> pred = brokenConstructors.get(ctor);
+      return pred != null && pred.test(args);
+    }
+
+    // create a new random tokenizer from classpath
+    private TokenizerSpec newTokenizer(Random random) {
+      TokenizerSpec spec = new TokenizerSpec();
+      while (spec.tokenizer == null) {
+        final Constructor<? extends Tokenizer> ctor =
+            tokenizers.get(random.nextInt(tokenizers.size()));
+        final StringBuilder descr = new StringBuilder();
+        final Object[] args = newTokenizerArgs(random, ctor.getParameterTypes());
+        if (broken(ctor, args)) {
+          continue;
+        }
+        spec.tokenizer = createComponent(ctor, args, descr, false);
+        if (spec.tokenizer != null) {
+          spec.toString = descr.toString();
+        }
+      }
+      return spec;
+    }
+
+    private CharFilterSpec newCharFilterChain(Random random, Reader reader) {
+      CharFilterSpec spec = new CharFilterSpec();
+      spec.reader = reader;
+      StringBuilder descr = new StringBuilder();
+      int numFilters = random.nextInt(3);
+      for (int i = 0; i < numFilters; i++) {
+        while (true) {
+          final Constructor<? extends CharFilter> ctor =
+              charfilters.get(random.nextInt(charfilters.size()));
+          final Object[] args = newCharFilterArgs(random, spec.reader, ctor.getParameterTypes());
+          if (broken(ctor, args)) {
+            continue;
+          }
+          reader = createComponent(ctor, args, descr, false);
+          if (reader != null) {
+            spec.reader = reader;
+            break;
+          }
+        }
+      }
+      spec.toString = descr.toString();
+      return spec;
+    }
+
+    private TokenFilterSpec newFilterChain(Random random, Tokenizer tokenizer) {
+      TokenFilterSpec spec = new TokenFilterSpec();
+      spec.stream = tokenizer;
+      StringBuilder descr = new StringBuilder();
+      int numFilters = random.nextInt(5);
+      for (int i = 0; i < numFilters; i++) {
+
+        // Insert ValidatingTF after each stage so we can
+        // catch problems right after the TF that "caused"
+        // them:
+        spec.stream = new ValidatingTokenFilter(spec.stream, "stage " + i);
+
+        while (true) {
+          final Constructor<? extends TokenFilter> ctor =
+              tokenfilters.get(random.nextInt(tokenfilters.size()));
+          if (random.nextBoolean()
+              && avoidConditionals.contains(ctor.getDeclaringClass()) == false) {
+            long seed = random.nextLong();
+            spec.stream =
+                new ConditionalTokenFilter(
+                    spec.stream,
+                    in -> {
+                      final Object[] args = newFilterArgs(random, in, ctor.getParameterTypes());
+                      if (broken(ctor, args)) {
+                        return in;
+                      }
+                      TokenStream ts = createComponent(ctor, args, descr, true);
+                      if (ts == null) {
+                        return in;
+                      }
+                      return ts;
+                    }) {
+                  Random random = new Random(seed);
+
+                  @Override
+                  public void reset() throws IOException {
+                    super.reset();
+                    random = new Random(seed);
+                  }
+
+                  @Override
+                  protected boolean shouldFilter() throws IOException {
+                    return random.nextBoolean();
+                  }
+                };
+            break;
+          } else {
+            final Object[] args = newFilterArgs(random, spec.stream, ctor.getParameterTypes());
+            if (broken(ctor, args)) {
+              continue;
+            }
+            final TokenFilter flt = createComponent(ctor, args, descr, false);
+            if (flt != null) {
+              spec.stream = flt;
+              break;
+            }
+          }
+        }
+      }
+
+      // Insert ValidatingTF after each stage so we can
+      // catch problems right after the TF that "caused"
+      // them:
+      spec.stream = new ValidatingTokenFilter(spec.stream, "last stage");
+
+      spec.toString = descr.toString();
+      return spec;
+    }
+  }
+
+  static class TokenizerSpec {
+    Tokenizer tokenizer;
+    String toString;
+  }
+
+  static class TokenFilterSpec {
+    TokenStream stream;
+    String toString;
+  }
+
+  static class CharFilterSpec {
+    Reader reader;
+    String toString;
+  }
+
+  public void testRandomChains() throws Throwable {
+    int numIterations = TEST_NIGHTLY ? atLeast(20) : 3;
+    Random random = random();
+    for (int i = 0; i < numIterations; i++) {
+      try (MockRandomAnalyzer a = new MockRandomAnalyzer(random.nextLong())) {
+        if (VERBOSE) {
+          System.out.println("Creating random analyzer:" + a);
+        }
+        try {
+          checkNormalize(a);
+          checkRandomData(
+              random,
+              a,
+              500 * RANDOM_MULTIPLIER,
+              20,
+              false,
+              false /* We already validate our own offsets... */);
+        } catch (Throwable e) {
+          System.err.println("Exception from random analyzer: " + a);
+          throw e;
+        }
+      }
+    }
+  }
+
+  public void checkNormalize(Analyzer a) {
+    // normalization should not modify characters that may be used for wildcards
+    // or regular expressions
+    String s = "([0-9]+)?*";
+    assertEquals(s, a.normalize("dummy", s).utf8ToString());
+  }
+
+  // we might regret this decision...
+  public void testRandomChainsWithLargeStrings() throws Throwable {
+    int numIterations = TEST_NIGHTLY ? atLeast(20) : 3;
+    Random random = random();
+    for (int i = 0; i < numIterations; i++) {
+      try (MockRandomAnalyzer a = new MockRandomAnalyzer(random.nextLong())) {
+        if (VERBOSE) {
+          System.out.println("Creating random analyzer:" + a);
+        }
+        try {
+          checkRandomData(
+              random,
+              a,
+              50 * RANDOM_MULTIPLIER,
+              80,
+              false,
+              false /* We already validate our own offsets... */);
+        } catch (Throwable e) {
+          System.err.println("Exception from random analyzer: " + a);
+          throw e;
+        }
+      }
+    }
+  }
+}
diff --git a/lucene/analysis.tests/src/test/org/apache/lucene/analysis/tests/da_UTF8.xml b/lucene/analysis.tests/src/test/org/apache/lucene/analysis/tests/da_UTF8.xml
new file mode 100644
index 00000000000..2c8d203be68
--- /dev/null
+++ b/lucene/analysis.tests/src/test/org/apache/lucene/analysis/tests/da_UTF8.xml
@@ -0,0 +1,1208 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE hyphenation-info SYSTEM "hyphenation.dtd">
+<!--
+  Copyright 1999-2004 The Apache Software Foundation
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!--
+This file contains the hyphenation patterns for danish.
+Adapted from dkhyph.tex, dkcommon.tex and dkspecial.tex
+originally created by Frank Jensen (fj@iesd.auc.dk).
+FOP adaptation by Carlos Villegas (cav@uniscope.co.jp)
+-->
+<hyphenation-info>
+
+<hyphen-char value="-"/>
+<hyphen-min before="2" after="2"/>
+
+<classes>
+aA
+bB
+cC
+dD
+eE
+fF
+gG
+hH
+iI
+jJ
+kK
+lL
+mM
+nN
+oO
+pP
+qQ
+rR
+sS
+tT
+uU
+vV
+wW
+xX
+yY
+zZ
+æÆ
+øØ
+åÅ
+</classes>
+<patterns>
+<!-- dkcommon -->
+.ae3
+.an3k
+.an1s
+.be5la
+.be1t
+.bi4tr
+.der3i
+.diagno5
+.her3
+.hoved3
+.ne4t5
+.om1
+.ove4
+.po1
+.til3
+.yd5r
+ab5le
+3abst
+a3c
+ade5la
+5adg
+a1e
+5afg
+5a4f1l
+af3r
+af4ri
+5afs
+a4gef
+a4gi
+ag5in
+ag5si
+3agti
+a4gy
+a3h
+ais5t
+a3j
+a5ka
+a3ke
+a5kr
+aku5
+a3la
+a1le
+a1li
+al3k
+4alkv
+a1lo
+al5si
+a3lu
+a1ly
+am4pa
+3analy
+an4k5r
+a3nu
+3anv
+a5o
+a5pe
+a3pi
+a5po
+a1ra
+ar5af
+1arb
+a1re
+5arg
+a1ri
+a3ro
+a3sa
+a3sc
+a1si
+a3sk
+a3so
+3a3sp
+a3ste
+a3sti
+a1ta1
+a1te
+a1ti
+a4t5in
+a1to
+ato5v
+a5tr
+a1tu
+a5va
+a1ve
+a5z
+1ba
+ba4ti
+4bd
+1be
+be1k
+be3ro
+be5ru
+be1s4
+be1tr
+1bi
+bi5sk
+b1j
+4b1n
+1bo
+bo4gr
+bo3ra
+bo5re
+1br4
+4bs
+bs5k
+b3so
+b1st
+b5t
+3bu
+bu4s5tr
+b5w
+1by
+by5s
+4c1c
+1ce
+ce5ro
+3ch
+4ch.
+ci4o
+ck3
+5cy
+3da
+4d3af
+d5anta
+da4s
+d1b
+d1d4
+1de
+de5d
+4de4lem
+der5eri
+de4rig
+de5sk
+d1f
+d1g
+d3h
+1di
+di1e
+di5l
+d3j
+d1k
+d1l
+d1m
+4d1n
+3do
+4dop
+d5ov
+d1p
+4drett
+5d4reve
+3drif
+3driv
+d5ros
+d5ru
+ds5an
+ds5in
+d1ski
+d4sm
+d4su
+dsu5l
+ds5vi
+d3ta
+d1te
+dt5o
+d5tr
+dt5u
+1du
+dub5
+d1v
+3dy
+e5ad
+e3af
+e5ag
+e3ak
+e1al
+ea4la
+e3an
+e5ap
+e3at
+e3bl
+ebs3
+e1ci
+ed5ar
+edde4
+eddel5
+e4do
+ed5ra
+ed3re
+ed3rin
+ed4str
+e3e
+3eff
+e3fr
+3eft
+e3gu
+e1h
+e3in
+ei5s
+e3je
+e4j5el
+e1ka
+e3ke
+e3kl
+4e1ko
+e5kr
+ek5sa
+3eksem
+3eksp
+e3ku
+e1kv
+e5ky
+e3lad
+el3ak
+el3ar
+e1las
+e3le
+e4lek
+3elem
+e1li
+5elim
+e3lo
+el5sa
+e5lu
+e3ly
+e4mad
+em4p5le
+em1s
+en5ak
+e4nan
+4enn
+e4no
+en3so
+e5nu
+e5ol
+e3op
+e1or
+e3ov
+epi3
+e1pr
+e3ra
+er3af
+e4rag
+e4rak
+e1re
+e4ref
+er5ege
+5erhv
+e1ri
+e4rib
+er1k
+ero5d
+er5ov
+er3s
+er5tr
+e3rum
+er5un
+e5ry
+e1ta
+e1te
+etek4s
+e1ti
+e3tj
+e1to
+e3tr
+e3tu
+e1ty
+e3um
+e3un
+3eur
+e1va
+e3ve
+e4v3erf
+e1vi
+e5x
+1fa
+fa4ce
+fags3
+f1b
+f1d
+1fe
+fej4
+fejl1
+f1f
+f1g
+f1h
+1fi
+f1k
+3fl
+1fo
+for1en
+fo4ri
+f1p
+f1s4
+4ft
+f3ta
+f1te
+f1ti
+f5to
+f5tvi
+1fu
+f1v
+3fy
+1ga
+g3art
+g1b
+g1d
+1ge
+4g5enden
+ger3in
+ge3s
+g3f
+g1g
+g1h
+1gi
+gi4b
+gi3st
+5gj
+g3k
+g1l
+g1m
+3go
+4g5om
+g5ov
+g3p
+1gr
+gs1a
+gsde4len
+g4se
+gsha4
+g5sla
+gs3or
+gs1p
+g5s4tide
+g4str
+gs1v
+g3ta
+g1te
+g1ti
+g5to
+g3tr
+gt4s
+g3ud
+gun5
+g3v
+1gy
+g5yd
+4ha.
+heds3
+he5s
+4het
+hi4e
+hi4n5
+hi3s
+ho5ko
+ho5ve
+4h3t
+hun4
+hund3
+hvo4
+i1a
+i3b
+i4ble
+i1c
+i3dr
+ids5k
+i1el
+i1en
+i3er
+i3et.
+if3r
+i3gu
+i3h
+i5i
+i5j
+i1ka
+i1ke
+ik1l
+i5ko
+ik3re
+ik5ri
+iks5t
+ik4tu
+i3ku
+ik3v
+i3lag
+il3eg
+il5ej
+il5el
+i3li
+i4l5id
+il3k
+i1lo
+il5u
+i3mu
+ind3t
+5inf
+ings1
+in3s
+in4sv
+inter1
+i3nu
+i3od
+i3og
+i5ok
+i3ol
+ion4
+ions1
+i5o5r
+i3ot
+i5pi
+i3pli
+i5pr
+i3re
+i3ri
+ir5t
+i3sc
+i3si
+i4sm
+is3p
+i1ster
+i3sti
+i5sua
+i1ta
+i1te
+i1ti
+i3to
+i3tr
+it5re.
+i1tu
+i3ty
+i1u
+i1va
+i1ve
+i1vi
+j3ag
+jde4rer
+jds1
+jek4to
+4j5en.
+j5k
+j3le
+j3li
+jlmeld5
+jlmel4di
+j3r
+jre5
+ju3s
+5kap
+k5au
+5kav
+k5b
+kel5s
+ke3sk
+ke5st
+ke4t5a
+k3h
+ki3e
+ki3st
+k1k
+k5lak
+k1le
+3klu
+k4ny
+5kod
+1kon
+ko3ra
+3kort
+ko3v
+1kra
+5kry
+ks3an
+k1si
+ks3k
+ks1p
+k3ste
+k5stu
+ks5v
+k1t
+k4tar
+k4terh
+kti4e
+kt5re
+kt5s
+3kur
+1kus
+3kut
+k4vo
+k4vu
+5lab
+lad3r
+5lagd
+la4g3r
+5lam
+1lat
+l1b
+ldiagnos5
+l3dr
+ld3st
+1le.
+5led
+4lele
+le4mo
+3len
+1ler
+1les
+4leu
+l1f
+lfin4
+lfind5
+l1go1
+l3h
+li4ga
+4l5ins
+4l3int
+li5o
+l3j
+l1ke
+l1ko
+l3ky
+l1l
+l5mu
+lo4du
+l3op
+4l5or
+3lov
+4l3p
+l4ps
+l3r
+4ls
+lses1
+ls5in
+l5sj
+l1ta
+l4taf
+l1te
+l4t5erf
+l3ti
+lt3o
+l3tr
+l3tu
+lu5l
+l3ve
+l3vi
+1ma
+m1b
+m3d
+1me
+4m5ej
+m3f
+m1g
+m3h
+1mi
+mi3k
+m5ing
+mi4o
+mi5sty
+m3k
+m1l
+m1m
+mmen5
+m1n
+3mo
+mo4da
+4mop
+4m5ov
+m1pe
+m3pi
+m3pl
+m1po
+m3pr
+m1r
+mse5s
+ms5in
+m5sk
+ms3p
+m3ste
+ms5v
+m3ta
+m3te
+m3ti
+m3tr
+m1ud
+1mul
+mu1li
+3my
+3na
+4nak
+1nal
+n1b
+n1c
+4nd
+n3dr
+nd5si
+nd5sk
+nd5sp
+1ne
+ne5a
+ne4da
+nemen4
+nement5e
+neo4
+n3erk
+n5erl
+ne5sl
+ne5st
+n1f
+n4go
+4n1h
+1ni
+4nim
+ni5o
+ni3st
+n1ke
+n1ko
+n3kr
+n3ku
+n5kv
+4n1l
+n1m
+n1n
+1no
+n3ord
+n5p
+n3r
+4ns
+n3si
+n1sku
+ns3po
+n1sta
+n5sti
+n1ta
+nta4le
+n1te
+n1ti
+ntiali4
+n3to
+n1tr
+nt4s5t
+nt4su
+n3tu
+n3ty
+4n1v
+3ny
+n3z
+o3a
+o4as
+ob3li
+o1c
+o4din
+od5ri
+od5s
+od5un
+o1e
+of5r
+o4gek
+o4gel
+o4g5o
+og5re
+og5sk
+o5h
+o5in
+oi6s5e
+o1j
+o3ka
+o1ke
+o3ku
+o3la
+o3le
+o1li
+o1lo
+o3lu
+o5ly
+1omr
+on3k
+ook5
+o3or
+o5ov
+o3pi
+op3l
+op3r
+op3s
+3opta
+4or.
+or1an
+3ordn
+ord5s
+o3re.
+o3reg
+o3rek
+o3rer
+o3re3s
+o3ret
+o3ri
+3orient
+or5im
+o4r5in
+or3k
+or5o
+or3sl
+or3st
+o3si
+o3so
+o3t
+o1te
+o5un
+ov4s
+3pa
+pa5gh
+p5anl
+p3d
+4pec
+3pen
+1per
+pe1ra
+pe5s
+pe3u
+p3f
+4p5h
+1pla
+p4lan
+4ple.
+4pler
+4ples
+p3m
+p3n
+5pok
+4po3re
+3pot
+4p5p4
+p4ro
+1proc
+p3sk
+p5so
+ps4p
+p3st
+p1t
+1pu
+pu5b
+p5ule
+p5v
+5py3
+qu4
+4raf
+ra5is
+4rarb
+r1b
+r4d5ar
+r3dr
+rd4s3
+4reks
+1rel
+re5la
+r5enss
+5rese
+re5spo
+4ress
+re3st
+re5s4u
+5rett
+r1f
+r1gu
+r1h
+ri1e
+ri5la
+4rimo
+r4ing
+ringse4
+ringso4r
+4rinp
+4rint
+r3ka
+r1ke
+r1ki
+rk3so
+r3ku
+r1l
+rmo4
+r5mu
+r1n
+ro1b
+ro3p
+r3or
+r3p
+r1r
+rre5s
+rro4n5
+r1sa
+r1si
+r5skr
+r4sk5v
+rs4n
+r3sp
+r5stu
+r5su
+r3sv
+r5tal
+r1te
+r4teli
+r1ti
+r3to
+r4t5or
+rt5rat
+rt3re
+r5tri
+r5tro
+rt3s
+r5ty
+r3ud
+run4da
+5rut
+r3va
+r1ve
+r3vi
+ry4s
+s3af
+1sam
+sa4ma
+s3ap
+s1ar
+1sat
+4s1b
+s1d
+sdy4
+1se
+s4ed
+5s4er
+se4se
+s1f
+4s1g4
+4s3h
+si4bl
+1sig
+s5int
+5sis
+5sit
+5siu
+s5ju
+4sk.
+1skab
+1ske
+s3kl
+sk5s4
+5sky
+s1le
+s1li
+slo3
+5slu
+s5ly
+s1m
+s4my
+4snin
+s4nit
+so5k
+5sol
+5som.
+3somm
+s5oms
+5somt
+3son
+4s1op
+sp4
+3spec
+4sper
+3s4pi
+s1pl
+3sprog.
+s5r4
+s1s4
+4st.
+5s4tam
+1stan
+st5as
+3stat
+1stav
+1ste.
+1sted
+3stel
+5stemo
+1sten
+5step
+3ster.
+3stes
+5stet
+5stj
+3sto
+st5om
+1str
+s1ud
+3sul
+s3un
+3sur
+s3ve
+3s4y
+1sy1s
+5ta.
+1tag
+tands3
+4tanv
+4tb
+tede4l
+teds5
+3teg
+5tekn
+teo1
+5term
+te5ro
+4t1f
+6t3g
+t1h
+tialis5t
+3tid
+ti4en
+ti3st
+4t3k
+4t1l
+tli4s5
+t1m
+t1n
+to5ra
+to1re
+to1ri
+tor4m
+4t3p
+t4ra
+4tres
+tro5v
+1try
+4ts
+t3si
+ts4pa
+ts5pr
+t3st
+ts5ul
+4t1t
+t5uds
+5tur
+t5ve
+1typ
+u1a
+5udl
+ud5r
+ud3s
+3udv
+u1e
+ue4t5
+uge4ri
+ugs3
+u5gu
+u3i
+u5kl
+uk4ta
+uk4tr
+u1la
+u1le
+u5ly
+u5pe
+up5l
+u5q
+u3ra
+u3re
+u4r3eg
+u1rer
+u3ro
+us5a
+u3si
+u5ska
+u5so
+us5v
+u1te
+u1ti
+u1to
+ut5r
+ut5s4
+5u5v
+va5d
+3varm
+1ved
+ve4l5e
+ve4reg
+ve3s
+5vet
+v5h
+vi4l3in
+1vis
+v5j
+v5k
+vl4
+v3le
+v5li
+vls1
+1vo
+4v5om
+v5p
+v5re
+v3st
+v5su
+v5t
+3vu
+y3a
+y5dr
+y3e
+y3ke
+y5ki
+yk3li
+y3ko
+yk4s5
+y3kv
+y5li
+y5lo
+y5mu
+yns5
+y5o
+y1pe
+y3pi
+y3re
+yr3ek
+y3ri
+y3si
+y3ti
+y5t3r
+y5ve
+zi5o
+<!-- dkspecial -->
+.så3
+.ær5i
+.øv3r
+a3tø
+a5væ
+brød3
+5bæ
+5drøv
+dstå4
+3dæ
+3dø
+e3læ
+e3lø
+e3rø
+er5øn
+e5tæ
+e5tø
+e1væ
+e3æ
+e5å
+3fæ
+3fø
+fø4r5en
+giø4
+g4sø
+g5så
+3gæ
+3gø1
+3gå
+i5tæ
+i3ø
+3kø
+3kå
+lingeniø4
+l3væ
+5løs
+m5tå
+1mæ
+3mø
+3må
+n3kæ
+n5tæ
+3næ
+4n5æb
+5nø
+o5læ
+or3ø
+o5å
+5præ
+5pæd
+på3
+r5kæ
+r5tæ
+r5tø
+r3væ
+r5æl
+4røn
+5rør
+3råd
+r5år
+s4kå
+3slå
+s4næ
+5stø
+1stå
+1sæ
+4s5æn
+1sø
+s5øk
+så4r5
+ti4ø
+3træk.
+t4sø
+t5så
+t3væ
+u3læ
+3værd
+1værk
+5vå
+y5væ
+æb3l
+æ3c
+æ3e
+æg5a
+æ4gek
+æ4g5r
+ægs5
+æ5i
+æ5kv
+ælle4
+æn1dr
+æ5o
+æ1re
+ær4g5r
+æ3ri
+ær4ma
+ær4mo
+ær5s
+æ5si
+æ3so
+æ3ste
+æ3ve
+øde5
+ø3e
+ø1je
+ø3ke
+ø3le
+øms5
+øn3st
+øn4t3
+ø1re
+ø3ri
+ørne3
+ør5o
+ø1ve
+å1d
+å1e
+å5h
+å3l
+å3re
+års5t
+å5sk
+å3t
+</patterns>
+</hyphenation-info>
diff --git a/lucene/analysis.tests/src/test/org/apache/lucene/analysis/tests/simple.aff b/lucene/analysis.tests/src/test/org/apache/lucene/analysis/tests/simple.aff
new file mode 100644
index 00000000000..aaf4a6cdf22
--- /dev/null
+++ b/lucene/analysis.tests/src/test/org/apache/lucene/analysis/tests/simple.aff
@@ -0,0 +1,20 @@
+SET UTF-8
+TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
+
+SFX A Y 3
+SFX A   0     e         n
+SFX A   0     e         t
+SFX A   0     e         h
+
+SFX C Y 2
+SFX C   0     d/C       c
+SFX C   0     c         b
+
+SFX D Y 1
+SFX D   0     s         o
+
+SFX E Y 1
+SFX E   0     d         o
+
+PFX B Y 1
+PFX B   0     s         o
diff --git a/lucene/analysis.tests/src/test/org/apache/lucene/analysis/tests/simple.dic b/lucene/analysis.tests/src/test/org/apache/lucene/analysis/tests/simple.dic
new file mode 100644
index 00000000000..2809611b876
--- /dev/null
+++ b/lucene/analysis.tests/src/test/org/apache/lucene/analysis/tests/simple.dic
@@ -0,0 +1,11 @@
+9
+ab/C
+apach/A
+foo/D
+foo/E
+lucen/A
+lucene
+mahout/A
+moo/E
+olr/B
+db
\ No newline at end of file
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/boost/DelimitedBoostTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/boost/DelimitedBoostTokenFilter.java
index b70768e65d3..9e693ca8710 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/boost/DelimitedBoostTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/boost/DelimitedBoostTokenFilter.java
@@ -21,6 +21,7 @@ import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.search.BoostAttribute;
+import org.apache.lucene.util.IgnoreRandomChains;
 
 /**
  * Characters before the delimiter are the "token", those after are the boost.
@@ -30,6 +31,8 @@ import org.apache.lucene.search.BoostAttribute;
  *
  * <p>Note make sure your Tokenizer doesn't split on the delimiter, or this won't work
  */
+@IgnoreRandomChains(
+    reason = "requires a special encoded token value, so it may fail with random data")
 public final class DelimitedBoostTokenFilter extends TokenFilter {
   private final char delimiter;
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilter.java
index 2f9337d242f..a384dba2b85 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilter.java
@@ -26,6 +26,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.IgnoreRandomChains;
 
 /**
  * Forms bigrams of CJK terms that are generated from StandardTokenizer or ICUTokenizer.
@@ -47,6 +48,7 @@ import org.apache.lucene.util.ArrayUtil;
  *
  * <p>In all cases, all non-CJK input is passed thru unmodified.
  */
+@IgnoreRandomChains(reason = "LUCENE-8092: doesn't handle graph inputs")
 public final class CJKBigramFilter extends TokenFilter {
   // configuration
   /** bigram flag for Han Ideographs */
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java
index d1a81c17631..0979ade78c8 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java
@@ -25,6 +25,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.IgnoreRandomChains;
 
 /*
  * TODO: Consider implementing https://issues.apache.org/jira/browse/LUCENE-1688 changes to stop list and associated constructors
@@ -43,10 +44,7 @@ import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
  *       "the-quick" has a term.type() of "gram"
  * </ul>
  */
-
-/*
- * Constructors and makeCommonSet based on similar code in StopFilter
- */
+@IgnoreRandomChains(reason = "LUCENE-4983")
 public final class CommonGramsFilter extends TokenFilter {
 
   public static final String GRAM_TYPE = "gram";
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java
index 80a638112fd..7a5ba1322ec 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java
@@ -23,6 +23,7 @@ import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.IgnoreRandomChains;
 
 /**
  * Wrap a CommonGramsFilter optimizing phrase queries by only returning single words when they are
@@ -42,6 +43,7 @@ import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
  * See:http://hudson.zones.apache.org/hudson/job/Lucene-trunk/javadoc//all/org/apache/lucene/analysis/TokenStream.html and
  * http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/package.html?revision=718798
  */
+@IgnoreRandomChains(reason = "TODO: doesn't handle graph inputs")
 public final class CommonGramsQueryFilter extends TokenFilter {
 
   private final TypeAttribute typeAttribute = addAttribute(TypeAttribute.class);
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
index 144fe069c13..fafdec7c72c 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
@@ -17,6 +17,7 @@
 package org.apache.lucene.analysis.core;
 
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.util.IgnoreRandomChains;
 
 /**
  * Normalizes token text to lower case.
@@ -27,6 +28,7 @@ import org.apache.lucene.analysis.TokenStream;
  * @see org.apache.lucene.analysis.LowerCaseFilter
  * @see LowerCaseFilterFactory
  */
+@IgnoreRandomChains(reason = "clones of core's filters")
 public final class LowerCaseFilter extends org.apache.lucene.analysis.LowerCaseFilter {
 
   /**
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java
index 22b756138d6..08b170f3f51 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java
@@ -18,6 +18,7 @@ package org.apache.lucene.analysis.core;
 
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.util.IgnoreRandomChains;
 
 /**
  * Removes stop words from a token stream.
@@ -28,6 +29,7 @@ import org.apache.lucene.analysis.TokenStream;
  * @see org.apache.lucene.analysis.StopFilter
  * @see StopFilterFactory
  */
+@IgnoreRandomChains(reason = "clones of core's filters")
 public final class StopFilter extends org.apache.lucene.analysis.StopFilter {
 
   /**
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/DelimitedTermFrequencyTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/DelimitedTermFrequencyTokenFilter.java
index 417602c0297..a42c988ab8a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/DelimitedTermFrequencyTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/DelimitedTermFrequencyTokenFilter.java
@@ -24,6 +24,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.IgnoreRandomChains;
 
 /**
  * Characters before the delimiter are the "token", the textual integer after is the term frequency.
@@ -36,6 +37,8 @@ import org.apache.lucene.util.ArrayUtil;
  *
  * <p>Note make sure your Tokenizer doesn't split on the delimiter, or this won't work
  */
+@IgnoreRandomChains(
+    reason = "requires a special encoded token value, so it may fail with random data")
 public final class DelimitedTermFrequencyTokenFilter extends TokenFilter {
   public static final char DEFAULT_DELIMITER = '|';
 
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilter.java
index 47fef0937e8..68216359eb0 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilter.java
@@ -20,6 +20,7 @@ import java.io.IOException;
 import org.apache.lucene.analysis.*;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.util.IgnoreRandomChains;
 
 /**
  * When the plain text is extracted from documents, we will often have many words hyphenated and
@@ -50,6 +51,8 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
  * &lt;/fieldtype&gt;
  * </pre>
  */
+@IgnoreRandomChains(
+    reason = "TODO: doesn't handle graph inputs (or even look at positionIncrement)")
 public final class HyphenatedWordsFilter extends TokenFilter {
 
   private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.java
index 4627f9ea4e3..80863ee2839 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.java
@@ -19,6 +19,7 @@ package org.apache.lucene.analysis.miscellaneous;
 import java.io.IOException;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.util.IgnoreRandomChains;
 
 /**
  * This TokenFilter limits the number of tokens while indexing. It is a replacement for the maximum
@@ -45,6 +46,7 @@ public final class LimitTokenCountFilter extends TokenFilter {
    *
    * @see #LimitTokenCountFilter(TokenStream,int,boolean)
    */
+  @IgnoreRandomChains(reason = "all tokens must be consumed")
   public LimitTokenCountFilter(TokenStream in, int maxTokenCount) {
     this(in, maxTokenCount, false);
   }
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenOffsetFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenOffsetFilter.java
index 757fa96c9a7..0a2db1df243 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenOffsetFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenOffsetFilter.java
@@ -20,6 +20,7 @@ import java.io.IOException;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.util.IgnoreRandomChains;
 
 /**
  * Lets all tokens pass through until it sees one with a start offset &lt;= a configured limit,
@@ -46,6 +47,7 @@ public final class LimitTokenOffsetFilter extends TokenFilter {
    *
    * @param maxStartOffset the maximum start offset allowed
    */
+  @IgnoreRandomChains(reason = "all tokens must be consumed")
   public LimitTokenOffsetFilter(TokenStream input, int maxStartOffset) {
     this(input, maxStartOffset, false);
   }
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenPositionFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenPositionFilter.java
index 6230ee7f7cb..edbee58bbb8 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenPositionFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenPositionFilter.java
@@ -20,6 +20,7 @@ import java.io.IOException;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.util.IgnoreRandomChains;
 
 /**
  * This TokenFilter limits its emitted tokens to those with positions that are not greater than the
@@ -50,6 +51,7 @@ public final class LimitTokenPositionFilter extends TokenFilter {
    * @param maxTokenPosition max position of tokens to produce (1st token always has position 1)
    * @see #LimitTokenPositionFilter(TokenStream,int,boolean)
    */
+  @IgnoreRandomChains(reason = "all tokens must be consumed")
   public LimitTokenPositionFilter(TokenStream in, int maxTokenPosition) {
     this(in, maxTokenPosition, false);
   }
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.java
index 2971704297b..8b871d3f2e9 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.java
@@ -30,6 +30,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.IgnoreRandomChains;
 import org.apache.lucene.util.InPlaceMergeSorter;
 import org.apache.lucene.util.RamUsageEstimator;
 
@@ -83,6 +84,7 @@ import org.apache.lucene.util.RamUsageEstimator;
  * StandardTokenizer} immediately removes many intra-word delimiters, it is recommended that this
  * filter be used after a tokenizer that does not do this (such as {@link WhitespaceTokenizer}).
  */
+@IgnoreRandomChains(reason = "Cannot correct offsets when a char filter had changed them")
 public final class WordDelimiterGraphFilter extends TokenFilter {
 
   /**
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java
index 57fe65bbb98..e7dfa320b6b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java
@@ -22,6 +22,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.util.AttributeFactory;
+import org.apache.lucene.util.IgnoreRandomChains;
 
 /**
  * Tokenizer for path-like hierarchies.
@@ -40,6 +41,7 @@ import org.apache.lucene.util.AttributeFactory;
  *  /something/something/else
  * </pre>
  */
+@IgnoreRandomChains(reason = "broken offsets")
 public class PathHierarchyTokenizer extends Tokenizer {
 
   public PathHierarchyTokenizer() {
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java
index d1cdb3a9386..7b1f60f51c4 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java
@@ -24,6 +24,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.util.AttributeFactory;
+import org.apache.lucene.util.IgnoreRandomChains;
 
 /**
  * Tokenizer for domain-like hierarchies.
@@ -43,6 +44,7 @@ import org.apache.lucene.util.AttributeFactory;
  * uk
  * </pre>
  */
+@IgnoreRandomChains(reason = "broken offsets")
 public class ReversePathHierarchyTokenizer extends Tokenizer {
 
   public ReversePathHierarchyTokenizer() {
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
index 76ef11be8ed..bdb8799dbcf 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
@@ -26,6 +26,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.util.AttributeFactory;
 import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.IgnoreRandomChains;
 
 /**
  * Extension of StandardTokenizer that is aware of Wikipedia syntax. It is based off of the
@@ -34,6 +35,7 @@ import org.apache.lucene.util.AttributeSource;
  *
  * @lucene.experimental
  */
+@IgnoreRandomChains(reason = "TODO: it seems to mess up offsets!?")
 public final class WikipediaTokenizer extends Tokenizer {
   public static final String INTERNAL_LINK = "il";
   public static final String EXTERNAL_LINK = "el";
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java
index 28777e8fdb6..0ee336aeb25 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java
@@ -69,7 +69,7 @@ public class TestBugInSomething extends BaseTokenStreamTestCase {
           protected Reader initReader(String fieldName, Reader reader) {
             reader = new MockCharFilter(reader, 0);
             reader = new MappingCharFilter(map, reader);
-            reader = new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(reader);
+            reader = new CheckThatYouDidntReadAnythingReaderWrapper(reader);
             return reader;
           }
         };
@@ -137,7 +137,7 @@ public class TestBugInSomething extends BaseTokenStreamTestCase {
       };
 
   public void testWrapping() throws Exception {
-    CharFilter cs = new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(wrappedStream);
+    CharFilter cs = new CheckThatYouDidntReadAnythingReaderWrapper(wrappedStream);
     Exception expected =
         expectThrows(
             Exception.class,
@@ -221,6 +221,69 @@ public class TestBugInSomething extends BaseTokenStreamTestCase {
 
   // todo: test framework?
 
+  static class CheckThatYouDidntReadAnythingReaderWrapper extends CharFilter {
+    boolean readSomething;
+
+    CheckThatYouDidntReadAnythingReaderWrapper(Reader in) {
+      super(in);
+    }
+
+    @Override
+    public int correct(int currentOff) {
+      return currentOff; // we don't change any offsets
+    }
+
+    @Override
+    public int read(char[] cbuf, int off, int len) throws IOException {
+      readSomething = true;
+      return input.read(cbuf, off, len);
+    }
+
+    @Override
+    public int read() throws IOException {
+      readSomething = true;
+      return input.read();
+    }
+
+    @Override
+    public int read(CharBuffer target) throws IOException {
+      readSomething = true;
+      return input.read(target);
+    }
+
+    @Override
+    public int read(char[] cbuf) throws IOException {
+      readSomething = true;
+      return input.read(cbuf);
+    }
+
+    @Override
+    public long skip(long n) throws IOException {
+      readSomething = true;
+      return input.skip(n);
+    }
+
+    @Override
+    public void mark(int readAheadLimit) throws IOException {
+      input.mark(readAheadLimit);
+    }
+
+    @Override
+    public boolean markSupported() {
+      return input.markSupported();
+    }
+
+    @Override
+    public boolean ready() throws IOException {
+      return input.ready();
+    }
+
+    @Override
+    public void reset() throws IOException {
+      input.reset();
+    }
+  }
+
   static final class SopTokenFilter extends TokenFilter {
 
     SopTokenFilter(TokenStream input) {
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java
index 95b8bddb114..5ae7b111024 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java
@@ -33,10 +33,10 @@ import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.TokenizerFactory;
 import org.apache.lucene.analysis.boost.DelimitedBoostTokenFilterFactory;
 import org.apache.lucene.analysis.miscellaneous.DelimitedTermFrequencyTokenFilterFactory;
-import org.apache.lucene.analysis.util.StringMockResourceLoader;
 import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.tests.analysis.MockTokenizer;
 import org.apache.lucene.tests.util.LuceneTestCase.Nightly;
+import org.apache.lucene.tests.util.StringMockResourceLoader;
 import org.apache.lucene.util.AttributeFactory;
 import org.apache.lucene.util.ResourceLoaderAware;
 import org.apache.lucene.util.Version;
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
deleted file mode 100644
index 98256b3b6f2..00000000000
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
+++ /dev/null
@@ -1,1045 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.core;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.Reader;
-import java.io.StringReader;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.InvocationTargetException;
-import java.lang.reflect.Modifier;
-import java.net.URI;
-import java.net.URL;
-import java.nio.CharBuffer;
-import java.nio.file.DirectoryStream;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.text.DateFormat;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.Enumeration;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.IdentityHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.Set;
-import java.util.function.Function;
-import java.util.function.Predicate;
-import java.util.regex.Pattern;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.CachingTokenFilter;
-import org.apache.lucene.analysis.CharArrayMap;
-import org.apache.lucene.analysis.CharArraySet;
-import org.apache.lucene.analysis.CharFilter;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.boost.DelimitedBoostTokenFilter;
-import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
-import org.apache.lucene.analysis.cjk.CJKBigramFilter;
-import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
-import org.apache.lucene.analysis.commongrams.CommonGramsQueryFilter;
-import org.apache.lucene.analysis.compound.HyphenationCompoundWordTokenFilter;
-import org.apache.lucene.analysis.compound.TestCompoundWordTokenFilter;
-import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
-import org.apache.lucene.analysis.hunspell.Dictionary;
-import org.apache.lucene.analysis.hunspell.TestHunspellStemFilter;
-import org.apache.lucene.analysis.minhash.MinHashFilter;
-import org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilter;
-import org.apache.lucene.analysis.miscellaneous.ConditionalTokenFilter;
-import org.apache.lucene.analysis.miscellaneous.DelimitedTermFrequencyTokenFilter;
-import org.apache.lucene.analysis.miscellaneous.FingerprintFilter;
-import org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilter;
-import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
-import org.apache.lucene.analysis.miscellaneous.LimitTokenOffsetFilter;
-import org.apache.lucene.analysis.miscellaneous.LimitTokenPositionFilter;
-import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
-import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
-import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
-import org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter;
-import org.apache.lucene.analysis.path.PathHierarchyTokenizer;
-import org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer;
-import org.apache.lucene.analysis.pattern.PatternTypingFilter;
-import org.apache.lucene.analysis.payloads.IdentityEncoder;
-import org.apache.lucene.analysis.payloads.PayloadEncoder;
-import org.apache.lucene.analysis.shingle.FixedShingleFilter;
-import org.apache.lucene.analysis.shingle.ShingleFilter;
-import org.apache.lucene.analysis.snowball.TestSnowball;
-import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.synonym.SynonymMap;
-import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
-import org.apache.lucene.store.ByteBuffersDirectory;
-import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.tests.analysis.CrankyTokenFilter;
-import org.apache.lucene.tests.analysis.MockTokenFilter;
-import org.apache.lucene.tests.analysis.MockTokenizer;
-import org.apache.lucene.tests.analysis.ValidatingTokenFilter;
-import org.apache.lucene.tests.util.Rethrow;
-import org.apache.lucene.tests.util.TestUtil;
-import org.apache.lucene.tests.util.automaton.AutomatonTestUtil;
-import org.apache.lucene.util.AttributeFactory;
-import org.apache.lucene.util.AttributeSource;
-import org.apache.lucene.util.CharsRef;
-import org.apache.lucene.util.Version;
-import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.CharacterRunAutomaton;
-import org.apache.lucene.util.automaton.Operations;
-import org.apache.lucene.util.automaton.RegExp;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.tartarus.snowball.SnowballStemmer;
-import org.xml.sax.InputSource;
-
-/** tests random analysis chains */
-public class TestRandomChains extends BaseTokenStreamTestCase {
-
-  static List<Constructor<? extends Tokenizer>> tokenizers;
-  static List<Constructor<? extends TokenFilter>> tokenfilters;
-  static List<Constructor<? extends CharFilter>> charfilters;
-
-  private static final Predicate<Object[]> ALWAYS = (objects -> true);
-
-  private static final Set<Class<?>> avoidConditionals = new HashSet<>();
-
-  static {
-    // These filters needs to consume the whole tokenstream, so conditionals don't make sense here
-    avoidConditionals.add(FingerprintFilter.class);
-    avoidConditionals.add(MinHashFilter.class);
-    avoidConditionals.add(ConcatenateGraphFilter.class);
-    // ShingleFilter doesn't handle input graphs correctly, so wrapping it in a condition can
-    // expose inconsistent offsets
-    // https://issues.apache.org/jira/browse/LUCENE-4170
-    avoidConditionals.add(ShingleFilter.class);
-    avoidConditionals.add(FixedShingleFilter.class);
-    // FlattenGraphFilter changes the output graph entirely, so wrapping it in a condition
-    // can break position lengths
-    avoidConditionals.add(FlattenGraphFilter.class);
-    // LimitToken*Filters don't set end offsets correctly
-    avoidConditionals.add(LimitTokenOffsetFilter.class);
-    avoidConditionals.add(LimitTokenCountFilter.class);
-    avoidConditionals.add(LimitTokenPositionFilter.class);
-  }
-
-  private static final Map<Constructor<?>, Predicate<Object[]>> brokenConstructors =
-      new HashMap<>();
-
-  static {
-    initBrokenConstructors();
-  }
-
-  @SuppressWarnings("deprecation")
-  private static void initBrokenConstructors() {
-    try {
-      brokenConstructors.put(
-          LimitTokenCountFilter.class.getConstructor(TokenStream.class, int.class), ALWAYS);
-      brokenConstructors.put(
-          LimitTokenCountFilter.class.getConstructor(TokenStream.class, int.class, boolean.class),
-          args -> {
-            assert args.length == 3;
-            return !((Boolean) args[2]); // args are broken if consumeAllTokens is false
-          });
-      brokenConstructors.put(
-          LimitTokenOffsetFilter.class.getConstructor(TokenStream.class, int.class), ALWAYS);
-      brokenConstructors.put(
-          LimitTokenOffsetFilter.class.getConstructor(TokenStream.class, int.class, boolean.class),
-          args -> {
-            assert args.length == 3;
-            return !((Boolean) args[2]); // args are broken if consumeAllTokens is false
-          });
-      brokenConstructors.put(
-          LimitTokenPositionFilter.class.getConstructor(TokenStream.class, int.class), ALWAYS);
-      brokenConstructors.put(
-          LimitTokenPositionFilter.class.getConstructor(
-              TokenStream.class, int.class, boolean.class),
-          args -> {
-            assert args.length == 3;
-            return !((Boolean) args[2]); // args are broken if consumeAllTokens is false
-          });
-      for (Class<?> c :
-          Arrays.<Class<?>>asList(
-              // doesn't actual reset itself!  TODO this statement is probably obsolete as of
-              // LUCENE-6121 ?
-              CachingTokenFilter.class,
-              // LUCENE-8092: doesn't handle graph inputs
-              CJKBigramFilter.class,
-              // TODO: LUCENE-4983
-              CommonGramsFilter.class,
-              // TODO: doesn't handle graph inputs
-              CommonGramsQueryFilter.class,
-              // Not broken, simulates brokenness:
-              CrankyTokenFilter.class,
-              // TODO: doesn't handle graph inputs (or even look at positionIncrement)
-              HyphenatedWordsFilter.class,
-              // broken offsets
-              PathHierarchyTokenizer.class,
-              // broken offsets
-              ReversePathHierarchyTokenizer.class,
-              // Not broken: we forcefully add this, so we shouldn't
-              // also randomly pick it:
-              ValidatingTokenFilter.class,
-              // TODO: it seems to mess up offsets!?
-              WikipediaTokenizer.class,
-              // TODO: needs to be a tokenizer, doesnt handle graph inputs properly (a shingle or
-              // similar following will then cause pain)
-              WordDelimiterFilter.class,
-              // Cannot correct offsets when a char filter had changed them:
-              WordDelimiterGraphFilter.class,
-              // requires a special encoded token value, so it may fail with random data:
-              DelimitedTermFrequencyTokenFilter.class,
-              // requires a special encoded token value, so it may fail with random data:
-              DelimitedBoostTokenFilter.class,
-              // clones of core's filters:
-              org.apache.lucene.analysis.core.StopFilter.class,
-              org.apache.lucene.analysis.core.LowerCaseFilter.class)) {
-        for (Constructor<?> ctor : c.getConstructors()) {
-          brokenConstructors.put(ctor, ALWAYS);
-        }
-      }
-    } catch (Exception e) {
-      throw new Error(e);
-    }
-  }
-
-  @BeforeClass
-  public static void beforeClass() throws Exception {
-    List<Class<?>> analysisClasses = getClassesForPackage("org.apache.lucene.analysis");
-    tokenizers = new ArrayList<>();
-    tokenfilters = new ArrayList<>();
-    charfilters = new ArrayList<>();
-    for (final Class<?> c : analysisClasses) {
-      final int modifiers = c.getModifiers();
-      if (
-      // don't waste time with abstract classes or deprecated known-buggy ones
-      Modifier.isAbstract(modifiers)
-          || !Modifier.isPublic(modifiers)
-          || c.isSynthetic()
-          || c.isAnonymousClass()
-          || c.isMemberClass()
-          || c.isInterface()
-          || c.isAnnotationPresent(Deprecated.class)
-          || !(Tokenizer.class.isAssignableFrom(c)
-              || TokenFilter.class.isAssignableFrom(c)
-              || CharFilter.class.isAssignableFrom(c))) {
-        continue;
-      }
-
-      for (final Constructor<?> ctor : c.getConstructors()) {
-        // don't test synthetic or deprecated ctors, they likely have known bugs:
-        if (ctor.isSynthetic()
-            || ctor.isAnnotationPresent(Deprecated.class)
-            || brokenConstructors.get(ctor) == ALWAYS) {
-          continue;
-        }
-        // conditional filters are tested elsewhere
-        if (ConditionalTokenFilter.class.isAssignableFrom(c)) {
-          continue;
-        }
-        if (Tokenizer.class.isAssignableFrom(c)) {
-          assertTrue(
-              ctor.toGenericString() + " has unsupported parameter types",
-              allowedTokenizerArgs.containsAll(Arrays.asList(ctor.getParameterTypes())));
-          tokenizers.add(castConstructor(Tokenizer.class, ctor));
-        } else if (TokenFilter.class.isAssignableFrom(c)) {
-          assertTrue(
-              ctor.toGenericString() + " has unsupported parameter types",
-              allowedTokenFilterArgs.containsAll(Arrays.asList(ctor.getParameterTypes())));
-          tokenfilters.add(castConstructor(TokenFilter.class, ctor));
-        } else if (CharFilter.class.isAssignableFrom(c)) {
-          assertTrue(
-              ctor.toGenericString() + " has unsupported parameter types",
-              allowedCharFilterArgs.containsAll(Arrays.asList(ctor.getParameterTypes())));
-          charfilters.add(castConstructor(CharFilter.class, ctor));
-        } else {
-          fail("Cannot get here");
-        }
-      }
-    }
-
-    final Comparator<Constructor<?>> ctorComp =
-        (arg0, arg1) -> arg0.toGenericString().compareTo(arg1.toGenericString());
-    Collections.sort(tokenizers, ctorComp);
-    Collections.sort(tokenfilters, ctorComp);
-    Collections.sort(charfilters, ctorComp);
-    if (VERBOSE) {
-      System.out.println("tokenizers = " + tokenizers);
-      System.out.println("tokenfilters = " + tokenfilters);
-      System.out.println("charfilters = " + charfilters);
-    }
-  }
-
-  @AfterClass
-  public static void afterClass() {
-    tokenizers = null;
-    tokenfilters = null;
-    charfilters = null;
-  }
-
-  /**
-   * Hack to work around the stupidness of Oracle's strict Java backwards compatibility. {@code
-   * Class<T>#getConstructors()} should return unmodifiable {@code List<Constructor<T>>} not array!
-   */
-  @SuppressWarnings("unchecked")
-  private static <T> Constructor<T> castConstructor(Class<T> instanceClazz, Constructor<?> ctor) {
-    return (Constructor<T>) ctor;
-  }
-
-  public static List<Class<?>> getClassesForPackage(String pckgname) throws Exception {
-    final List<Class<?>> classes = new ArrayList<>();
-    collectClassesForPackage(pckgname, classes);
-    assertFalse(
-        "No classes found in package '"
-            + pckgname
-            + "'; maybe your test classes are packaged as JAR file?",
-        classes.isEmpty());
-    return classes;
-  }
-
-  private static void collectClassesForPackage(String pckgname, List<Class<?>> classes)
-      throws Exception {
-    final ClassLoader cld = TestRandomChains.class.getClassLoader();
-    final String path = pckgname.replace('.', '/');
-    final Enumeration<URL> resources = cld.getResources(path);
-    while (resources.hasMoreElements()) {
-      final URI uri = resources.nextElement().toURI();
-      if (!"file".equalsIgnoreCase(uri.getScheme())) continue;
-      final Path directory = Paths.get(uri);
-      if (Files.exists(directory)) {
-        try (DirectoryStream<Path> stream = Files.newDirectoryStream(directory)) {
-          for (Path file : stream) {
-            if (Files.isDirectory(file)) {
-              // recurse
-              String subPackage = pckgname + "." + file.getFileName().toString();
-              collectClassesForPackage(subPackage, classes);
-            }
-            String fname = file.getFileName().toString();
-            if (fname.endsWith(".class")) {
-              String clazzName = fname.substring(0, fname.length() - 6);
-              // exclude Test classes that happen to be in these packages.
-              // class.ForName'ing some of them can cause trouble.
-              if (!clazzName.endsWith("Test") && !clazzName.startsWith("Test")) {
-                // Don't run static initializers, as we won't use most of them.
-                // Java will do that automatically once accessed/instantiated.
-                classes.add(Class.forName(pckgname + '.' + clazzName, false, cld));
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-
-  private static final Map<Class<?>, Function<Random, Object>> argProducers =
-      new IdentityHashMap<Class<?>, Function<Random, Object>>() {
-        {
-          put(
-              int.class,
-              random -> {
-                // TODO: could cause huge ram usage to use full int range for some filters
-                // (e.g. allocate enormous arrays)
-                // return Integer.valueOf(random.nextInt());
-                return Integer.valueOf(TestUtil.nextInt(random, -50, 50));
-              });
-          put(
-              char.class,
-              random -> {
-                // TODO: fix any filters that care to throw IAE instead.
-                // also add a unicode validating filter to validate termAtt?
-                // return Character.valueOf((char)random.nextInt(65536));
-                while (true) {
-                  char c = (char) random.nextInt(65536);
-                  if (c < '\uD800' || c > '\uDFFF') {
-                    return Character.valueOf(c);
-                  }
-                }
-              });
-          put(float.class, Random::nextFloat);
-          put(boolean.class, Random::nextBoolean);
-          put(byte.class, random -> (byte) random.nextInt(256));
-          put(
-              byte[].class,
-              random -> {
-                byte[] bytes = new byte[random.nextInt(256)];
-                random.nextBytes(bytes);
-                return bytes;
-              });
-          put(Random.class, random -> new Random(random.nextLong()));
-          put(Version.class, random -> Version.LATEST);
-          put(AttributeFactory.class, BaseTokenStreamTestCase::newAttributeFactory);
-          put(
-              Set.class,
-              random -> {
-                // TypeTokenFilter
-                Set<String> set = new HashSet<>();
-                int num = random.nextInt(5);
-                for (int i = 0; i < num; i++) {
-                  set.add(
-                      StandardTokenizer.TOKEN_TYPES[
-                          random.nextInt(StandardTokenizer.TOKEN_TYPES.length)]);
-                }
-                return set;
-              });
-          put(
-              Collection.class,
-              random -> {
-                // CapitalizationFilter
-                Collection<char[]> col = new ArrayList<>();
-                int num = random.nextInt(5);
-                for (int i = 0; i < num; i++) {
-                  col.add(TestUtil.randomSimpleString(random).toCharArray());
-                }
-                return col;
-              });
-          put(
-              CharArraySet.class,
-              random -> {
-                int num = random.nextInt(10);
-                CharArraySet set = new CharArraySet(num, random.nextBoolean());
-                for (int i = 0; i < num; i++) {
-                  // TODO: make nastier
-                  set.add(TestUtil.randomSimpleString(random));
-                }
-                return set;
-              });
-          // TODO: don't want to make the exponentially slow ones Dawid documents
-          // in TestPatternReplaceFilter, so dont use truly random patterns (for now)
-          put(Pattern.class, random -> Pattern.compile("a"));
-          put(
-              Pattern[].class,
-              random -> new Pattern[] {Pattern.compile("([a-z]+)"), Pattern.compile("([0-9]+)")});
-          put(
-              PayloadEncoder.class,
-              random ->
-                  new IdentityEncoder()); // the other encoders will throw exceptions if tokens
-          // arent numbers?
-          put(
-              Dictionary.class,
-              random -> {
-                // TODO: make nastier
-                InputStream affixStream =
-                    TestHunspellStemFilter.class.getResourceAsStream("simple.aff");
-                InputStream dictStream =
-                    TestHunspellStemFilter.class.getResourceAsStream("simple.dic");
-                try {
-                  return new Dictionary(
-                      new ByteBuffersDirectory(), "dictionary", affixStream, dictStream);
-                } catch (Exception ex) {
-                  Rethrow.rethrow(ex);
-                  return null; // unreachable code
-                }
-              });
-          put(
-              HyphenationTree.class,
-              random -> {
-                // TODO: make nastier
-                try {
-                  InputSource is =
-                      new InputSource(
-                          TestCompoundWordTokenFilter.class
-                              .getResource("da_UTF8.xml")
-                              .toExternalForm());
-                  HyphenationTree hyphenator =
-                      HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
-                  return hyphenator;
-                } catch (Exception ex) {
-                  Rethrow.rethrow(ex);
-                  return null; // unreachable code
-                }
-              });
-          put(
-              SnowballStemmer.class,
-              random -> {
-                try {
-                  String lang =
-                      TestSnowball.SNOWBALL_LANGS.get(
-                          random.nextInt(TestSnowball.SNOWBALL_LANGS.size()));
-                  Class<? extends SnowballStemmer> clazz =
-                      Class.forName("org.tartarus.snowball.ext." + lang + "Stemmer")
-                          .asSubclass(SnowballStemmer.class);
-                  return clazz.getConstructor().newInstance();
-                } catch (Exception ex) {
-                  Rethrow.rethrow(ex);
-                  return null; // unreachable code
-                }
-              });
-          put(
-              String.class,
-              random -> {
-                // TODO: make nastier
-                if (random.nextBoolean()) {
-                  // a token type
-                  return StandardTokenizer.TOKEN_TYPES[
-                      random.nextInt(StandardTokenizer.TOKEN_TYPES.length)];
-                } else {
-                  return TestUtil.randomSimpleString(random);
-                }
-              });
-          put(
-              NormalizeCharMap.class,
-              random -> {
-                NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
-                // we can't add duplicate keys, or NormalizeCharMap gets angry
-                Set<String> keys = new HashSet<>();
-                int num = random.nextInt(5);
-                // System.out.println("NormalizeCharMap=");
-                for (int i = 0; i < num; i++) {
-                  String key = TestUtil.randomSimpleString(random);
-                  if (!keys.contains(key) && key.length() > 0) {
-                    String value = TestUtil.randomSimpleString(random);
-                    builder.add(key, value);
-                    keys.add(key);
-                    // System.out.println("mapping: '" + key + "' => '" + value + "'");
-                  }
-                }
-                return builder.build();
-              });
-          put(
-              CharacterRunAutomaton.class,
-              random -> {
-                // TODO: could probably use a purely random automaton
-                switch (random.nextInt(5)) {
-                  case 0:
-                    return MockTokenizer.KEYWORD;
-                  case 1:
-                    return MockTokenizer.SIMPLE;
-                  case 2:
-                    return MockTokenizer.WHITESPACE;
-                  case 3:
-                    return MockTokenFilter.EMPTY_STOPSET;
-                  default:
-                    return MockTokenFilter.ENGLISH_STOPSET;
-                }
-              });
-          put(
-              CharArrayMap.class,
-              random -> {
-                int num = random.nextInt(10);
-                CharArrayMap<String> map = new CharArrayMap<>(num, random.nextBoolean());
-                for (int i = 0; i < num; i++) {
-                  // TODO: make nastier
-                  map.put(TestUtil.randomSimpleString(random), TestUtil.randomSimpleString(random));
-                }
-                return map;
-              });
-          put(
-              StemmerOverrideMap.class,
-              random -> {
-                int num = random.nextInt(10);
-                StemmerOverrideFilter.Builder builder =
-                    new StemmerOverrideFilter.Builder(random.nextBoolean());
-                for (int i = 0; i < num; i++) {
-                  String input = "";
-                  do {
-                    input = TestUtil.randomRealisticUnicodeString(random);
-                  } while (input.isEmpty());
-                  String out = "";
-                  TestUtil.randomSimpleString(random);
-                  do {
-                    out = TestUtil.randomRealisticUnicodeString(random);
-                  } while (out.isEmpty());
-                  builder.add(input, out);
-                }
-                try {
-                  return builder.build();
-                } catch (Exception ex) {
-                  Rethrow.rethrow(ex);
-                  return null; // unreachable code
-                }
-              });
-          put(
-              SynonymMap.class,
-              new Function<Random, Object>() {
-                @Override
-                public Object apply(Random random) {
-                  SynonymMap.Builder b = new SynonymMap.Builder(random.nextBoolean());
-                  final int numEntries = atLeast(10);
-                  for (int j = 0; j < numEntries; j++) {
-                    addSyn(
-                        b,
-                        randomNonEmptyString(random),
-                        randomNonEmptyString(random),
-                        random.nextBoolean());
-                  }
-                  try {
-                    return b.build();
-                  } catch (Exception ex) {
-                    Rethrow.rethrow(ex);
-                    return null; // unreachable code
-                  }
-                }
-
-                private void addSyn(
-                    SynonymMap.Builder b, String input, String output, boolean keepOrig) {
-                  b.add(
-                      new CharsRef(input.replaceAll(" +", "\u0000")),
-                      new CharsRef(output.replaceAll(" +", "\u0000")),
-                      keepOrig);
-                }
-
-                private String randomNonEmptyString(Random random) {
-                  while (true) {
-                    final String s = TestUtil.randomUnicodeString(random).trim();
-                    if (s.length() != 0 && s.indexOf('\u0000') == -1) {
-                      return s;
-                    }
-                  }
-                }
-              });
-          put(
-              DateFormat.class,
-              random -> {
-                if (random.nextBoolean()) return null;
-                return DateFormat.getDateInstance(DateFormat.DEFAULT, randomLocale(random));
-              });
-          put(
-              Automaton.class,
-              random -> {
-                return Operations.determinize(
-                    new RegExp(AutomatonTestUtil.randomRegexp(random), RegExp.NONE).toAutomaton(),
-                    Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
-              });
-          put(
-              PatternTypingFilter.PatternTypingRule[].class,
-              random -> {
-                int numRules = TestUtil.nextInt(random, 1, 3);
-                PatternTypingFilter.PatternTypingRule[] patternTypingRules =
-                    new PatternTypingFilter.PatternTypingRule[numRules];
-                for (int i = 0; i < patternTypingRules.length; i++) {
-                  String s = TestUtil.randomSimpleString(random, 1, 2);
-                  // random regex with one group
-                  String regex = s + "(.*)";
-                  // pattern rule with a template that accepts one group.
-                  patternTypingRules[i] =
-                      new PatternTypingFilter.PatternTypingRule(
-                          Pattern.compile(regex), TestUtil.nextInt(random, 1, 8), s + "_$1");
-                }
-                return patternTypingRules;
-              });
-        }
-      };
-
-  static final Set<Class<?>> allowedTokenizerArgs, allowedTokenFilterArgs, allowedCharFilterArgs;
-
-  static {
-    allowedTokenizerArgs = Collections.newSetFromMap(new IdentityHashMap<Class<?>, Boolean>());
-    allowedTokenizerArgs.addAll(argProducers.keySet());
-    allowedTokenizerArgs.add(Reader.class);
-    allowedTokenizerArgs.add(AttributeFactory.class);
-    allowedTokenizerArgs.add(AttributeSource.class);
-    allowedTokenizerArgs.add(Automaton.class);
-
-    allowedTokenFilterArgs = Collections.newSetFromMap(new IdentityHashMap<Class<?>, Boolean>());
-    allowedTokenFilterArgs.addAll(argProducers.keySet());
-    allowedTokenFilterArgs.add(TokenStream.class);
-    // TODO: fix this one, thats broken:
-    allowedTokenFilterArgs.add(CommonGramsFilter.class);
-
-    allowedCharFilterArgs = Collections.newSetFromMap(new IdentityHashMap<Class<?>, Boolean>());
-    allowedCharFilterArgs.addAll(argProducers.keySet());
-    allowedCharFilterArgs.add(Reader.class);
-  }
-
-  @SuppressWarnings("unchecked")
-  static <T> T newRandomArg(Random random, Class<T> paramType) {
-    final Function<Random, Object> producer = argProducers.get(paramType);
-    assertNotNull("No producer for arguments of type " + paramType.getName() + " found", producer);
-    return (T) producer.apply(random);
-  }
-
-  static Object[] newTokenizerArgs(Random random, Class<?>[] paramTypes) {
-    Object[] args = new Object[paramTypes.length];
-    for (int i = 0; i < args.length; i++) {
-      Class<?> paramType = paramTypes[i];
-      if (paramType == AttributeSource.class) {
-        // TODO: args[i] = new AttributeSource();
-        // this is currently too scary to deal with!
-        args[i] = null; // force IAE
-      } else {
-        args[i] = newRandomArg(random, paramType);
-      }
-    }
-    return args;
-  }
-
-  static Object[] newCharFilterArgs(Random random, Reader reader, Class<?>[] paramTypes) {
-    Object[] args = new Object[paramTypes.length];
-    for (int i = 0; i < args.length; i++) {
-      Class<?> paramType = paramTypes[i];
-      if (paramType == Reader.class) {
-        args[i] = reader;
-      } else {
-        args[i] = newRandomArg(random, paramType);
-      }
-    }
-    return args;
-  }
-
-  static Object[] newFilterArgs(Random random, TokenStream stream, Class<?>[] paramTypes) {
-    Object[] args = new Object[paramTypes.length];
-    for (int i = 0; i < args.length; i++) {
-      Class<?> paramType = paramTypes[i];
-      if (paramType == TokenStream.class) {
-        args[i] = stream;
-      } else if (paramType == CommonGramsFilter.class) {
-        // TODO: fix this one, thats broken: CommonGramsQueryFilter takes this one explicitly
-        args[i] = new CommonGramsFilter(stream, newRandomArg(random, CharArraySet.class));
-      } else {
-        args[i] = newRandomArg(random, paramType);
-      }
-    }
-    return args;
-  }
-
-  static class MockRandomAnalyzer extends Analyzer {
-    final long seed;
-
-    MockRandomAnalyzer(long seed) {
-      this.seed = seed;
-    }
-
-    @Override
-    protected TokenStreamComponents createComponents(String fieldName) {
-      Random random = new Random(seed);
-      TokenizerSpec tokenizerSpec = newTokenizer(random);
-      // System.out.println("seed=" + seed + ",create tokenizer=" + tokenizerSpec.toString);
-      TokenFilterSpec filterSpec = newFilterChain(random, tokenizerSpec.tokenizer);
-      // System.out.println("seed=" + seed + ",create filter=" + filterSpec.toString);
-      return new TokenStreamComponents(tokenizerSpec.tokenizer, filterSpec.stream);
-    }
-
-    @Override
-    protected Reader initReader(String fieldName, Reader reader) {
-      Random random = new Random(seed);
-      CharFilterSpec charfilterspec = newCharFilterChain(random, reader);
-      return charfilterspec.reader;
-    }
-
-    @Override
-    public String toString() {
-      Random random = new Random(seed);
-      StringBuilder sb = new StringBuilder();
-      CharFilterSpec charFilterSpec = newCharFilterChain(random, new StringReader(""));
-      sb.append("\ncharfilters=");
-      sb.append(charFilterSpec.toString);
-      // intentional: initReader gets its own separate random
-      random = new Random(seed);
-      TokenizerSpec tokenizerSpec = newTokenizer(random);
-      sb.append("\n");
-      sb.append("tokenizer=");
-      sb.append(tokenizerSpec.toString);
-      TokenFilterSpec tokenFilterSpec = newFilterChain(random, tokenizerSpec.tokenizer);
-      sb.append("\n");
-      sb.append("filters=");
-      sb.append(tokenFilterSpec.toString);
-      return sb.toString();
-    }
-
-    private <T> T createComponent(
-        Constructor<T> ctor, Object[] args, StringBuilder descr, boolean isConditional) {
-      try {
-        final T instance = ctor.newInstance(args);
-        /*
-        if (descr.length() > 0) {
-          descr.append(",");
-        }
-        */
-        descr.append("\n  ");
-        if (isConditional) {
-          descr.append("Conditional:");
-        }
-        descr.append(ctor.getDeclaringClass().getName());
-        String params = Arrays.deepToString(args);
-        params = params.substring(1, params.length() - 1);
-        descr.append("(").append(params).append(")");
-        return instance;
-      } catch (InvocationTargetException ite) {
-        final Throwable cause = ite.getCause();
-        if (cause instanceof IllegalArgumentException
-            || cause instanceof UnsupportedOperationException) {
-          // thats ok, ignore
-          if (VERBOSE) {
-            System.err.println("Ignoring IAE/UOE from ctor:");
-            cause.printStackTrace(System.err);
-          }
-        } else {
-          Rethrow.rethrow(cause);
-        }
-      } catch (IllegalAccessException | InstantiationException iae) {
-        Rethrow.rethrow(iae);
-      }
-      return null; // no success
-    }
-
-    private boolean broken(Constructor<?> ctor, Object[] args) {
-      final Predicate<Object[]> pred = brokenConstructors.get(ctor);
-      return pred != null && pred.test(args);
-    }
-
-    // create a new random tokenizer from classpath
-    private TokenizerSpec newTokenizer(Random random) {
-      TokenizerSpec spec = new TokenizerSpec();
-      while (spec.tokenizer == null) {
-        final Constructor<? extends Tokenizer> ctor =
-            tokenizers.get(random.nextInt(tokenizers.size()));
-        final StringBuilder descr = new StringBuilder();
-        final Object[] args = newTokenizerArgs(random, ctor.getParameterTypes());
-        if (broken(ctor, args)) {
-          continue;
-        }
-        spec.tokenizer = createComponent(ctor, args, descr, false);
-        if (spec.tokenizer != null) {
-          spec.toString = descr.toString();
-        }
-      }
-      return spec;
-    }
-
-    private CharFilterSpec newCharFilterChain(Random random, Reader reader) {
-      CharFilterSpec spec = new CharFilterSpec();
-      spec.reader = reader;
-      StringBuilder descr = new StringBuilder();
-      int numFilters = random.nextInt(3);
-      for (int i = 0; i < numFilters; i++) {
-        while (true) {
-          final Constructor<? extends CharFilter> ctor =
-              charfilters.get(random.nextInt(charfilters.size()));
-          final Object[] args = newCharFilterArgs(random, spec.reader, ctor.getParameterTypes());
-          if (broken(ctor, args)) {
-            continue;
-          }
-          reader = createComponent(ctor, args, descr, false);
-          if (reader != null) {
-            spec.reader = reader;
-            break;
-          }
-        }
-      }
-      spec.toString = descr.toString();
-      return spec;
-    }
-
-    private TokenFilterSpec newFilterChain(Random random, Tokenizer tokenizer) {
-      TokenFilterSpec spec = new TokenFilterSpec();
-      spec.stream = tokenizer;
-      StringBuilder descr = new StringBuilder();
-      int numFilters = random.nextInt(5);
-      for (int i = 0; i < numFilters; i++) {
-
-        // Insert ValidatingTF after each stage so we can
-        // catch problems right after the TF that "caused"
-        // them:
-        spec.stream = new ValidatingTokenFilter(spec.stream, "stage " + i);
-
-        while (true) {
-          final Constructor<? extends TokenFilter> ctor =
-              tokenfilters.get(random.nextInt(tokenfilters.size()));
-          if (random.nextBoolean()
-              && avoidConditionals.contains(ctor.getDeclaringClass()) == false) {
-            long seed = random.nextLong();
-            spec.stream =
-                new ConditionalTokenFilter(
-                    spec.stream,
-                    in -> {
-                      final Object[] args = newFilterArgs(random, in, ctor.getParameterTypes());
-                      if (broken(ctor, args)) {
-                        return in;
-                      }
-                      TokenStream ts = createComponent(ctor, args, descr, true);
-                      if (ts == null) {
-                        return in;
-                      }
-                      return ts;
-                    }) {
-                  Random random = new Random(seed);
-
-                  @Override
-                  public void reset() throws IOException {
-                    super.reset();
-                    random = new Random(seed);
-                  }
-
-                  @Override
-                  protected boolean shouldFilter() throws IOException {
-                    return random.nextBoolean();
-                  }
-                };
-            break;
-          } else {
-            final Object[] args = newFilterArgs(random, spec.stream, ctor.getParameterTypes());
-            if (broken(ctor, args)) {
-              continue;
-            }
-            final TokenFilter flt = createComponent(ctor, args, descr, false);
-            if (flt != null) {
-              spec.stream = flt;
-              break;
-            }
-          }
-        }
-      }
-
-      // Insert ValidatingTF after each stage so we can
-      // catch problems right after the TF that "caused"
-      // them:
-      spec.stream = new ValidatingTokenFilter(spec.stream, "last stage");
-
-      spec.toString = descr.toString();
-      return spec;
-    }
-  }
-
-  static class CheckThatYouDidntReadAnythingReaderWrapper extends CharFilter {
-    boolean readSomething;
-
-    CheckThatYouDidntReadAnythingReaderWrapper(Reader in) {
-      super(in);
-    }
-
-    @Override
-    public int correct(int currentOff) {
-      return currentOff; // we don't change any offsets
-    }
-
-    @Override
-    public int read(char[] cbuf, int off, int len) throws IOException {
-      readSomething = true;
-      return input.read(cbuf, off, len);
-    }
-
-    @Override
-    public int read() throws IOException {
-      readSomething = true;
-      return input.read();
-    }
-
-    @Override
-    public int read(CharBuffer target) throws IOException {
-      readSomething = true;
-      return input.read(target);
-    }
-
-    @Override
-    public int read(char[] cbuf) throws IOException {
-      readSomething = true;
-      return input.read(cbuf);
-    }
-
-    @Override
-    public long skip(long n) throws IOException {
-      readSomething = true;
-      return input.skip(n);
-    }
-
-    @Override
-    public void mark(int readAheadLimit) throws IOException {
-      input.mark(readAheadLimit);
-    }
-
-    @Override
-    public boolean markSupported() {
-      return input.markSupported();
-    }
-
-    @Override
-    public boolean ready() throws IOException {
-      return input.ready();
-    }
-
-    @Override
-    public void reset() throws IOException {
-      input.reset();
-    }
-  }
-
-  static class TokenizerSpec {
-    Tokenizer tokenizer;
-    String toString;
-  }
-
-  static class TokenFilterSpec {
-    TokenStream stream;
-    String toString;
-  }
-
-  static class CharFilterSpec {
-    Reader reader;
-    String toString;
-  }
-
-  public void testRandomChains() throws Throwable {
-    int numIterations = TEST_NIGHTLY ? atLeast(20) : 3;
-    Random random = random();
-    for (int i = 0; i < numIterations; i++) {
-      try (MockRandomAnalyzer a = new MockRandomAnalyzer(random.nextLong())) {
-        if (VERBOSE) {
-          System.out.println("Creating random analyzer:" + a);
-        }
-        try {
-          checkNormalize(a);
-          checkRandomData(
-              random,
-              a,
-              500 * RANDOM_MULTIPLIER,
-              20,
-              false,
-              false /* We already validate our own offsets... */);
-        } catch (Throwable e) {
-          System.err.println("Exception from random analyzer: " + a);
-          throw e;
-        }
-      }
-    }
-  }
-
-  public void checkNormalize(Analyzer a) {
-    // normalization should not modify characters that may be used for wildcards
-    // or regular expressions
-    String s = "([0-9]+)?*";
-    assertEquals(s, a.normalize("dummy", s).utf8ToString());
-  }
-
-  // we might regret this decision...
-  public void testRandomChainsWithLargeStrings() throws Throwable {
-    int numIterations = TEST_NIGHTLY ? atLeast(20) : 3;
-    Random random = random();
-    for (int i = 0; i < numIterations; i++) {
-      try (MockRandomAnalyzer a = new MockRandomAnalyzer(random.nextLong())) {
-        if (VERBOSE) {
-          System.out.println("Creating random analyzer:" + a);
-        }
-        try {
-          checkRandomData(
-              random,
-              a,
-              50 * RANDOM_MULTIPLIER,
-              80,
-              false,
-              false /* We already validate our own offsets... */);
-        } catch (Throwable e) {
-          System.err.println("Exception from random analyzer: " + a);
-          throw e;
-        }
-      }
-    }
-  }
-}
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java
index 52854280c12..166b4b7b1ef 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java
@@ -19,8 +19,8 @@ package org.apache.lucene.analysis.miscellaneous;
 import java.io.Reader;
 import java.io.StringReader;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.StringMockResourceLoader;
 import org.apache.lucene.tests.analysis.BaseTokenStreamFactoryTestCase;
+import org.apache.lucene.tests.util.StringMockResourceLoader;
 import org.apache.lucene.util.Version;
 
 /** Simple tests to ensure the keyword marker filter factory is working. */
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilterFactory.java
index 9e366bc4930..c581ab22f36 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilterFactory.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilterFactory.java
@@ -19,8 +19,8 @@ package org.apache.lucene.analysis.miscellaneous;
 import java.io.Reader;
 import java.io.StringReader;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.StringMockResourceLoader;
 import org.apache.lucene.tests.analysis.BaseTokenStreamFactoryTestCase;
+import org.apache.lucene.tests.util.StringMockResourceLoader;
 import org.apache.lucene.util.Version;
 
 /** Simple tests to ensure the stemmer override filter factory is working. */
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTypingFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTypingFilterFactory.java
index 37006580507..543600155f0 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTypingFilterFactory.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTypingFilterFactory.java
@@ -19,10 +19,10 @@ package org.apache.lucene.analysis.pattern;
 
 import org.apache.lucene.analysis.TokenFilterFactory;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.StringMockResourceLoader;
 import org.apache.lucene.tests.analysis.BaseTokenStreamFactoryTestCase;
 import org.apache.lucene.tests.analysis.CannedTokenStream;
 import org.apache.lucene.tests.analysis.Token;
+import org.apache.lucene.tests.util.StringMockResourceLoader;
 import org.apache.lucene.util.Version;
 
 /** This test just ensures the factory works */
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballPorterFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballPorterFilterFactory.java
index b55542a83d4..1340714fbee 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballPorterFilterFactory.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballPorterFilterFactory.java
@@ -19,8 +19,8 @@ package org.apache.lucene.analysis.snowball;
 import java.io.Reader;
 import java.io.StringReader;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.StringMockResourceLoader;
 import org.apache.lucene.tests.analysis.BaseTokenStreamFactoryTestCase;
+import org.apache.lucene.tests.util.StringMockResourceLoader;
 import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.EnglishStemmer;
 
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestMultiWordSynonyms.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestMultiWordSynonyms.java
index 7cd538c750b..c1024c34104 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestMultiWordSynonyms.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestMultiWordSynonyms.java
@@ -19,8 +19,8 @@ package org.apache.lucene.analysis.synonym;
 import java.io.Reader;
 import java.io.StringReader;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.StringMockResourceLoader;
 import org.apache.lucene.tests.analysis.BaseTokenStreamFactoryTestCase;
+import org.apache.lucene.tests.util.StringMockResourceLoader;
 import org.apache.lucene.util.Version;
 
 /** @since solr 1.4 */
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java
index 8df8e4bfeb4..b42c7725182 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java
@@ -22,8 +22,8 @@ import org.apache.lucene.analysis.TokenFilterFactory;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.cjk.CJKAnalyzer;
 import org.apache.lucene.analysis.pattern.PatternTokenizerFactory;
-import org.apache.lucene.analysis.util.StringMockResourceLoader;
 import org.apache.lucene.tests.analysis.BaseTokenStreamFactoryTestCase;
+import org.apache.lucene.tests.util.StringMockResourceLoader;
 import org.apache.lucene.util.Version;
 
 @Deprecated
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java
index b82bde51b8a..8f0ab4c5344 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java
@@ -26,6 +26,7 @@ import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenFilterFactory;
 import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.tests.util.StringMockResourceLoader;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.ResourceLoader;
 
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseCompletionFilter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseCompletionFilter.java
index 5fc33753501..4a22cce3f12 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseCompletionFilter.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseCompletionFilter.java
@@ -30,6 +30,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.CharsRefBuilder;
+import org.apache.lucene.util.IgnoreRandomChains;
 
 /**
  * A {@link org.apache.lucene.analysis.TokenFilter} that adds Japanese romanized tokens to the term
@@ -54,6 +55,7 @@ import org.apache.lucene.util.CharsRefBuilder;
  * WIDTH NORMALIZATION IS NOT PERFORMED, THIS DOES NOT WORK AS EXPECTED. See also: {@link
  * JapaneseCompletionAnalyzer}.
  */
+@IgnoreRandomChains(reason = "LUCENE-10363: fails with incorrect offsets")
 public final class JapaneseCompletionFilter extends TokenFilter {
   public static final Mode DEFAULT_MODE = Mode.INDEX;
 
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilter.java
index c2350c7c94a..2fdaffea38a 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilter.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseIterationMarkCharFilter.java
@@ -20,6 +20,7 @@ import java.io.IOException;
 import java.io.Reader;
 import org.apache.lucene.analysis.CharFilter;
 import org.apache.lucene.analysis.util.RollingCharBuffer;
+import org.apache.lucene.util.IgnoreRandomChains;
 
 /**
  * Normalizes Japanese horizontal iteration marks (odoriji) to their expanded form.
@@ -36,6 +37,8 @@ import org.apache.lucene.analysis.util.RollingCharBuffer;
  * reached in order to not keep a copy of the character stream in memory. Vertical iteration marks,
  * which are even rarer than horizontal iteration marks in contemporary Japanese, are unsupported.
  */
+@IgnoreRandomChains(
+    reason = "LUCENE-10358: fails with incorrect offsets or causes IndexOutOfBounds")
 public class JapaneseIterationMarkCharFilter extends CharFilter {
 
   /** Normalize kanji iteration marks by default */
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseKatakanaStemFilter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseKatakanaStemFilter.java
index 18b5ee1c930..9198a17388e 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseKatakanaStemFilter.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseKatakanaStemFilter.java
@@ -45,6 +45,9 @@ public final class JapaneseKatakanaStemFilter extends TokenFilter {
 
   public JapaneseKatakanaStemFilter(TokenStream input, int minimumLength) {
     super(input);
+    if (minimumLength < 1) {
+      throw new IllegalArgumentException("minimumLength must be >=1");
+    }
     this.minimumKatakanaLength = minimumLength;
   }
 
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java
index 70438022012..7b01751a3ae 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java
@@ -25,6 +25,7 @@ import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+import org.apache.lucene.util.IgnoreRandomChains;
 
 /**
  * A {@link TokenFilter} that normalizes Japanese numbers (kansūji) to regular Arabic decimal
@@ -82,6 +83,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
  * <p>Japanese formal numbers (daiji), accounting numbers and decimal fractions are currently not
  * supported.
  */
+@IgnoreRandomChains(reason = "LUCENE-10362: fails with incorrect offsets")
 public class JapaneseNumberFilter extends TokenFilter {
 
   private final CharTermAttribute termAttr = addAttribute(CharTermAttribute.class);
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
index bbc5ccf7335..47cb8d19297 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
@@ -41,6 +41,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
 import org.apache.lucene.analysis.util.RollingCharBuffer;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.AttributeFactory;
+import org.apache.lucene.util.IgnoreRandomChains;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.fst.FST;
@@ -275,6 +276,7 @@ public final class JapaneseTokenizer extends Tokenizer {
    * @param mode tokenization mode.
    * @lucene.experimental
    */
+  @IgnoreRandomChains(reason = "Parameters are too complex to be tested")
   public JapaneseTokenizer(
       AttributeFactory factory,
       TokenInfoDictionary systemDictionary,
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java
deleted file mode 100644
index d38acabea74..00000000000
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.ja;
-
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
-import org.apache.lucene.util.ResourceLoader;
-
-/** Fake resource loader for tests: works if you want to fake reading a single file */
-class StringMockResourceLoader implements ResourceLoader {
-  String text;
-
-  public StringMockResourceLoader(String text) {
-    this.text = text;
-  }
-
-  @Override
-  public <T> Class<? extends T> findClass(String cname, Class<T> expectedType) {
-    try {
-      return Class.forName(cname).asSubclass(expectedType);
-    } catch (Exception e) {
-      throw new RuntimeException("Cannot load class: " + cname, e);
-    }
-  }
-
-  @Override
-  public InputStream openResource(String resource) throws IOException {
-    return new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
-  }
-}
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestFactories.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestFactories.java
index ee6232fa411..b05d5edadc3 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestFactories.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestFactories.java
@@ -36,6 +36,7 @@ import org.apache.lucene.analysis.miscellaneous.DelimitedTermFrequencyTokenFilte
 import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.tests.analysis.MockTokenizer;
 import org.apache.lucene.tests.util.LuceneTestCase.Nightly;
+import org.apache.lucene.tests.util.StringMockResourceLoader;
 import org.apache.lucene.util.AttributeFactory;
 import org.apache.lucene.util.ResourceLoaderAware;
 import org.apache.lucene.util.Version;
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilterFactory.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilterFactory.java
index 470ce0c8f2d..80264951fab 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilterFactory.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilterFactory.java
@@ -22,6 +22,7 @@ import java.util.HashMap;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.tests.util.StringMockResourceLoader;
 
 /** Simple tests for {@link JapaneseBaseFormFilterFactory} */
 public class TestJapaneseBaseFormFilterFactory extends BaseTokenStreamTestCase {
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilterFactory.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilterFactory.java
index 6ac6dcc6981..9e456a5aa7d 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilterFactory.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseIterationMarkCharFilterFactory.java
@@ -25,6 +25,7 @@ import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.tests.analysis.MockTokenizer;
+import org.apache.lucene.tests.util.StringMockResourceLoader;
 
 /** Simple tests for {@link JapaneseIterationMarkCharFilterFactory} */
 public class TestJapaneseIterationMarkCharFilterFactory extends BaseTokenStreamTestCase {
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilterFactory.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilterFactory.java
index 53d8e9c8d1b..aee5f75f03e 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilterFactory.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilterFactory.java
@@ -22,6 +22,7 @@ import java.util.HashMap;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.tests.util.StringMockResourceLoader;
 
 /** Simple tests for {@link JapaneseKatakanaStemFilterFactory} */
 public class TestJapaneseKatakanaStemFilterFactory extends BaseTokenStreamTestCase {
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseNumberFilterFactory.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseNumberFilterFactory.java
index 5b260d87e0d..ef91bab1a4c 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseNumberFilterFactory.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseNumberFilterFactory.java
@@ -23,6 +23,7 @@ import java.util.Map;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.tests.util.StringMockResourceLoader;
 
 /** Simple tests for {@link org.apache.lucene.analysis.ja.JapaneseNumberFilterFactory} */
 public class TestJapaneseNumberFilterFactory extends BaseTokenStreamTestCase {
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapanesePartOfSpeechStopFilterFactory.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapanesePartOfSpeechStopFilterFactory.java
index c3d403811db..02006ab8e82 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapanesePartOfSpeechStopFilterFactory.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapanesePartOfSpeechStopFilterFactory.java
@@ -23,6 +23,7 @@ import java.util.Map;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.tests.util.StringMockResourceLoader;
 import org.apache.lucene.util.ClasspathResourceLoader;
 import org.apache.lucene.util.Version;
 
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilterFactory.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilterFactory.java
index e5ed23c904f..cd4eb045c44 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilterFactory.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilterFactory.java
@@ -22,6 +22,7 @@ import java.util.HashMap;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.tests.util.StringMockResourceLoader;
 
 /** Simple tests for {@link JapaneseReadingFormFilterFactory} */
 public class TestJapaneseReadingFormFilterFactory extends BaseTokenStreamTestCase {
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizerFactory.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizerFactory.java
index 3c11270f5fc..fb11c4cd6d6 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizerFactory.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizerFactory.java
@@ -23,6 +23,7 @@ import java.util.Map;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.tests.util.StringMockResourceLoader;
 
 /** Simple tests for {@link JapaneseTokenizerFactory} */
 public class TestJapaneseTokenizerFactory extends BaseTokenStreamTestCase {
diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
index 648bb403ceb..18a82ba184a 100644
--- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
+++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
@@ -32,6 +32,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.util.CharsRefBuilder;
+import org.apache.lucene.util.IgnoreRandomChains;
 
 /**
  * {@link TokenFilter} using Morfologik library to transform input tokens into lemma and
@@ -73,6 +74,7 @@ public class MorfologikFilter extends TokenFilter {
    * @param in input token stream.
    * @param dict Dictionary to use for stemming.
    */
+  @IgnoreRandomChains(reason = "No dictionary support yet")
   public MorfologikFilter(final TokenStream in, final Dictionary dict) {
     super(in);
     this.input = in;
diff --git a/lucene/analysis/nori/src/java/module-info.java b/lucene/analysis/nori/src/java/module-info.java
index 9dd085b5a5d..77e67801524 100644
--- a/lucene/analysis/nori/src/java/module-info.java
+++ b/lucene/analysis/nori/src/java/module-info.java
@@ -28,6 +28,7 @@ module org.apache.lucene.analysis.nori {
   provides org.apache.lucene.analysis.TokenizerFactory with
       org.apache.lucene.analysis.ko.KoreanTokenizerFactory;
   provides org.apache.lucene.analysis.TokenFilterFactory with
+      org.apache.lucene.analysis.ko.KoreanNumberFilterFactory,
       org.apache.lucene.analysis.ko.KoreanPartOfSpeechStopFilterFactory,
       org.apache.lucene.analysis.ko.KoreanReadingFormFilterFactory;
 }
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanNumberFilter.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanNumberFilter.java
index 61ef959f27d..bc435aa4661 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanNumberFilter.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanNumberFilter.java
@@ -26,6 +26,7 @@ import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+import org.apache.lucene.util.IgnoreRandomChains;
 
 /**
  * A {@link TokenFilter} that normalizes Korean numbers to regular Arabic decimal numbers in
@@ -72,6 +73,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
  *
  * @lucene.experimental
  */
+@IgnoreRandomChains(reason = "LUCENE-10361: KoreanNumberFilter messes up offsets")
 public class KoreanNumberFilter extends TokenFilter {
 
   private final CharTermAttribute termAttr = addAttribute(CharTermAttribute.class);
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java
index 0765b801c4d..325fae710b9 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java
@@ -40,6 +40,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
 import org.apache.lucene.analysis.util.RollingCharBuffer;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.AttributeFactory;
+import org.apache.lucene.util.IgnoreRandomChains;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.fst.FST;
@@ -59,6 +60,7 @@ import org.apache.lucene.util.fst.FST;
  *
  * @lucene.experimental
  */
+@IgnoreRandomChains(reason = "LUCENE-10359: fails with incorrect offsets")
 public final class KoreanTokenizer extends Tokenizer {
 
   /** Token type reflecting the original source of this token */
@@ -205,6 +207,7 @@ public final class KoreanTokenizer extends Tokenizer {
    * @param discardPunctuation true if punctuation tokens should be dropped from the output.
    * @lucene.experimental
    */
+  @IgnoreRandomChains(reason = "Parameters are too complex to be tested")
   public KoreanTokenizer(
       AttributeFactory factory,
       TokenInfoDictionary systemDictionary,
diff --git a/lucene/analysis/nori/src/resources/META-INF/services/org.apache.lucene.analysis.TokenFilterFactory b/lucene/analysis/nori/src/resources/META-INF/services/org.apache.lucene.analysis.TokenFilterFactory
index 4fff75330d2..cf903c1e04c 100644
--- a/lucene/analysis/nori/src/resources/META-INF/services/org.apache.lucene.analysis.TokenFilterFactory
+++ b/lucene/analysis/nori/src/resources/META-INF/services/org.apache.lucene.analysis.TokenFilterFactory
@@ -13,5 +13,6 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
+org.apache.lucene.analysis.ko.KoreanNumberFilterFactory
 org.apache.lucene.analysis.ko.KoreanPartOfSpeechStopFilterFactory
 org.apache.lucene.analysis.ko.KoreanReadingFormFilterFactory
\ No newline at end of file
diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/StringMockResourceLoader.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/StringMockResourceLoader.java
deleted file mode 100644
index e29bfbef1cf..00000000000
--- a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/StringMockResourceLoader.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.ko;
-
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
-import org.apache.lucene.util.ResourceLoader;
-
-/** Fake resource loader for tests: works if you want to fake reading a single file */
-class StringMockResourceLoader implements ResourceLoader {
-  private String text;
-
-  public StringMockResourceLoader(String text) {
-    this.text = text;
-  }
-
-  @Override
-  public <T> Class<? extends T> findClass(String cname, Class<T> expectedType) {
-    try {
-      return Class.forName(cname).asSubclass(expectedType);
-    } catch (Exception e) {
-      throw new RuntimeException("Cannot load class: " + cname, e);
-    }
-  }
-
-  @Override
-  public InputStream openResource(String resource) throws IOException {
-    return new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
-  }
-}
diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanNumberFilterFactory.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanNumberFilterFactory.java
index 2a519e8c723..9dc244a12a0 100644
--- a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanNumberFilterFactory.java
+++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanNumberFilterFactory.java
@@ -23,6 +23,7 @@ import java.util.Map;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.tests.util.StringMockResourceLoader;
 
 /** Simple tests for {@link org.apache.lucene.analysis.ko.KoreanNumberFilterFactory} */
 public class TestKoreanNumberFilterFactory extends BaseTokenStreamTestCase {
diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanPartOfSpeechStopFilterFactory.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanPartOfSpeechStopFilterFactory.java
index 68fd7faa312..5a6c31dca32 100644
--- a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanPartOfSpeechStopFilterFactory.java
+++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanPartOfSpeechStopFilterFactory.java
@@ -23,6 +23,7 @@ import java.util.Map;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.tests.util.StringMockResourceLoader;
 import org.apache.lucene.util.Version;
 
 /** Simple tests for {@link KoreanPartOfSpeechStopFilterFactory} */
diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanReadingFormFilterFactory.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanReadingFormFilterFactory.java
index 46b910c92a3..a92aab1ef2a 100644
--- a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanReadingFormFilterFactory.java
+++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanReadingFormFilterFactory.java
@@ -21,6 +21,7 @@ import java.io.StringReader;
 import java.util.HashMap;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.tests.util.StringMockResourceLoader;
 
 /** Simple tests for {@link KoreanReadingFormFilterFactory} */
 public class TestKoreanReadingFormFilterFactory extends BaseTokenStreamTestCase {
diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanTokenizerFactory.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanTokenizerFactory.java
index 93bd20d663e..63847cbebe8 100644
--- a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanTokenizerFactory.java
+++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanTokenizerFactory.java
@@ -23,6 +23,7 @@ import java.util.HashMap;
 import java.util.Map;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.tests.util.StringMockResourceLoader;
 
 /** Simple tests for {@link KoreanTokenizerFactory} */
 public class TestKoreanTokenizerFactory extends BaseTokenStreamTestCase {
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPChunkerFilter.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPChunkerFilter.java
index 93d0c11b608..00932278337 100644
--- a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPChunkerFilter.java
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPChunkerFilter.java
@@ -27,12 +27,14 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.IgnoreRandomChains;
 
 /**
  * Run OpenNLP chunker. Prerequisite: the OpenNLPTokenizer and OpenNLPPOSFilter must precede this
  * filter. Tags terms in the TypeAttribute, replacing the POS tags previously put there by
  * OpenNLPPOSFilter.
  */
+@IgnoreRandomChains(reason = "other filters must precede this one (see docs)")
 public final class OpenNLPChunkerFilter extends TokenFilter {
 
   private List<AttributeSource> sentenceTokenAttrs = new ArrayList<>();
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPLemmatizerFilter.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPLemmatizerFilter.java
index 1e8e1d13938..af14f03cf21 100644
--- a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPLemmatizerFilter.java
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPLemmatizerFilter.java
@@ -29,6 +29,7 @@ import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
 import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.IgnoreRandomChains;
 
 /**
  * Runs OpenNLP dictionary-based and/or MaxEnt lemmatizers.
@@ -41,6 +42,7 @@ import org.apache.lucene.util.AttributeSource;
  * <p>The dictionary file must be encoded as UTF-8, with one entry per line, in the form <code>
  * word[tab]lemma[tab]part-of-speech</code>
  */
+@IgnoreRandomChains(reason = "LUCENE-10352: no dictionary support yet")
 public class OpenNLPLemmatizerFilter extends TokenFilter {
   private final NLPLemmatizerOp lemmatizerOp;
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPPOSFilter.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPPOSFilter.java
index f9c7bdd73a1..2cb3ab595fc 100644
--- a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPPOSFilter.java
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPPOSFilter.java
@@ -27,8 +27,10 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.IgnoreRandomChains;
 
 /** Run OpenNLP POS tagger. Tags all terms in the TypeAttribute. */
+@IgnoreRandomChains(reason = "LUCENE-10352: add argument providers for this one")
 public final class OpenNLPPOSFilter extends TokenFilter {
 
   private List<AttributeSource> sentenceTokenAttrs = new ArrayList<>();
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPTokenizer.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPTokenizer.java
index 134fa25d855..c31f5c11ea0 100644
--- a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPTokenizer.java
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPTokenizer.java
@@ -26,12 +26,14 @@ import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.util.SegmentingTokenizerBase;
 import org.apache.lucene.util.AttributeFactory;
+import org.apache.lucene.util.IgnoreRandomChains;
 
 /**
  * Run OpenNLP SentenceDetector and Tokenizer. The last token in each sentence is marked by setting
  * the {@link #EOS_FLAG_BIT} in the FlagsAttribute; following filters can use this information to
  * apply operations to tokens one sentence at a time.
  */
+@IgnoreRandomChains(reason = "LUCENE-10352: add argument providers for this one")
 public final class OpenNLPTokenizer extends SegmentingTokenizerBase {
   public static int EOS_FLAG_BIT = 1;
 
diff --git a/lucene/analysis/phonetic/build.gradle b/lucene/analysis/phonetic/build.gradle
index e5595cb2d76..2297af53c85 100644
--- a/lucene/analysis/phonetic/build.gradle
+++ b/lucene/analysis/phonetic/build.gradle
@@ -23,7 +23,7 @@ dependencies {
   moduleApi project(':lucene:core')
   moduleApi project(':lucene:analysis:common')
 
-  moduleImplementation 'commons-codec:commons-codec'
+  moduleApi 'commons-codec:commons-codec'
 
   testImplementation project(':lucene:test-framework')
 } 
diff --git a/lucene/analysis/phonetic/src/java/module-info.java b/lucene/analysis/phonetic/src/java/module-info.java
index 706251af4ca..9bf5e641b51 100644
--- a/lucene/analysis/phonetic/src/java/module-info.java
+++ b/lucene/analysis/phonetic/src/java/module-info.java
@@ -26,6 +26,7 @@ module org.apache.lucene.analysis.phonetic {
 
   provides org.apache.lucene.analysis.TokenFilterFactory with
       org.apache.lucene.analysis.phonetic.BeiderMorseFilterFactory,
+      org.apache.lucene.analysis.phonetic.DaitchMokotoffSoundexFilterFactory,
       org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilterFactory,
       org.apache.lucene.analysis.phonetic.PhoneticFilterFactory;
 }
diff --git a/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java b/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java
index 5e16e47298d..aa0dc1a8caf 100644
--- a/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java
+++ b/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java
@@ -26,6 +26,7 @@ import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.util.IgnoreRandomChains;
 
 /**
  * TokenFilter for Beider-Morse phonetic encoding.
@@ -33,6 +34,8 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
  * @see BeiderMorseEncoder
  * @lucene.experimental
  */
+@IgnoreRandomChains(
+    reason = "LUCENE-10360: cannot handle empty tokens (or those only dashes and whitespace)")
 public final class BeiderMorseFilter extends TokenFilter {
   private final PhoneticEngine engine;
   private final LanguageSet languages;
@@ -72,6 +75,7 @@ public final class BeiderMorseFilter extends TokenFilter {
    * @param languages optional Set of original languages. Can be null (which means it will be
    *     guessed).
    */
+  @IgnoreRandomChains(reason = "LUCENE-10352: Add support for LanguageSet randomization")
   public BeiderMorseFilter(TokenStream input, PhoneticEngine engine, LanguageSet languages) {
     super(input);
     this.engine = engine;
diff --git a/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilter.java b/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilter.java
index e1f267a5d66..6a950d84cb4 100644
--- a/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilter.java
+++ b/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilter.java
@@ -39,6 +39,9 @@ public final class DoubleMetaphoneFilter extends TokenFilter {
    */
   public DoubleMetaphoneFilter(TokenStream input, int maxCodeLength, boolean inject) {
     super(input);
+    if (maxCodeLength < 1) {
+      throw new IllegalArgumentException("maxCodeLength must be >=1");
+    }
     this.encoder.setMaxCodeLen(maxCodeLength);
     this.inject = inject;
   }
diff --git a/lucene/analysis/phonetic/src/resources/META-INF/services/org.apache.lucene.analysis.TokenFilterFactory b/lucene/analysis/phonetic/src/resources/META-INF/services/org.apache.lucene.analysis.TokenFilterFactory
index fe78873ce4f..677ae4829bf 100644
--- a/lucene/analysis/phonetic/src/resources/META-INF/services/org.apache.lucene.analysis.TokenFilterFactory
+++ b/lucene/analysis/phonetic/src/resources/META-INF/services/org.apache.lucene.analysis.TokenFilterFactory
@@ -14,5 +14,6 @@
 #  limitations under the License.
 
 org.apache.lucene.analysis.phonetic.BeiderMorseFilterFactory
+org.apache.lucene.analysis.phonetic.DaitchMokotoffSoundexFilterFactory
 org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilterFactory
 org.apache.lucene.analysis.phonetic.PhoneticFilterFactory
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/CachingTokenFilter.java b/lucene/core/src/java/org/apache/lucene/analysis/CachingTokenFilter.java
index f87ee8816ac..d3a6c24565a 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/CachingTokenFilter.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/CachingTokenFilter.java
@@ -21,6 +21,7 @@ import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.IgnoreRandomChains;
 
 /**
  * This class can be used if the token attributes of a TokenStream are intended to be consumed more
@@ -31,6 +32,9 @@ import org.apache.lucene.util.AttributeSource;
  * although only before {@link #incrementToken()} is called the first time. Prior to Lucene 5, it
  * was never propagated.
  */
+@IgnoreRandomChains(
+    reason =
+        "doesn't actual reset itself! TODO: this statement is probably obsolete as of LUCENE-6121")
 public final class CachingTokenFilter extends TokenFilter {
   private List<AttributeSource.State> cache = null;
   private Iterator<AttributeSource.State> iterator = null;
diff --git a/lucene/core/src/java/org/apache/lucene/util/IgnoreRandomChains.java b/lucene/core/src/java/org/apache/lucene/util/IgnoreRandomChains.java
new file mode 100644
index 00000000000..f6f4c2a4860
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/util/IgnoreRandomChains.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.util;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+/**
+ * Annotation to not test a class or constructor with {@code TestRandomChains} integration test.
+ *
+ * @lucene.internal
+ */
+@Retention(RetentionPolicy.RUNTIME)
+@Target({ElementType.CONSTRUCTOR, ElementType.TYPE})
+public @interface IgnoreRandomChains {
+  /** A reason for ignoring should always be given. */
+  String reason();
+}
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java b/lucene/test-framework/src/java/org/apache/lucene/tests/util/StringMockResourceLoader.java
similarity index 97%
rename from lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java
rename to lucene/test-framework/src/java/org/apache/lucene/tests/util/StringMockResourceLoader.java
index 87764d6f526..d708ac75764 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/tests/util/StringMockResourceLoader.java
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.lucene.analysis.util;
+package org.apache.lucene.tests.util;
 
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
diff --git a/settings.gradle b/settings.gradle
index ed641bf1a6b..0923e9db874 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -36,6 +36,7 @@ include "lucene:analysis:opennlp"
 include "lucene:analysis:phonetic"
 include "lucene:analysis:smartcn"
 include "lucene:analysis:stempel"
+include "lucene:analysis.tests"
 include "lucene:backward-codecs"
 include "lucene:benchmark"
 include "lucene:classification"