values = new ArrayList<>();
- SolrInputDocument sid = new SolrInputDocument();
- String id = "myid1";
- sid.addField("id", id);
- sid.addField("text", "some unique text");
- SolrInputDocumentWritable sidw = new SolrInputDocumentWritable(sid);
- values.add(sidw);
- reduceDriver.withInput(new Text(id), values);
-
- reduceDriver.withCacheArchive(solrHomeZip.getAbsolutePath());
-
- reduceDriver.withOutputFormat(SolrOutputFormat.class,
- NullInputFormat.class);
-
- reduceDriver.run();
-
- assertEquals("Expected 1 counter increment", 1,
- reduceDriver.getCounters().findCounter(SolrCounters.class.getName(),
- SolrCounters.DOCUMENTS_WRITTEN.toString()).getValue());
- } finally {
- myReducer.cleanup(myReducer.context);
- }
- }
-
-}
diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/UtilsForTests.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/UtilsForTests.java
deleted file mode 100644
index bc5148f5ae3..00000000000
--- a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/UtilsForTests.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.solr.client.solrj.SolrQuery;
-import org.apache.solr.client.solrj.SolrServerException;
-import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
-import org.apache.solr.client.solrj.response.QueryResponse;
-
-import java.io.File;
-import java.io.IOException;
-
-import static org.junit.Assert.assertEquals;
-
-
-public class UtilsForTests {
-
- public static void validateSolrServerDocumentCount(File solrHomeDir, FileSystem fs, Path outDir, int expectedDocs, int expectedShards)
- throws IOException, SolrServerException {
-
- long actualDocs = 0;
- int actualShards = 0;
- for (FileStatus dir : fs.listStatus(outDir)) { // for each shard
- if (dir.getPath().getName().startsWith("part") && dir.isDirectory()) {
- actualShards++;
- try (EmbeddedSolrServer solr
- = SolrRecordWriter.createEmbeddedSolrServer(new Path(solrHomeDir.getAbsolutePath()), fs, dir.getPath())) {
- SolrQuery query = new SolrQuery();
- query.setQuery("*:*");
- QueryResponse resp = solr.query(query);
- long numDocs = resp.getResults().getNumFound();
- actualDocs += numDocs;
- }
- }
- }
- assertEquals(expectedShards, actualShards);
- assertEquals(expectedDocs, actualDocs);
- }
-
-}
diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRClientCluster.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRClientCluster.java
deleted file mode 100644
index be5ea01cd29..00000000000
--- a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRClientCluster.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop.hack;
-
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-
-/*
- * A simple interface for a client MR cluster used for testing. This interface
- * provides basic methods which are independent of the underlying Mini Cluster (
- * either through MR1 or MR2).
- */
-public interface MiniMRClientCluster {
-
- public void start() throws IOException;
-
- /**
- * Stop and start back the cluster using the same configuration.
- */
- public void restart() throws IOException;
-
- public void stop() throws IOException;
-
- public Configuration getConfig() throws IOException;
-
-}
diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRClientClusterFactory.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRClientClusterFactory.java
deleted file mode 100644
index 2bf721b7a6c..00000000000
--- a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRClientClusterFactory.java
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop.hack;
-
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.permission.FsPermission;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.util.JarFinder;
-
-/**
- * A MiniMRCluster factory. In MR2, it provides a wrapper MiniMRClientCluster
- * interface around the MiniMRYarnCluster. While in MR1, it provides such
- * wrapper around MiniMRCluster. This factory should be used in tests to provide
- * an easy migration of tests across MR1 and MR2.
- */
-public class MiniMRClientClusterFactory {
-
- public static MiniMRClientCluster create(Class> caller, int noOfNMs,
- Configuration conf, File testWorkDir) throws IOException {
- return create(caller, caller.getSimpleName(), noOfNMs, conf, testWorkDir);
- }
-
- public static MiniMRClientCluster create(Class> caller, String identifier,
- int noOfNMs, Configuration conf, File testWorkDir) throws IOException {
-
- if (conf == null) {
- conf = new Configuration();
- }
-
- FileSystem fs = FileSystem.get(conf);
-
- Path testRootDir = new Path(testWorkDir.getPath(), identifier + "-tmpDir")
- .makeQualified(fs);
- Path appJar = new Path(testRootDir, "MRAppJar.jar");
-
- // Copy MRAppJar and make it private.
- Path appMasterJar = new Path(MiniMRYarnCluster.APPJAR);
-
- fs.copyFromLocalFile(appMasterJar, appJar);
- fs.setPermission(appJar, new FsPermission("744"));
-
- Job job = Job.getInstance(conf);
-
- job.addFileToClassPath(appJar);
-
- Path callerJar = new Path(JarFinder.getJar(caller));
- Path remoteCallerJar = new Path(testRootDir, callerJar.getName());
- fs.copyFromLocalFile(callerJar, remoteCallerJar);
- fs.setPermission(remoteCallerJar, new FsPermission("744"));
- job.addFileToClassPath(remoteCallerJar);
-
- MiniMRYarnCluster miniMRYarnCluster;
- try {
- miniMRYarnCluster = new MiniMRYarnCluster(identifier,
- noOfNMs, testWorkDir);
- } catch (Exception e) {
- throw new RuntimeException(e);
- }
- job.getConfiguration().set("minimrclientcluster.caller.name",
- identifier);
- job.getConfiguration().setInt("minimrclientcluster.nodemanagers.number",
- noOfNMs);
- miniMRYarnCluster.init(job.getConfiguration());
- miniMRYarnCluster.start();
-
- return new MiniMRYarnClusterAdapter(miniMRYarnCluster, testWorkDir);
- }
-
-}
diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRCluster.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRCluster.java
deleted file mode 100644
index cf872abdb1c..00000000000
--- a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRCluster.java
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop.hack;
-
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.JobID;
-import org.apache.hadoop.mapred.JobPriority;
-import org.apache.hadoop.mapred.MapTaskCompletionEventsUpdate;
-import org.apache.hadoop.mapred.TaskCompletionEvent;
-import org.apache.hadoop.security.AccessControlException;
-import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.lucene.util.LuceneTestCase;
-
-
-/**
- * This class is an MR2 replacement for older MR1 MiniMRCluster, that was used
- * by tests prior to MR2. This replacement class uses the new MiniMRYarnCluster
- * in MR2 but provides the same old MR1 interface, so tests can be migrated from
- * MR1 to MR2 with minimal changes.
- *
- * Due to major differences between MR1 and MR2, a number of methods are either
- * unimplemented/unsupported or were re-implemented to provide wrappers around
- * MR2 functionality.
- *
- * @deprecated Use {@link org.apache.hadoop.mapred.MiniMRClientClusterFactory}
- * instead
- */
-@Deprecated
-public class MiniMRCluster {
- private static final Log LOG = LogFactory.getLog(MiniMRCluster.class);
-
- private MiniMRClientCluster mrClientCluster;
-
- public String getTaskTrackerLocalDir(int taskTracker) {
- throw new UnsupportedOperationException();
- }
-
- public String[] getTaskTrackerLocalDirs(int taskTracker) {
- throw new UnsupportedOperationException();
- }
-
- class JobTrackerRunner {
- // Mock class
- }
-
- class TaskTrackerRunner {
- // Mock class
- }
-
- public JobTrackerRunner getJobTrackerRunner() {
- throw new UnsupportedOperationException();
- }
-
- TaskTrackerRunner getTaskTrackerRunner(int id) {
- throw new UnsupportedOperationException();
- }
-
- public int getNumTaskTrackers() {
- throw new UnsupportedOperationException();
- }
-
- public void setInlineCleanupThreads() {
- throw new UnsupportedOperationException();
- }
-
- public void waitUntilIdle() {
- throw new UnsupportedOperationException();
- }
-
- private void waitTaskTrackers() {
- throw new UnsupportedOperationException();
- }
-
- public int getJobTrackerPort() {
- throw new UnsupportedOperationException();
- }
-
- public JobConf createJobConf() {
- JobConf jobConf = null;
- try {
- jobConf = new JobConf(mrClientCluster.getConfig());
- } catch (IOException e) {
- LOG.error(e);
- }
- return jobConf;
- }
-
- public JobConf createJobConf(JobConf conf) {
- JobConf jobConf = null;
- try {
- jobConf = new JobConf(mrClientCluster.getConfig());
- } catch (IOException e) {
- LOG.error(e);
- }
- return jobConf;
- }
-
- static JobConf configureJobConf(JobConf conf, String namenode,
- int jobTrackerPort, int jobTrackerInfoPort, UserGroupInformation ugi) {
- throw new UnsupportedOperationException();
- }
-
- public MiniMRCluster(int numTaskTrackers, String namenode, int numDir,
- String[] racks, String[] hosts) throws Exception {
- this(0, 0, numTaskTrackers, namenode, numDir, racks, hosts);
- }
-
- public MiniMRCluster(int numTaskTrackers, String namenode, int numDir,
- String[] racks, String[] hosts, JobConf conf) throws Exception {
- this(0, 0, numTaskTrackers, namenode, numDir, racks, hosts, null, conf);
- }
-
- public MiniMRCluster(int numTaskTrackers, String namenode, int numDir)
- throws Exception {
- this(0, 0, numTaskTrackers, namenode, numDir);
- }
-
- public MiniMRCluster(int jobTrackerPort, int taskTrackerPort,
- int numTaskTrackers, String namenode, int numDir) throws Exception {
- this(jobTrackerPort, taskTrackerPort, numTaskTrackers, namenode, numDir,
- null);
- }
-
- public MiniMRCluster(int jobTrackerPort, int taskTrackerPort,
- int numTaskTrackers, String namenode, int numDir, String[] racks)
- throws Exception {
- this(jobTrackerPort, taskTrackerPort, numTaskTrackers, namenode, numDir,
- racks, null);
- }
-
- public MiniMRCluster(int jobTrackerPort, int taskTrackerPort,
- int numTaskTrackers, String namenode, int numDir, String[] racks,
- String[] hosts) throws Exception {
- this(jobTrackerPort, taskTrackerPort, numTaskTrackers, namenode, numDir,
- racks, hosts, null);
- }
-
- public MiniMRCluster(int jobTrackerPort, int taskTrackerPort,
- int numTaskTrackers, String namenode, int numDir, String[] racks,
- String[] hosts, UserGroupInformation ugi) throws Exception {
- this(jobTrackerPort, taskTrackerPort, numTaskTrackers, namenode, numDir,
- racks, hosts, ugi, null);
- }
-
- public MiniMRCluster(int jobTrackerPort, int taskTrackerPort,
- int numTaskTrackers, String namenode, int numDir, String[] racks,
- String[] hosts, UserGroupInformation ugi, JobConf conf)
- throws Exception {
- this(jobTrackerPort, taskTrackerPort, numTaskTrackers, namenode, numDir,
- racks, hosts, ugi, conf, 0);
- }
-
- public MiniMRCluster(int jobTrackerPort, int taskTrackerPort,
- int numTaskTrackers, String namenode, int numDir, String[] racks,
- String[] hosts, UserGroupInformation ugi, JobConf conf,
- int numTrackerToExclude) throws Exception {
- if (conf == null) conf = new JobConf();
- FileSystem.setDefaultUri(conf, namenode);
- String identifier = this.getClass().getSimpleName() + "_"
- + Integer.toString(LuceneTestCase.random().nextInt(Integer.MAX_VALUE));
- mrClientCluster = MiniMRClientClusterFactory.create(this.getClass(),
- identifier, numTaskTrackers, conf, new File(conf.get("testWorkDir")));
- }
-
- public UserGroupInformation getUgi() {
- throw new UnsupportedOperationException();
- }
-
- public TaskCompletionEvent[] getTaskCompletionEvents(JobID id, int from,
- int max) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- public void setJobPriority(JobID jobId, JobPriority priority)
- throws AccessControlException, IOException {
- throw new UnsupportedOperationException();
- }
-
- public JobPriority getJobPriority(JobID jobId) {
- throw new UnsupportedOperationException();
- }
-
- public long getJobFinishTime(JobID jobId) {
- throw new UnsupportedOperationException();
- }
-
- public void initializeJob(JobID jobId) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- public MapTaskCompletionEventsUpdate getMapTaskCompletionEventsUpdates(
- int index, JobID jobId, int max) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- public JobConf getJobTrackerConf() {
- JobConf jobConf = null;
- try {
- jobConf = new JobConf(mrClientCluster.getConfig());
- } catch (IOException e) {
- LOG.error(e);
- }
- return jobConf;
- }
-
- public int getFaultCount(String hostName) {
- throw new UnsupportedOperationException();
- }
-
- public void startJobTracker() {
- // Do nothing
- }
-
- public void startJobTracker(boolean wait) {
- // Do nothing
- }
-
- public void stopJobTracker() {
- // Do nothing
- }
-
- public void stopTaskTracker(int id) {
- // Do nothing
- }
-
- public void startTaskTracker(String host, String rack, int idx, int numDir)
- throws IOException {
- // Do nothing
- }
-
- void addTaskTracker(TaskTrackerRunner taskTracker) {
- throw new UnsupportedOperationException();
- }
-
- int getTaskTrackerID(String trackerName) {
- throw new UnsupportedOperationException();
- }
-
- public void shutdown() {
- try {
- mrClientCluster.stop();
- } catch (IOException e) {
- LOG.error(e);
- }
- }
-}
diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRYarnCluster.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRYarnCluster.java
deleted file mode 100644
index 8fa1b3132bc..00000000000
--- a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRYarnCluster.java
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop.hack;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.Locale;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.CommonConfigurationKeys;
-import org.apache.hadoop.fs.FileContext;
-import org.apache.hadoop.fs.LocalFileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.LocalContainerLauncher;
-import org.apache.hadoop.mapred.ShuffleHandler;
-import org.apache.hadoop.mapreduce.MRConfig;
-import org.apache.hadoop.mapreduce.MRJobConfig;
-import org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer;
-import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig;
-import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils;
-import org.apache.hadoop.service.AbstractService;
-import org.apache.hadoop.service.Service;
-import org.apache.hadoop.util.JarFinder;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
-import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
-import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor;
-
-/**
- * Configures and starts the MR-specific components in the YARN cluster.
- *
- */
-public class MiniMRYarnCluster extends MiniYARNCluster {
-
- public static final String APPJAR = JarFinder.getJar(LocalContainerLauncher.class);
-
- private static final Log LOG = LogFactory.getLog(MiniMRYarnCluster.class);
- private JobHistoryServer historyServer;
- private JobHistoryServerWrapper historyServerWrapper;
-
- public MiniMRYarnCluster(String testName, File testWorkDir) {
- this(testName, 1, testWorkDir);
- }
-
- public MiniMRYarnCluster(String testName, int noOfNMs, File testWorkDir) {
- super(testName, noOfNMs, 4, 4, testWorkDir);
- //TODO: add the history server
- historyServerWrapper = new JobHistoryServerWrapper();
- addService(historyServerWrapper);
- }
-
- @Override
- public void serviceInit(Configuration conf) throws Exception {
- conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME);
- if (conf.get(MRJobConfig.MR_AM_STAGING_DIR) == null) {
- conf.set(MRJobConfig.MR_AM_STAGING_DIR, new File(getTestWorkDir(),
- "apps_staging_dir/").getAbsolutePath());
- }
-
- // By default, VMEM monitoring disabled, PMEM monitoring enabled.
- if (!conf.getBoolean(
- MRConfig.MAPREDUCE_MINICLUSTER_CONTROL_RESOURCE_MONITORING,
- MRConfig.DEFAULT_MAPREDUCE_MINICLUSTER_CONTROL_RESOURCE_MONITORING)) {
- conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false);
- conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false);
- }
-
- conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "000");
-
- try {
- Path stagingPath = FileContext.getFileContext(conf).makeQualified(
- new Path(conf.get(MRJobConfig.MR_AM_STAGING_DIR)));
- /*
- * Re-configure the staging path on Windows if the file system is localFs.
- * We need to use a absolute path that contains the drive letter. The unit
- * test could run on a different drive than the AM. We can run into the
- * issue that job files are localized to the drive where the test runs on,
- * while the AM starts on a different drive and fails to find the job
- * metafiles. Using absolute path can avoid this ambiguity.
- */
- if (Path.WINDOWS) {
- if (LocalFileSystem.class.isInstance(stagingPath.getFileSystem(conf))) {
- conf.set(MRJobConfig.MR_AM_STAGING_DIR,
- new File(conf.get(MRJobConfig.MR_AM_STAGING_DIR))
- .getAbsolutePath());
- }
- }
- FileContext fc=FileContext.getFileContext(stagingPath.toUri(), conf);
- if (fc.util().exists(stagingPath)) {
- LOG.info(stagingPath + " exists! deleting...");
- fc.delete(stagingPath, true);
- }
- LOG.info("mkdir: " + stagingPath);
- //mkdir the staging directory so that right permissions are set while running as proxy user
- fc.mkdir(stagingPath, null, true);
- //mkdir done directory as well
- String doneDir = JobHistoryUtils.getConfiguredHistoryServerDoneDirPrefix(conf);
- Path doneDirPath = fc.makeQualified(new Path(doneDir));
- fc.mkdir(doneDirPath, null, true);
- } catch (IOException e) {
- throw new YarnRuntimeException("Could not create staging directory. ", e);
- }
- conf.set(MRConfig.MASTER_ADDRESS, "test"); // The default is local because of
- // which shuffle doesn't happen
- //configure the shuffle service in NM
- conf.setStrings(YarnConfiguration.NM_AUX_SERVICES,
- new String[] { ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID });
- conf.setClass(String.format(Locale.ENGLISH, YarnConfiguration.NM_AUX_SERVICE_FMT,
- ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID), ShuffleHandler.class,
- Service.class);
-
- // Non-standard shuffle port
- conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0);
-
- conf.setClass(YarnConfiguration.NM_CONTAINER_EXECUTOR,
- DefaultContainerExecutor.class, ContainerExecutor.class);
-
- // TestMRJobs is for testing non-uberized operation only; see TestUberAM
- // for corresponding uberized tests.
- conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
-
- super.serviceInit(conf);
- }
-
- private class JobHistoryServerWrapper extends AbstractService {
- public JobHistoryServerWrapper() {
- super(JobHistoryServerWrapper.class.getName());
- }
-
- @Override
- public synchronized void serviceStart() throws Exception {
- try {
- if (!getConfig().getBoolean(
- JHAdminConfig.MR_HISTORY_MINICLUSTER_FIXED_PORTS,
- JHAdminConfig.DEFAULT_MR_HISTORY_MINICLUSTER_FIXED_PORTS)) {
- // pick free random ports.
- getConfig().set(JHAdminConfig.MR_HISTORY_ADDRESS,
- MiniYARNCluster.getHostname() + ":0");
- getConfig().set(JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS,
- MiniYARNCluster.getHostname() + ":0");
- }
- historyServer = new JobHistoryServer();
- historyServer.init(getConfig());
- new Thread() {
- public void run() {
- historyServer.start();
- };
- }.start();
- while (historyServer.getServiceState() == STATE.INITED) {
- LOG.info("Waiting for HistoryServer to start...");
- Thread.sleep(1500);
- }
- //TODO Add a timeout. State.STOPPED check ?
- if (historyServer.getServiceState() != STATE.STARTED) {
- throw new IOException("HistoryServer failed to start");
- }
- super.serviceStart();
- } catch (Throwable t) {
- throw new YarnRuntimeException(t);
- }
- //need to do this because historyServer.init creates a new Configuration
- getConfig().set(JHAdminConfig.MR_HISTORY_ADDRESS,
- historyServer.getConfig().get(JHAdminConfig.MR_HISTORY_ADDRESS));
- getConfig().set(JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS,
- historyServer.getConfig().get(JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS));
-
- LOG.info("MiniMRYARN ResourceManager address: " +
- getConfig().get(YarnConfiguration.RM_ADDRESS));
- LOG.info("MiniMRYARN ResourceManager web address: " +
- getConfig().get(YarnConfiguration.RM_WEBAPP_ADDRESS));
- LOG.info("MiniMRYARN HistoryServer address: " +
- getConfig().get(JHAdminConfig.MR_HISTORY_ADDRESS));
- LOG.info("MiniMRYARN HistoryServer web address: " +
- getConfig().get(JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS));
- }
-
- @Override
- public synchronized void serviceStop() throws Exception {
- if (historyServer != null) {
- historyServer.stop();
- }
- super.serviceStop();
- }
- }
-
- public JobHistoryServer getHistoryServer() {
- return this.historyServer;
- }
-}
diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRYarnClusterAdapter.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRYarnClusterAdapter.java
deleted file mode 100644
index 08ab881005b..00000000000
--- a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniMRYarnClusterAdapter.java
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop.hack;
-
-import java.io.File;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig;
-import org.apache.hadoop.service.Service.STATE;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
-
-/**
- * An adapter for MiniMRYarnCluster providing a MiniMRClientCluster interface.
- * This interface could be used by tests across both MR1 and MR2.
- */
-public class MiniMRYarnClusterAdapter implements MiniMRClientCluster {
-
- private MiniMRYarnCluster miniMRYarnCluster;
-
- private File testWorkDir;
-
- private static final Log LOG = LogFactory.getLog(MiniMRYarnClusterAdapter.class);
-
- public MiniMRYarnClusterAdapter(MiniMRYarnCluster miniMRYarnCluster, File testWorkDir) {
- this.miniMRYarnCluster = miniMRYarnCluster;
- this.testWorkDir = testWorkDir;
- }
-
- @Override
- public Configuration getConfig() {
- return miniMRYarnCluster.getConfig();
- }
-
- @Override
- public void start() {
- miniMRYarnCluster.start();
- }
-
- @Override
- public void stop() {
- miniMRYarnCluster.stop();
- }
-
- @Override
- public void restart() {
- if (!miniMRYarnCluster.getServiceState().equals(STATE.STARTED)){
- LOG.warn("Cannot restart the mini cluster, start it first");
- return;
- }
- Configuration oldConf = new Configuration(getConfig());
- String callerName = oldConf.get("minimrclientcluster.caller.name",
- this.getClass().getName());
- int noOfNMs = oldConf.getInt("minimrclientcluster.nodemanagers.number", 1);
- oldConf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS, true);
- oldConf.setBoolean(JHAdminConfig.MR_HISTORY_MINICLUSTER_FIXED_PORTS, true);
- stop();
- miniMRYarnCluster = new MiniMRYarnCluster(callerName, noOfNMs, testWorkDir);
- miniMRYarnCluster.init(oldConf);
- miniMRYarnCluster.start();
- }
-
-}
diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniYARNCluster.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniYARNCluster.java
deleted file mode 100644
index 6931f87e2ce..00000000000
--- a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/hack/MiniYARNCluster.java
+++ /dev/null
@@ -1,409 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop.hack;
-
-import java.io.File;
-import java.io.IOException;
-import java.net.InetAddress;
-import java.net.UnknownHostException;
-import java.util.Locale;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileContext;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
-import org.apache.hadoop.service.AbstractService;
-import org.apache.hadoop.service.CompositeService;
-import org.apache.hadoop.util.Shell;
-import org.apache.hadoop.util.Shell.ShellCommandExecutor;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.hadoop.yarn.event.Dispatcher;
-import org.apache.hadoop.yarn.exceptions.YarnException;
-import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
-import org.apache.hadoop.yarn.factories.RecordFactory;
-import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
-import org.apache.hadoop.yarn.server.api.ResourceTracker;
-import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
-import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
-import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest;
-import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResponse;
-import org.apache.hadoop.yarn.server.nodemanager.Context;
-import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService;
-import org.apache.hadoop.yarn.server.nodemanager.NodeManager;
-import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater;
-import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl;
-import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
-import org.apache.hadoop.yarn.server.resourcemanager.ResourceTrackerService;
-
-public class MiniYARNCluster extends CompositeService {
-
- private static final Log LOG = LogFactory.getLog(MiniYARNCluster.class);
-
- // temp fix until metrics system can auto-detect itself running in unit test:
- static {
- DefaultMetricsSystem.setMiniClusterMode(true);
- }
-
- private NodeManager[] nodeManagers;
- private ResourceManager resourceManager;
-
- private ResourceManagerWrapper resourceManagerWrapper;
-
- private File testWorkDir;
-
- // Number of nm-local-dirs per nodemanager
- private int numLocalDirs;
- // Number of nm-log-dirs per nodemanager
- private int numLogDirs;
-
- /**
- * @param testName name of the test
- * @param noOfNodeManagers the number of node managers in the cluster
- * @param numLocalDirs the number of nm-local-dirs per nodemanager
- * @param numLogDirs the number of nm-log-dirs per nodemanager
- */
- public MiniYARNCluster(String testName, int noOfNodeManagers,
- int numLocalDirs, int numLogDirs, File testWorkDir) {
- super(testName.replace("$", ""));
- this.numLocalDirs = numLocalDirs;
- this.numLogDirs = numLogDirs;
- String testSubDir = testName.replace("$", "");
- File targetWorkDir = new File(testWorkDir, testSubDir);
- try {
- FileContext.getLocalFSFileContext().delete(
- new Path(targetWorkDir.getAbsolutePath()), true);
- } catch (Exception e) {
- LOG.warn("COULD NOT CLEANUP", e);
- throw new YarnRuntimeException("could not cleanup test dir: "+ e, e);
- }
-
- if (Shell.WINDOWS) {
- // The test working directory can exceed the maximum path length supported
- // by some Windows APIs and cmd.exe (260 characters). To work around this,
- // create a symlink in temporary storage with a much shorter path,
- // targeting the full path to the test working directory. Then, use the
- // symlink as the test working directory.
- String targetPath = targetWorkDir.getAbsolutePath();
- File link = new File(System.getProperty("java.io.tmpdir"),
- String.valueOf(System.nanoTime()));
- String linkPath = link.getAbsolutePath();
-
- try {
- FileContext.getLocalFSFileContext().delete(new Path(linkPath), true);
- } catch (IOException e) {
- throw new YarnRuntimeException("could not cleanup symlink: " + linkPath, e);
- }
-
- // Guarantee target exists before creating symlink.
- targetWorkDir.mkdirs();
-
- ShellCommandExecutor shexec = new ShellCommandExecutor(
- Shell.getSymlinkCommand(targetPath, linkPath));
- try {
- shexec.execute();
- } catch (IOException e) {
- throw new YarnRuntimeException(String.format(Locale.ENGLISH,
- "failed to create symlink from %s to %s, shell output: %s", linkPath,
- targetPath, shexec.getOutput()), e);
- }
-
- this.testWorkDir = link;
- } else {
- this.testWorkDir = targetWorkDir;
- }
-
- resourceManagerWrapper = new ResourceManagerWrapper();
- addService(resourceManagerWrapper);
- nodeManagers = new CustomNodeManager[noOfNodeManagers];
- for(int index = 0; index < noOfNodeManagers; index++) {
- addService(new NodeManagerWrapper(index));
- nodeManagers[index] = new CustomNodeManager();
- }
- }
-
- @Override
- public void serviceInit(Configuration conf) throws Exception {
- super.serviceInit(conf instanceof YarnConfiguration ? conf
- : new YarnConfiguration(
- conf));
- }
-
- public File getTestWorkDir() {
- return testWorkDir;
- }
-
- public ResourceManager getResourceManager() {
- return this.resourceManager;
- }
-
- public NodeManager getNodeManager(int i) {
- return this.nodeManagers[i];
- }
-
- public static String getHostname() {
- try {
- return InetAddress.getLocalHost().getHostName();
- }
- catch (UnknownHostException ex) {
- throw new RuntimeException(ex);
- }
- }
-
- private class ResourceManagerWrapper extends AbstractService {
- public ResourceManagerWrapper() {
- super(ResourceManagerWrapper.class.getName());
- }
-
- @Override
- public synchronized void serviceStart() throws Exception {
- try {
- getConfig().setBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, true);
- if (!getConfig().getBoolean(
- YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS,
- YarnConfiguration.DEFAULT_YARN_MINICLUSTER_FIXED_PORTS)) {
- // pick free random ports.
- String hostname = MiniYARNCluster.getHostname();
- getConfig().set(YarnConfiguration.RM_ADDRESS,
- hostname + ":0");
- getConfig().set(YarnConfiguration.RM_ADMIN_ADDRESS,
- hostname + ":0");
- getConfig().set(YarnConfiguration.RM_SCHEDULER_ADDRESS,
- hostname + ":0");
- getConfig().set(YarnConfiguration.RM_RESOURCE_TRACKER_ADDRESS,
- hostname + ":0");
- getConfig().set(YarnConfiguration.RM_WEBAPP_ADDRESS,
- hostname + ":0");
- }
- resourceManager = new ResourceManager() {
- @Override
- protected void doSecureLogin() throws IOException {
- // Don't try to login using keytab in the testcase.
- };
- };
- resourceManager.init(getConfig());
- new Thread() {
- public void run() {
- resourceManager.start();
- };
- }.start();
- int waitCount = 0;
- while (resourceManager.getServiceState() == STATE.INITED
- && waitCount++ < 60) {
- LOG.info("Waiting for RM to start...");
- Thread.sleep(1500);
- }
- if (resourceManager.getServiceState() != STATE.STARTED) {
- // RM could have failed.
- throw new IOException(
- "ResourceManager failed to start. Final state is "
- + resourceManager.getServiceState());
- }
- super.serviceStart();
- } catch (Throwable t) {
- throw new YarnRuntimeException(t);
- }
- LOG.info("MiniYARN ResourceManager address: " +
- getConfig().get(YarnConfiguration.RM_ADDRESS));
- LOG.info("MiniYARN ResourceManager web address: " +
- getConfig().get(YarnConfiguration.RM_WEBAPP_ADDRESS));
- }
-
- @Override
- public synchronized void serviceStop() throws Exception {
- if (resourceManager != null) {
- resourceManager.stop();
- }
- super.serviceStop();
-
- if (Shell.WINDOWS) {
- // On Windows, clean up the short temporary symlink that was created to
- // work around path length limitation.
- String testWorkDirPath = testWorkDir.getAbsolutePath();
- try {
- FileContext.getLocalFSFileContext().delete(new Path(testWorkDirPath),
- true);
- } catch (IOException e) {
- LOG.warn("could not cleanup symlink: " +
- testWorkDir.getAbsolutePath());
- }
- }
- }
- }
-
- private class NodeManagerWrapper extends AbstractService {
- int index = 0;
-
- public NodeManagerWrapper(int i) {
- super(NodeManagerWrapper.class.getName() + "_" + i);
- index = i;
- }
-
- public synchronized void serviceInit(Configuration conf) throws Exception {
- Configuration config = new YarnConfiguration(conf);
- super.serviceInit(config);
- }
-
- /**
- * Create local/log directories
- * @param dirType type of directories i.e. local dirs or log dirs
- * @param numDirs number of directories
- * @return the created directories as a comma delimited String
- */
- private String prepareDirs(String dirType, int numDirs) {
- File []dirs = new File[numDirs];
- String dirsString = "";
- for (int i = 0; i < numDirs; i++) {
- dirs[i]= new File(testWorkDir, MiniYARNCluster.this.getName()
- + "-" + dirType + "Dir-nm-" + index + "_" + i);
- dirs[i].mkdirs();
- LOG.info("Created " + dirType + "Dir in " + dirs[i].getAbsolutePath());
- String delimiter = (i > 0) ? "," : "";
- dirsString = dirsString.concat(delimiter + dirs[i].getAbsolutePath());
- }
- return dirsString;
- }
-
- public synchronized void serviceStart() throws Exception {
- try {
- // create nm-local-dirs and configure them for the nodemanager
- String localDirsString = prepareDirs("local", numLocalDirs);
- getConfig().set(YarnConfiguration.NM_LOCAL_DIRS, localDirsString);
- // create nm-log-dirs and configure them for the nodemanager
- String logDirsString = prepareDirs("log", numLogDirs);
- getConfig().set(YarnConfiguration.NM_LOG_DIRS, logDirsString);
-
- File remoteLogDir =
- new File(testWorkDir, MiniYARNCluster.this.getName()
- + "-remoteLogDir-nm-" + index);
- remoteLogDir.mkdir();
- getConfig().set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
- remoteLogDir.getAbsolutePath());
- // By default AM + 2 containers
- getConfig().setInt(YarnConfiguration.NM_PMEM_MB, 4*1024);
- getConfig().set(YarnConfiguration.NM_ADDRESS,
- MiniYARNCluster.getHostname() + ":0");
- getConfig().set(YarnConfiguration.NM_LOCALIZER_ADDRESS,
- MiniYARNCluster.getHostname() + ":0");
- getConfig().set(YarnConfiguration.NM_WEBAPP_ADDRESS,
- MiniYARNCluster.getHostname() + ":0");
-
- // Disable resource checks by default
- if (!getConfig().getBoolean(
- YarnConfiguration.YARN_MINICLUSTER_CONTROL_RESOURCE_MONITORING,
- YarnConfiguration.
- DEFAULT_YARN_MINICLUSTER_CONTROL_RESOURCE_MONITORING)) {
- getConfig().setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false);
- getConfig().setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false);
- }
-
- LOG.info("Starting NM: " + index);
- nodeManagers[index].init(getConfig());
- new Thread() {
- public void run() {
- nodeManagers[index].start();
- };
- }.start();
- int waitCount = 0;
- while (nodeManagers[index].getServiceState() == STATE.INITED
- && waitCount++ < 60) {
- LOG.info("Waiting for NM " + index + " to start...");
- Thread.sleep(1000);
- }
- if (nodeManagers[index].getServiceState() != STATE.STARTED) {
- // RM could have failed.
- throw new IOException("NodeManager " + index + " failed to start");
- }
- super.serviceStart();
- } catch (Throwable t) {
- throw new YarnRuntimeException(t);
- }
- }
-
- @Override
- public synchronized void serviceStop() throws Exception {
- if (nodeManagers[index] != null) {
- nodeManagers[index].stop();
- }
- super.serviceStop();
- }
- }
-
- private class CustomNodeManager extends NodeManager {
- @Override
- protected void doSecureLogin() throws IOException {
- // Don't try to login using keytab in the testcase.
- };
-
- @Override
- protected NodeStatusUpdater createNodeStatusUpdater(Context context,
- Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
- return new NodeStatusUpdaterImpl(context, dispatcher,
- healthChecker, metrics) {
- @Override
- protected ResourceTracker getRMClient() {
- final ResourceTrackerService rt = resourceManager
- .getResourceTrackerService();
- final RecordFactory recordFactory =
- RecordFactoryProvider.getRecordFactory(null);
-
- // For in-process communication without RPC
- return new ResourceTracker() {
-
- @Override
- public NodeHeartbeatResponse nodeHeartbeat(
- NodeHeartbeatRequest request) throws YarnException,
- IOException {
- NodeHeartbeatResponse response = recordFactory.newRecordInstance(
- NodeHeartbeatResponse.class);
- try {
- response = rt.nodeHeartbeat(request);
- } catch (YarnException e) {
- LOG.info("Exception in heartbeat from node " +
- request.getNodeStatus().getNodeId(), e);
- throw e;
- }
- return response;
- }
-
- @Override
- public RegisterNodeManagerResponse registerNodeManager(
- RegisterNodeManagerRequest request)
- throws YarnException, IOException {
- RegisterNodeManagerResponse response = recordFactory.
- newRecordInstance(RegisterNodeManagerResponse.class);
- try {
- response = rt.registerNodeManager(request);
- } catch (YarnException e) {
- LOG.info("Exception in node registration from "
- + request.getNodeId().toString(), e);
- throw e;
- }
- return response;
- }
- };
- };
-
- @Override
- protected void stopRMProxy() {
- return;
- }
- };
- };
- }
-}
diff --git a/solr/contrib/morphlines-cell/README.txt b/solr/contrib/morphlines-cell/README.txt
deleted file mode 100644
index a3a1ba971e2..00000000000
--- a/solr/contrib/morphlines-cell/README.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Apache Solr Morphlines-Cell
-
-*Experimental* - This contrib is currently subject to change in ways that may
-break back compatibility.
-
-This contrib provides a variety of Kite Morphlines features for Solr Cell type functionality.
\ No newline at end of file
diff --git a/solr/contrib/morphlines-cell/build.xml b/solr/contrib/morphlines-cell/build.xml
deleted file mode 100644
index 397472d3e00..00000000000
--- a/solr/contrib/morphlines-cell/build.xml
+++ /dev/null
@@ -1,144 +0,0 @@
-
-
-
-
-
-
-
- Solr Cell Morphline commands.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/solr/contrib/morphlines-cell/ivy.xml b/solr/contrib/morphlines-cell/ivy.xml
deleted file mode 100644
index c090f2174f9..00000000000
--- a/solr/contrib/morphlines-cell/ivy.xml
+++ /dev/null
@@ -1,35 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java b/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java
deleted file mode 100644
index d4483a546b6..00000000000
--- a/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java
+++ /dev/null
@@ -1,348 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.morphlines.cell;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.IllformedLocaleException;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Objects;
-import java.util.stream.Collectors;
-
-import com.google.common.collect.ArrayListMultimap;
-import com.google.common.collect.ListMultimap;
-import com.google.common.io.Closeables;
-import com.typesafe.config.Config;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.common.SolrInputField;
-import org.apache.solr.common.params.MultiMapSolrParams;
-import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.common.util.SuppressForbidden;
-import org.apache.solr.handler.extraction.ExtractingParams;
-import org.apache.solr.handler.extraction.ExtractionDateUtil;
-import org.apache.solr.handler.extraction.SolrContentHandler;
-import org.apache.solr.handler.extraction.SolrContentHandlerFactory;
-import org.apache.solr.morphlines.solr.SolrLocator;
-import org.apache.solr.schema.IndexSchema;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.apache.tika.sax.xpath.Matcher;
-import org.apache.tika.sax.xpath.MatchingContentHandler;
-import org.apache.tika.sax.xpath.XPathParser;
-import org.kitesdk.morphline.api.Command;
-import org.kitesdk.morphline.api.CommandBuilder;
-import org.kitesdk.morphline.api.MorphlineCompilationException;
-import org.kitesdk.morphline.api.MorphlineContext;
-import org.kitesdk.morphline.api.MorphlineRuntimeException;
-import org.kitesdk.morphline.api.Record;
-import org.kitesdk.morphline.base.Configs;
-import org.kitesdk.morphline.base.Fields;
-import org.kitesdk.morphline.stdio.AbstractParser;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-/**
- * Command that pipes the first attachment of a record into one of the given Tika parsers, then maps
- * the Tika output back to a record using SolrCell.
- *
- * The Tika parser is chosen from the configurable list of parsers, depending on the MIME type
- * specified in the input record. Typically, this requires an upstream DetectMimeTypeBuilder
- * in a prior command.
- */
-public final class SolrCellBuilder implements CommandBuilder {
-
- @Override
- public Collection getNames() {
- return Collections.singletonList("solrCell");
- }
-
- @Override
- public Command build(Config config, Command parent, Command child, MorphlineContext context) {
- return new SolrCell(this, config, parent, child, context);
- }
-
-
- ///////////////////////////////////////////////////////////////////////////////
- // Nested classes:
- ///////////////////////////////////////////////////////////////////////////////
- private static final class SolrCell extends AbstractParser {
-
- private final IndexSchema schema;
- private final List dateFormats;
- private final String xpathExpr;
- private final List parsers = new ArrayList<>();
- private final SolrContentHandlerFactory solrContentHandlerFactory;
- private final Locale locale;
-
- private final SolrParams solrParams;
- private final Map mediaTypeToParserMap;
-
- private static final XPathParser PARSER = new XPathParser("xhtml", XHTMLContentHandler.XHTML);
-
- public static final String ADDITIONAL_SUPPORTED_MIME_TYPES = "additionalSupportedMimeTypes";
-
- public SolrCell(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
- super(builder, config, parent, child, context);
-
- Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator");
- SolrLocator locator = new SolrLocator(solrLocatorConfig, context);
- LOG.debug("solrLocator: {}", locator);
- this.schema = Objects.requireNonNull(locator.getIndexSchema());
- if (LOG.isTraceEnabled()) {
- LOG.trace("Solr schema: \n" + schema.getFields().entrySet().stream()
- .sorted(Map.Entry.comparingByKey()).map(Map.Entry::getValue).map(Object::toString)
- .collect(Collectors.joining("\n")));
- }
-
- ListMultimap cellParams = ArrayListMultimap.create();
- String uprefix = getConfigs().getString(config, ExtractingParams.UNKNOWN_FIELD_PREFIX, null);
- if (uprefix != null) {
- cellParams.put(ExtractingParams.UNKNOWN_FIELD_PREFIX, uprefix);
- }
- for (String capture : getConfigs().getStringList(config, ExtractingParams.CAPTURE_ELEMENTS, Collections.emptyList())) {
- cellParams.put(ExtractingParams.CAPTURE_ELEMENTS, capture);
- }
- Config fmapConfig = getConfigs().getConfig(config, "fmap", null);
- if (fmapConfig != null) {
- for (Map.Entry entry : new Configs().getEntrySet(fmapConfig)) {
- cellParams.put(ExtractingParams.MAP_PREFIX + entry.getKey(), entry.getValue().toString());
- }
- }
- String captureAttributes = getConfigs().getString(config, ExtractingParams.CAPTURE_ATTRIBUTES, null);
- if (captureAttributes != null) {
- cellParams.put(ExtractingParams.CAPTURE_ATTRIBUTES, captureAttributes);
- }
- String lowerNames = getConfigs().getString(config, ExtractingParams.LOWERNAMES, null);
- if (lowerNames != null) {
- cellParams.put(ExtractingParams.LOWERNAMES, lowerNames);
- }
- String defaultField = getConfigs().getString(config, ExtractingParams.DEFAULT_FIELD, null);
- if (defaultField != null) {
- cellParams.put(ExtractingParams.DEFAULT_FIELD, defaultField);
- }
- xpathExpr = getConfigs().getString(config, ExtractingParams.XPATH_EXPRESSION, null);
- if (xpathExpr != null) {
- cellParams.put(ExtractingParams.XPATH_EXPRESSION, xpathExpr);
- }
-
- this.dateFormats = getConfigs().getStringList(config, "dateFormats", new ArrayList<>(ExtractionDateUtil.DEFAULT_DATE_FORMATS));
-
- String handlerStr = getConfigs().getString(config, "solrContentHandlerFactory", TrimSolrContentHandlerFactory.class.getName());
- Class extends SolrContentHandlerFactory> factoryClass;
- try {
- factoryClass = Class.forName(handlerStr).asSubclass(SolrContentHandlerFactory.class);
- } catch (ClassNotFoundException cnfe) {
- throw new MorphlineCompilationException("Could not find class "
- + handlerStr + " to use for " + "solrContentHandlerFactory", config, cnfe);
- }
- this.solrContentHandlerFactory = getSolrContentHandlerFactory(factoryClass, dateFormats, config);
-
- this.locale = getLocale(getConfigs().getString(config, "locale", null));
-
- this.mediaTypeToParserMap = new HashMap<>();
- //MimeTypes mimeTypes = MimeTypes.getDefaultMimeTypes(); // FIXME getMediaTypeRegistry.normalize()
-
- List extends Config> parserConfigs = getConfigs().getConfigList(config, "parsers");
- for (Config parserConfig : parserConfigs) {
- String parserClassName = getConfigs().getString(parserConfig, "parser");
-
- Object obj;
- try {
- obj = Class.forName(parserClassName).newInstance();
- } catch (Throwable e) {
- throw new MorphlineCompilationException("Cannot instantiate Tika parser: " + parserClassName, config, e);
- }
- if (!(obj instanceof Parser)) {
- throw new MorphlineCompilationException("Tika parser " + obj.getClass().getName()
- + " must be an instance of class " + Parser.class.getName(), config);
- }
- Parser parser = (Parser) obj;
- this.parsers.add(parser);
-
- List mediaTypes = getConfigs().getStringList(parserConfig, SUPPORTED_MIME_TYPES, Collections.emptyList());
- for (String mediaTypeStr : mediaTypes) {
- MediaType mediaType = parseMediaType(mediaTypeStr);
- addSupportedMimeType(mediaTypeStr);
- this.mediaTypeToParserMap.put(mediaType, parser);
- }
-
- if (!parserConfig.hasPath(SUPPORTED_MIME_TYPES)) {
- for (MediaType mediaType : parser.getSupportedTypes(new ParseContext())) {
- mediaType = mediaType.getBaseType();
- addSupportedMimeType(mediaType.toString());
- this.mediaTypeToParserMap.put(mediaType, parser);
- }
- List extras = getConfigs().getStringList(parserConfig, ADDITIONAL_SUPPORTED_MIME_TYPES, Collections.emptyList());
- for (String mediaTypeStr : extras) {
- MediaType mediaType = parseMediaType(mediaTypeStr);
- addSupportedMimeType(mediaTypeStr);
- this.mediaTypeToParserMap.put(mediaType, parser);
- }
- }
- }
- //LOG.info("mediaTypeToParserMap="+mediaTypeToParserMap);
-
- Map tmp = new HashMap<>();
- for (Map.Entry> entry : cellParams.asMap().entrySet()) {
- tmp.put(entry.getKey(), entry.getValue().toArray(new String[entry.getValue().size()]));
- }
- this.solrParams = new MultiMapSolrParams(tmp);
- validateArguments();
- }
-
- @Override
- protected boolean doProcess(Record record, InputStream inputStream) {
- Parser parser = detectParser(record);
- if (parser == null) {
- return false;
- }
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(Locale.class, locale);
-
- Metadata metadata = new Metadata();
- for (Entry entry : record.getFields().entries()) {
- metadata.add(entry.getKey(), entry.getValue().toString());
- }
-
- SolrContentHandler handler = solrContentHandlerFactory.createSolrContentHandler(metadata, solrParams, schema);
- try {
- inputStream = TikaInputStream.get(inputStream);
-
- ContentHandler parsingHandler = handler;
-
- // String xpathExpr = "/xhtml:html/xhtml:body/xhtml:div/descendant:node()";
- if (xpathExpr != null) {
- Matcher matcher = PARSER.parse(xpathExpr);
- parsingHandler = new MatchingContentHandler(parsingHandler, matcher);
- }
-
- try {
- parser.parse(inputStream, parsingHandler, metadata, parseContext);
- } catch (IOException | TikaException | SAXException e) {
- throw new MorphlineRuntimeException("Cannot parse", e);
- }
- } finally {
- if (inputStream != null) {
- Closeables.closeQuietly(inputStream);
- }
- }
-
- SolrInputDocument doc = handler.newDocument();
- LOG.debug("solr doc: {}", doc);
- Record outputRecord = toRecord(doc);
- return getChild().process(outputRecord);
- }
-
- private Parser detectParser(Record record) {
- if (!hasAtLeastOneMimeType(record)) {
- return null;
- }
- String mediaTypeStr = (String) record.getFirstValue(Fields.ATTACHMENT_MIME_TYPE); //ExtractingParams.STREAM_TYPE);
- assert mediaTypeStr != null;
-
- MediaType mediaType = parseMediaType(mediaTypeStr).getBaseType();
- Parser parser = mediaTypeToParserMap.get(mediaType); // fast path
- if (parser != null) {
- return parser;
- }
- // wildcard matching
- for (Map.Entry entry : mediaTypeToParserMap.entrySet()) {
- if (isMediaTypeMatch(mediaType, entry.getKey())) {
- return entry.getValue();
- }
- }
- if (LOG.isDebugEnabled()) {
- LOG.debug("No supported MIME type parser found for " + Fields.ATTACHMENT_MIME_TYPE + "=" + mediaTypeStr);
- }
- return null;
- }
-
- private boolean hasAtLeastOneMimeType(Record record) {
- if (!record.getFields().containsKey(Fields.ATTACHMENT_MIME_TYPE)) {
- LOG.debug("Command failed because of missing MIME type for record: {}", record);
- return false;
- }
- return true;
- }
-
- private MediaType parseMediaType(String mediaTypeStr) {
- MediaType mediaType = MediaType.parse(mediaTypeStr.trim().toLowerCase(Locale.ROOT));
- return mediaType.getBaseType();
- };
-
- /** Returns true if mediaType falls withing the given range (pattern), false otherwise */
- private boolean isMediaTypeMatch(MediaType mediaType, MediaType rangePattern) {
- String WILDCARD = "*";
- String rangePatternType = rangePattern.getType();
- String rangePatternSubtype = rangePattern.getSubtype();
- return (rangePatternType.equals(WILDCARD) || rangePatternType.equals(mediaType.getType()))
- && (rangePatternSubtype.equals(WILDCARD) || rangePatternSubtype.equals(mediaType.getSubtype()));
- }
-
- private static SolrContentHandlerFactory getSolrContentHandlerFactory(
- Class extends SolrContentHandlerFactory> factoryClass, Collection dateFormats, Config config) {
- try {
- return factoryClass.getConstructor(Collection.class).newInstance(dateFormats);
- } catch (NoSuchMethodException nsme) {
- throw new MorphlineCompilationException("Unable to find valid constructor of type "
- + factoryClass.getName() + " for creating SolrContentHandler", config, nsme);
- } catch (Exception e) {
- throw new MorphlineCompilationException("Unexpected exception when trying to create SolrContentHandlerFactory of type "
- + factoryClass.getName(), config, e);
- }
- }
-
- private Record toRecord(SolrInputDocument doc) {
- Record record = new Record();
- for (Entry entry : doc.entrySet()) {
- record.getFields().putAll(entry.getKey(), entry.getValue().getValues());
- }
- return record;
- }
-
- @SuppressForbidden(reason = "Usage of outdated locale parsing with Locale#toString() because of backwards compatibility")
- private Locale getLocale(String name) {
- if (name == null) {
- return Locale.ROOT;
- }
- for (Locale locale : Locale.getAvailableLocales()) {
- if (locale.toString().equals(name)) {
- return locale;
- }
- }
- try {
- return new Locale.Builder().setLanguageTag(name).build();
- } catch (IllformedLocaleException ex) {
- throw new MorphlineCompilationException("Malformed / non-existent locale: " + name, getConfig(), ex);
- }
- }
- }
-
-}
diff --git a/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/StripNonCharSolrContentHandlerFactory.java b/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/StripNonCharSolrContentHandlerFactory.java
deleted file mode 100644
index 81f49afd4e5..00000000000
--- a/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/StripNonCharSolrContentHandlerFactory.java
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.morphlines.cell;
-
-import java.util.Collection;
-
-import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.handler.extraction.SolrContentHandler;
-import org.apache.solr.handler.extraction.SolrContentHandlerFactory;
-import org.apache.solr.schema.IndexSchema;
-import org.apache.solr.schema.SchemaField;
-import org.apache.tika.metadata.Metadata;
-
-/**
- * {@link SolrContentHandler} and associated factory that strips non-characters and trims on output.
- * This prevents exceptions on parsing integer fields inside Solr server.
- */
-public class StripNonCharSolrContentHandlerFactory extends SolrContentHandlerFactory {
-
- public StripNonCharSolrContentHandlerFactory(Collection dateFormats) {
- super(dateFormats);
- }
-
- @Override
- public SolrContentHandler createSolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema) {
- return new StripNonCharSolrContentHandler(metadata, params, schema, dateFormats);
- }
-
-
- ///////////////////////////////////////////////////////////////////////////////
- // Nested classes:
- ///////////////////////////////////////////////////////////////////////////////
- private static final class StripNonCharSolrContentHandler extends SolrContentHandler {
-
- public StripNonCharSolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema, Collection dateFormats) {
- super(metadata, params, schema, dateFormats);
- }
-
- /**
- * Strip all non-characters, which can cause SolrReducer problems if present.
- * This is borrowed from Apache Nutch.
- */
- private static String stripNonCharCodepoints(String input) {
- StringBuilder stripped = new StringBuilder(input.length());
- char ch;
- for (int i = 0; i < input.length(); i++) {
- ch = input.charAt(i);
- // Strip all non-characters http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Noncharacter_Code_Point=True:]
- // and non-printable control characters except tabulator, new line and carriage return
- if (ch % 0x10000 != 0xffff && // 0xffff - 0x10ffff range step 0x10000
- ch % 0x10000 != 0xfffe && // 0xfffe - 0x10fffe range
- (ch <= 0xfdd0 || ch >= 0xfdef) && // 0xfdd0 - 0xfdef
- (ch > 0x1F || ch == 0x9 || ch == 0xa || ch == 0xd)) {
- stripped.append(ch);
- }
- }
- return stripped.toString();
- }
-
- @Override
- protected String transformValue(String val, SchemaField schemaField) {
- String ret = super.transformValue(val, schemaField).trim();
- ret = stripNonCharCodepoints(ret);
- return ret;
- }
- }
-}
diff --git a/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/TrimSolrContentHandlerFactory.java b/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/TrimSolrContentHandlerFactory.java
deleted file mode 100644
index 6e7df593ff8..00000000000
--- a/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/TrimSolrContentHandlerFactory.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.morphlines.cell;
-
-import java.util.Collection;
-
-import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.handler.extraction.SolrContentHandler;
-import org.apache.solr.handler.extraction.SolrContentHandlerFactory;
-import org.apache.solr.schema.IndexSchema;
-import org.apache.solr.schema.SchemaField;
-import org.apache.tika.metadata.Metadata;
-
-/**
- * {@link SolrContentHandler} and associated factory that trims field values on output.
- * This prevents exceptions on parsing integer fields inside Solr server.
- */
-public class TrimSolrContentHandlerFactory extends SolrContentHandlerFactory {
-
- public TrimSolrContentHandlerFactory(Collection dateFormats) {
- super(dateFormats);
- }
-
- @Override
- public SolrContentHandler createSolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema) {
- return new TrimSolrContentHandler(metadata, params, schema, dateFormats);
- }
-
-
- ///////////////////////////////////////////////////////////////////////////////
- // Nested classes:
- ///////////////////////////////////////////////////////////////////////////////
- private static final class TrimSolrContentHandler extends SolrContentHandler {
-
- public TrimSolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema, Collection dateFormats) {
- super(metadata, params, schema, dateFormats);
- }
-
- @Override
- protected String transformValue(String val, SchemaField schemaField) {
- return super.transformValue(val, schemaField).trim();
- }
- }
-}
diff --git a/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/package-info.java b/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/package-info.java
deleted file mode 100644
index 0f44a70488d..00000000000
--- a/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/package-info.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Morphlines Solr Cell related code.
- */
-package org.apache.solr.morphlines.cell;
-
-
-
-
diff --git a/solr/contrib/morphlines-cell/src/java/overview.html b/solr/contrib/morphlines-cell/src/java/overview.html
deleted file mode 100644
index 3e25367d302..00000000000
--- a/solr/contrib/morphlines-cell/src/java/overview.html
+++ /dev/null
@@ -1,21 +0,0 @@
-
-
-
-Apache Solr Search Server: Solr Cell Morphline Commands
-
-
diff --git a/solr/contrib/morphlines-cell/src/test-files/README.txt b/solr/contrib/morphlines-cell/src/test-files/README.txt
deleted file mode 100644
index 8905df29d30..00000000000
--- a/solr/contrib/morphlines-cell/src/test-files/README.txt
+++ /dev/null
@@ -1 +0,0 @@
-The test-files by this module are located in the morphlines-core module.
diff --git a/solr/contrib/morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java b/solr/contrib/morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java
deleted file mode 100644
index e0872b609ed..00000000000
--- a/solr/contrib/morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java
+++ /dev/null
@@ -1,292 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.morphlines.cell;
-
-import java.io.File;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.Map;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.lucene.util.Constants;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.common.params.MapSolrParams;
-import org.apache.solr.handler.extraction.ExtractionDateUtil;
-import org.apache.solr.handler.extraction.SolrContentHandler;
-import org.apache.solr.morphlines.solr.AbstractSolrMorphlineTestBase;
-import org.apache.solr.schema.IndexSchema;
-import org.apache.tika.metadata.Metadata;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-public class SolrCellMorphlineTest extends AbstractSolrMorphlineTestBase {
-
- private Map expectedRecords = new HashMap<>();
- private Map> expectedRecordContents = new HashMap<>();
-
- @BeforeClass
- public static void beforeClass2() {
- assumeFalse("FIXME: Morphlines currently has issues with Windows paths", Constants.WINDOWS);
- }
-
- @Before
- public void setUp() throws Exception {
- super.setUp();
-
- String path = RESOURCES_DIR + File.separator + "test-documents" + File.separator;
- expectedRecords.put(path + "sample-statuses-20120906-141433.avro", 2);
- expectedRecords.put(path + "sample-statuses-20120906-141433", 2);
- expectedRecords.put(path + "sample-statuses-20120906-141433.gz", 2);
- expectedRecords.put(path + "sample-statuses-20120906-141433.bz2", 2);
- expectedRecords.put(path + "cars.csv", 6);
- expectedRecords.put(path + "cars.csv.gz", 6);
- expectedRecords.put(path + "cars.tar.gz", 4);
- expectedRecords.put(path + "cars.tsv", 6);
- expectedRecords.put(path + "cars.ssv", 6);
- expectedRecords.put(path + "test-documents.7z", 9);
- expectedRecords.put(path + "test-documents.cpio", 9);
- expectedRecords.put(path + "test-documents.tar", 9);
- expectedRecords.put(path + "test-documents.tbz2", 9);
- expectedRecords.put(path + "test-documents.tgz", 9);
- expectedRecords.put(path + "test-documents.zip", 9);
- expectedRecords.put(path + "multiline-stacktrace.log", 4);
-
- {
- Map record = new LinkedHashMap();
- record.put("ignored__attachment_mimetype", "image/jpeg");
- record.put("ignored_exif_isospeedratings", "400");
- record.put("ignored_meta_creation_date", "2009-08-11T09:09:45");
- record.put("ignored_tiff_model", "Canon EOS 40D");
- record.put("text", NON_EMPTY_FIELD);
- expectedRecordContents.put("/testJPEG_EXIF.jpg", record);
- expectedRecordContents.put("/testJPEG_EXIF.jpg.tar", record);
- expectedRecordContents.put("/testJPEG_EXIF.jpg.tar.gz", record);
- }
-
- {
- String file = path + "testWORD_various.doc";
- Map record = new LinkedHashMap();
- record.put("ignored__attachment_mimetype", "application/msword");
- record.put("ignored_author", "Michael McCandless");
- record.put("ignored_creation_date", "2011-09-02T10:11:00Z");
- record.put("ignored_title", "");
- record.put("ignored_keywords", "Keyword1 Keyword2");
- record.put("ignored_subject", "Subject is here");
- record.put("text", NON_EMPTY_FIELD);
- expectedRecordContents.put(file, record);
- }
-
- {
- String file = path + "testPDF.pdf";
- Map record = new LinkedHashMap();
- record.put("ignored__attachment_mimetype", "application/pdf");
- record.put("ignored_author", "Bertrand Delacrétaz");
- record.put("ignored_creation_date", "2007-09-15T09:02:31Z");
- record.put("ignored_title", "Apache Tika - Apache Tika");
- record.put("ignored_xmp_creatortool", "Firefox");
- record.put("text", NON_EMPTY_FIELD);
- expectedRecordContents.put(file, record);
- }
-
- {
- String file = path + "email.eml";
- Map record = new LinkedHashMap();
- String name = "Patrick Foo ";
- record.put("ignored__attachment_mimetype", "message/rfc822");
- record.put("ignored_author", name);
- //record.put("ignored_content_length", "1068");
- record.put("ignored_creation_date", "2013-11-27T20:01:23Z");
- record.put("ignored_message_from", name);
- record.put("ignored_message_to", name);
- record.put("ignored_creator", name);
- record.put("ignored_dc_creator", name);
- record.put("ignored_dc_title", "Test EML");
- record.put("ignored_dcterms_created", "2013-11-27T20:01:23Z");
- record.put("ignored_meta_author", name);
- record.put("ignored_meta_creation_date", "2013-11-27T20:01:23Z");
- record.put("ignored_subject", "Test EML");
- record.put("text", NON_EMPTY_FIELD);
- expectedRecordContents.put(file, record);
- }
-
- {
- String file = path + "testEXCEL.xlsx";
- Map record = new LinkedHashMap();
- record.put("ignored__attachment_mimetype", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
- record.put("ignored_author", "Keith Bennett");
- record.put("ignored_creation_date", "2007-10-01T16:13:56Z");
- record.put("ignored_title", "Simple Excel document");
- record.put("text", NON_EMPTY_FIELD);
- expectedRecordContents.put(file, record);
- }
-
- FileUtils.copyFile(new File(RESOURCES_DIR + "/custom-mimetypes.xml"), new File(tempDir + "/custom-mimetypes.xml"));
- }
-
- @Test
- @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-6489")
- public void testSolrCellJPGCompressed() throws Exception {
- morphline = createMorphline("test-morphlines" + File.separator + "solrCellJPGCompressed");
- String path = RESOURCES_DIR + File.separator + "test-documents" + File.separator;
- String[] files = new String[] {
- path + "testJPEG_EXIF.jpg",
- path + "testJPEG_EXIF.jpg.gz",
- path + "testJPEG_EXIF.jpg.tar.gz",
- //path + "jpeg2000.jp2",
- };
- testDocumentTypesInternal(files, expectedRecords, expectedRecordContents);
- }
-
- @Test
- public void testSolrCellXML() throws Exception {
- morphline = createMorphline("test-morphlines" + File.separator + "solrCellXML");
- String path = RESOURCES_DIR + File.separator + "test-documents" + File.separator;
- String[] files = new String[] {
- path + "testXML2.xml",
- };
- testDocumentTypesInternal(files, expectedRecords, expectedRecordContents);
- }
-
- @Test
- @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-6489")
- public void testSolrCellDocumentTypes() throws Exception {
- AbstractSolrMorphlineTestBase.setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes", false);
-
- morphline = createMorphline(new File(tempDir).getAbsolutePath() + "/test-morphlines/solrCellDocumentTypes");
- String path = RESOURCES_DIR + File.separator + "test-documents" + File.separator;
- String[] files = new String[] {
- path + "testBMPfp.txt",
- path + "boilerplate.html",
- path + "NullHeader.docx",
- path + "testWORD_various.doc",
- path + "testPDF.pdf",
- path + "testJPEG_EXIF.jpg",
- path + "testJPEG_EXIF.jpg.gz",
- path + "testJPEG_EXIF.jpg.tar.gz",
- path + "testXML.xml",
- path + "cars.csv",
-// path + "cars.tsv",
-// path + "cars.ssv",
- path + "cars.csv.gz",
- path + "cars.tar.gz",
- path + "sample-statuses-20120906-141433.avro",
- path + "sample-statuses-20120906-141433",
- path + "sample-statuses-20120906-141433.gz",
- path + "sample-statuses-20120906-141433.bz2",
- path + "email.eml",
- };
- testDocumentTypesInternal(files, expectedRecords, expectedRecordContents);
- }
-
- @Test
- @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9220")
- public void testSolrCellDocumentTypes2() throws Exception {
-
- AbstractSolrMorphlineTestBase.setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes", false);
-
- morphline = createMorphline(new File(tempDir).getAbsolutePath() + "/test-morphlines/solrCellDocumentTypes");
- String path = RESOURCES_DIR + File.separator + "test-documents" + File.separator;
- String[] files = new String[] {
- path + "testPPT_various.ppt",
- path + "testPPT_various.pptx",
- path + "testEXCEL.xlsx",
- path + "testEXCEL.xls",
- path + "testPages.pages",
- //path + "testNumbers.numbers",
- //path + "testKeynote.key",
-
- path + "testRTFVarious.rtf",
- path + "complex.mbox",
- path + "test-outlook.msg",
- path + "testEMLX.emlx",
- path + "testRFC822",
- path + "rsstest.rss",
-// path + "testDITA.dita",
-
- path + "testMP3i18n.mp3",
- path + "testAIFF.aif",
- path + "testFLAC.flac",
-// path + "testFLAC.oga",
-// path + "testVORBIS.ogg",
- path + "testMP4.m4a",
- path + "testWAV.wav",
-// path + "testWMA.wma",
-
- path + "testFLV.flv",
-// path + "testWMV.wmv",
-
- path + "testBMP.bmp",
- path + "testPNG.png",
- path + "testPSD.psd",
- path + "testSVG.svg",
- path + "testTIFF.tif",
-
-// path + "test-documents.7z",
-// path + "test-documents.cpio",
-// path + "test-documents.tar",
-// path + "test-documents.tbz2",
-// path + "test-documents.tgz",
-// path + "test-documents.zip",
-// path + "test-zip-of-zip.zip",
-// path + "testJAR.jar",
-
-// path + "testKML.kml",
-// path + "testRDF.rdf",
- path + "testVISIO.vsd",
-// path + "testWAR.war",
-// path + "testWindows-x86-32.exe",
-// path + "testWINMAIL.dat",
-// path + "testWMF.wmf",
- };
- testDocumentTypesInternal(files, expectedRecords, expectedRecordContents);
- }
-
- /**
- * Test that the ContentHandler properly strips the illegal characters
- */
- @Test
- public void testTransformValue() {
- String fieldName = "user_name";
- assertFalse("foobar".equals(getFoobarWithNonChars()));
-
- Metadata metadata = new Metadata();
- // load illegal char string into a metadata field and generate a new document,
- // which will cause the ContentHandler to be invoked.
- metadata.set(fieldName, getFoobarWithNonChars());
- StripNonCharSolrContentHandlerFactory contentHandlerFactory =
- new StripNonCharSolrContentHandlerFactory(ExtractionDateUtil.DEFAULT_DATE_FORMATS);
- IndexSchema schema = h.getCore().getLatestSchema();
- SolrContentHandler contentHandler =
- contentHandlerFactory.createSolrContentHandler(metadata, new MapSolrParams(new HashMap()), schema);
- SolrInputDocument doc = contentHandler.newDocument();
- String foobar = doc.getFieldValue(fieldName).toString();
- assertTrue("foobar".equals(foobar));
- }
-
- /**
- * Returns string "foobar" with illegal characters interspersed.
- */
- private String getFoobarWithNonChars() {
- char illegalChar = '\uffff';
- StringBuilder builder = new StringBuilder();
- builder.append(illegalChar).append(illegalChar).append("foo").append(illegalChar)
- .append(illegalChar).append("bar").append(illegalChar).append(illegalChar);
- return builder.toString();
- }
-
-}
diff --git a/solr/contrib/morphlines-core/README.txt b/solr/contrib/morphlines-core/README.txt
deleted file mode 100644
index 0efa4674258..00000000000
--- a/solr/contrib/morphlines-core/README.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-Apache Solr Morphlines-Core
-
-*Experimental* - This contrib is currently subject to change in ways that may
-break back compatibility.
-
-This contrib provides a variety of Kite Morphlines features for Solr.
\ No newline at end of file
diff --git a/solr/contrib/morphlines-core/build.xml b/solr/contrib/morphlines-core/build.xml
deleted file mode 100644
index 2cf6261669b..00000000000
--- a/solr/contrib/morphlines-core/build.xml
+++ /dev/null
@@ -1,105 +0,0 @@
-
-
-
-
-
-
-
- Solr Morphlines commands.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/solr/contrib/morphlines-core/ivy.xml b/solr/contrib/morphlines-core/ivy.xml
deleted file mode 100644
index ad47aec4015..00000000000
--- a/solr/contrib/morphlines-core/ivy.xml
+++ /dev/null
@@ -1,128 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/DocumentLoader.java b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/DocumentLoader.java
deleted file mode 100644
index f3030247065..00000000000
--- a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/DocumentLoader.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.morphlines.solr;
-
-import java.io.IOException;
-
-import org.apache.solr.client.solrj.SolrServerException;
-import org.apache.solr.client.solrj.response.SolrPingResponse;
-import org.apache.solr.client.solrj.response.UpdateResponse;
-import org.apache.solr.common.SolrInputDocument;
-
-/**
- * A vehicle to load a list of Solr documents into some kind of destination,
- * such as a SolrServer or MapReduce RecordWriter.
- */
-public interface DocumentLoader {
-
- /** Begins a transaction */
- public void beginTransaction() throws IOException, SolrServerException;
-
- /** Loads the given document into the destination */
- public void load(SolrInputDocument doc) throws IOException, SolrServerException;
-
- /**
- * Sends any outstanding documents to the destination and waits for a positive
- * or negative ack (i.e. exception). Depending on the outcome the caller
- * should then commit or rollback the current flume transaction
- * correspondingly.
- *
- * @throws IOException
- * If there is a low-level I/O error.
- */
- public void commitTransaction() throws IOException, SolrServerException;
-
- /**
- * Performs a rollback of all non-committed documents pending.
- *
- * Note that this is not a true rollback as in databases. Content you have
- * previously added may have already been committed due to autoCommit, buffer
- * full, other client performing a commit etc. So this is only a best-effort
- * rollback.
- *
- * @throws IOException
- * If there is a low-level I/O error.
- */
- public UpdateResponse rollbackTransaction() throws IOException, SolrServerException;
-
- /** Releases allocated resources */
- public void shutdown() throws IOException, SolrServerException;
-
- /**
- * Issues a ping request to check if the server is alive
- *
- * @throws IOException
- * If there is a low-level I/O error.
- */
- public SolrPingResponse ping() throws IOException, SolrServerException;
-
-}
diff --git a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/FileUtils.java b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/FileUtils.java
deleted file mode 100644
index e979d37bbc7..00000000000
--- a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/FileUtils.java
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.morphlines.solr;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.nio.file.Files;
-
-
-class FileUtils {
-
- //-----------------------------------------------------------------------
- /**
- * Deletes a directory recursively.
- *
- * @param directory directory to delete
- * @throws IOException in case deletion is unsuccessful
- */
- public static void deleteDirectory(File directory) throws IOException {
- if (!directory.exists()) {
- return;
- }
-
- if (!isSymlink(directory)) {
- cleanDirectory(directory);
- }
-
- Files.delete(directory.toPath());
- }
-
- /**
- * Determines whether the specified file is a Symbolic Link rather than an actual file.
- *
- * Will not return true if there is a Symbolic Link anywhere in the path,
- * only if the specific file is.
- *
- * @param file the file to check
- * @return true if the file is a Symbolic Link
- * @throws IOException if an IO error occurs while checking the file
- * @since Commons IO 2.0
- */
- public static boolean isSymlink(File file) throws IOException {
- if (file == null) {
- throw new NullPointerException("File must not be null");
- }
-// if (FilenameUtils.isSystemWindows()) {
- if (File.separatorChar == '\\') {
- return false;
- }
- File fileInCanonicalDir = null;
- if (file.getParent() == null) {
- fileInCanonicalDir = file;
- } else {
- File canonicalDir = file.getParentFile().getCanonicalFile();
- fileInCanonicalDir = new File(canonicalDir, file.getName());
- }
-
- if (fileInCanonicalDir.getCanonicalFile().equals(fileInCanonicalDir.getAbsoluteFile())) {
- return false;
- } else {
- return true;
- }
- }
-
- /**
- * Cleans a directory without deleting it.
- *
- * @param directory directory to clean
- * @throws IOException in case cleaning is unsuccessful
- */
- public static void cleanDirectory(File directory) throws IOException {
- if (!directory.exists()) {
- String message = directory + " does not exist";
- throw new IllegalArgumentException(message);
- }
-
- if (!directory.isDirectory()) {
- String message = directory + " is not a directory";
- throw new IllegalArgumentException(message);
- }
-
- File[] files = directory.listFiles();
- if (files == null) { // null if security restricted
- throw new IOException("Failed to list contents of " + directory);
- }
-
- IOException exception = null;
- for (File file : files) {
- try {
- forceDelete(file);
- } catch (IOException ioe) {
- exception = ioe;
- }
- }
-
- if (null != exception) {
- throw exception;
- }
- }
-
- //-----------------------------------------------------------------------
- /**
- * Deletes a file. If file is a directory, delete it and all sub-directories.
- *
- * The difference between File.delete() and this method are:
- *
- * A directory to be deleted does not have to be empty.
- * You get exceptions when a file or directory cannot be deleted.
- * (java.io.File methods returns a boolean)
- *
- *
- * @param file file or directory to delete, must not be null
- * @throws NullPointerException if the directory is null
- * @throws FileNotFoundException if the file was not found
- * @throws IOException in case deletion is unsuccessful
- */
- public static void forceDelete(File file) throws IOException {
- if (file.isDirectory()) {
- deleteDirectory(file);
- } else {
- Files.delete(file.toPath());
- }
- }
-
-}
diff --git a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/GenerateSolrSequenceKeyBuilder.java b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/GenerateSolrSequenceKeyBuilder.java
deleted file mode 100644
index be002efe8f1..00000000000
--- a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/GenerateSolrSequenceKeyBuilder.java
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.morphlines.solr;
-
-import java.security.SecureRandom;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Random;
-
-import org.apache.solr.schema.IndexSchema;
-import org.apache.solr.schema.SchemaField;
-import org.kitesdk.morphline.api.Command;
-import org.kitesdk.morphline.api.CommandBuilder;
-import org.kitesdk.morphline.api.MorphlineContext;
-import org.kitesdk.morphline.api.MorphlineRuntimeException;
-import org.kitesdk.morphline.api.Record;
-import org.kitesdk.morphline.base.AbstractCommand;
-import org.kitesdk.morphline.base.Fields;
-import org.kitesdk.morphline.base.Notifications;
-
-import com.typesafe.config.Config;
-
-/**
- * A command that assigns a record unique key that is the concatenation of the given
- * baseIdField
record field, followed by a running count of the record number within
- * the current session. The count is reset to zero whenever a "startSession" notification is
- * received.
- *
- * For example, assume a CSV file containing multiple records but no unique ids, and the
- * baseIdField
field is the filesystem path of the file. Now this command can be used
- * to assign the following record values to Solr's unique key field:
- * $path#0, $path#1, ... $path#N
.
- *
- * The name of the unique key field is fetched from Solr's schema.xml file, as directed by the
- * solrLocator
configuration parameter.
- */
-public final class GenerateSolrSequenceKeyBuilder implements CommandBuilder {
-
- @Override
- public Collection getNames() {
- return Arrays.asList(
- "generateSolrSequenceKey",
- "sanitizeUniqueSolrKey" // old name (retained for backwards compatibility)
- );
- }
-
- @Override
- public Command build(Config config, Command parent, Command child, MorphlineContext context) {
- return new GenerateSolrSequenceKey(this, config, parent, child, context);
- }
-
-
- ///////////////////////////////////////////////////////////////////////////////
- // Nested classes:
- ///////////////////////////////////////////////////////////////////////////////
- private static final class GenerateSolrSequenceKey extends AbstractCommand {
-
- private final boolean preserveExisting;
- private final String baseIdFieldName;
- private final String uniqueKeyName;
- private long recordCounter = 0;
-
- private final String idPrefix; // for load testing only; enables adding same document many times with a different unique key
- private final Random randomIdPrefix; // for load testing only; enables adding same document many times with a different unique key
-
- public GenerateSolrSequenceKey(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
- super(builder, config, parent, child, context);
- this.baseIdFieldName = getConfigs().getString(config, "baseIdField", Fields.BASE_ID);
- this.preserveExisting = getConfigs().getBoolean(config, "preserveExisting", true);
-
- Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator");
- SolrLocator locator = new SolrLocator(solrLocatorConfig, context);
- LOG.debug("solrLocator: {}", locator);
- IndexSchema schema = locator.getIndexSchema();
- SchemaField uniqueKey = schema.getUniqueKeyField();
- uniqueKeyName = uniqueKey == null ? null : uniqueKey.getName();
-
- String tmpIdPrefix = getConfigs().getString(config, "idPrefix", null); // for load testing only
- Random tmpRandomIdPrefx = null;
- if ("random".equals(tmpIdPrefix)) { // for load testing only
- tmpRandomIdPrefx = new Random(new SecureRandom().nextLong());
- tmpIdPrefix = null;
- }
- idPrefix = tmpIdPrefix;
- randomIdPrefix = tmpRandomIdPrefx;
- validateArguments();
- }
-
- @Override
- protected boolean doProcess(Record doc) {
- long num = recordCounter++;
- // LOG.debug("record #{} id before sanitizing doc: {}", num, doc);
- if (uniqueKeyName == null || (preserveExisting && doc.getFields().containsKey(uniqueKeyName))) {
- ; // we must preserve the existing id
- } else {
- Object baseId = doc.getFirstValue(baseIdFieldName);
- if (baseId == null) {
- throw new MorphlineRuntimeException("Record field " + baseIdFieldName
- + " must not be null as it is needed as a basis for a unique key for solr doc: " + doc);
- }
- doc.replaceValues(uniqueKeyName, baseId.toString() + "#" + num);
- }
-
- // for load testing only; enables adding same document many times with a different unique key
- if (idPrefix != null) {
- String id = doc.getFirstValue(uniqueKeyName).toString();
- id = idPrefix + id;
- doc.replaceValues(uniqueKeyName, id);
- } else if (randomIdPrefix != null) {
- String id = doc.getFirstValue(uniqueKeyName).toString();
- id = String.valueOf(Math.abs(randomIdPrefix.nextInt())) + "#" + id;
- doc.replaceValues(uniqueKeyName, id);
- }
-
- LOG.debug("record #{} unique key sanitized to this: {}", num, doc);
-
- return super.doProcess(doc);
- }
-
- @Override
- protected void doNotify(Record notification) {
- if (Notifications.containsLifecycleEvent(notification, Notifications.LifecycleEvent.START_SESSION)) {
- recordCounter = 0; // reset
- }
- super.doNotify(notification);
- }
-
- }
-}
diff --git a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/LoadSolrBuilder.java b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/LoadSolrBuilder.java
deleted file mode 100644
index a3af6e18c5f..00000000000
--- a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/LoadSolrBuilder.java
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.morphlines.solr;
-
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.concurrent.atomic.AtomicBoolean;
-
-import org.apache.solr.client.solrj.SolrServerException;
-import org.apache.solr.common.SolrInputDocument;
-
-import org.kitesdk.morphline.api.Command;
-import org.kitesdk.morphline.api.CommandBuilder;
-import org.kitesdk.morphline.api.MorphlineContext;
-import org.kitesdk.morphline.api.MorphlineRuntimeException;
-import org.kitesdk.morphline.api.Record;
-import org.kitesdk.morphline.base.AbstractCommand;
-import org.kitesdk.morphline.base.Configs;
-import org.kitesdk.morphline.base.Metrics;
-import org.kitesdk.morphline.base.Notifications;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.codahale.metrics.Timer;
-import com.typesafe.config.Config;
-import com.typesafe.config.ConfigFactory;
-
-/**
- * A command that loads a record into a SolrServer or MapReduce SolrOutputFormat.
- */
-public final class LoadSolrBuilder implements CommandBuilder {
-
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
- private static final AtomicBoolean WARNED_ABOUT_INDEX_TIME_BOOSTS = new AtomicBoolean();
-
- @Override
- public Collection getNames() {
- return Collections.singletonList("loadSolr");
- }
-
- @Override
- public Command build(Config config, Command parent, Command child, MorphlineContext context) {
- return new LoadSolr(this, config, parent, child, context);
- }
-
-
- ///////////////////////////////////////////////////////////////////////////////
- // Nested classes:
- ///////////////////////////////////////////////////////////////////////////////
- private static final class LoadSolr extends AbstractCommand {
-
- private final DocumentLoader loader;
- private final Timer elapsedTime;
-
- public LoadSolr(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
- super(builder, config, parent, child, context);
- Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator");
- SolrLocator locator = new SolrLocator(solrLocatorConfig, context);
- LOG.debug("solrLocator: {}", locator);
- this.loader = locator.getLoader();
- Config boostsConfig = getConfigs().getConfig(config, "boosts", ConfigFactory.empty());
- if (new Configs().getEntrySet(boostsConfig).isEmpty() == false) {
- String message = "Ignoring field boosts: as index-time boosts are not supported anymore";
- if (WARNED_ABOUT_INDEX_TIME_BOOSTS.compareAndSet(false, true)) {
- log.warn(message);
- } else {
- log.debug(message);
- }
- }
- validateArguments();
- this.elapsedTime = getTimer(Metrics.ELAPSED_TIME);
- }
-
- @Override
- protected void doNotify(Record notification) {
- for (Object event : Notifications.getLifecycleEvents(notification)) {
- if (event == Notifications.LifecycleEvent.BEGIN_TRANSACTION) {
- try {
- loader.beginTransaction();
- } catch (SolrServerException | IOException e) {
- throw new MorphlineRuntimeException(e);
- }
- } else if (event == Notifications.LifecycleEvent.COMMIT_TRANSACTION) {
- try {
- loader.commitTransaction();
- } catch (SolrServerException | IOException e) {
- throw new MorphlineRuntimeException(e);
- }
- }
- else if (event == Notifications.LifecycleEvent.ROLLBACK_TRANSACTION) {
- try {
- loader.rollbackTransaction();
- } catch (SolrServerException | IOException e) {
- throw new MorphlineRuntimeException(e);
- }
- }
- else if (event == Notifications.LifecycleEvent.SHUTDOWN) {
- try {
- loader.shutdown();
- } catch (SolrServerException | IOException e) {
- throw new MorphlineRuntimeException(e);
- }
- }
- }
- super.doNotify(notification);
- }
-
- @Override
- protected boolean doProcess(Record record) {
- Timer.Context timerContext = elapsedTime.time();
- SolrInputDocument doc = convert(record);
- try {
- loader.load(doc);
- } catch (IOException | SolrServerException e) {
- throw new MorphlineRuntimeException(e);
- } finally {
- timerContext.stop();
- }
-
- // pass record to next command in chain:
- return super.doProcess(record);
- }
-
- private SolrInputDocument convert(Record record) {
- Map> map = record.getFields().asMap();
- SolrInputDocument doc = new SolrInputDocument(new HashMap(2 * map.size()));
- for (Map.Entry> entry : map.entrySet()) {
- String key = entry.getKey();
- doc.setField(key, entry.getValue());
- }
- return doc;
- }
-
- }
-}
diff --git a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SafeConcurrentUpdateSolrClient.java b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SafeConcurrentUpdateSolrClient.java
deleted file mode 100644
index a5fb929c92d..00000000000
--- a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SafeConcurrentUpdateSolrClient.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.morphlines.solr;
-
-import java.lang.invoke.MethodHandles;
-
-import org.apache.http.client.HttpClient;
-import org.apache.solr.client.solrj.impl.ConcurrentUpdateSolrClient;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * ConcurrentUpdateSolrServer that propagates exceptions up to the submitter of
- * requests on blockUntilFinished()
- */
-final class SafeConcurrentUpdateSolrClient extends ConcurrentUpdateSolrClient {
-
- private Throwable currentException = null;
- private final Object myLock = new Object();
-
- private static final Logger LOGGER = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- public SafeConcurrentUpdateSolrClient(String solrServerUrl, int queueSize, int threadCount) {
- this(solrServerUrl, null, queueSize, threadCount);
- }
-
- public SafeConcurrentUpdateSolrClient(String solrServerUrl, HttpClient client, int queueSize, int threadCount) {
- super(solrServerUrl, client, queueSize, threadCount, null, false);
- }
-
- @Override
- public void handleError(Throwable ex) {
- assert ex != null;
- synchronized (myLock) {
- currentException = ex;
- }
- LOGGER.error("handleError", ex);
- }
-
- @Override
- public void blockUntilFinished() {
- super.blockUntilFinished();
- synchronized (myLock) {
- if (currentException != null) {
- throw new RuntimeException(currentException);
- }
- }
- }
-
- public void clearException() {
- synchronized (myLock) {
- currentException = null;
- }
- }
-
-}
diff --git a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SanitizeUnknownSolrFieldsBuilder.java b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SanitizeUnknownSolrFieldsBuilder.java
deleted file mode 100644
index 9ede7145b40..00000000000
--- a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SanitizeUnknownSolrFieldsBuilder.java
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.morphlines.solr;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Map;
-import java.util.Objects;
-import java.util.stream.Collectors;
-
-import org.apache.solr.schema.IndexSchema;
-
-import org.kitesdk.morphline.api.Command;
-import org.kitesdk.morphline.api.CommandBuilder;
-import org.kitesdk.morphline.api.MorphlineContext;
-import org.kitesdk.morphline.api.Record;
-import org.kitesdk.morphline.base.AbstractCommand;
-import com.typesafe.config.Config;
-
-/**
- * Command that sanitizes record fields that are unknown to Solr schema.xml by either deleting them
- * (renameToPrefix is absent or a zero length string), or by moving them to a field prefixed with
- * the given renameToPrefix (e.g. renameToPrefix = "ignored_" to use typical dynamic Solr fields).
- *
- * Recall that Solr throws an exception on any attempt to load a document that contains a field that
- * isn't specified in schema.xml.
- */
-public final class SanitizeUnknownSolrFieldsBuilder implements CommandBuilder {
-
- @Override
- public Collection getNames() {
- return Collections.singletonList("sanitizeUnknownSolrFields");
- }
-
- @Override
- public Command build(Config config, Command parent, Command child, MorphlineContext context) {
- return new SanitizeUnknownSolrFields(this, config, parent, child, context);
- }
-
-
- ///////////////////////////////////////////////////////////////////////////////
- // Nested classes:
- ///////////////////////////////////////////////////////////////////////////////
- private static final class SanitizeUnknownSolrFields extends AbstractCommand {
-
- private final IndexSchema schema;
- private final String renameToPrefix;
-
- public SanitizeUnknownSolrFields(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
- super(builder, config, parent, child, context);
-
- Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator");
- SolrLocator locator = new SolrLocator(solrLocatorConfig, context);
- LOG.debug("solrLocator: {}", locator);
- this.schema = Objects.requireNonNull(locator.getIndexSchema());
- if (LOG.isTraceEnabled()) {
- LOG.trace("Solr schema: \n" +
- schema.getFields().entrySet().stream().sorted(Map.Entry.comparingByKey())
- .map(Map.Entry::getValue).map(Object::toString).collect(Collectors.joining("\n"))
- );
- }
-
- String str = getConfigs().getString(config, "renameToPrefix", "").trim();
- this.renameToPrefix = str.length() > 0 ? str : null;
- validateArguments();
- }
-
- @Override
- protected boolean doProcess(Record record) {
- Collection entries = new ArrayList(record.getFields().asMap().entrySet());
- for (Map.Entry> entry : entries) {
- String key = entry.getKey();
- if (schema.getFieldOrNull(key) == null) {
- LOG.debug("Sanitizing unknown Solr field: {}", key);
- Collection values = entry.getValue();
- if (renameToPrefix != null) {
- record.getFields().putAll(renameToPrefix + key, values);
- }
- values.clear(); // implicitly removes key from record
- }
- }
- return super.doProcess(record);
- }
-
- }
-}
diff --git a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrClientDocumentLoader.java b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrClientDocumentLoader.java
deleted file mode 100644
index ef9ea8424bd..00000000000
--- a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrClientDocumentLoader.java
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.morphlines.solr;
-
-import org.apache.solr.client.solrj.SolrClient;
-import org.apache.solr.client.solrj.SolrServerException;
-import org.apache.solr.client.solrj.impl.CloudSolrClient;
-import org.apache.solr.client.solrj.impl.ConcurrentUpdateSolrClient;
-import org.apache.solr.client.solrj.response.SolrPingResponse;
-import org.apache.solr.client.solrj.response.UpdateResponse;
-import org.apache.solr.common.SolrInputDocument;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * A vehicle to load a list of Solr documents into a local or remote {@link org.apache.solr.client.solrj.SolrClient}.
- */
-public class SolrClientDocumentLoader implements DocumentLoader {
-
- private final SolrClient client; // proxy to local or remote solr server
- private long numLoadedDocs = 0; // number of documents loaded in the current transaction
- private final int batchSize;
- private final List batch = new ArrayList();
-
- private static final Logger LOGGER = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- public SolrClientDocumentLoader(SolrClient client, int batchSize) {
- if (client == null) {
- throw new IllegalArgumentException("solr server must not be null");
- }
- this.client = client;
- if (batchSize <= 0) {
- throw new IllegalArgumentException("batchSize must be a positive number: " + batchSize);
- }
- this.batchSize = batchSize;
- }
-
- @Override
- public void beginTransaction() {
- LOGGER.trace("beginTransaction");
- batch.clear();
- numLoadedDocs = 0;
- if (client instanceof SafeConcurrentUpdateSolrClient) {
- ((SafeConcurrentUpdateSolrClient) client).clearException();
- }
- }
-
- @Override
- public void load(SolrInputDocument doc) throws IOException, SolrServerException {
- LOGGER.trace("load doc: {}", doc);
- batch.add(doc);
- if (batch.size() >= batchSize) {
- loadBatch();
- }
- }
-
- @Override
- public void commitTransaction() throws SolrServerException, IOException {
- LOGGER.trace("commitTransaction");
- if (batch.size() > 0) {
- loadBatch();
- }
- if (numLoadedDocs > 0) {
- if (client instanceof ConcurrentUpdateSolrClient) {
- ((ConcurrentUpdateSolrClient) client).blockUntilFinished();
- }
- }
- }
-
- private void loadBatch() throws SolrServerException, IOException {
- numLoadedDocs += batch.size();
- try {
- UpdateResponse rsp = client.add(batch);
- } finally {
- batch.clear();
- }
- }
-
- @Override
- public UpdateResponse rollbackTransaction() throws SolrServerException, IOException {
- LOGGER.trace("rollback");
- if (!(client instanceof CloudSolrClient)) {
- return client.rollback();
- } else {
- return new UpdateResponse();
- }
- }
-
- @Override
- public void shutdown() throws IOException {
- LOGGER.trace("shutdown");
- client.close();
- }
-
- @Override
- public SolrPingResponse ping() throws SolrServerException, IOException {
- LOGGER.trace("ping");
- return client.ping();
- }
-
- public SolrClient getSolrClient() {
- return client;
- }
-
-}
diff --git a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrLocator.java b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrLocator.java
deleted file mode 100644
index 1d177a6778c..00000000000
--- a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrLocator.java
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.morphlines.solr;
-
-import javax.xml.parsers.ParserConfigurationException;
-import java.io.File;
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-import java.nio.file.Paths;
-import java.util.Objects;
-
-import com.google.common.io.Files;
-import com.typesafe.config.Config;
-import com.typesafe.config.ConfigFactory;
-import com.typesafe.config.ConfigRenderOptions;
-import com.typesafe.config.ConfigUtil;
-import org.apache.solr.client.solrj.SolrClient;
-import org.apache.solr.client.solrj.impl.CloudSolrClient;
-import org.apache.solr.client.solrj.impl.CloudSolrClient.Builder;
-import org.apache.solr.common.cloud.SolrZkClient;
-import org.apache.solr.core.SolrConfig;
-import org.apache.solr.core.SolrResourceLoader;
-import org.apache.solr.schema.IndexSchema;
-import org.apache.solr.util.SystemIdResolver;
-import org.apache.zookeeper.KeeperException;
-import org.kitesdk.morphline.api.MorphlineCompilationException;
-import org.kitesdk.morphline.api.MorphlineContext;
-import org.kitesdk.morphline.api.MorphlineRuntimeException;
-import org.kitesdk.morphline.base.Configs;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.xml.sax.InputSource;
-import org.xml.sax.SAXException;
-
-/**
- * Set of configuration parameters that identify the location and schema of a Solr server or
- * SolrCloud; Based on this information this class can return the schema and a corresponding
- * {@link DocumentLoader}.
- */
-public class SolrLocator {
-
- private Config config;
- private MorphlineContext context;
- private String collectionName;
- private String zkHost;
- private String solrUrl;
- private String solrHomeDir;
- private int batchSize = 1000;
-
- private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- protected SolrLocator(MorphlineContext context) {
- this.context = Objects.requireNonNull(context);
- }
-
- public SolrLocator(Config config, MorphlineContext context) {
- this(context);
- this.config = config;
- Configs configs = new Configs();
- collectionName = configs.getString(config, "collection", null);
- zkHost = configs.getString(config, "zkHost", null);
- solrHomeDir = configs.getString(config, "solrHomeDir", null);
- solrUrl = configs.getString(config, "solrUrl", null);
- batchSize = configs.getInt(config, "batchSize", batchSize);
- LOG.trace("Constructed solrLocator: {}", this);
- configs.validateArguments(config);
- }
-
- public DocumentLoader getLoader() {
- if (context instanceof SolrMorphlineContext) {
- DocumentLoader loader = ((SolrMorphlineContext)context).getDocumentLoader();
- if (loader != null) {
- return loader;
- }
- }
-
- if (zkHost != null && zkHost.length() > 0) {
- if (collectionName == null || collectionName.length() == 0) {
- throw new MorphlineCompilationException("Parameter 'zkHost' requires that you also pass parameter 'collection'", config);
- }
- CloudSolrClient cloudSolrClient = new Builder()
- .withZkHost(zkHost)
- .build();
- cloudSolrClient.setDefaultCollection(collectionName);
- cloudSolrClient.connect();
- return new SolrClientDocumentLoader(cloudSolrClient, batchSize);
- } else {
- if (solrUrl == null || solrUrl.length() == 0) {
- throw new MorphlineCompilationException("Missing parameter 'solrUrl'", config);
- }
- int solrServerNumThreads = 2;
- int solrServerQueueLength = solrServerNumThreads;
- SolrClient server = new SafeConcurrentUpdateSolrClient(solrUrl, solrServerQueueLength, solrServerNumThreads);
- // SolrServer server = new HttpSolrServer(solrServerUrl);
- // SolrServer server = new ConcurrentUpdateSolrServer(solrServerUrl, solrServerQueueLength, solrServerNumThreads);
- // server.setParser(new XMLResponseParser()); // binary parser is used by default
- return new SolrClientDocumentLoader(server, batchSize);
- }
- }
-
- public IndexSchema getIndexSchema() {
- if (context instanceof SolrMorphlineContext) {
- IndexSchema schema = ((SolrMorphlineContext)context).getIndexSchema();
- if (schema != null) {
- validateSchema(schema);
- return schema;
- }
- }
-
- File downloadedSolrHomeDir = null;
- try {
- // If solrHomeDir isn't defined and zkHost and collectionName are defined
- // then download schema.xml and solrconfig.xml, etc from zk and use that as solrHomeDir
- String mySolrHomeDir = solrHomeDir;
- if (solrHomeDir == null || solrHomeDir.length() == 0) {
- if (zkHost == null || zkHost.length() == 0) {
- // TODO: implement download from solrUrl if specified
- throw new MorphlineCompilationException(
- "Downloading a Solr schema requires either parameter 'solrHomeDir' or parameters 'zkHost' and 'collection'",
- config);
- }
- if (collectionName == null || collectionName.length() == 0) {
- throw new MorphlineCompilationException(
- "Parameter 'zkHost' requires that you also pass parameter 'collection'", config);
- }
- ZooKeeperDownloader zki = new ZooKeeperDownloader();
- SolrZkClient zkClient = zki.getZkClient(zkHost);
- try {
- String configName = zki.readConfigName(zkClient, collectionName);
- downloadedSolrHomeDir = Files.createTempDir();
- downloadedSolrHomeDir = zki.downloadConfigDir(zkClient, configName, downloadedSolrHomeDir);
- mySolrHomeDir = downloadedSolrHomeDir.getAbsolutePath();
- } catch (KeeperException | InterruptedException | IOException e) {
- throw new MorphlineCompilationException("Cannot download schema.xml from ZooKeeper", config, e);
- } finally {
- zkClient.close();
- }
- }
-
- LOG.debug("SolrLocator loading IndexSchema from dir {}", mySolrHomeDir);
- try {
- SolrResourceLoader loader = new SolrResourceLoader(Paths.get(mySolrHomeDir));
- SolrConfig solrConfig = new SolrConfig(loader, "solrconfig.xml", null);
- InputSource is = new InputSource(loader.openSchema("schema.xml"));
- is.setSystemId(SystemIdResolver.createSystemIdFromResourceName("schema.xml"));
-
- IndexSchema schema = new IndexSchema(solrConfig, "schema.xml", is);
- validateSchema(schema);
- return schema;
- } catch (ParserConfigurationException | IOException | SAXException e) {
- throw new MorphlineRuntimeException(e);
- }
- } finally {
- if (downloadedSolrHomeDir != null) {
- try {
- FileUtils.deleteDirectory(downloadedSolrHomeDir);
- } catch (IOException e) {
- LOG.warn("Cannot delete tmp directory", e);
- }
- }
- }
- }
-
- private void validateSchema(IndexSchema schema) {
- if (schema.getUniqueKeyField() == null) {
- throw new MorphlineCompilationException("Solr schema.xml is missing unique key field", config);
- }
- if (!schema.getUniqueKeyField().isRequired()) {
- throw new MorphlineCompilationException("Solr schema.xml must contain a required unique key field", config);
- }
- }
-
- @Override
- public String toString() {
- return toConfig(null).root().render(ConfigRenderOptions.concise());
- }
-
- public Config toConfig(String key) {
- String json = "";
- if (key != null) {
- json = toJson(key) + " : ";
- }
- json +=
- "{" +
- " collection : " + toJson(collectionName) + ", " +
- " zkHost : " + toJson(zkHost) + ", " +
- " solrUrl : " + toJson(solrUrl) + ", " +
- " solrHomeDir : " + toJson(solrHomeDir) + ", " +
- " batchSize : " + toJson(batchSize) + " " +
- "}";
- return ConfigFactory.parseString(json);
- }
-
- private String toJson(Object key) {
- String str = key == null ? "" : key.toString();
- str = ConfigUtil.quoteString(str);
- return str;
- }
-
- public String getCollectionName() {
- return this.collectionName;
- }
-
- public void setCollectionName(String collectionName) {
- this.collectionName = collectionName;
- }
-
- public String getZkHost() {
- return this.zkHost;
- }
-
- public void setZkHost(String zkHost) {
- this.zkHost = zkHost;
- }
-
- public String getSolrHomeDir() {
- return this.solrHomeDir;
- }
-
- public void setSolrHomeDir(String solrHomeDir) {
- this.solrHomeDir = solrHomeDir;
- }
-
- public String getServerUrl() {
- return this.solrUrl;
- }
-
- public void setServerUrl(String solrUrl) {
- this.solrUrl = solrUrl;
- }
-
- public int getBatchSize() {
- return this.batchSize;
- }
-
- public void setBatchSize(int batchSize) {
- this.batchSize = batchSize;
- }
-
-}
diff --git a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrMorphlineContext.java b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrMorphlineContext.java
deleted file mode 100644
index b3cd3019506..00000000000
--- a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrMorphlineContext.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.morphlines.solr;
-
-import org.apache.solr.schema.IndexSchema;
-
-import org.kitesdk.morphline.api.MorphlineContext;
-
-/**
- * A context that is specific to Solr.
- */
-public class SolrMorphlineContext extends MorphlineContext {
-
- private DocumentLoader loader;
- private IndexSchema schema;
-
- /** For public access use {@link Builder#build()} instead */
- protected SolrMorphlineContext() {}
-
- public DocumentLoader getDocumentLoader() {
- return loader;
- }
-
- public IndexSchema getIndexSchema() {
- return schema;
- }
-
-
- ///////////////////////////////////////////////////////////////////////////////
- // Nested classes:
- ///////////////////////////////////////////////////////////////////////////////
- /**
- * Helper to construct a {@link SolrMorphlineContext} instance.
- */
- public static class Builder extends MorphlineContext.Builder {
-
- private DocumentLoader loader;
- private IndexSchema schema;
-
- public Builder() {}
-
- public Builder setDocumentLoader(DocumentLoader loader) {
- this.loader = loader;
- return this;
- }
-
- public Builder setIndexSchema(IndexSchema schema) {
- this.schema = schema;
- return this;
- }
-
- @Override
- public SolrMorphlineContext build() {
- ((SolrMorphlineContext)context).loader = loader;
- ((SolrMorphlineContext)context).schema = schema;
- return (SolrMorphlineContext) super.build();
- }
-
- @Override
- protected SolrMorphlineContext create() {
- return new SolrMorphlineContext();
- }
-
- }
-
-}
diff --git a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/TokenizeTextBuilder.java b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/TokenizeTextBuilder.java
deleted file mode 100644
index 7c96f3ff176..00000000000
--- a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/TokenizeTextBuilder.java
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.morphlines.solr;
-
-import java.io.IOException;
-import java.io.Reader;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-import java.util.Objects;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.solr.schema.FieldType;
-import org.apache.solr.schema.IndexSchema;
-
-import org.kitesdk.morphline.api.Command;
-import org.kitesdk.morphline.api.CommandBuilder;
-import org.kitesdk.morphline.api.MorphlineCompilationException;
-import org.kitesdk.morphline.api.MorphlineContext;
-import org.kitesdk.morphline.api.MorphlineRuntimeException;
-import org.kitesdk.morphline.api.Record;
-import org.kitesdk.morphline.base.AbstractCommand;
-import com.typesafe.config.Config;
-
-/**
- * A command that uses the embedded Solr/Lucene Analyzer library to generate tokens from a text
- * string, without sending data to a Solr server.
- */
-public final class TokenizeTextBuilder implements CommandBuilder {
-
- @Override
- public Collection getNames() {
- return Collections.singletonList("tokenizeText");
- }
-
- @Override
- public Command build(Config config, Command parent, Command child, MorphlineContext context) {
- return new TokenizeText(this, config, parent, child, context);
- }
-
-
- ///////////////////////////////////////////////////////////////////////////////
- // Nested classes:
- ///////////////////////////////////////////////////////////////////////////////
- private static final class TokenizeText extends AbstractCommand {
-
- private final String inputFieldName;
- private final String outputFieldName;
- private final Analyzer analyzer;
- private final CharTermAttribute token; // cached
- private final ReusableStringReader reader = new ReusableStringReader(); // cached
-
- public TokenizeText(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
- super(builder, config, parent, child, context);
- this.inputFieldName = getConfigs().getString(config, "inputField");
- this.outputFieldName = getConfigs().getString(config, "outputField");
- String solrFieldType = getConfigs().getString(config, "solrFieldType");
- Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator");
- SolrLocator locator = new SolrLocator(solrLocatorConfig, context);
- LOG.debug("solrLocator: {}", locator);
- IndexSchema schema = locator.getIndexSchema();
- FieldType fieldType = schema.getFieldTypeByName(solrFieldType);
- if (fieldType == null) {
- throw new MorphlineCompilationException("Missing Solr field type in schema.xml for name: " + solrFieldType, config);
- }
- this.analyzer = Objects.requireNonNull(fieldType.getIndexAnalyzer());
- // register CharTermAttribute for later (implicit) reuse
- this.token = Objects.requireNonNull(analyzer.tokenStream("content", reader).addAttribute(CharTermAttribute.class));
- validateArguments();
- }
-
- @Override
- protected boolean doProcess(Record record) {
- try {
- List outputValues = record.get(outputFieldName);
- for (Object value : record.get(inputFieldName)) {
- reader.setValue(value.toString());
- TokenStream tokenStream = analyzer.tokenStream("content", reader);
- tokenStream.reset();
- while (tokenStream.incrementToken()) {
- if (token.length() > 0) { // incrementToken() updates the token!
- String tokenStr = new String(token.buffer(), 0, token.length());
- outputValues.add(tokenStr);
- }
- }
- tokenStream.end();
- tokenStream.close();
- }
- } catch (IOException e) {
- throw new MorphlineRuntimeException(e);
- }
-
- // pass record to next command in chain:
- return super.doProcess(record);
- }
-
- }
-
- private static final class ReusableStringReader extends Reader {
- private int pos = 0, size = 0;
- private String s = null;
-
- void setValue(String s) {
- this.s = s;
- this.size = s.length();
- this.pos = 0;
- }
-
- @Override
- public int read() {
- if (pos < size) {
- return s.charAt(pos++);
- } else {
- s = null;
- return -1;
- }
- }
-
- @Override
- public int read(char[] c, int off, int len) {
- if (pos < size) {
- len = Math.min(len, size-pos);
- s.getChars(pos, pos+len, c, off);
- pos += len;
- return len;
- } else {
- s = null;
- return -1;
- }
- }
-
- @Override
- public void close() {
- pos = size; // this prevents NPE when reading after close!
- s = null;
- }
- }
-}
diff --git a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/ZooKeeperDownloader.java b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/ZooKeeperDownloader.java
deleted file mode 100644
index 7185531d807..00000000000
--- a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/ZooKeeperDownloader.java
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.morphlines.solr;
-
-import java.io.File;
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-import java.util.List;
-
-import org.apache.solr.cloud.ZkController;
-import org.apache.solr.common.cloud.Aliases;
-import org.apache.solr.common.cloud.ClusterState;
-import org.apache.solr.common.cloud.SolrZkClient;
-import org.apache.solr.common.cloud.ZkConfigManager;
-import org.apache.solr.common.cloud.ZkNodeProps;
-import org.apache.solr.common.cloud.ZkStateReader;
-import org.apache.solr.common.util.StrUtils;
-import org.apache.zookeeper.KeeperException;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.google.common.base.Preconditions;
-import com.google.common.io.Files;
-
-/**
- * Downloads SolrCloud information from ZooKeeper.
- */
-final class ZooKeeperDownloader {
-
- private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- public SolrZkClient getZkClient(String zkHost) {
- if (zkHost == null) {
- throw new IllegalArgumentException("zkHost must not be null");
- }
-
- SolrZkClient zkClient;
- try {
- zkClient = new SolrZkClient(zkHost, 30000);
- } catch (Exception e) {
- throw new IllegalArgumentException("Cannot connect to ZooKeeper: " + zkHost, e);
- }
- return zkClient;
- }
-
- /**
- * Returns config value given collection name
- * Borrowed heavily from Solr's ZKController.
- */
- public String readConfigName(SolrZkClient zkClient, String collection)
- throws KeeperException, InterruptedException {
- if (collection == null) {
- throw new IllegalArgumentException("collection must not be null");
- }
- String configName = null;
-
- // first check for alias
- byte[] aliasData = zkClient.getData(ZkStateReader.ALIASES, null, null, true);
- Aliases aliases = ClusterState.load(aliasData);
- String alias = aliases.getCollectionAlias(collection);
- if (alias != null) {
- List aliasList = StrUtils.splitSmart(alias, ",", true);
- if (aliasList.size() > 1) {
- throw new IllegalArgumentException("collection cannot be an alias that maps to multiple collections");
- }
- collection = aliasList.get(0);
- }
-
- String path = ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection;
- if (LOG.isInfoEnabled()) {
- LOG.info("Load collection config from:" + path);
- }
- byte[] data = zkClient.getData(path, null, null, true);
-
- if(data != null) {
- ZkNodeProps props = ZkNodeProps.load(data);
- configName = props.getStr(ZkController.CONFIGNAME_PROP);
- }
-
- if (configName != null && !zkClient.exists(ZkConfigManager.CONFIGS_ZKNODE + "/" + configName, true)) {
- LOG.error("Specified config does not exist in ZooKeeper:" + configName);
- throw new IllegalArgumentException("Specified config does not exist in ZooKeeper:"
- + configName);
- }
-
- return configName;
- }
-
- /**
- * Download and return the config directory from ZK
- */
- public File downloadConfigDir(SolrZkClient zkClient, String configName, File dir)
- throws IOException, InterruptedException, KeeperException {
- Preconditions.checkArgument(dir.exists());
- Preconditions.checkArgument(dir.isDirectory());
- ZkConfigManager manager = new ZkConfigManager(zkClient);
- manager.downloadConfigDir(configName, dir.toPath());
- File confDir = new File(dir, "conf");
- if (!confDir.isDirectory()) {
- // create a temporary directory with "conf" subdir and mv the config in there. This is
- // necessary because of CDH-11188; solrctl does not generate nor accept directories with e.g.
- // conf/solrconfig.xml which is necessary for proper solr operation. This should work
- // even if solrctl changes.
- confDir = new File(Files.createTempDir().getAbsolutePath(), "conf");
- confDir.getParentFile().deleteOnExit();
- Files.move(dir, confDir);
- dir = confDir.getParentFile();
- }
- verifyConfigDir(confDir);
- return dir;
- }
-
- private void verifyConfigDir(File confDir) throws IOException {
- File solrConfigFile = new File(confDir, "solrconfig.xml");
- if (!solrConfigFile.exists()) {
- throw new IOException("Detected invalid Solr config dir in ZooKeeper - Reason: File not found: "
- + solrConfigFile.getName());
- }
- if (!solrConfigFile.isFile()) {
- throw new IOException("Detected invalid Solr config dir in ZooKeeper - Reason: Not a file: "
- + solrConfigFile.getName());
- }
- if (!solrConfigFile.canRead()) {
- throw new IOException("Insufficient permissions to read file: " + solrConfigFile);
- }
- }
-
-}
diff --git a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/package-info.java b/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/package-info.java
deleted file mode 100644
index f4b91eca57c..00000000000
--- a/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/package-info.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Morphlines Solr related code.
- */
-package org.apache.solr.morphlines.solr;
-
-
-
-
diff --git a/solr/contrib/morphlines-core/src/java/overview.html b/solr/contrib/morphlines-core/src/java/overview.html
deleted file mode 100644
index 7f8ad137a34..00000000000
--- a/solr/contrib/morphlines-core/src/java/overview.html
+++ /dev/null
@@ -1,21 +0,0 @@
-
-
-
-Apache Solr Search Server: Solr Core Morphline Commands
-
-
diff --git a/solr/contrib/morphlines-core/src/test-files/custom-mimetypes.xml b/solr/contrib/morphlines-core/src/test-files/custom-mimetypes.xml
deleted file mode 100644
index 6891e42d616..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/custom-mimetypes.xml
+++ /dev/null
@@ -1,38 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/solr/contrib/morphlines-core/src/test-files/log4j.properties b/solr/contrib/morphlines-core/src/test-files/log4j.properties
deleted file mode 100644
index 40fc92bbb4d..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/log4j.properties
+++ /dev/null
@@ -1,7 +0,0 @@
-# Logging level
-log4j.rootLogger=INFO, CONSOLE
-
-log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
-log4j.appender.CONSOLE.Target=System.err
-log4j.appender.CONSOLE.layout=org.apache.log4j.EnhancedPatternLayout
-log4j.appender.CONSOLE.layout.ConversionPattern=%-4r %-5p (%t) [%X{node_name} %X{collection} %X{shard} %X{replica} %X{core}] %c{1.} %m%n
diff --git a/solr/contrib/morphlines-core/src/test-files/morphlines-core.marker b/solr/contrib/morphlines-core/src/test-files/morphlines-core.marker
deleted file mode 100644
index f4ed7be804a..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/morphlines-core.marker
+++ /dev/null
@@ -1 +0,0 @@
-# Marker file, so we can lookup this file from classpath to get the resources folder for morphlines-core.
\ No newline at end of file
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/currency.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/currency.xml
deleted file mode 100644
index 3a9c58afee8..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/currency.xml
+++ /dev/null
@@ -1,67 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/elevate.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/elevate.xml
deleted file mode 100644
index 2c09ebed669..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/elevate.xml
+++ /dev/null
@@ -1,42 +0,0 @@
-
-
-
-
-
-
-
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/contractions_ca.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/contractions_ca.txt
deleted file mode 100644
index 307a85f913d..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/contractions_ca.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-# Set of Catalan contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-d
-l
-m
-n
-s
-t
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/contractions_fr.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/contractions_fr.txt
deleted file mode 100644
index 722db588333..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/contractions_fr.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-# Set of French contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-l
-m
-t
-qu
-n
-s
-j
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/contractions_ga.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/contractions_ga.txt
deleted file mode 100644
index 9ebe7fa349a..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/contractions_ga.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-# Set of Irish contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-d
-m
-b
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/contractions_it.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/contractions_it.txt
deleted file mode 100644
index cac04095372..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/contractions_it.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-# Set of Italian contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-c
-l
-all
-dall
-dell
-nell
-sull
-coll
-pell
-gl
-agl
-dagl
-degl
-negl
-sugl
-un
-m
-t
-s
-v
-d
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/hyphenations_ga.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/hyphenations_ga.txt
deleted file mode 100644
index 4d2642cc5a3..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/hyphenations_ga.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-# Set of Irish hyphenations for StopFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-h
-n
-t
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stemdict_nl.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stemdict_nl.txt
deleted file mode 100644
index 441072971d3..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stemdict_nl.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-# Set of overrides for the dutch stemmer
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-fiets fiets
-bromfiets bromfiets
-ei eier
-kind kinder
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stoptags_ja.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stoptags_ja.txt
deleted file mode 100644
index 71b750845e3..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stoptags_ja.txt
+++ /dev/null
@@ -1,420 +0,0 @@
-#
-# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter.
-#
-# Any token with a part-of-speech tag that exactly matches those defined in this
-# file are removed from the token stream.
-#
-# Set your own stoptags by uncommenting the lines below. Note that comments are
-# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists,
-# etc. that can be useful for building you own stoptag set.
-#
-# The entire possible tagset is provided below for convenience.
-#
-#####
-# noun: unclassified nouns
-#名詞
-#
-# noun-common: Common nouns or nouns where the sub-classification is undefined
-#名詞-一般
-#
-# noun-proper: Proper nouns where the sub-classification is undefined
-#名詞-固有名詞
-#
-# noun-proper-misc: miscellaneous proper nouns
-#名詞-固有名詞-一般
-#
-# noun-proper-person: Personal names where the sub-classification is undefined
-#名詞-固有名詞-人名
-#
-# noun-proper-person-misc: names that cannot be divided into surname and
-# given name; foreign names; names where the surname or given name is unknown.
-# e.g. お市の方
-#名詞-固有名詞-人名-一般
-#
-# noun-proper-person-surname: Mainly Japanese surnames.
-# e.g. 山田
-#名詞-固有名詞-人名-姓
-#
-# noun-proper-person-given_name: Mainly Japanese given names.
-# e.g. 太郎
-#名詞-固有名詞-人名-名
-#
-# noun-proper-organization: Names representing organizations.
-# e.g. 通産省, NHK
-#名詞-固有名詞-組織
-#
-# noun-proper-place: Place names where the sub-classification is undefined
-#名詞-固有名詞-地域
-#
-# noun-proper-place-misc: Place names excluding countries.
-# e.g. アジア, バルセロナ, 京都
-#名詞-固有名詞-地域-一般
-#
-# noun-proper-place-country: Country names.
-# e.g. 日本, オーストラリア
-#名詞-固有名詞-地域-国
-#
-# noun-pronoun: Pronouns where the sub-classification is undefined
-#名詞-代名詞
-#
-# noun-pronoun-misc: miscellaneous pronouns:
-# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ
-#名詞-代名詞-一般
-#
-# noun-pronoun-contraction: Spoken language contraction made by combining a
-# pronoun and the particle 'wa'.
-# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ
-#名詞-代名詞-縮約
-#
-# noun-adverbial: Temporal nouns such as names of days or months that behave
-# like adverbs. Nouns that represent amount or ratios and can be used adverbially,
-# e.g. 金曜, 一月, 午後, 少量
-#名詞-副詞可能
-#
-# noun-verbal: Nouns that take arguments with case and can appear followed by
-# 'suru' and related verbs (する, できる, なさる, くださる)
-# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り
-#名詞-サ変接続
-#
-# noun-adjective-base: The base form of adjectives, words that appear before な ("na")
-# e.g. 健康, 安易, 駄目, だめ
-#名詞-形容動詞語幹
-#
-# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数.
-# e.g. 0, 1, 2, 何, 数, 幾
-#名詞-数
-#
-# noun-affix: noun affixes where the sub-classification is undefined
-#名詞-非自立
-#
-# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that
-# attach to the base form of inflectional words, words that cannot be classified
-# into any of the other categories below. This category includes indefinite nouns.
-# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第,
-# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み,
-# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳,
-# わり, 割り, 割, ん-口語/, もん-口語/
-#名詞-非自立-一般
-#
-# noun-affix-adverbial: noun affixes that that can behave as adverbs.
-# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ,
-# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか,
-# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所,
-# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま,
-# 儘, 侭, みぎり, 矢先
-#名詞-非自立-副詞可能
-#
-# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars
-# with the stem よう(だ) ("you(da)").
-# e.g. よう, やう, 様 (よう)
-#名詞-非自立-助動詞語幹
-#
-# noun-affix-adjective-base: noun affixes that can connect to the indeclinable
-# connection form な (aux "da").
-# e.g. みたい, ふう
-#名詞-非自立-形容動詞語幹
-#
-# noun-special: special nouns where the sub-classification is undefined.
-#名詞-特殊
-#
-# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is
-# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base
-# form of inflectional words.
-# e.g. そう
-#名詞-特殊-助動詞語幹
-#
-# noun-suffix: noun suffixes where the sub-classification is undefined.
-#名詞-接尾
-#
-# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect
-# to ガル or タイ and can combine into compound nouns, words that cannot be classified into
-# any of the other categories below. In general, this category is more inclusive than
-# 接尾語 ("suffix") and is usually the last element in a compound noun.
-# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み,
-# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用
-#名詞-接尾-一般
-#
-# noun-suffix-person: Suffixes that form nouns and attach to person names more often
-# than other nouns.
-# e.g. 君, 様, 著
-#名詞-接尾-人名
-#
-# noun-suffix-place: Suffixes that form nouns and attach to place names more often
-# than other nouns.
-# e.g. 町, 市, 県
-#名詞-接尾-地域
-#
-# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that
-# can appear before スル ("suru").
-# e.g. 化, 視, 分け, 入り, 落ち, 買い
-#名詞-接尾-サ変接続
-#
-# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions,
-# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the
-# conjunctive form of inflectional words.
-# e.g. そう
-#名詞-接尾-助動詞語幹
-#
-# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive
-# form of inflectional words and appear before the copula だ ("da").
-# e.g. 的, げ, がち
-#名詞-接尾-形容動詞語幹
-#
-# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs.
-# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ)
-#名詞-接尾-副詞可能
-#
-# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category
-# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach
-# to numbers.
-# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半
-#名詞-接尾-助数詞
-#
-# noun-suffix-special: Special suffixes that mainly attach to inflecting words.
-# e.g. (楽し) さ, (考え) 方
-#名詞-接尾-特殊
-#
-# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words
-# together.
-# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦)
-#名詞-接続詞的
-#
-# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are
-# semantically verb-like.
-# e.g. ごらん, ご覧, 御覧, 頂戴
-#名詞-動詞非自立的
-#
-# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry,
-# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation")
-# is いわく ("iwaku").
-#名詞-引用文字列
-#
-# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and
-# behave like an adjective.
-# e.g. 申し訳, 仕方, とんでも, 違い
-#名詞-ナイ形容詞語幹
-#
-#####
-# prefix: unclassified prefixes
-#接頭詞
-#
-# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms)
-# excluding numerical expressions.
-# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派)
-#接頭詞-名詞接続
-#
-# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb
-# in conjunctive form followed by なる/なさる/くださる.
-# e.g. お (読みなさい), お (座り)
-#接頭詞-動詞接続
-#
-# prefix-adjectival: Prefixes that attach to adjectives.
-# e.g. お (寒いですねえ), バカ (でかい)
-#接頭詞-形容詞接続
-#
-# prefix-numerical: Prefixes that attach to numerical expressions.
-# e.g. 約, およそ, 毎時
-#接頭詞-数接続
-#
-#####
-# verb: unclassified verbs
-#動詞
-#
-# verb-main:
-#動詞-自立
-#
-# verb-auxiliary:
-#動詞-非自立
-#
-# verb-suffix:
-#動詞-接尾
-#
-#####
-# adjective: unclassified adjectives
-#形容詞
-#
-# adjective-main:
-#形容詞-自立
-#
-# adjective-auxiliary:
-#形容詞-非自立
-#
-# adjective-suffix:
-#形容詞-接尾
-#
-#####
-# adverb: unclassified adverbs
-#副詞
-#
-# adverb-misc: Words that can be segmented into one unit and where adnominal
-# modification is not possible.
-# e.g. あいかわらず, 多分
-#副詞-一般
-#
-# adverb-particle_conjunction: Adverbs that can be followed by の, は, に,
-# な, する, だ, etc.
-# e.g. こんなに, そんなに, あんなに, なにか, なんでも
-#副詞-助詞類接続
-#
-#####
-# adnominal: Words that only have noun-modifying forms.
-# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう,
-# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした,
-# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き
-#連体詞
-#
-#####
-# conjunction: Conjunctions that can occur independently.
-# e.g. が, けれども, そして, じゃあ, それどころか
-接続詞
-#
-#####
-# particle: unclassified particles.
-助詞
-#
-# particle-case: case particles where the subclassification is undefined.
-助詞-格助詞
-#
-# particle-case-misc: Case particles.
-# e.g. から, が, で, と, に, へ, より, を, の, にて
-助詞-格助詞-一般
-#
-# particle-case-quote: the "to" that appears after nouns, a person’s speech,
-# quotation marks, expressions of decisions from a meeting, reasons, judgements,
-# conjectures, etc.
-# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...)
-助詞-格助詞-引用
-#
-# particle-case-compound: Compounds of particles and verbs that mainly behave
-# like case particles.
-# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って,
-# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける,
-# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し,
-# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして,
-# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって,
-# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る,
-# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる,
-# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ
-助詞-格助詞-連語
-#
-# particle-conjunctive:
-# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども,
-# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/,
-# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/
-助詞-接続助詞
-#
-# particle-dependency:
-# e.g. こそ, さえ, しか, すら, は, も, ぞ
-助詞-係助詞
-#
-# particle-adverbial:
-# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/,
-# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/,
-# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに,
-# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/,
-# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」)
-助詞-副助詞
-#
-# particle-interjective: particles with interjective grammatical roles.
-# e.g. (松島) や
-助詞-間投助詞
-#
-# particle-coordinate:
-# e.g. と, たり, だの, だり, とか, なり, や, やら
-助詞-並立助詞
-#
-# particle-final:
-# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ,
-# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/
-助詞-終助詞
-#
-# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is
-# adverbial, conjunctive, or sentence final. For example:
-# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」
-# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」
-# 「(祈りが届いたせい) か (, 試験に合格した.)」
-# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」
-# e.g. か
-助詞-副助詞/並立助詞/終助詞
-#
-# particle-adnominalizer: The "no" that attaches to nouns and modifies
-# non-inflectional words.
-助詞-連体化
-#
-# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs
-# that are giongo, giseigo, or gitaigo.
-# e.g. に, と
-助詞-副詞化
-#
-# particle-special: A particle that does not fit into one of the above classifications.
-# This includes particles that are used in Tanka, Haiku, and other poetry.
-# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家)
-助詞-特殊
-#
-#####
-# auxiliary-verb:
-助動詞
-#
-#####
-# interjection: Greetings and other exclamations.
-# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます,
-# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい
-#感動詞
-#
-#####
-# symbol: unclassified Symbols.
-記号
-#
-# symbol-misc: A general symbol not in one of the categories below.
-# e.g. [○◎@$〒→+]
-記号-一般
-#
-# symbol-comma: Commas
-# e.g. [,、]
-記号-読点
-#
-# symbol-period: Periods and full stops.
-# e.g. [..。]
-記号-句点
-#
-# symbol-space: Full-width whitespace.
-記号-空白
-#
-# symbol-open_bracket:
-# e.g. [({‘“『【]
-記号-括弧開
-#
-# symbol-close_bracket:
-# e.g. [)}’”』」】]
-記号-括弧閉
-#
-# symbol-alphabetic:
-#記号-アルファベット
-#
-#####
-# other: unclassified other
-#その他
-#
-# other-interjection: Words that are hard to classify as noun-suffixes or
-# sentence-final particles.
-# e.g. (だ)ァ
-その他-間投
-#
-#####
-# filler: Aizuchi that occurs during a conversation or sounds inserted as filler.
-# e.g. あの, うんと, えと
-フィラー
-#
-#####
-# non-verbal: non-verbal sound.
-非言語音
-#
-#####
-# fragment:
-#語断片
-#
-#####
-# unknown: unknown part of speech.
-#未知語
-#
-##### End of file
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_ar.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_ar.txt
deleted file mode 100644
index 046829db6a2..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_ar.txt
+++ /dev/null
@@ -1,125 +0,0 @@
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-# Cleaned on October 11, 2009 (not normalized, so use before normalization)
-# This means that when modifying this list, you might need to add some
-# redundant entries, for example containing forms with both أ and ا
-من
-ومن
-منها
-منه
-في
-وفي
-فيها
-فيه
-و
-ف
-ثم
-او
-أو
-ب
-بها
-به
-ا
-أ
-اى
-اي
-أي
-أى
-لا
-ولا
-الا
-ألا
-إلا
-لكن
-ما
-وما
-كما
-فما
-عن
-مع
-اذا
-إذا
-ان
-أن
-إن
-انها
-أنها
-إنها
-انه
-أنه
-إنه
-بان
-بأن
-فان
-فأن
-وان
-وأن
-وإن
-التى
-التي
-الذى
-الذي
-الذين
-الى
-الي
-إلى
-إلي
-على
-عليها
-عليه
-اما
-أما
-إما
-ايضا
-أيضا
-كل
-وكل
-لم
-ولم
-لن
-ولن
-هى
-هي
-هو
-وهى
-وهي
-وهو
-فهى
-فهي
-فهو
-انت
-أنت
-لك
-لها
-له
-هذه
-هذا
-تلك
-ذلك
-هناك
-كانت
-كان
-يكون
-تكون
-وكانت
-وكان
-غير
-بعض
-قد
-نحو
-بين
-بينما
-منذ
-ضمن
-حيث
-الان
-الآن
-خلال
-بعد
-قبل
-حتى
-عند
-عندما
-لدى
-جميع
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_bg.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_bg.txt
deleted file mode 100644
index 1ae4ba2ae38..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_bg.txt
+++ /dev/null
@@ -1,193 +0,0 @@
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-а
-аз
-ако
-ала
-бе
-без
-беше
-би
-бил
-била
-били
-било
-близо
-бъдат
-бъде
-бяха
-в
-вас
-ваш
-ваша
-вероятно
-вече
-взема
-ви
-вие
-винаги
-все
-всеки
-всички
-всичко
-всяка
-във
-въпреки
-върху
-г
-ги
-главно
-го
-д
-да
-дали
-до
-докато
-докога
-дори
-досега
-доста
-е
-едва
-един
-ето
-за
-зад
-заедно
-заради
-засега
-затова
-защо
-защото
-и
-из
-или
-им
-има
-имат
-иска
-й
-каза
-как
-каква
-какво
-както
-какъв
-като
-кога
-когато
-което
-които
-кой
-който
-колко
-която
-къде
-където
-към
-ли
-м
-ме
-между
-мен
-ми
-мнозина
-мога
-могат
-може
-моля
-момента
-му
-н
-на
-над
-назад
-най
-направи
-напред
-например
-нас
-не
-него
-нея
-ни
-ние
-никой
-нито
-но
-някои
-някой
-няма
-обаче
-около
-освен
-особено
-от
-отгоре
-отново
-още
-пак
-по
-повече
-повечето
-под
-поне
-поради
-после
-почти
-прави
-пред
-преди
-през
-при
-пък
-първо
-с
-са
-само
-се
-сега
-си
-скоро
-след
-сме
-според
-сред
-срещу
-сте
-съм
-със
-също
-т
-тази
-така
-такива
-такъв
-там
-твой
-те
-тези
-ти
-тн
-то
-това
-тогава
-този
-той
-толкова
-точно
-трябва
-тук
-тъй
-тя
-тях
-у
-харесва
-ч
-че
-често
-чрез
-ще
-щом
-я
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_ca.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_ca.txt
deleted file mode 100644
index 3da65deafe1..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_ca.txt
+++ /dev/null
@@ -1,220 +0,0 @@
-# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
-a
-abans
-ací
-ah
-així
-això
-al
-als
-aleshores
-algun
-alguna
-algunes
-alguns
-alhora
-allà
-allí
-allò
-altra
-altre
-altres
-amb
-ambdós
-ambdues
-apa
-aquell
-aquella
-aquelles
-aquells
-aquest
-aquesta
-aquestes
-aquests
-aquí
-baix
-cada
-cadascú
-cadascuna
-cadascunes
-cadascuns
-com
-contra
-d'un
-d'una
-d'unes
-d'uns
-dalt
-de
-del
-dels
-des
-després
-dins
-dintre
-donat
-doncs
-durant
-e
-eh
-el
-els
-em
-en
-encara
-ens
-entre
-érem
-eren
-éreu
-es
-és
-esta
-està
-estàvem
-estaven
-estàveu
-esteu
-et
-etc
-ets
-fins
-fora
-gairebé
-ha
-han
-has
-havia
-he
-hem
-heu
-hi
-ho
-i
-igual
-iguals
-ja
-l'hi
-la
-les
-li
-li'n
-llavors
-m'he
-ma
-mal
-malgrat
-mateix
-mateixa
-mateixes
-mateixos
-me
-mentre
-més
-meu
-meus
-meva
-meves
-molt
-molta
-moltes
-molts
-mon
-mons
-n'he
-n'hi
-ne
-ni
-no
-nogensmenys
-només
-nosaltres
-nostra
-nostre
-nostres
-o
-oh
-oi
-on
-pas
-pel
-pels
-per
-però
-perquè
-poc
-poca
-pocs
-poques
-potser
-propi
-qual
-quals
-quan
-quant
-que
-què
-quelcom
-qui
-quin
-quina
-quines
-quins
-s'ha
-s'han
-sa
-semblant
-semblants
-ses
-seu
-seus
-seva
-seva
-seves
-si
-sobre
-sobretot
-sóc
-solament
-sols
-son
-són
-sons
-sota
-sou
-t'ha
-t'han
-t'he
-ta
-tal
-també
-tampoc
-tan
-tant
-tanta
-tantes
-teu
-teus
-teva
-teves
-ton
-tons
-tot
-tota
-totes
-tots
-un
-una
-unes
-uns
-us
-va
-vaig
-vam
-van
-vas
-veu
-vosaltres
-vostra
-vostre
-vostres
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_cz.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_cz.txt
deleted file mode 100644
index 53c6097dac7..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_cz.txt
+++ /dev/null
@@ -1,172 +0,0 @@
-a
-s
-k
-o
-i
-u
-v
-z
-dnes
-cz
-tímto
-budeš
-budem
-byli
-jseš
-můj
-svým
-ta
-tomto
-tohle
-tuto
-tyto
-jej
-zda
-proč
-máte
-tato
-kam
-tohoto
-kdo
-kteří
-mi
-nám
-tom
-tomuto
-mít
-nic
-proto
-kterou
-byla
-toho
-protože
-asi
-ho
-naši
-napište
-re
-což
-tím
-takže
-svých
-její
-svými
-jste
-aj
-tu
-tedy
-teto
-bylo
-kde
-ke
-pravé
-ji
-nad
-nejsou
-či
-pod
-téma
-mezi
-přes
-ty
-pak
-vám
-ani
-když
-však
-neg
-jsem
-tento
-článku
-články
-aby
-jsme
-před
-pta
-jejich
-byl
-ještě
-až
-bez
-také
-pouze
-první
-vaše
-která
-nás
-nový
-tipy
-pokud
-může
-strana
-jeho
-své
-jiné
-zprávy
-nové
-není
-vás
-jen
-podle
-zde
-už
-být
-více
-bude
-již
-než
-který
-by
-které
-co
-nebo
-ten
-tak
-má
-při
-od
-po
-jsou
-jak
-další
-ale
-si
-se
-ve
-to
-jako
-za
-zpět
-ze
-do
-pro
-je
-na
-atd
-atp
-jakmile
-přičemž
-já
-on
-ona
-ono
-oni
-ony
-my
-vy
-jí
-ji
-mě
-mne
-jemu
-tomu
-těm
-těmu
-němu
-němuž
-jehož
-jíž
-jelikož
-jež
-jakož
-načež
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_da.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_da.txt
deleted file mode 100644
index a3ff5fe122c..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_da.txt
+++ /dev/null
@@ -1,108 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Danish stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This is a ranked list (commonest to rarest) of stopwords derived from
- | a large text sample.
-
-
-og | and
-i | in
-jeg | I
-det | that (dem. pronoun)/it (pers. pronoun)
-at | that (in front of a sentence)/to (with infinitive)
-en | a/an
-den | it (pers. pronoun)/that (dem. pronoun)
-til | to/at/for/until/against/by/of/into, more
-er | present tense of "to be"
-som | who, as
-på | on/upon/in/on/at/to/after/of/with/for, on
-de | they
-med | with/by/in, along
-han | he
-af | of/by/from/off/for/in/with/on, off
-for | at/for/to/from/by/of/ago, in front/before, because
-ikke | not
-der | who/which, there/those
-var | past tense of "to be"
-mig | me/myself
-sig | oneself/himself/herself/itself/themselves
-men | but
-et | a/an/one, one (number), someone/somebody/one
-har | present tense of "to have"
-om | round/about/for/in/a, about/around/down, if
-vi | we
-min | my
-havde | past tense of "to have"
-ham | him
-hun | she
-nu | now
-over | over/above/across/by/beyond/past/on/about, over/past
-da | then, when/as/since
-fra | from/off/since, off, since
-du | you
-ud | out
-sin | his/her/its/one's
-dem | them
-os | us/ourselves
-op | up
-man | you/one
-hans | his
-hvor | where
-eller | or
-hvad | what
-skal | must/shall etc.
-selv | myself/youself/herself/ourselves etc., even
-her | here
-alle | all/everyone/everybody etc.
-vil | will (verb)
-blev | past tense of "to stay/to remain/to get/to become"
-kunne | could
-ind | in
-når | when
-være | present tense of "to be"
-dog | however/yet/after all
-noget | something
-ville | would
-jo | you know/you see (adv), yes
-deres | their/theirs
-efter | after/behind/according to/for/by/from, later/afterwards
-ned | down
-skulle | should
-denne | this
-end | than
-dette | this
-mit | my/mine
-også | also
-under | under/beneath/below/during, below/underneath
-have | have
-dig | you
-anden | other
-hende | her
-mine | my
-alt | everything
-meget | much/very, plenty of
-sit | his, her, its, one's
-sine | his, her, its, one's
-vor | our
-mod | against
-disse | these
-hvis | if
-din | your/yours
-nogle | some
-hos | by/at
-blive | be/become
-mange | many
-ad | by/through
-bliver | present tense of "to be/to become"
-hendes | her/hers
-været | be
-thi | for (conj)
-jer | you
-sådan | such, like this/like that
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_de.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_de.txt
deleted file mode 100644
index f7703841887..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_de.txt
+++ /dev/null
@@ -1,292 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A German stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | The number of forms in this list is reduced significantly by passing it
- | through the German stemmer.
-
-
-aber | but
-
-alle | all
-allem
-allen
-aller
-alles
-
-als | than, as
-also | so
-am | an + dem
-an | at
-
-ander | other
-andere
-anderem
-anderen
-anderer
-anderes
-anderm
-andern
-anderr
-anders
-
-auch | also
-auf | on
-aus | out of
-bei | by
-bin | am
-bis | until
-bist | art
-da | there
-damit | with it
-dann | then
-
-der | the
-den
-des
-dem
-die
-das
-
-daß | that
-
-derselbe | the same
-derselben
-denselben
-desselben
-demselben
-dieselbe
-dieselben
-dasselbe
-
-dazu | to that
-
-dein | thy
-deine
-deinem
-deinen
-deiner
-deines
-
-denn | because
-
-derer | of those
-dessen | of him
-
-dich | thee
-dir | to thee
-du | thou
-
-dies | this
-diese
-diesem
-diesen
-dieser
-dieses
-
-
-doch | (several meanings)
-dort | (over) there
-
-
-durch | through
-
-ein | a
-eine
-einem
-einen
-einer
-eines
-
-einig | some
-einige
-einigem
-einigen
-einiger
-einiges
-
-einmal | once
-
-er | he
-ihn | him
-ihm | to him
-
-es | it
-etwas | something
-
-euer | your
-eure
-eurem
-euren
-eurer
-eures
-
-für | for
-gegen | towards
-gewesen | p.p. of sein
-hab | have
-habe | have
-haben | have
-hat | has
-hatte | had
-hatten | had
-hier | here
-hin | there
-hinter | behind
-
-ich | I
-mich | me
-mir | to me
-
-
-ihr | you, to her
-ihre
-ihrem
-ihren
-ihrer
-ihres
-euch | to you
-
-im | in + dem
-in | in
-indem | while
-ins | in + das
-ist | is
-
-jede | each, every
-jedem
-jeden
-jeder
-jedes
-
-jene | that
-jenem
-jenen
-jener
-jenes
-
-jetzt | now
-kann | can
-
-kein | no
-keine
-keinem
-keinen
-keiner
-keines
-
-können | can
-könnte | could
-machen | do
-man | one
-
-manche | some, many a
-manchem
-manchen
-mancher
-manches
-
-mein | my
-meine
-meinem
-meinen
-meiner
-meines
-
-mit | with
-muss | must
-musste | had to
-nach | to(wards)
-nicht | not
-nichts | nothing
-noch | still, yet
-nun | now
-nur | only
-ob | whether
-oder | or
-ohne | without
-sehr | very
-
-sein | his
-seine
-seinem
-seinen
-seiner
-seines
-
-selbst | self
-sich | herself
-
-sie | they, she
-ihnen | to them
-
-sind | are
-so | so
-
-solche | such
-solchem
-solchen
-solcher
-solches
-
-soll | shall
-sollte | should
-sondern | but
-sonst | else
-über | over
-um | about, around
-und | and
-
-uns | us
-unse
-unsem
-unsen
-unser
-unses
-
-unter | under
-viel | much
-vom | von + dem
-von | from
-vor | before
-während | while
-war | was
-waren | were
-warst | wast
-was | what
-weg | away, off
-weil | because
-weiter | further
-
-welche | which
-welchem
-welchen
-welcher
-welches
-
-wenn | when
-werde | will
-werden | will
-wie | how
-wieder | again
-will | want
-wir | we
-wird | will
-wirst | willst
-wo | where
-wollen | want
-wollte | wanted
-würde | would
-würden | would
-zu | to
-zum | zu + dem
-zur | zu + der
-zwar | indeed
-zwischen | between
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_el.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_el.txt
deleted file mode 100644
index 232681f5bd6..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_el.txt
+++ /dev/null
@@ -1,78 +0,0 @@
-# Lucene Greek Stopwords list
-# Note: by default this file is used after GreekLowerCaseFilter,
-# so when modifying this file use 'σ' instead of 'ς'
-ο
-η
-το
-οι
-τα
-του
-τησ
-των
-τον
-την
-και
-κι
-κ
-ειμαι
-εισαι
-ειναι
-ειμαστε
-ειστε
-στο
-στον
-στη
-στην
-μα
-αλλα
-απο
-για
-προσ
-με
-σε
-ωσ
-παρα
-αντι
-κατα
-μετα
-θα
-να
-δε
-δεν
-μη
-μην
-επι
-ενω
-εαν
-αν
-τοτε
-που
-πωσ
-ποιοσ
-ποια
-ποιο
-ποιοι
-ποιεσ
-ποιων
-ποιουσ
-αυτοσ
-αυτη
-αυτο
-αυτοι
-αυτων
-αυτουσ
-αυτεσ
-αυτα
-εκεινοσ
-εκεινη
-εκεινο
-εκεινοι
-εκεινεσ
-εκεινα
-εκεινων
-εκεινουσ
-οπωσ
-ομωσ
-ισωσ
-οσο
-οτι
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_en.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_en.txt
deleted file mode 100644
index 2c164c0b2a1..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_en.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# a couple of test stopwords to test that the words are really being
-# configured from this file:
-stopworda
-stopwordb
-
-# Standard english stop words taken from Lucene's StopAnalyzer
-a
-an
-and
-are
-as
-at
-be
-but
-by
-for
-if
-in
-into
-is
-it
-no
-not
-of
-on
-or
-such
-that
-the
-their
-then
-there
-these
-they
-this
-to
-was
-will
-with
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_es.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_es.txt
deleted file mode 100644
index 2db14760075..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_es.txt
+++ /dev/null
@@ -1,354 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Spanish stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-
- | The following is a ranked list (commonest to rarest) of stopwords
- | deriving from a large sample of text.
-
- | Extra words have been added at the end.
-
-de | from, of
-la | the, her
-que | who, that
-el | the
-en | in
-y | and
-a | to
-los | the, them
-del | de + el
-se | himself, from him etc
-las | the, them
-por | for, by, etc
-un | a
-para | for
-con | with
-no | no
-una | a
-su | his, her
-al | a + el
- | es from SER
-lo | him
-como | how
-más | more
-pero | pero
-sus | su plural
-le | to him, her
-ya | already
-o | or
- | fue from SER
-este | this
- | ha from HABER
-sí | himself etc
-porque | because
-esta | this
- | son from SER
-entre | between
- | está from ESTAR
-cuando | when
-muy | very
-sin | without
-sobre | on
- | ser from SER
- | tiene from TENER
-también | also
-me | me
-hasta | until
-hay | there is/are
-donde | where
- | han from HABER
-quien | whom, that
- | están from ESTAR
- | estado from ESTAR
-desde | from
-todo | all
-nos | us
-durante | during
- | estados from ESTAR
-todos | all
-uno | a
-les | to them
-ni | nor
-contra | against
-otros | other
- | fueron from SER
-ese | that
-eso | that
- | había from HABER
-ante | before
-ellos | they
-e | and (variant of y)
-esto | this
-mí | me
-antes | before
-algunos | some
-qué | what?
-unos | a
-yo | I
-otro | other
-otras | other
-otra | other
-él | he
-tanto | so much, many
-esa | that
-estos | these
-mucho | much, many
-quienes | who
-nada | nothing
-muchos | many
-cual | who
- | sea from SER
-poco | few
-ella | she
-estar | to be
- | haber from HABER
-estas | these
- | estaba from ESTAR
- | estamos from ESTAR
-algunas | some
-algo | something
-nosotros | we
-
- | other forms
-
-mi | me
-mis | mi plural
-tú | thou
-te | thee
-ti | thee
-tu | thy
-tus | tu plural
-ellas | they
-nosotras | we
-vosotros | you
-vosotras | you
-os | you
-mío | mine
-mía |
-míos |
-mías |
-tuyo | thine
-tuya |
-tuyos |
-tuyas |
-suyo | his, hers, theirs
-suya |
-suyos |
-suyas |
-nuestro | ours
-nuestra |
-nuestros |
-nuestras |
-vuestro | yours
-vuestra |
-vuestros |
-vuestras |
-esos | those
-esas | those
-
- | forms of estar, to be (not including the infinitive):
-estoy
-estás
-está
-estamos
-estáis
-están
-esté
-estés
-estemos
-estéis
-estén
-estaré
-estarás
-estará
-estaremos
-estaréis
-estarán
-estaría
-estarías
-estaríamos
-estaríais
-estarían
-estaba
-estabas
-estábamos
-estabais
-estaban
-estuve
-estuviste
-estuvo
-estuvimos
-estuvisteis
-estuvieron
-estuviera
-estuvieras
-estuviéramos
-estuvierais
-estuvieran
-estuviese
-estuvieses
-estuviésemos
-estuvieseis
-estuviesen
-estando
-estado
-estada
-estados
-estadas
-estad
-
- | forms of haber, to have (not including the infinitive):
-he
-has
-ha
-hemos
-habéis
-han
-haya
-hayas
-hayamos
-hayáis
-hayan
-habré
-habrás
-habrá
-habremos
-habréis
-habrán
-habría
-habrías
-habríamos
-habríais
-habrían
-había
-habías
-habíamos
-habíais
-habían
-hube
-hubiste
-hubo
-hubimos
-hubisteis
-hubieron
-hubiera
-hubieras
-hubiéramos
-hubierais
-hubieran
-hubiese
-hubieses
-hubiésemos
-hubieseis
-hubiesen
-habiendo
-habido
-habida
-habidos
-habidas
-
- | forms of ser, to be (not including the infinitive):
-soy
-eres
-es
-somos
-sois
-son
-sea
-seas
-seamos
-seáis
-sean
-seré
-serás
-será
-seremos
-seréis
-serán
-sería
-serías
-seríamos
-seríais
-serían
-era
-eras
-éramos
-erais
-eran
-fui
-fuiste
-fue
-fuimos
-fuisteis
-fueron
-fuera
-fueras
-fuéramos
-fuerais
-fueran
-fuese
-fueses
-fuésemos
-fueseis
-fuesen
-siendo
-sido
- | sed also means 'thirst'
-
- | forms of tener, to have (not including the infinitive):
-tengo
-tienes
-tiene
-tenemos
-tenéis
-tienen
-tenga
-tengas
-tengamos
-tengáis
-tengan
-tendré
-tendrás
-tendrá
-tendremos
-tendréis
-tendrán
-tendría
-tendrías
-tendríamos
-tendríais
-tendrían
-tenía
-tenías
-teníamos
-teníais
-tenían
-tuve
-tuviste
-tuvo
-tuvimos
-tuvisteis
-tuvieron
-tuviera
-tuvieras
-tuviéramos
-tuvierais
-tuvieran
-tuviese
-tuvieses
-tuviésemos
-tuvieseis
-tuviesen
-teniendo
-tenido
-tenida
-tenidos
-tenidas
-tened
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_eu.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_eu.txt
deleted file mode 100644
index 25f1db93460..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_eu.txt
+++ /dev/null
@@ -1,99 +0,0 @@
-# example set of basque stopwords
-al
-anitz
-arabera
-asko
-baina
-bat
-batean
-batek
-bati
-batzuei
-batzuek
-batzuetan
-batzuk
-bera
-beraiek
-berau
-berauek
-bere
-berori
-beroriek
-beste
-bezala
-da
-dago
-dira
-ditu
-du
-dute
-edo
-egin
-ere
-eta
-eurak
-ez
-gainera
-gu
-gutxi
-guzti
-haiei
-haiek
-haietan
-hainbeste
-hala
-han
-handik
-hango
-hara
-hari
-hark
-hartan
-hau
-hauei
-hauek
-hauetan
-hemen
-hemendik
-hemengo
-hi
-hona
-honek
-honela
-honetan
-honi
-hor
-hori
-horiei
-horiek
-horietan
-horko
-horra
-horrek
-horrela
-horretan
-horri
-hortik
-hura
-izan
-ni
-noiz
-nola
-non
-nondik
-nongo
-nor
-nora
-ze
-zein
-zen
-zenbait
-zenbat
-zer
-zergatik
-ziren
-zituen
-zu
-zuek
-zuen
-zuten
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_fa.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_fa.txt
deleted file mode 100644
index 723641c6da7..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_fa.txt
+++ /dev/null
@@ -1,313 +0,0 @@
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-# Note: by default this file is used after normalization, so when adding entries
-# to this file, use the arabic 'ي' instead of 'ی'
-انان
-نداشته
-سراسر
-خياه
-ايشان
-وي
-تاكنون
-بيشتري
-دوم
-پس
-ناشي
-وگو
-يا
-داشتند
-سپس
-هنگام
-هرگز
-پنج
-نشان
-امسال
-ديگر
-گروهي
-شدند
-چطور
-ده
-و
-دو
-نخستين
-ولي
-چرا
-چه
-وسط
-ه
-كدام
-قابل
-يك
-رفت
-هفت
-همچنين
-در
-هزار
-بله
-بلي
-شايد
-اما
-شناسي
-گرفته
-دهد
-داشته
-دانست
-داشتن
-خواهيم
-ميليارد
-وقتيكه
-امد
-خواهد
-جز
-اورده
-شده
-بلكه
-خدمات
-شدن
-برخي
-نبود
-بسياري
-جلوگيري
-حق
-كردند
-نوعي
-بعري
-نكرده
-نظير
-نبايد
-بوده
-بودن
-داد
-اورد
-هست
-جايي
-شود
-دنبال
-داده
-بايد
-سابق
-هيچ
-همان
-انجا
-كمتر
-كجاست
-گردد
-كسي
-تر
-مردم
-تان
-دادن
-بودند
-سري
-جدا
-ندارند
-مگر
-يكديگر
-دارد
-دهند
-بنابراين
-هنگامي
-سمت
-جا
-انچه
-خود
-دادند
-زياد
-دارند
-اثر
-بدون
-بهترين
-بيشتر
-البته
-به
-براساس
-بيرون
-كرد
-بعضي
-گرفت
-توي
-اي
-ميليون
-او
-جريان
-تول
-بر
-مانند
-برابر
-باشيم
-مدتي
-گويند
-اكنون
-تا
-تنها
-جديد
-چند
-بي
-نشده
-كردن
-كردم
-گويد
-كرده
-كنيم
-نمي
-نزد
-روي
-قصد
-فقط
-بالاي
-ديگران
-اين
-ديروز
-توسط
-سوم
-ايم
-دانند
-سوي
-استفاده
-شما
-كنار
-داريم
-ساخته
-طور
-امده
-رفته
-نخست
-بيست
-نزديك
-طي
-كنيد
-از
-انها
-تمامي
-داشت
-يكي
-طريق
-اش
-چيست
-روب
-نمايد
-گفت
-چندين
-چيزي
-تواند
-ام
-ايا
-با
-ان
-ايد
-ترين
-اينكه
-ديگري
-راه
-هايي
-بروز
-همچنان
-پاعين
-كس
-حدود
-مختلف
-مقابل
-چيز
-گيرد
-ندارد
-ضد
-همچون
-سازي
-شان
-مورد
-باره
-مرسي
-خويش
-برخوردار
-چون
-خارج
-شش
-هنوز
-تحت
-ضمن
-هستيم
-گفته
-فكر
-بسيار
-پيش
-براي
-روزهاي
-انكه
-نخواهد
-بالا
-كل
-وقتي
-كي
-چنين
-كه
-گيري
-نيست
-است
-كجا
-كند
-نيز
-يابد
-بندي
-حتي
-توانند
-عقب
-خواست
-كنند
-بين
-تمام
-همه
-ما
-باشند
-مثل
-شد
-اري
-باشد
-اره
-طبق
-بعد
-اگر
-صورت
-غير
-جاي
-بيش
-ريزي
-اند
-زيرا
-چگونه
-بار
-لطفا
-مي
-درباره
-من
-ديده
-همين
-گذاري
-برداري
-علت
-گذاشته
-هم
-فوق
-نه
-ها
-شوند
-اباد
-همواره
-هر
-اول
-خواهند
-چهار
-نام
-امروز
-مان
-هاي
-قبل
-كنم
-سعي
-تازه
-را
-هستند
-زير
-جلوي
-عنوان
-بود
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_fi.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_fi.txt
deleted file mode 100644
index addad798c4b..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_fi.txt
+++ /dev/null
@@ -1,95 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
-| forms of BE
-
-olla
-olen
-olet
-on
-olemme
-olette
-ovat
-ole | negative form
-
-oli
-olisi
-olisit
-olisin
-olisimme
-olisitte
-olisivat
-olit
-olin
-olimme
-olitte
-olivat
-ollut
-olleet
-
-en | negation
-et
-ei
-emme
-ette
-eivät
-
-|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans
-minä minun minut minua minussa minusta minuun minulla minulta minulle | I
-sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you
-hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she
-me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we
-te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you
-he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they
-
-tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this
-tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that
-se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it
-nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these
-nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those
-ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they
-
-kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
-ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl)
-mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what
-mitkä | (pl)
-
-joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which
-jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl)
-
-| conjunctions
-
-että | that
-ja | and
-jos | if
-koska | because
-kuin | than
-mutta | but
-niin | so
-sekä | and
-sillä | for
-tai | or
-vaan | but
-vai | or
-vaikka | although
-
-
-| prepositions
-
-kanssa | with
-mukaan | according to
-noin | about
-poikki | across
-yli | over, across
-
-| other
-
-kun | when
-niin | so
-nyt | now
-itse | self
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_fr.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_fr.txt
deleted file mode 100644
index c00837ea939..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_fr.txt
+++ /dev/null
@@ -1,183 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A French stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-au | a + le
-aux | a + les
-avec | with
-ce | this
-ces | these
-dans | with
-de | of
-des | de + les
-du | de + le
-elle | she
-en | `of them' etc
-et | and
-eux | them
-il | he
-je | I
-la | the
-le | the
-leur | their
-lui | him
-ma | my (fem)
-mais | but
-me | me
-même | same; as in moi-même (myself) etc
-mes | me (pl)
-moi | me
-mon | my (masc)
-ne | not
-nos | our (pl)
-notre | our
-nous | we
-on | one
-ou | where
-par | by
-pas | not
-pour | for
-qu | que before vowel
-que | that
-qui | who
-sa | his, her (fem)
-se | oneself
-ses | his (pl)
-son | his, her (masc)
-sur | on
-ta | thy (fem)
-te | thee
-tes | thy (pl)
-toi | thee
-ton | thy (masc)
-tu | thou
-un | a
-une | a
-vos | your (pl)
-votre | your
-vous | you
-
- | single letter forms
-
-c | c'
-d | d'
-j | j'
-l | l'
-à | to, at
-m | m'
-n | n'
-s | s'
-t | t'
-y | there
-
- | forms of être (not including the infinitive):
-été
-étée
-étées
-étés
-étant
-suis
-es
-est
-sommes
-êtes
-sont
-serai
-seras
-sera
-serons
-serez
-seront
-serais
-serait
-serions
-seriez
-seraient
-étais
-était
-étions
-étiez
-étaient
-fus
-fut
-fûmes
-fûtes
-furent
-sois
-soit
-soyons
-soyez
-soient
-fusse
-fusses
-fût
-fussions
-fussiez
-fussent
-
- | forms of avoir (not including the infinitive):
-ayant
-eu
-eue
-eues
-eus
-ai
-as
-avons
-avez
-ont
-aurai
-auras
-aura
-aurons
-aurez
-auront
-aurais
-aurait
-aurions
-auriez
-auraient
-avais
-avait
-avions
-aviez
-avaient
-eut
-eûmes
-eûtes
-eurent
-aie
-aies
-ait
-ayons
-ayez
-aient
-eusse
-eusses
-eût
-eussions
-eussiez
-eussent
-
- | Later additions (from Jean-Christophe Deschamps)
-ceci | this
-celà | that
-cet | this
-cette | this
-ici | here
-ils | they
-les | the (pl)
-leurs | their (pl)
-quel | which
-quels | which
-quelle | which
-quelles | which
-sans | without
-soi | oneself
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_ga.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_ga.txt
deleted file mode 100644
index 9ff88d747e5..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_ga.txt
+++ /dev/null
@@ -1,110 +0,0 @@
-
-a
-ach
-ag
-agus
-an
-aon
-ar
-arna
-as
-b'
-ba
-beirt
-bhúr
-caoga
-ceathair
-ceathrar
-chomh
-chtó
-chuig
-chun
-cois
-céad
-cúig
-cúigear
-d'
-daichead
-dar
-de
-deich
-deichniúr
-den
-dhá
-do
-don
-dtí
-dá
-dár
-dó
-faoi
-faoin
-faoina
-faoinár
-fara
-fiche
-gach
-gan
-go
-gur
-haon
-hocht
-i
-iad
-idir
-in
-ina
-ins
-inár
-is
-le
-leis
-lena
-lenár
-m'
-mar
-mo
-mé
-na
-nach
-naoi
-naonúr
-ná
-ní
-níor
-nó
-nócha
-ocht
-ochtar
-os
-roimh
-sa
-seacht
-seachtar
-seachtó
-seasca
-seisear
-siad
-sibh
-sinn
-sna
-sé
-sí
-tar
-thar
-thú
-triúr
-trí
-trína
-trínár
-tríocha
-tú
-um
-ár
-é
-éis
-í
-ó
-ón
-óna
-ónár
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_gl.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_gl.txt
deleted file mode 100644
index d8760b12c14..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_gl.txt
+++ /dev/null
@@ -1,161 +0,0 @@
-# galican stopwords
-a
-aínda
-alí
-aquel
-aquela
-aquelas
-aqueles
-aquilo
-aquí
-ao
-aos
-as
-así
-á
-ben
-cando
-che
-co
-coa
-comigo
-con
-connosco
-contigo
-convosco
-coas
-cos
-cun
-cuns
-cunha
-cunhas
-da
-dalgunha
-dalgunhas
-dalgún
-dalgúns
-das
-de
-del
-dela
-delas
-deles
-desde
-deste
-do
-dos
-dun
-duns
-dunha
-dunhas
-e
-el
-ela
-elas
-eles
-en
-era
-eran
-esa
-esas
-ese
-eses
-esta
-estar
-estaba
-está
-están
-este
-estes
-estiven
-estou
-eu
-é
-facer
-foi
-foron
-fun
-había
-hai
-iso
-isto
-la
-las
-lle
-lles
-lo
-los
-mais
-me
-meu
-meus
-min
-miña
-miñas
-moi
-na
-nas
-neste
-nin
-no
-non
-nos
-nosa
-nosas
-noso
-nosos
-nós
-nun
-nunha
-nuns
-nunhas
-o
-os
-ou
-ó
-ós
-para
-pero
-pode
-pois
-pola
-polas
-polo
-polos
-por
-que
-se
-senón
-ser
-seu
-seus
-sexa
-sido
-sobre
-súa
-súas
-tamén
-tan
-te
-ten
-teñen
-teño
-ter
-teu
-teus
-ti
-tido
-tiña
-tiven
-túa
-túas
-un
-unha
-unhas
-uns
-vos
-vosa
-vosas
-voso
-vosos
-vós
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_hi.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_hi.txt
deleted file mode 100644
index 86286bb083b..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_hi.txt
+++ /dev/null
@@ -1,235 +0,0 @@
-# Also see http://www.opensource.org/licenses/bsd-license.html
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# Note: by default this file also contains forms normalized by HindiNormalizer
-# for spelling variation (see section below), such that it can be used whether or
-# not you enable that feature. When adding additional entries to this list,
-# please add the normalized form as well.
-अंदर
-अत
-अपना
-अपनी
-अपने
-अभी
-आदि
-आप
-इत्यादि
-इन
-इनका
-इन्हीं
-इन्हें
-इन्हों
-इस
-इसका
-इसकी
-इसके
-इसमें
-इसी
-इसे
-उन
-उनका
-उनकी
-उनके
-उनको
-उन्हीं
-उन्हें
-उन्हों
-उस
-उसके
-उसी
-उसे
-एक
-एवं
-एस
-ऐसे
-और
-कई
-कर
-करता
-करते
-करना
-करने
-करें
-कहते
-कहा
-का
-काफ़ी
-कि
-कितना
-किन्हें
-किन्हों
-किया
-किर
-किस
-किसी
-किसे
-की
-कुछ
-कुल
-के
-को
-कोई
-कौन
-कौनसा
-गया
-घर
-जब
-जहाँ
-जा
-जितना
-जिन
-जिन्हें
-जिन्हों
-जिस
-जिसे
-जीधर
-जैसा
-जैसे
-जो
-तक
-तब
-तरह
-तिन
-तिन्हें
-तिन्हों
-तिस
-तिसे
-तो
-था
-थी
-थे
-दबारा
-दिया
-दुसरा
-दूसरे
-दो
-द्वारा
-न
-नहीं
-ना
-निहायत
-नीचे
-ने
-पर
-पर
-पहले
-पूरा
-पे
-फिर
-बनी
-बही
-बहुत
-बाद
-बाला
-बिलकुल
-भी
-भीतर
-मगर
-मानो
-मे
-में
-यदि
-यह
-यहाँ
-यही
-या
-यिह
-ये
-रखें
-रहा
-रहे
-ऱ्वासा
-लिए
-लिये
-लेकिन
-व
-वर्ग
-वह
-वह
-वहाँ
-वहीं
-वाले
-वुह
-वे
-वग़ैरह
-संग
-सकता
-सकते
-सबसे
-सभी
-साथ
-साबुत
-साभ
-सारा
-से
-सो
-ही
-हुआ
-हुई
-हुए
-है
-हैं
-हो
-होता
-होती
-होते
-होना
-होने
-# additional normalized forms of the above
-अपनि
-जेसे
-होति
-सभि
-तिंहों
-इंहों
-दवारा
-इसि
-किंहें
-थि
-उंहों
-ओर
-जिंहें
-वहिं
-अभि
-बनि
-हि
-उंहिं
-उंहें
-हें
-वगेरह
-एसे
-रवासा
-कोन
-निचे
-काफि
-उसि
-पुरा
-भितर
-हे
-बहि
-वहां
-कोइ
-यहां
-जिंहों
-तिंहें
-किसि
-कइ
-यहि
-इंहिं
-जिधर
-इंहें
-अदि
-इतयादि
-हुइ
-कोनसा
-इसकि
-दुसरे
-जहां
-अप
-किंहों
-उनकि
-भि
-वरग
-हुअ
-जेसा
-नहिं
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_hu.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_hu.txt
deleted file mode 100644
index 1a96f1db6f2..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_hu.txt
+++ /dev/null
@@ -1,209 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
-| Hungarian stop word list
-| prepared by Anna Tordai
-
-a
-ahogy
-ahol
-aki
-akik
-akkor
-alatt
-által
-általában
-amely
-amelyek
-amelyekben
-amelyeket
-amelyet
-amelynek
-ami
-amit
-amolyan
-amíg
-amikor
-át
-abban
-ahhoz
-annak
-arra
-arról
-az
-azok
-azon
-azt
-azzal
-azért
-aztán
-azután
-azonban
-bár
-be
-belül
-benne
-cikk
-cikkek
-cikkeket
-csak
-de
-e
-eddig
-egész
-egy
-egyes
-egyetlen
-egyéb
-egyik
-egyre
-ekkor
-el
-elég
-ellen
-elő
-először
-előtt
-első
-én
-éppen
-ebben
-ehhez
-emilyen
-ennek
-erre
-ez
-ezt
-ezek
-ezen
-ezzel
-ezért
-és
-fel
-felé
-hanem
-hiszen
-hogy
-hogyan
-igen
-így
-illetve
-ill.
-ill
-ilyen
-ilyenkor
-ison
-ismét
-itt
-jó
-jól
-jobban
-kell
-kellett
-keresztül
-keressünk
-ki
-kívül
-között
-közül
-legalább
-lehet
-lehetett
-legyen
-lenne
-lenni
-lesz
-lett
-maga
-magát
-majd
-majd
-már
-más
-másik
-meg
-még
-mellett
-mert
-mely
-melyek
-mi
-mit
-míg
-miért
-milyen
-mikor
-minden
-mindent
-mindenki
-mindig
-mint
-mintha
-mivel
-most
-nagy
-nagyobb
-nagyon
-ne
-néha
-nekem
-neki
-nem
-néhány
-nélkül
-nincs
-olyan
-ott
-össze
-ő
-ők
-őket
-pedig
-persze
-rá
-s
-saját
-sem
-semmi
-sok
-sokat
-sokkal
-számára
-szemben
-szerint
-szinte
-talán
-tehát
-teljes
-tovább
-továbbá
-több
-úgy
-ugyanis
-új
-újabb
-újra
-után
-utána
-utolsó
-vagy
-vagyis
-valaki
-valami
-valamint
-való
-vagyok
-van
-vannak
-volt
-voltam
-voltak
-voltunk
-vissza
-vele
-viszont
-volna
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_hy.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_hy.txt
deleted file mode 100644
index 60c1c50fbc8..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_hy.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-# example set of Armenian stopwords.
-այդ
-այլ
-այն
-այս
-դու
-դուք
-եմ
-են
-ենք
-ես
-եք
-է
-էի
-էին
-էինք
-էիր
-էիք
-էր
-ըստ
-թ
-ի
-ին
-իսկ
-իր
-կամ
-համար
-հետ
-հետո
-մենք
-մեջ
-մի
-ն
-նա
-նաև
-նրա
-նրանք
-որ
-որը
-որոնք
-որպես
-ու
-ում
-պիտի
-վրա
-և
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_id.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_id.txt
deleted file mode 100644
index 4617f83a5c5..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_id.txt
+++ /dev/null
@@ -1,359 +0,0 @@
-# from appendix D of: A Study of Stemming Effects on Information
-# Retrieval in Bahasa Indonesia
-ada
-adanya
-adalah
-adapun
-agak
-agaknya
-agar
-akan
-akankah
-akhirnya
-aku
-akulah
-amat
-amatlah
-anda
-andalah
-antar
-diantaranya
-antara
-antaranya
-diantara
-apa
-apaan
-mengapa
-apabila
-apakah
-apalagi
-apatah
-atau
-ataukah
-ataupun
-bagai
-bagaikan
-sebagai
-sebagainya
-bagaimana
-bagaimanapun
-sebagaimana
-bagaimanakah
-bagi
-bahkan
-bahwa
-bahwasanya
-sebaliknya
-banyak
-sebanyak
-beberapa
-seberapa
-begini
-beginian
-beginikah
-beginilah
-sebegini
-begitu
-begitukah
-begitulah
-begitupun
-sebegitu
-belum
-belumlah
-sebelum
-sebelumnya
-sebenarnya
-berapa
-berapakah
-berapalah
-berapapun
-betulkah
-sebetulnya
-biasa
-biasanya
-bila
-bilakah
-bisa
-bisakah
-sebisanya
-boleh
-bolehkah
-bolehlah
-buat
-bukan
-bukankah
-bukanlah
-bukannya
-cuma
-percuma
-dahulu
-dalam
-dan
-dapat
-dari
-daripada
-dekat
-demi
-demikian
-demikianlah
-sedemikian
-dengan
-depan
-di
-dia
-dialah
-dini
-diri
-dirinya
-terdiri
-dong
-dulu
-enggak
-enggaknya
-entah
-entahlah
-terhadap
-terhadapnya
-hal
-hampir
-hanya
-hanyalah
-harus
-haruslah
-harusnya
-seharusnya
-hendak
-hendaklah
-hendaknya
-hingga
-sehingga
-ia
-ialah
-ibarat
-ingin
-inginkah
-inginkan
-ini
-inikah
-inilah
-itu
-itukah
-itulah
-jangan
-jangankan
-janganlah
-jika
-jikalau
-juga
-justru
-kala
-kalau
-kalaulah
-kalaupun
-kalian
-kami
-kamilah
-kamu
-kamulah
-kan
-kapan
-kapankah
-kapanpun
-dikarenakan
-karena
-karenanya
-ke
-kecil
-kemudian
-kenapa
-kepada
-kepadanya
-ketika
-seketika
-khususnya
-kini
-kinilah
-kiranya
-sekiranya
-kita
-kitalah
-kok
-lagi
-lagian
-selagi
-lah
-lain
-lainnya
-melainkan
-selaku
-lalu
-melalui
-terlalu
-lama
-lamanya
-selama
-selama
-selamanya
-lebih
-terlebih
-bermacam
-macam
-semacam
-maka
-makanya
-makin
-malah
-malahan
-mampu
-mampukah
-mana
-manakala
-manalagi
-masih
-masihkah
-semasih
-masing
-mau
-maupun
-semaunya
-memang
-mereka
-merekalah
-meski
-meskipun
-semula
-mungkin
-mungkinkah
-nah
-namun
-nanti
-nantinya
-nyaris
-oleh
-olehnya
-seorang
-seseorang
-pada
-padanya
-padahal
-paling
-sepanjang
-pantas
-sepantasnya
-sepantasnyalah
-para
-pasti
-pastilah
-per
-pernah
-pula
-pun
-merupakan
-rupanya
-serupa
-saat
-saatnya
-sesaat
-saja
-sajalah
-saling
-bersama
-sama
-sesama
-sambil
-sampai
-sana
-sangat
-sangatlah
-saya
-sayalah
-se
-sebab
-sebabnya
-sebuah
-tersebut
-tersebutlah
-sedang
-sedangkan
-sedikit
-sedikitnya
-segala
-segalanya
-segera
-sesegera
-sejak
-sejenak
-sekali
-sekalian
-sekalipun
-sesekali
-sekaligus
-sekarang
-sekarang
-sekitar
-sekitarnya
-sela
-selain
-selalu
-seluruh
-seluruhnya
-semakin
-sementara
-sempat
-semua
-semuanya
-sendiri
-sendirinya
-seolah
-seperti
-sepertinya
-sering
-seringnya
-serta
-siapa
-siapakah
-siapapun
-disini
-disinilah
-sini
-sinilah
-sesuatu
-sesuatunya
-suatu
-sesudah
-sesudahnya
-sudah
-sudahkah
-sudahlah
-supaya
-tadi
-tadinya
-tak
-tanpa
-setelah
-telah
-tentang
-tentu
-tentulah
-tentunya
-tertentu
-seterusnya
-tapi
-tetapi
-setiap
-tiap
-setidaknya
-tidak
-tidakkah
-tidaklah
-toh
-waduh
-wah
-wahai
-sewaktu
-walau
-walaupun
-wong
-yaitu
-yakni
-yang
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_it.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_it.txt
deleted file mode 100644
index 4cb5b0891b1..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_it.txt
+++ /dev/null
@@ -1,301 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | An Italian stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-ad | a (to) before vowel
-al | a + il
-allo | a + lo
-ai | a + i
-agli | a + gli
-all | a + l'
-agl | a + gl'
-alla | a + la
-alle | a + le
-con | with
-col | con + il
-coi | con + i (forms collo, cogli etc are now very rare)
-da | from
-dal | da + il
-dallo | da + lo
-dai | da + i
-dagli | da + gli
-dall | da + l'
-dagl | da + gll'
-dalla | da + la
-dalle | da + le
-di | of
-del | di + il
-dello | di + lo
-dei | di + i
-degli | di + gli
-dell | di + l'
-degl | di + gl'
-della | di + la
-delle | di + le
-in | in
-nel | in + el
-nello | in + lo
-nei | in + i
-negli | in + gli
-nell | in + l'
-negl | in + gl'
-nella | in + la
-nelle | in + le
-su | on
-sul | su + il
-sullo | su + lo
-sui | su + i
-sugli | su + gli
-sull | su + l'
-sugl | su + gl'
-sulla | su + la
-sulle | su + le
-per | through, by
-tra | among
-contro | against
-io | I
-tu | thou
-lui | he
-lei | she
-noi | we
-voi | you
-loro | they
-mio | my
-mia |
-miei |
-mie |
-tuo |
-tua |
-tuoi | thy
-tue |
-suo |
-sua |
-suoi | his, her
-sue |
-nostro | our
-nostra |
-nostri |
-nostre |
-vostro | your
-vostra |
-vostri |
-vostre |
-mi | me
-ti | thee
-ci | us, there
-vi | you, there
-lo | him, the
-la | her, the
-li | them
-le | them, the
-gli | to him, the
-ne | from there etc
-il | the
-un | a
-uno | a
-una | a
-ma | but
-ed | and
-se | if
-perché | why, because
-anche | also
-come | how
-dov | where (as dov')
-dove | where
-che | who, that
-chi | who
-cui | whom
-non | not
-più | more
-quale | who, that
-quanto | how much
-quanti |
-quanta |
-quante |
-quello | that
-quelli |
-quella |
-quelle |
-questo | this
-questi |
-questa |
-queste |
-si | yes
-tutto | all
-tutti | all
-
- | single letter forms:
-
-a | at
-c | as c' for ce or ci
-e | and
-i | the
-l | as l'
-o | or
-
- | forms of avere, to have (not including the infinitive):
-
-ho
-hai
-ha
-abbiamo
-avete
-hanno
-abbia
-abbiate
-abbiano
-avrò
-avrai
-avrà
-avremo
-avrete
-avranno
-avrei
-avresti
-avrebbe
-avremmo
-avreste
-avrebbero
-avevo
-avevi
-aveva
-avevamo
-avevate
-avevano
-ebbi
-avesti
-ebbe
-avemmo
-aveste
-ebbero
-avessi
-avesse
-avessimo
-avessero
-avendo
-avuto
-avuta
-avuti
-avute
-
- | forms of essere, to be (not including the infinitive):
-sono
-sei
-è
-siamo
-siete
-sia
-siate
-siano
-sarò
-sarai
-sarà
-saremo
-sarete
-saranno
-sarei
-saresti
-sarebbe
-saremmo
-sareste
-sarebbero
-ero
-eri
-era
-eravamo
-eravate
-erano
-fui
-fosti
-fu
-fummo
-foste
-furono
-fossi
-fosse
-fossimo
-fossero
-essendo
-
- | forms of fare, to do (not including the infinitive, fa, fat-):
-faccio
-fai
-facciamo
-fanno
-faccia
-facciate
-facciano
-farò
-farai
-farà
-faremo
-farete
-faranno
-farei
-faresti
-farebbe
-faremmo
-fareste
-farebbero
-facevo
-facevi
-faceva
-facevamo
-facevate
-facevano
-feci
-facesti
-fece
-facemmo
-faceste
-fecero
-facessi
-facesse
-facessimo
-facessero
-facendo
-
- | forms of stare, to be (not including the infinitive):
-sto
-stai
-sta
-stiamo
-stanno
-stia
-stiate
-stiano
-starò
-starai
-starà
-staremo
-starete
-staranno
-starei
-staresti
-starebbe
-staremmo
-stareste
-starebbero
-stavo
-stavi
-stava
-stavamo
-stavate
-stavano
-stetti
-stesti
-stette
-stemmo
-steste
-stettero
-stessi
-stesse
-stessimo
-stessero
-stando
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_ja.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_ja.txt
deleted file mode 100644
index d4321be6b16..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_ja.txt
+++ /dev/null
@@ -1,127 +0,0 @@
-#
-# This file defines a stopword set for Japanese.
-#
-# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia.
-# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745
-# for frequency lists, etc. that can be useful for making your own set (if desired)
-#
-# Note that there is an overlap between these stopwords and the terms stopped when used
-# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note
-# that comments are not allowed on the same line as stopwords.
-#
-# Also note that stopping is done in a case-insensitive manner. Change your StopFilter
-# configuration if you need case-sensitive stopping. Lastly, note that stopping is done
-# using the same character width as the entries in this file. Since this StopFilter is
-# normally done after a CJKWidthFilter in your chain, you would usually want your romaji
-# entries to be in half-width and your kana entries to be in full-width.
-#
-の
-に
-は
-を
-た
-が
-で
-て
-と
-し
-れ
-さ
-ある
-いる
-も
-する
-から
-な
-こと
-として
-い
-や
-れる
-など
-なっ
-ない
-この
-ため
-その
-あっ
-よう
-また
-もの
-という
-あり
-まで
-られ
-なる
-へ
-か
-だ
-これ
-によって
-により
-おり
-より
-による
-ず
-なり
-られる
-において
-ば
-なかっ
-なく
-しかし
-について
-せ
-だっ
-その後
-できる
-それ
-う
-ので
-なお
-のみ
-でき
-き
-つ
-における
-および
-いう
-さらに
-でも
-ら
-たり
-その他
-に関する
-たち
-ます
-ん
-なら
-に対して
-特に
-せる
-及び
-これら
-とき
-では
-にて
-ほか
-ながら
-うち
-そして
-とともに
-ただし
-かつて
-それぞれ
-または
-お
-ほど
-ものの
-に対する
-ほとんど
-と共に
-といった
-です
-とも
-ところ
-ここ
-##### End of file
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_lv.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_lv.txt
deleted file mode 100644
index e21a23c06c3..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_lv.txt
+++ /dev/null
@@ -1,172 +0,0 @@
-# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins
-# the original list of over 800 forms was refined:
-# pronouns, adverbs, interjections were removed
-#
-# prepositions
-aiz
-ap
-ar
-apakš
-ārpus
-augšpus
-bez
-caur
-dēļ
-gar
-iekš
-iz
-kopš
-labad
-lejpus
-līdz
-no
-otrpus
-pa
-par
-pār
-pēc
-pie
-pirms
-pret
-priekš
-starp
-šaipus
-uz
-viņpus
-virs
-virspus
-zem
-apakšpus
-# Conjunctions
-un
-bet
-jo
-ja
-ka
-lai
-tomēr
-tikko
-turpretī
-arī
-kaut
-gan
-tādēļ
-tā
-ne
-tikvien
-vien
-kā
-ir
-te
-vai
-kamēr
-# Particles
-ar
-diezin
-droši
-diemžēl
-nebūt
-ik
-it
-taču
-nu
-pat
-tiklab
-iekšpus
-nedz
-tik
-nevis
-turpretim
-jeb
-iekam
-iekām
-iekāms
-kolīdz
-līdzko
-tiklīdz
-jebšu
-tālab
-tāpēc
-nekā
-itin
-jā
-jau
-jel
-nē
-nezin
-tad
-tikai
-vis
-tak
-iekams
-vien
-# modal verbs
-būt
-biju
-biji
-bija
-bijām
-bijāt
-esmu
-esi
-esam
-esat
-būšu
-būsi
-būs
-būsim
-būsiet
-tikt
-tiku
-tiki
-tika
-tikām
-tikāt
-tieku
-tiec
-tiek
-tiekam
-tiekat
-tikšu
-tiks
-tiksim
-tiksiet
-tapt
-tapi
-tapāt
-topat
-tapšu
-tapsi
-taps
-tapsim
-tapsiet
-kļūt
-kļuvu
-kļuvi
-kļuva
-kļuvām
-kļuvāt
-kļūstu
-kļūsti
-kļūst
-kļūstam
-kļūstat
-kļūšu
-kļūsi
-kļūs
-kļūsim
-kļūsiet
-# verbs
-varēt
-varēju
-varējām
-varēšu
-varēsim
-var
-varēji
-varējāt
-varēsi
-varēsiet
-varat
-varēja
-varēs
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_nl.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_nl.txt
deleted file mode 100644
index f4d61f5092c..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_nl.txt
+++ /dev/null
@@ -1,117 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Dutch stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This is a ranked list (commonest to rarest) of stopwords derived from
- | a large sample of Dutch text.
-
- | Dutch stop words frequently exhibit homonym clashes. These are indicated
- | clearly below.
-
-de | the
-en | and
-van | of, from
-ik | I, the ego
-te | (1) chez, at etc, (2) to, (3) too
-dat | that, which
-die | that, those, who, which
-in | in, inside
-een | a, an, one
-hij | he
-het | the, it
-niet | not, nothing, naught
-zijn | (1) to be, being, (2) his, one's, its
-is | is
-was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
-op | on, upon, at, in, up, used up
-aan | on, upon, to (as dative)
-met | with, by
-als | like, such as, when
-voor | (1) before, in front of, (2) furrow
-had | had, past tense all persons sing. of 'hebben' (have)
-er | there
-maar | but, only
-om | round, about, for etc
-hem | him
-dan | then
-zou | should/would, past tense all persons sing. of 'zullen'
-of | or, whether, if
-wat | what, something, anything
-mijn | possessive and noun 'mine'
-men | people, 'one'
-dit | this
-zo | so, thus, in this way
-door | through by
-over | over, across
-ze | she, her, they, them
-zich | oneself
-bij | (1) a bee, (2) by, near, at
-ook | also, too
-tot | till, until
-je | you
-mij | me
-uit | out of, from
-der | Old Dutch form of 'van der' still found in surnames
-daar | (1) there, (2) because
-haar | (1) her, their, them, (2) hair
-naar | (1) unpleasant, unwell etc, (2) towards, (3) as
-heb | present first person sing. of 'to have'
-hoe | how, why
-heeft | present third person sing. of 'to have'
-hebben | 'to have' and various parts thereof
-deze | this
-u | you
-want | (1) for, (2) mitten, (3) rigging
-nog | yet, still
-zal | 'shall', first and third person sing. of verb 'zullen' (will)
-me | me
-zij | she, they
-nu | now
-ge | 'thou', still used in Belgium and south Netherlands
-geen | none
-omdat | because
-iets | something, somewhat
-worden | to become, grow, get
-toch | yet, still
-al | all, every, each
-waren | (1) 'were' (2) to wander, (3) wares, (3)
-veel | much, many
-meer | (1) more, (2) lake
-doen | to do, to make
-toen | then, when
-moet | noun 'spot/mote' and present form of 'to must'
-ben | (1) am, (2) 'are' in interrogative second person singular of 'to be'
-zonder | without
-kan | noun 'can' and present form of 'to be able'
-hun | their, them
-dus | so, consequently
-alles | all, everything, anything
-onder | under, beneath
-ja | yes, of course
-eens | once, one day
-hier | here
-wie | who
-werd | imperfect third person sing. of 'become'
-altijd | always
-doch | yet, but etc
-wordt | present third person sing. of 'become'
-wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans
-kunnen | to be able
-ons | us/our
-zelf | self
-tegen | against, towards, at
-na | after, near
-reeds | already
-wil | (1) present tense of 'want', (2) 'will', noun, (3) fender
-kon | could; past tense of 'to be able'
-niets | nothing
-uw | your
-iemand | somebody
-geweest | been; past participle of 'be'
-andere | other
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_no.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_no.txt
deleted file mode 100644
index e76f36e69ed..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_no.txt
+++ /dev/null
@@ -1,192 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Norwegian stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This stop word list is for the dominant bokmål dialect. Words unique
- | to nynorsk are marked *.
-
- | Revised by Jan Bruusgaard , Jan 2005
-
-og | and
-i | in
-jeg | I
-det | it/this/that
-at | to (w. inf.)
-en | a/an
-et | a/an
-den | it/this/that
-til | to
-er | is/am/are
-som | who/that
-på | on
-de | they / you(formal)
-med | with
-han | he
-av | of
-ikke | not
-ikkje | not *
-der | there
-så | so
-var | was/were
-meg | me
-seg | you
-men | but
-ett | one
-har | have
-om | about
-vi | we
-min | my
-mitt | my
-ha | have
-hadde | had
-hun | she
-nå | now
-over | over
-da | when/as
-ved | by/know
-fra | from
-du | you
-ut | out
-sin | your
-dem | them
-oss | us
-opp | up
-man | you/one
-kan | can
-hans | his
-hvor | where
-eller | or
-hva | what
-skal | shall/must
-selv | self (reflective)
-sjøl | self (reflective)
-her | here
-alle | all
-vil | will
-bli | become
-ble | became
-blei | became *
-blitt | have become
-kunne | could
-inn | in
-når | when
-være | be
-kom | come
-noen | some
-noe | some
-ville | would
-dere | you
-som | who/which/that
-deres | their/theirs
-kun | only/just
-ja | yes
-etter | after
-ned | down
-skulle | should
-denne | this
-for | for/because
-deg | you
-si | hers/his
-sine | hers/his
-sitt | hers/his
-mot | against
-å | to
-meget | much
-hvorfor | why
-dette | this
-disse | these/those
-uten | without
-hvordan | how
-ingen | none
-din | your
-ditt | your
-blir | become
-samme | same
-hvilken | which
-hvilke | which (plural)
-sånn | such a
-inni | inside/within
-mellom | between
-vår | our
-hver | each
-hvem | who
-vors | us/ours
-hvis | whose
-både | both
-bare | only/just
-enn | than
-fordi | as/because
-før | before
-mange | many
-også | also
-slik | just
-vært | been
-være | to be
-båe | both *
-begge | both
-siden | since
-dykk | your *
-dykkar | yours *
-dei | they *
-deira | them *
-deires | theirs *
-deim | them *
-di | your (fem.) *
-då | as/when *
-eg | I *
-ein | a/an *
-eit | a/an *
-eitt | a/an *
-elles | or *
-honom | he *
-hjå | at *
-ho | she *
-hoe | she *
-henne | her
-hennar | her/hers
-hennes | hers
-hoss | how *
-hossen | how *
-ikkje | not *
-ingi | noone *
-inkje | noone *
-korleis | how *
-korso | how *
-kva | what/which *
-kvar | where *
-kvarhelst | where *
-kven | who/whom *
-kvi | why *
-kvifor | why *
-me | we *
-medan | while *
-mi | my *
-mine | my *
-mykje | much *
-no | now *
-nokon | some (masc./neut.) *
-noka | some (fem.) *
-nokor | some *
-noko | some *
-nokre | some *
-si | his/hers *
-sia | since *
-sidan | since *
-so | so *
-somt | some *
-somme | some *
-um | about*
-upp | up *
-vere | be *
-vore | was *
-verte | become *
-vort | become *
-varte | became *
-vart | became *
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_pt.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_pt.txt
deleted file mode 100644
index 276c1b446f2..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_pt.txt
+++ /dev/null
@@ -1,251 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Portuguese stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-
- | The following is a ranked list (commonest to rarest) of stopwords
- | deriving from a large sample of text.
-
- | Extra words have been added at the end.
-
-de | of, from
-a | the; to, at; her
-o | the; him
-que | who, that
-e | and
-do | de + o
-da | de + a
-em | in
-um | a
-para | for
- | é from SER
-com | with
-não | not, no
-uma | a
-os | the; them
-no | em + o
-se | himself etc
-na | em + a
-por | for
-mais | more
-as | the; them
-dos | de + os
-como | as, like
-mas | but
- | foi from SER
-ao | a + o
-ele | he
-das | de + as
- | tem from TER
-à | a + a
-seu | his
-sua | her
-ou | or
- | ser from SER
-quando | when
-muito | much
- | há from HAV
-nos | em + os; us
-já | already, now
- | está from EST
-eu | I
-também | also
-só | only, just
-pelo | per + o
-pela | per + a
-até | up to
-isso | that
-ela | he
-entre | between
- | era from SER
-depois | after
-sem | without
-mesmo | same
-aos | a + os
- | ter from TER
-seus | his
-quem | whom
-nas | em + as
-me | me
-esse | that
-eles | they
- | estão from EST
-você | you
- | tinha from TER
- | foram from SER
-essa | that
-num | em + um
-nem | nor
-suas | her
-meu | my
-às | a + as
-minha | my
- | têm from TER
-numa | em + uma
-pelos | per + os
-elas | they
- | havia from HAV
- | seja from SER
-qual | which
- | será from SER
-nós | we
- | tenho from TER
-lhe | to him, her
-deles | of them
-essas | those
-esses | those
-pelas | per + as
-este | this
- | fosse from SER
-dele | of him
-
- | other words. There are many contractions such as naquele = em+aquele,
- | mo = me+o, but they are rare.
- | Indefinite article plural forms are also rare.
-
-tu | thou
-te | thee
-vocês | you (plural)
-vos | you
-lhes | to them
-meus | my
-minhas
-teu | thy
-tua
-teus
-tuas
-nosso | our
-nossa
-nossos
-nossas
-
-dela | of her
-delas | of them
-
-esta | this
-estes | these
-estas | these
-aquele | that
-aquela | that
-aqueles | those
-aquelas | those
-isto | this
-aquilo | that
-
- | forms of estar, to be (not including the infinitive):
-estou
-está
-estamos
-estão
-estive
-esteve
-estivemos
-estiveram
-estava
-estávamos
-estavam
-estivera
-estivéramos
-esteja
-estejamos
-estejam
-estivesse
-estivéssemos
-estivessem
-estiver
-estivermos
-estiverem
-
- | forms of haver, to have (not including the infinitive):
-hei
-há
-havemos
-hão
-houve
-houvemos
-houveram
-houvera
-houvéramos
-haja
-hajamos
-hajam
-houvesse
-houvéssemos
-houvessem
-houver
-houvermos
-houverem
-houverei
-houverá
-houveremos
-houverão
-houveria
-houveríamos
-houveriam
-
- | forms of ser, to be (not including the infinitive):
-sou
-somos
-são
-era
-éramos
-eram
-fui
-foi
-fomos
-foram
-fora
-fôramos
-seja
-sejamos
-sejam
-fosse
-fôssemos
-fossem
-for
-formos
-forem
-serei
-será
-seremos
-serão
-seria
-seríamos
-seriam
-
- | forms of ter, to have (not including the infinitive):
-tenho
-tem
-temos
-tém
-tinha
-tínhamos
-tinham
-tive
-teve
-tivemos
-tiveram
-tivera
-tivéramos
-tenha
-tenhamos
-tenham
-tivesse
-tivéssemos
-tivessem
-tiver
-tivermos
-tiverem
-terei
-terá
-teremos
-terão
-teria
-teríamos
-teriam
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_ro.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_ro.txt
deleted file mode 100644
index 4fdee90a5ba..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_ro.txt
+++ /dev/null
@@ -1,233 +0,0 @@
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-acea
-aceasta
-această
-aceea
-acei
-aceia
-acel
-acela
-acele
-acelea
-acest
-acesta
-aceste
-acestea
-aceşti
-aceştia
-acolo
-acum
-ai
-aia
-aibă
-aici
-al
-ăla
-ale
-alea
-ălea
-altceva
-altcineva
-am
-ar
-are
-aş
-aşadar
-asemenea
-asta
-ăsta
-astăzi
-astea
-ăstea
-ăştia
-asupra
-aţi
-au
-avea
-avem
-aveţi
-azi
-bine
-bucur
-bună
-ca
-că
-căci
-când
-care
-cărei
-căror
-cărui
-cât
-câte
-câţi
-către
-câtva
-ce
-cel
-ceva
-chiar
-cînd
-cine
-cineva
-cît
-cîte
-cîţi
-cîtva
-contra
-cu
-cum
-cumva
-curând
-curînd
-da
-dă
-dacă
-dar
-datorită
-de
-deci
-deja
-deoarece
-departe
-deşi
-din
-dinaintea
-dintr
-dintre
-drept
-după
-ea
-ei
-el
-ele
-eram
-este
-eşti
-eu
-face
-fără
-fi
-fie
-fiecare
-fii
-fim
-fiţi
-iar
-ieri
-îi
-îl
-îmi
-împotriva
-în
-înainte
-înaintea
-încât
-încît
-încotro
-între
-întrucât
-întrucît
-îţi
-la
-lângă
-le
-li
-lîngă
-lor
-lui
-mă
-mâine
-mea
-mei
-mele
-mereu
-meu
-mi
-mine
-mult
-multă
-mulţi
-ne
-nicăieri
-nici
-nimeni
-nişte
-noastră
-noastre
-noi
-noştri
-nostru
-nu
-ori
-oricând
-oricare
-oricât
-orice
-oricînd
-oricine
-oricît
-oricum
-oriunde
-până
-pe
-pentru
-peste
-pînă
-poate
-pot
-prea
-prima
-primul
-prin
-printr
-sa
-să
-săi
-sale
-sau
-său
-se
-şi
-sînt
-sîntem
-sînteţi
-spre
-sub
-sunt
-suntem
-sunteţi
-ta
-tăi
-tale
-tău
-te
-ţi
-ţie
-tine
-toată
-toate
-tot
-toţi
-totuşi
-tu
-un
-una
-unde
-undeva
-unei
-unele
-uneori
-unor
-vă
-vi
-voastră
-voastre
-voi
-voştri
-vostru
-vouă
-vreo
-vreun
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_ru.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_ru.txt
deleted file mode 100644
index 64307693457..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_ru.txt
+++ /dev/null
@@ -1,241 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | a russian stop word list. comments begin with vertical bar. each stop
- | word is at the start of a line.
-
- | this is a ranked list (commonest to rarest) of stopwords derived from
- | a large text sample.
-
- | letter `ё' is translated to `е'.
-
-и | and
-в | in/into
-во | alternative form
-не | not
-что | what/that
-он | he
-на | on/onto
-я | i
-с | from
-со | alternative form
-как | how
-а | milder form of `no' (but)
-то | conjunction and form of `that'
-все | all
-она | she
-так | so, thus
-его | him
-но | but
-да | yes/and
-ты | thou
-к | towards, by
-у | around, chez
-же | intensifier particle
-вы | you
-за | beyond, behind
-бы | conditional/subj. particle
-по | up to, along
-только | only
-ее | her
-мне | to me
-было | it was
-вот | here is/are, particle
-от | away from
-меня | me
-еще | still, yet, more
-нет | no, there isnt/arent
-о | about
-из | out of
-ему | to him
-теперь | now
-когда | when
-даже | even
-ну | so, well
-вдруг | suddenly
-ли | interrogative particle
-если | if
-уже | already, but homonym of `narrower'
-или | or
-ни | neither
-быть | to be
-был | he was
-него | prepositional form of его
-до | up to
-вас | you accusative
-нибудь | indef. suffix preceded by hyphen
-опять | again
-уж | already, but homonym of `adder'
-вам | to you
-сказал | he said
-ведь | particle `after all'
-там | there
-потом | then
-себя | oneself
-ничего | nothing
-ей | to her
-может | usually with `быть' as `maybe'
-они | they
-тут | here
-где | where
-есть | there is/are
-надо | got to, must
-ней | prepositional form of ей
-для | for
-мы | we
-тебя | thee
-их | them, their
-чем | than
-была | she was
-сам | self
-чтоб | in order to
-без | without
-будто | as if
-человек | man, person, one
-чего | genitive form of `what'
-раз | once
-тоже | also
-себе | to oneself
-под | beneath
-жизнь | life
-будет | will be
-ж | short form of intensifer particle `же'
-тогда | then
-кто | who
-этот | this
-говорил | was saying
-того | genitive form of `that'
-потому | for that reason
-этого | genitive form of `this'
-какой | which
-совсем | altogether
-ним | prepositional form of `его', `они'
-здесь | here
-этом | prepositional form of `этот'
-один | one
-почти | almost
-мой | my
-тем | instrumental/dative plural of `тот', `то'
-чтобы | full form of `in order that'
-нее | her (acc.)
-кажется | it seems
-сейчас | now
-были | they were
-куда | where to
-зачем | why
-сказать | to say
-всех | all (acc., gen. preposn. plural)
-никогда | never
-сегодня | today
-можно | possible, one can
-при | by
-наконец | finally
-два | two
-об | alternative form of `о', about
-другой | another
-хоть | even
-после | after
-над | above
-больше | more
-тот | that one (masc.)
-через | across, in
-эти | these
-нас | us
-про | about
-всего | in all, only, of all
-них | prepositional form of `они' (they)
-какая | which, feminine
-много | lots
-разве | interrogative particle
-сказала | she said
-три | three
-эту | this, acc. fem. sing.
-моя | my, feminine
-впрочем | moreover, besides
-хорошо | good
-свою | ones own, acc. fem. sing.
-этой | oblique form of `эта', fem. `this'
-перед | in front of
-иногда | sometimes
-лучше | better
-чуть | a little
-том | preposn. form of `that one'
-нельзя | one must not
-такой | such a one
-им | to them
-более | more
-всегда | always
-конечно | of course
-всю | acc. fem. sing of `all'
-между | between
-
-
- | b: some paradigms
- |
- | personal pronouns
- |
- | я меня мне мной [мною]
- | ты тебя тебе тобой [тобою]
- | он его ему им [него, нему, ним]
- | она ее эи ею [нее, нэи, нею]
- | оно его ему им [него, нему, ним]
- |
- | мы нас нам нами
- | вы вас вам вами
- | они их им ими [них, ним, ними]
- |
- | себя себе собой [собою]
- |
- | demonstrative pronouns: этот (this), тот (that)
- |
- | этот эта это эти
- | этого эты это эти
- | этого этой этого этих
- | этому этой этому этим
- | этим этой этим [этою] этими
- | этом этой этом этих
- |
- | тот та то те
- | того ту то те
- | того той того тех
- | тому той тому тем
- | тем той тем [тою] теми
- | том той том тех
- |
- | determinative pronouns
- |
- | (a) весь (all)
- |
- | весь вся все все
- | всего всю все все
- | всего всей всего всех
- | всему всей всему всем
- | всем всей всем [всею] всеми
- | всем всей всем всех
- |
- | (b) сам (himself etc)
- |
- | сам сама само сами
- | самого саму само самих
- | самого самой самого самих
- | самому самой самому самим
- | самим самой самим [самою] самими
- | самом самой самом самих
- |
- | stems of verbs `to be', `to have', `to do' and modal
- |
- | быть бы буд быв есть суть
- | име
- | дел
- | мог мож мочь
- | уме
- | хоч хот
- | долж
- | можн
- | нужн
- | нельзя
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_sv.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_sv.txt
deleted file mode 100644
index 22bddfd8cb3..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_sv.txt
+++ /dev/null
@@ -1,131 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Swedish stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This is a ranked list (commonest to rarest) of stopwords derived from
- | a large text sample.
-
- | Swedish stop words occasionally exhibit homonym clashes. For example
- | så = so, but also seed. These are indicated clearly below.
-
-och | and
-det | it, this/that
-att | to (with infinitive)
-i | in, at
-en | a
-jag | I
-hon | she
-som | who, that
-han | he
-på | on
-den | it, this/that
-med | with
-var | where, each
-sig | him(self) etc
-för | for
-så | so (also: seed)
-till | to
-är | is
-men | but
-ett | a
-om | if; around, about
-hade | had
-de | they, these/those
-av | of
-icke | not, no
-mig | me
-du | you
-henne | her
-då | then, when
-sin | his
-nu | now
-har | have
-inte | inte någon = no one
-hans | his
-honom | him
-skulle | 'sake'
-hennes | her
-där | there
-min | my
-man | one (pronoun)
-ej | nor
-vid | at, by, on (also: vast)
-kunde | could
-något | some etc
-från | from, off
-ut | out
-när | when
-efter | after, behind
-upp | up
-vi | we
-dem | them
-vara | be
-vad | what
-över | over
-än | than
-dig | you
-kan | can
-sina | his
-här | here
-ha | have
-mot | towards
-alla | all
-under | under (also: wonder)
-någon | some etc
-eller | or (else)
-allt | all
-mycket | much
-sedan | since
-ju | why
-denna | this/that
-själv | myself, yourself etc
-detta | this/that
-åt | to
-utan | without
-varit | was
-hur | how
-ingen | no
-mitt | my
-ni | you
-bli | to be, become
-blev | from bli
-oss | us
-din | thy
-dessa | these/those
-några | some etc
-deras | their
-blir | from bli
-mina | my
-samma | (the) same
-vilken | who, that
-er | you, your
-sådan | such a
-vår | our
-blivit | from bli
-dess | its
-inom | within
-mellan | between
-sådant | such a
-varför | why
-varje | each
-vilka | who, that
-ditt | thy
-vem | who
-vilket | who, that
-sitta | his
-sådana | such a
-vart | each
-dina | thy
-vars | whose
-vårt | our
-våra | our
-ert | your
-era | your
-vilkas | whose
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_th.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_th.txt
deleted file mode 100644
index 07f0fabe692..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_th.txt
+++ /dev/null
@@ -1,119 +0,0 @@
-# Thai stopwords from:
-# "Opinion Detection in Thai Political News Columns
-# Based on Subjectivity Analysis"
-# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak
-ไว้
-ไม่
-ไป
-ได้
-ให้
-ใน
-โดย
-แห่ง
-แล้ว
-และ
-แรก
-แบบ
-แต่
-เอง
-เห็น
-เลย
-เริ่ม
-เรา
-เมื่อ
-เพื่อ
-เพราะ
-เป็นการ
-เป็น
-เปิดเผย
-เปิด
-เนื่องจาก
-เดียวกัน
-เดียว
-เช่น
-เฉพาะ
-เคย
-เข้า
-เขา
-อีก
-อาจ
-อะไร
-ออก
-อย่าง
-อยู่
-อยาก
-หาก
-หลาย
-หลังจาก
-หลัง
-หรือ
-หนึ่ง
-ส่วน
-ส่ง
-สุด
-สําหรับ
-ว่า
-วัน
-ลง
-ร่วม
-ราย
-รับ
-ระหว่าง
-รวม
-ยัง
-มี
-มาก
-มา
-พร้อม
-พบ
-ผ่าน
-ผล
-บาง
-น่า
-นี้
-นํา
-นั้น
-นัก
-นอกจาก
-ทุก
-ที่สุด
-ที่
-ทําให้
-ทํา
-ทาง
-ทั้งนี้
-ทั้ง
-ถ้า
-ถูก
-ถึง
-ต้อง
-ต่างๆ
-ต่าง
-ต่อ
-ตาม
-ตั้งแต่
-ตั้ง
-ด้าน
-ด้วย
-ดัง
-ซึ่ง
-ช่วง
-จึง
-จาก
-จัด
-จะ
-คือ
-ความ
-ครั้ง
-คง
-ขึ้น
-ของ
-ขอ
-ขณะ
-ก่อน
-ก็
-การ
-กับ
-กัน
-กว่า
-กล่าว
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_tr.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_tr.txt
deleted file mode 100644
index 84d9408d4ea..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/stopwords_tr.txt
+++ /dev/null
@@ -1,212 +0,0 @@
-# Turkish stopwords from LUCENE-559
-# merged with the list from "Information Retrieval on Turkish Texts"
-# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
-acaba
-altmış
-altı
-ama
-ancak
-arada
-aslında
-ayrıca
-bana
-bazı
-belki
-ben
-benden
-beni
-benim
-beri
-beş
-bile
-bin
-bir
-birçok
-biri
-birkaç
-birkez
-birşey
-birşeyi
-biz
-bize
-bizden
-bizi
-bizim
-böyle
-böylece
-bu
-buna
-bunda
-bundan
-bunlar
-bunları
-bunların
-bunu
-bunun
-burada
-çok
-çünkü
-da
-daha
-dahi
-de
-defa
-değil
-diğer
-diye
-doksan
-dokuz
-dolayı
-dolayısıyla
-dört
-edecek
-eden
-ederek
-edilecek
-ediliyor
-edilmesi
-ediyor
-eğer
-elli
-en
-etmesi
-etti
-ettiği
-ettiğini
-gibi
-göre
-halen
-hangi
-hatta
-hem
-henüz
-hep
-hepsi
-her
-herhangi
-herkesin
-hiç
-hiçbir
-için
-iki
-ile
-ilgili
-ise
-işte
-itibaren
-itibariyle
-kadar
-karşın
-katrilyon
-kendi
-kendilerine
-kendini
-kendisi
-kendisine
-kendisini
-kez
-ki
-kim
-kimden
-kime
-kimi
-kimse
-kırk
-milyar
-milyon
-mu
-mü
-mı
-nasıl
-ne
-neden
-nedenle
-nerde
-nerede
-nereye
-niye
-niçin
-o
-olan
-olarak
-oldu
-olduğu
-olduğunu
-olduklarını
-olmadı
-olmadığı
-olmak
-olması
-olmayan
-olmaz
-olsa
-olsun
-olup
-olur
-olursa
-oluyor
-on
-ona
-ondan
-onlar
-onlardan
-onları
-onların
-onu
-onun
-otuz
-oysa
-öyle
-pek
-rağmen
-sadece
-sanki
-sekiz
-seksen
-sen
-senden
-seni
-senin
-siz
-sizden
-sizi
-sizin
-şey
-şeyden
-şeyi
-şeyler
-şöyle
-şu
-şuna
-şunda
-şundan
-şunları
-şunu
-tarafından
-trilyon
-tüm
-üç
-üzere
-var
-vardı
-ve
-veya
-ya
-yani
-yapacak
-yapılan
-yapılması
-yapıyor
-yapmak
-yaptı
-yaptığı
-yaptığını
-yaptıkları
-yedi
-yerine
-yetmiş
-yine
-yirmi
-yoksa
-yüz
-zaten
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/userdict_ja.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/userdict_ja.txt
deleted file mode 100644
index 6f0368e4d81..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/lang/userdict_ja.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-#
-# This is a sample user dictionary for Kuromoji (JapaneseTokenizer)
-#
-# Add entries to this file in order to override the statistical model in terms
-# of segmentation, readings and part-of-speech tags. Notice that entries do
-# not have weights since they are always used when found. This is by-design
-# in order to maximize ease-of-use.
-#
-# Entries are defined using the following CSV format:
-# , ... , ... ,
-#
-# Notice that a single half-width space separates tokens and readings, and
-# that the number tokens and readings must match exactly.
-#
-# Also notice that multiple entries with the same is undefined.
-#
-# Whitespace only lines are ignored. Comments are not allowed on entry lines.
-#
-
-# Custom segmentation for kanji compounds
-日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞
-関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞
-
-# Custom segmentation for compound katakana
-トートバッグ,トート バッグ,トート バッグ,かずカナ名詞
-ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞
-
-# Custom reading for former sumo wrestler
-朝青龍,朝青龍,アサショウリュウ,カスタム人名
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/protwords.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/protwords.txt
deleted file mode 100644
index 1dfc0abecbf..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/protwords.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#-----------------------------------------------------------------------
-# Use a protected word file to protect against the stemmer reducing two
-# unrelated words to the same base word.
-
-# Some non-words that normally won't be encountered,
-# just to test that they won't be stemmed.
-dontstems
-zwhacky
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema.xml
deleted file mode 100644
index 11c1f763bcc..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/schema.xml
+++ /dev/null
@@ -1,927 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- id
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig.xml
deleted file mode 100644
index 05de0b628c9..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/solrconfig.xml
+++ /dev/null
@@ -1,1426 +0,0 @@
-
-
-
-
-
-
-
-
- ${tests.luceneMatchVersion:LATEST}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ${solr.data.dir:}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 128
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ${solr.ulog.dir:}
-
-
-
-
- ${solr.autoCommit.maxTime:60000}
- false
-
-
-
-
-
- ${solr.autoSoftCommit.maxTime:1000}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 1024
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- true
-
-
-
-
-
- 20
-
-
- 200
-
-
-
-
-
-
-
-
-
-
-
- static firstSearcher warming in solrconfig.xml
-
-
-
-
-
- false
-
-
- 4
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- explicit
- 10
- text
-
-
-
-
-
-
-
-
-
-
-
-
-
- explicit
- json
- true
- text
-
-
-
-
-
-
-
-
- textSpell
-
-
-
-
-
- default
- name
- solr.DirectSolrSpellChecker
-
- internal
-
- 0.5
-
- 2
-
- 1
-
- 5
-
- 4
-
- 0.01
-
-
-
-
-
- wordbreak
- solr.WordBreakSolrSpellChecker
- name
- true
- true
- 10
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- text
-
- default
- wordbreak
- on
- true
- 10
- 5
- 5
- true
- true
- 10
- 5
-
-
- spellcheck
-
-
-
-
-
-
-
-
-
- text
- true
-
-
- tvComponent
-
-
-
-
-
-
-
-
- default
-
-
- org.carrot2.clustering.lingo.LingoClusteringAlgorithm
-
-
- 20
-
-
- clustering/carrot2
-
-
- ENGLISH
-
-
- stc
- org.carrot2.clustering.stc.STCClusteringAlgorithm
-
-
-
-
-
-
- true
- default
- true
-
- name
- id
-
- features
-
- true
-
-
-
- false
-
- edismax
-
- text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
-
- *:*
- 10
- *,score
-
-
- clustering
-
-
-
-
-
-
-
-
-
- true
-
-
- terms
-
-
-
-
-
-
-
- string
- elevate.xml
-
-
-
-
-
- explicit
- text
-
-
- elevator
-
-
-
-
-
-
-
-
-
-
- 100
-
-
-
-
-
-
-
- 70
-
- 0.5
-
- [-\w ,/\n\"']{20,200}
-
-
-
-
-
-
- ]]>
- ]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ,,
- ,,
- ,,
- ,,
- ,]]>
- ]]>
-
-
-
-
-
- 10
- .,!?
-
-
-
-
-
-
- WORD
-
-
- en
- US
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- text/plain; charset=UTF-8
-
-
-
-
-
-
-
-
- 5
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stopwords.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stopwords.txt
deleted file mode 100644
index ae1e83eeb3d..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/stopwords.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/synonyms.txt b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/synonyms.txt
deleted file mode 100644
index 7f72128303b..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/synonyms.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#-----------------------------------------------------------------------
-#some test synonym mappings unlikely to appear in real input text
-aaafoo => aaabar
-bbbfoo => bbbfoo bbbbar
-cccfoo => cccbar cccbaz
-fooaaa,baraaa,bazaaa
-
-# Some synonym groups specific to this example
-GB,gib,gigabyte,gigabytes
-MB,mib,megabyte,megabytes
-Television, Televisions, TV, TVs
-#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
-#after us won't split it into two words.
-
-# Synonym mappings can be used for spelling correction too
-pixima => pixma
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/currency.xml b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/currency.xml
deleted file mode 100644
index 3a9c58afee8..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/currency.xml
+++ /dev/null
@@ -1,67 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/elevate.xml b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/elevate.xml
deleted file mode 100644
index 2c09ebed669..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/elevate.xml
+++ /dev/null
@@ -1,42 +0,0 @@
-
-
-
-
-
-
-
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/contractions_ca.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/contractions_ca.txt
deleted file mode 100644
index 307a85f913d..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/contractions_ca.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-# Set of Catalan contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-d
-l
-m
-n
-s
-t
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/contractions_fr.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/contractions_fr.txt
deleted file mode 100644
index 722db588333..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/contractions_fr.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-# Set of French contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-l
-m
-t
-qu
-n
-s
-j
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/contractions_ga.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/contractions_ga.txt
deleted file mode 100644
index 9ebe7fa349a..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/contractions_ga.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-# Set of Irish contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-d
-m
-b
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/contractions_it.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/contractions_it.txt
deleted file mode 100644
index cac04095372..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/contractions_it.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-# Set of Italian contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-c
-l
-all
-dall
-dell
-nell
-sull
-coll
-pell
-gl
-agl
-dagl
-degl
-negl
-sugl
-un
-m
-t
-s
-v
-d
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/hyphenations_ga.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/hyphenations_ga.txt
deleted file mode 100644
index 4d2642cc5a3..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/hyphenations_ga.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-# Set of Irish hyphenations for StopFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-h
-n
-t
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stemdict_nl.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stemdict_nl.txt
deleted file mode 100644
index 441072971d3..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stemdict_nl.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-# Set of overrides for the dutch stemmer
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-fiets fiets
-bromfiets bromfiets
-ei eier
-kind kinder
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stoptags_ja.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stoptags_ja.txt
deleted file mode 100644
index 71b750845e3..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stoptags_ja.txt
+++ /dev/null
@@ -1,420 +0,0 @@
-#
-# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter.
-#
-# Any token with a part-of-speech tag that exactly matches those defined in this
-# file are removed from the token stream.
-#
-# Set your own stoptags by uncommenting the lines below. Note that comments are
-# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists,
-# etc. that can be useful for building you own stoptag set.
-#
-# The entire possible tagset is provided below for convenience.
-#
-#####
-# noun: unclassified nouns
-#名詞
-#
-# noun-common: Common nouns or nouns where the sub-classification is undefined
-#名詞-一般
-#
-# noun-proper: Proper nouns where the sub-classification is undefined
-#名詞-固有名詞
-#
-# noun-proper-misc: miscellaneous proper nouns
-#名詞-固有名詞-一般
-#
-# noun-proper-person: Personal names where the sub-classification is undefined
-#名詞-固有名詞-人名
-#
-# noun-proper-person-misc: names that cannot be divided into surname and
-# given name; foreign names; names where the surname or given name is unknown.
-# e.g. お市の方
-#名詞-固有名詞-人名-一般
-#
-# noun-proper-person-surname: Mainly Japanese surnames.
-# e.g. 山田
-#名詞-固有名詞-人名-姓
-#
-# noun-proper-person-given_name: Mainly Japanese given names.
-# e.g. 太郎
-#名詞-固有名詞-人名-名
-#
-# noun-proper-organization: Names representing organizations.
-# e.g. 通産省, NHK
-#名詞-固有名詞-組織
-#
-# noun-proper-place: Place names where the sub-classification is undefined
-#名詞-固有名詞-地域
-#
-# noun-proper-place-misc: Place names excluding countries.
-# e.g. アジア, バルセロナ, 京都
-#名詞-固有名詞-地域-一般
-#
-# noun-proper-place-country: Country names.
-# e.g. 日本, オーストラリア
-#名詞-固有名詞-地域-国
-#
-# noun-pronoun: Pronouns where the sub-classification is undefined
-#名詞-代名詞
-#
-# noun-pronoun-misc: miscellaneous pronouns:
-# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ
-#名詞-代名詞-一般
-#
-# noun-pronoun-contraction: Spoken language contraction made by combining a
-# pronoun and the particle 'wa'.
-# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ
-#名詞-代名詞-縮約
-#
-# noun-adverbial: Temporal nouns such as names of days or months that behave
-# like adverbs. Nouns that represent amount or ratios and can be used adverbially,
-# e.g. 金曜, 一月, 午後, 少量
-#名詞-副詞可能
-#
-# noun-verbal: Nouns that take arguments with case and can appear followed by
-# 'suru' and related verbs (する, できる, なさる, くださる)
-# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り
-#名詞-サ変接続
-#
-# noun-adjective-base: The base form of adjectives, words that appear before な ("na")
-# e.g. 健康, 安易, 駄目, だめ
-#名詞-形容動詞語幹
-#
-# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数.
-# e.g. 0, 1, 2, 何, 数, 幾
-#名詞-数
-#
-# noun-affix: noun affixes where the sub-classification is undefined
-#名詞-非自立
-#
-# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that
-# attach to the base form of inflectional words, words that cannot be classified
-# into any of the other categories below. This category includes indefinite nouns.
-# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第,
-# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み,
-# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳,
-# わり, 割り, 割, ん-口語/, もん-口語/
-#名詞-非自立-一般
-#
-# noun-affix-adverbial: noun affixes that that can behave as adverbs.
-# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ,
-# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか,
-# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所,
-# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま,
-# 儘, 侭, みぎり, 矢先
-#名詞-非自立-副詞可能
-#
-# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars
-# with the stem よう(だ) ("you(da)").
-# e.g. よう, やう, 様 (よう)
-#名詞-非自立-助動詞語幹
-#
-# noun-affix-adjective-base: noun affixes that can connect to the indeclinable
-# connection form な (aux "da").
-# e.g. みたい, ふう
-#名詞-非自立-形容動詞語幹
-#
-# noun-special: special nouns where the sub-classification is undefined.
-#名詞-特殊
-#
-# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is
-# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base
-# form of inflectional words.
-# e.g. そう
-#名詞-特殊-助動詞語幹
-#
-# noun-suffix: noun suffixes where the sub-classification is undefined.
-#名詞-接尾
-#
-# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect
-# to ガル or タイ and can combine into compound nouns, words that cannot be classified into
-# any of the other categories below. In general, this category is more inclusive than
-# 接尾語 ("suffix") and is usually the last element in a compound noun.
-# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み,
-# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用
-#名詞-接尾-一般
-#
-# noun-suffix-person: Suffixes that form nouns and attach to person names more often
-# than other nouns.
-# e.g. 君, 様, 著
-#名詞-接尾-人名
-#
-# noun-suffix-place: Suffixes that form nouns and attach to place names more often
-# than other nouns.
-# e.g. 町, 市, 県
-#名詞-接尾-地域
-#
-# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that
-# can appear before スル ("suru").
-# e.g. 化, 視, 分け, 入り, 落ち, 買い
-#名詞-接尾-サ変接続
-#
-# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions,
-# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the
-# conjunctive form of inflectional words.
-# e.g. そう
-#名詞-接尾-助動詞語幹
-#
-# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive
-# form of inflectional words and appear before the copula だ ("da").
-# e.g. 的, げ, がち
-#名詞-接尾-形容動詞語幹
-#
-# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs.
-# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ)
-#名詞-接尾-副詞可能
-#
-# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category
-# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach
-# to numbers.
-# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半
-#名詞-接尾-助数詞
-#
-# noun-suffix-special: Special suffixes that mainly attach to inflecting words.
-# e.g. (楽し) さ, (考え) 方
-#名詞-接尾-特殊
-#
-# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words
-# together.
-# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦)
-#名詞-接続詞的
-#
-# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are
-# semantically verb-like.
-# e.g. ごらん, ご覧, 御覧, 頂戴
-#名詞-動詞非自立的
-#
-# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry,
-# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation")
-# is いわく ("iwaku").
-#名詞-引用文字列
-#
-# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and
-# behave like an adjective.
-# e.g. 申し訳, 仕方, とんでも, 違い
-#名詞-ナイ形容詞語幹
-#
-#####
-# prefix: unclassified prefixes
-#接頭詞
-#
-# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms)
-# excluding numerical expressions.
-# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派)
-#接頭詞-名詞接続
-#
-# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb
-# in conjunctive form followed by なる/なさる/くださる.
-# e.g. お (読みなさい), お (座り)
-#接頭詞-動詞接続
-#
-# prefix-adjectival: Prefixes that attach to adjectives.
-# e.g. お (寒いですねえ), バカ (でかい)
-#接頭詞-形容詞接続
-#
-# prefix-numerical: Prefixes that attach to numerical expressions.
-# e.g. 約, およそ, 毎時
-#接頭詞-数接続
-#
-#####
-# verb: unclassified verbs
-#動詞
-#
-# verb-main:
-#動詞-自立
-#
-# verb-auxiliary:
-#動詞-非自立
-#
-# verb-suffix:
-#動詞-接尾
-#
-#####
-# adjective: unclassified adjectives
-#形容詞
-#
-# adjective-main:
-#形容詞-自立
-#
-# adjective-auxiliary:
-#形容詞-非自立
-#
-# adjective-suffix:
-#形容詞-接尾
-#
-#####
-# adverb: unclassified adverbs
-#副詞
-#
-# adverb-misc: Words that can be segmented into one unit and where adnominal
-# modification is not possible.
-# e.g. あいかわらず, 多分
-#副詞-一般
-#
-# adverb-particle_conjunction: Adverbs that can be followed by の, は, に,
-# な, する, だ, etc.
-# e.g. こんなに, そんなに, あんなに, なにか, なんでも
-#副詞-助詞類接続
-#
-#####
-# adnominal: Words that only have noun-modifying forms.
-# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう,
-# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした,
-# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き
-#連体詞
-#
-#####
-# conjunction: Conjunctions that can occur independently.
-# e.g. が, けれども, そして, じゃあ, それどころか
-接続詞
-#
-#####
-# particle: unclassified particles.
-助詞
-#
-# particle-case: case particles where the subclassification is undefined.
-助詞-格助詞
-#
-# particle-case-misc: Case particles.
-# e.g. から, が, で, と, に, へ, より, を, の, にて
-助詞-格助詞-一般
-#
-# particle-case-quote: the "to" that appears after nouns, a person’s speech,
-# quotation marks, expressions of decisions from a meeting, reasons, judgements,
-# conjectures, etc.
-# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...)
-助詞-格助詞-引用
-#
-# particle-case-compound: Compounds of particles and verbs that mainly behave
-# like case particles.
-# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って,
-# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける,
-# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し,
-# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして,
-# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって,
-# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る,
-# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる,
-# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ
-助詞-格助詞-連語
-#
-# particle-conjunctive:
-# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども,
-# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/,
-# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/
-助詞-接続助詞
-#
-# particle-dependency:
-# e.g. こそ, さえ, しか, すら, は, も, ぞ
-助詞-係助詞
-#
-# particle-adverbial:
-# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/,
-# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/,
-# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに,
-# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/,
-# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」)
-助詞-副助詞
-#
-# particle-interjective: particles with interjective grammatical roles.
-# e.g. (松島) や
-助詞-間投助詞
-#
-# particle-coordinate:
-# e.g. と, たり, だの, だり, とか, なり, や, やら
-助詞-並立助詞
-#
-# particle-final:
-# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ,
-# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/
-助詞-終助詞
-#
-# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is
-# adverbial, conjunctive, or sentence final. For example:
-# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」
-# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」
-# 「(祈りが届いたせい) か (, 試験に合格した.)」
-# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」
-# e.g. か
-助詞-副助詞/並立助詞/終助詞
-#
-# particle-adnominalizer: The "no" that attaches to nouns and modifies
-# non-inflectional words.
-助詞-連体化
-#
-# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs
-# that are giongo, giseigo, or gitaigo.
-# e.g. に, と
-助詞-副詞化
-#
-# particle-special: A particle that does not fit into one of the above classifications.
-# This includes particles that are used in Tanka, Haiku, and other poetry.
-# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家)
-助詞-特殊
-#
-#####
-# auxiliary-verb:
-助動詞
-#
-#####
-# interjection: Greetings and other exclamations.
-# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます,
-# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい
-#感動詞
-#
-#####
-# symbol: unclassified Symbols.
-記号
-#
-# symbol-misc: A general symbol not in one of the categories below.
-# e.g. [○◎@$〒→+]
-記号-一般
-#
-# symbol-comma: Commas
-# e.g. [,、]
-記号-読点
-#
-# symbol-period: Periods and full stops.
-# e.g. [..。]
-記号-句点
-#
-# symbol-space: Full-width whitespace.
-記号-空白
-#
-# symbol-open_bracket:
-# e.g. [({‘“『【]
-記号-括弧開
-#
-# symbol-close_bracket:
-# e.g. [)}’”』」】]
-記号-括弧閉
-#
-# symbol-alphabetic:
-#記号-アルファベット
-#
-#####
-# other: unclassified other
-#その他
-#
-# other-interjection: Words that are hard to classify as noun-suffixes or
-# sentence-final particles.
-# e.g. (だ)ァ
-その他-間投
-#
-#####
-# filler: Aizuchi that occurs during a conversation or sounds inserted as filler.
-# e.g. あの, うんと, えと
-フィラー
-#
-#####
-# non-verbal: non-verbal sound.
-非言語音
-#
-#####
-# fragment:
-#語断片
-#
-#####
-# unknown: unknown part of speech.
-#未知語
-#
-##### End of file
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_ar.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_ar.txt
deleted file mode 100644
index 046829db6a2..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_ar.txt
+++ /dev/null
@@ -1,125 +0,0 @@
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-# Cleaned on October 11, 2009 (not normalized, so use before normalization)
-# This means that when modifying this list, you might need to add some
-# redundant entries, for example containing forms with both أ and ا
-من
-ومن
-منها
-منه
-في
-وفي
-فيها
-فيه
-و
-ف
-ثم
-او
-أو
-ب
-بها
-به
-ا
-أ
-اى
-اي
-أي
-أى
-لا
-ولا
-الا
-ألا
-إلا
-لكن
-ما
-وما
-كما
-فما
-عن
-مع
-اذا
-إذا
-ان
-أن
-إن
-انها
-أنها
-إنها
-انه
-أنه
-إنه
-بان
-بأن
-فان
-فأن
-وان
-وأن
-وإن
-التى
-التي
-الذى
-الذي
-الذين
-الى
-الي
-إلى
-إلي
-على
-عليها
-عليه
-اما
-أما
-إما
-ايضا
-أيضا
-كل
-وكل
-لم
-ولم
-لن
-ولن
-هى
-هي
-هو
-وهى
-وهي
-وهو
-فهى
-فهي
-فهو
-انت
-أنت
-لك
-لها
-له
-هذه
-هذا
-تلك
-ذلك
-هناك
-كانت
-كان
-يكون
-تكون
-وكانت
-وكان
-غير
-بعض
-قد
-نحو
-بين
-بينما
-منذ
-ضمن
-حيث
-الان
-الآن
-خلال
-بعد
-قبل
-حتى
-عند
-عندما
-لدى
-جميع
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_bg.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_bg.txt
deleted file mode 100644
index 1ae4ba2ae38..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_bg.txt
+++ /dev/null
@@ -1,193 +0,0 @@
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-а
-аз
-ако
-ала
-бе
-без
-беше
-би
-бил
-била
-били
-било
-близо
-бъдат
-бъде
-бяха
-в
-вас
-ваш
-ваша
-вероятно
-вече
-взема
-ви
-вие
-винаги
-все
-всеки
-всички
-всичко
-всяка
-във
-въпреки
-върху
-г
-ги
-главно
-го
-д
-да
-дали
-до
-докато
-докога
-дори
-досега
-доста
-е
-едва
-един
-ето
-за
-зад
-заедно
-заради
-засега
-затова
-защо
-защото
-и
-из
-или
-им
-има
-имат
-иска
-й
-каза
-как
-каква
-какво
-както
-какъв
-като
-кога
-когато
-което
-които
-кой
-който
-колко
-която
-къде
-където
-към
-ли
-м
-ме
-между
-мен
-ми
-мнозина
-мога
-могат
-може
-моля
-момента
-му
-н
-на
-над
-назад
-най
-направи
-напред
-например
-нас
-не
-него
-нея
-ни
-ние
-никой
-нито
-но
-някои
-някой
-няма
-обаче
-около
-освен
-особено
-от
-отгоре
-отново
-още
-пак
-по
-повече
-повечето
-под
-поне
-поради
-после
-почти
-прави
-пред
-преди
-през
-при
-пък
-първо
-с
-са
-само
-се
-сега
-си
-скоро
-след
-сме
-според
-сред
-срещу
-сте
-съм
-със
-също
-т
-тази
-така
-такива
-такъв
-там
-твой
-те
-тези
-ти
-тн
-то
-това
-тогава
-този
-той
-толкова
-точно
-трябва
-тук
-тъй
-тя
-тях
-у
-харесва
-ч
-че
-често
-чрез
-ще
-щом
-я
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_ca.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_ca.txt
deleted file mode 100644
index 3da65deafe1..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_ca.txt
+++ /dev/null
@@ -1,220 +0,0 @@
-# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
-a
-abans
-ací
-ah
-així
-això
-al
-als
-aleshores
-algun
-alguna
-algunes
-alguns
-alhora
-allà
-allí
-allò
-altra
-altre
-altres
-amb
-ambdós
-ambdues
-apa
-aquell
-aquella
-aquelles
-aquells
-aquest
-aquesta
-aquestes
-aquests
-aquí
-baix
-cada
-cadascú
-cadascuna
-cadascunes
-cadascuns
-com
-contra
-d'un
-d'una
-d'unes
-d'uns
-dalt
-de
-del
-dels
-des
-després
-dins
-dintre
-donat
-doncs
-durant
-e
-eh
-el
-els
-em
-en
-encara
-ens
-entre
-érem
-eren
-éreu
-es
-és
-esta
-està
-estàvem
-estaven
-estàveu
-esteu
-et
-etc
-ets
-fins
-fora
-gairebé
-ha
-han
-has
-havia
-he
-hem
-heu
-hi
-ho
-i
-igual
-iguals
-ja
-l'hi
-la
-les
-li
-li'n
-llavors
-m'he
-ma
-mal
-malgrat
-mateix
-mateixa
-mateixes
-mateixos
-me
-mentre
-més
-meu
-meus
-meva
-meves
-molt
-molta
-moltes
-molts
-mon
-mons
-n'he
-n'hi
-ne
-ni
-no
-nogensmenys
-només
-nosaltres
-nostra
-nostre
-nostres
-o
-oh
-oi
-on
-pas
-pel
-pels
-per
-però
-perquè
-poc
-poca
-pocs
-poques
-potser
-propi
-qual
-quals
-quan
-quant
-que
-què
-quelcom
-qui
-quin
-quina
-quines
-quins
-s'ha
-s'han
-sa
-semblant
-semblants
-ses
-seu
-seus
-seva
-seva
-seves
-si
-sobre
-sobretot
-sóc
-solament
-sols
-son
-són
-sons
-sota
-sou
-t'ha
-t'han
-t'he
-ta
-tal
-també
-tampoc
-tan
-tant
-tanta
-tantes
-teu
-teus
-teva
-teves
-ton
-tons
-tot
-tota
-totes
-tots
-un
-una
-unes
-uns
-us
-va
-vaig
-vam
-van
-vas
-veu
-vosaltres
-vostra
-vostre
-vostres
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_cz.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_cz.txt
deleted file mode 100644
index 53c6097dac7..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_cz.txt
+++ /dev/null
@@ -1,172 +0,0 @@
-a
-s
-k
-o
-i
-u
-v
-z
-dnes
-cz
-tímto
-budeš
-budem
-byli
-jseš
-můj
-svým
-ta
-tomto
-tohle
-tuto
-tyto
-jej
-zda
-proč
-máte
-tato
-kam
-tohoto
-kdo
-kteří
-mi
-nám
-tom
-tomuto
-mít
-nic
-proto
-kterou
-byla
-toho
-protože
-asi
-ho
-naši
-napište
-re
-což
-tím
-takže
-svých
-její
-svými
-jste
-aj
-tu
-tedy
-teto
-bylo
-kde
-ke
-pravé
-ji
-nad
-nejsou
-či
-pod
-téma
-mezi
-přes
-ty
-pak
-vám
-ani
-když
-však
-neg
-jsem
-tento
-článku
-články
-aby
-jsme
-před
-pta
-jejich
-byl
-ještě
-až
-bez
-také
-pouze
-první
-vaše
-která
-nás
-nový
-tipy
-pokud
-může
-strana
-jeho
-své
-jiné
-zprávy
-nové
-není
-vás
-jen
-podle
-zde
-už
-být
-více
-bude
-již
-než
-který
-by
-které
-co
-nebo
-ten
-tak
-má
-při
-od
-po
-jsou
-jak
-další
-ale
-si
-se
-ve
-to
-jako
-za
-zpět
-ze
-do
-pro
-je
-na
-atd
-atp
-jakmile
-přičemž
-já
-on
-ona
-ono
-oni
-ony
-my
-vy
-jí
-ji
-mě
-mne
-jemu
-tomu
-těm
-těmu
-němu
-němuž
-jehož
-jíž
-jelikož
-jež
-jakož
-načež
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_da.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_da.txt
deleted file mode 100644
index a3ff5fe122c..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_da.txt
+++ /dev/null
@@ -1,108 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Danish stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This is a ranked list (commonest to rarest) of stopwords derived from
- | a large text sample.
-
-
-og | and
-i | in
-jeg | I
-det | that (dem. pronoun)/it (pers. pronoun)
-at | that (in front of a sentence)/to (with infinitive)
-en | a/an
-den | it (pers. pronoun)/that (dem. pronoun)
-til | to/at/for/until/against/by/of/into, more
-er | present tense of "to be"
-som | who, as
-på | on/upon/in/on/at/to/after/of/with/for, on
-de | they
-med | with/by/in, along
-han | he
-af | of/by/from/off/for/in/with/on, off
-for | at/for/to/from/by/of/ago, in front/before, because
-ikke | not
-der | who/which, there/those
-var | past tense of "to be"
-mig | me/myself
-sig | oneself/himself/herself/itself/themselves
-men | but
-et | a/an/one, one (number), someone/somebody/one
-har | present tense of "to have"
-om | round/about/for/in/a, about/around/down, if
-vi | we
-min | my
-havde | past tense of "to have"
-ham | him
-hun | she
-nu | now
-over | over/above/across/by/beyond/past/on/about, over/past
-da | then, when/as/since
-fra | from/off/since, off, since
-du | you
-ud | out
-sin | his/her/its/one's
-dem | them
-os | us/ourselves
-op | up
-man | you/one
-hans | his
-hvor | where
-eller | or
-hvad | what
-skal | must/shall etc.
-selv | myself/youself/herself/ourselves etc., even
-her | here
-alle | all/everyone/everybody etc.
-vil | will (verb)
-blev | past tense of "to stay/to remain/to get/to become"
-kunne | could
-ind | in
-når | when
-være | present tense of "to be"
-dog | however/yet/after all
-noget | something
-ville | would
-jo | you know/you see (adv), yes
-deres | their/theirs
-efter | after/behind/according to/for/by/from, later/afterwards
-ned | down
-skulle | should
-denne | this
-end | than
-dette | this
-mit | my/mine
-også | also
-under | under/beneath/below/during, below/underneath
-have | have
-dig | you
-anden | other
-hende | her
-mine | my
-alt | everything
-meget | much/very, plenty of
-sit | his, her, its, one's
-sine | his, her, its, one's
-vor | our
-mod | against
-disse | these
-hvis | if
-din | your/yours
-nogle | some
-hos | by/at
-blive | be/become
-mange | many
-ad | by/through
-bliver | present tense of "to be/to become"
-hendes | her/hers
-været | be
-thi | for (conj)
-jer | you
-sådan | such, like this/like that
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_de.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_de.txt
deleted file mode 100644
index f7703841887..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_de.txt
+++ /dev/null
@@ -1,292 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A German stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | The number of forms in this list is reduced significantly by passing it
- | through the German stemmer.
-
-
-aber | but
-
-alle | all
-allem
-allen
-aller
-alles
-
-als | than, as
-also | so
-am | an + dem
-an | at
-
-ander | other
-andere
-anderem
-anderen
-anderer
-anderes
-anderm
-andern
-anderr
-anders
-
-auch | also
-auf | on
-aus | out of
-bei | by
-bin | am
-bis | until
-bist | art
-da | there
-damit | with it
-dann | then
-
-der | the
-den
-des
-dem
-die
-das
-
-daß | that
-
-derselbe | the same
-derselben
-denselben
-desselben
-demselben
-dieselbe
-dieselben
-dasselbe
-
-dazu | to that
-
-dein | thy
-deine
-deinem
-deinen
-deiner
-deines
-
-denn | because
-
-derer | of those
-dessen | of him
-
-dich | thee
-dir | to thee
-du | thou
-
-dies | this
-diese
-diesem
-diesen
-dieser
-dieses
-
-
-doch | (several meanings)
-dort | (over) there
-
-
-durch | through
-
-ein | a
-eine
-einem
-einen
-einer
-eines
-
-einig | some
-einige
-einigem
-einigen
-einiger
-einiges
-
-einmal | once
-
-er | he
-ihn | him
-ihm | to him
-
-es | it
-etwas | something
-
-euer | your
-eure
-eurem
-euren
-eurer
-eures
-
-für | for
-gegen | towards
-gewesen | p.p. of sein
-hab | have
-habe | have
-haben | have
-hat | has
-hatte | had
-hatten | had
-hier | here
-hin | there
-hinter | behind
-
-ich | I
-mich | me
-mir | to me
-
-
-ihr | you, to her
-ihre
-ihrem
-ihren
-ihrer
-ihres
-euch | to you
-
-im | in + dem
-in | in
-indem | while
-ins | in + das
-ist | is
-
-jede | each, every
-jedem
-jeden
-jeder
-jedes
-
-jene | that
-jenem
-jenen
-jener
-jenes
-
-jetzt | now
-kann | can
-
-kein | no
-keine
-keinem
-keinen
-keiner
-keines
-
-können | can
-könnte | could
-machen | do
-man | one
-
-manche | some, many a
-manchem
-manchen
-mancher
-manches
-
-mein | my
-meine
-meinem
-meinen
-meiner
-meines
-
-mit | with
-muss | must
-musste | had to
-nach | to(wards)
-nicht | not
-nichts | nothing
-noch | still, yet
-nun | now
-nur | only
-ob | whether
-oder | or
-ohne | without
-sehr | very
-
-sein | his
-seine
-seinem
-seinen
-seiner
-seines
-
-selbst | self
-sich | herself
-
-sie | they, she
-ihnen | to them
-
-sind | are
-so | so
-
-solche | such
-solchem
-solchen
-solcher
-solches
-
-soll | shall
-sollte | should
-sondern | but
-sonst | else
-über | over
-um | about, around
-und | and
-
-uns | us
-unse
-unsem
-unsen
-unser
-unses
-
-unter | under
-viel | much
-vom | von + dem
-von | from
-vor | before
-während | while
-war | was
-waren | were
-warst | wast
-was | what
-weg | away, off
-weil | because
-weiter | further
-
-welche | which
-welchem
-welchen
-welcher
-welches
-
-wenn | when
-werde | will
-werden | will
-wie | how
-wieder | again
-will | want
-wir | we
-wird | will
-wirst | willst
-wo | where
-wollen | want
-wollte | wanted
-würde | would
-würden | would
-zu | to
-zum | zu + dem
-zur | zu + der
-zwar | indeed
-zwischen | between
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_el.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_el.txt
deleted file mode 100644
index 232681f5bd6..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_el.txt
+++ /dev/null
@@ -1,78 +0,0 @@
-# Lucene Greek Stopwords list
-# Note: by default this file is used after GreekLowerCaseFilter,
-# so when modifying this file use 'σ' instead of 'ς'
-ο
-η
-το
-οι
-τα
-του
-τησ
-των
-τον
-την
-και
-κι
-κ
-ειμαι
-εισαι
-ειναι
-ειμαστε
-ειστε
-στο
-στον
-στη
-στην
-μα
-αλλα
-απο
-για
-προσ
-με
-σε
-ωσ
-παρα
-αντι
-κατα
-μετα
-θα
-να
-δε
-δεν
-μη
-μην
-επι
-ενω
-εαν
-αν
-τοτε
-που
-πωσ
-ποιοσ
-ποια
-ποιο
-ποιοι
-ποιεσ
-ποιων
-ποιουσ
-αυτοσ
-αυτη
-αυτο
-αυτοι
-αυτων
-αυτουσ
-αυτεσ
-αυτα
-εκεινοσ
-εκεινη
-εκεινο
-εκεινοι
-εκεινεσ
-εκεινα
-εκεινων
-εκεινουσ
-οπωσ
-ομωσ
-ισωσ
-οσο
-οτι
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_en.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_en.txt
deleted file mode 100644
index 2c164c0b2a1..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_en.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# a couple of test stopwords to test that the words are really being
-# configured from this file:
-stopworda
-stopwordb
-
-# Standard english stop words taken from Lucene's StopAnalyzer
-a
-an
-and
-are
-as
-at
-be
-but
-by
-for
-if
-in
-into
-is
-it
-no
-not
-of
-on
-or
-such
-that
-the
-their
-then
-there
-these
-they
-this
-to
-was
-will
-with
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_es.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_es.txt
deleted file mode 100644
index 2db14760075..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_es.txt
+++ /dev/null
@@ -1,354 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Spanish stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-
- | The following is a ranked list (commonest to rarest) of stopwords
- | deriving from a large sample of text.
-
- | Extra words have been added at the end.
-
-de | from, of
-la | the, her
-que | who, that
-el | the
-en | in
-y | and
-a | to
-los | the, them
-del | de + el
-se | himself, from him etc
-las | the, them
-por | for, by, etc
-un | a
-para | for
-con | with
-no | no
-una | a
-su | his, her
-al | a + el
- | es from SER
-lo | him
-como | how
-más | more
-pero | pero
-sus | su plural
-le | to him, her
-ya | already
-o | or
- | fue from SER
-este | this
- | ha from HABER
-sí | himself etc
-porque | because
-esta | this
- | son from SER
-entre | between
- | está from ESTAR
-cuando | when
-muy | very
-sin | without
-sobre | on
- | ser from SER
- | tiene from TENER
-también | also
-me | me
-hasta | until
-hay | there is/are
-donde | where
- | han from HABER
-quien | whom, that
- | están from ESTAR
- | estado from ESTAR
-desde | from
-todo | all
-nos | us
-durante | during
- | estados from ESTAR
-todos | all
-uno | a
-les | to them
-ni | nor
-contra | against
-otros | other
- | fueron from SER
-ese | that
-eso | that
- | había from HABER
-ante | before
-ellos | they
-e | and (variant of y)
-esto | this
-mí | me
-antes | before
-algunos | some
-qué | what?
-unos | a
-yo | I
-otro | other
-otras | other
-otra | other
-él | he
-tanto | so much, many
-esa | that
-estos | these
-mucho | much, many
-quienes | who
-nada | nothing
-muchos | many
-cual | who
- | sea from SER
-poco | few
-ella | she
-estar | to be
- | haber from HABER
-estas | these
- | estaba from ESTAR
- | estamos from ESTAR
-algunas | some
-algo | something
-nosotros | we
-
- | other forms
-
-mi | me
-mis | mi plural
-tú | thou
-te | thee
-ti | thee
-tu | thy
-tus | tu plural
-ellas | they
-nosotras | we
-vosotros | you
-vosotras | you
-os | you
-mío | mine
-mía |
-míos |
-mías |
-tuyo | thine
-tuya |
-tuyos |
-tuyas |
-suyo | his, hers, theirs
-suya |
-suyos |
-suyas |
-nuestro | ours
-nuestra |
-nuestros |
-nuestras |
-vuestro | yours
-vuestra |
-vuestros |
-vuestras |
-esos | those
-esas | those
-
- | forms of estar, to be (not including the infinitive):
-estoy
-estás
-está
-estamos
-estáis
-están
-esté
-estés
-estemos
-estéis
-estén
-estaré
-estarás
-estará
-estaremos
-estaréis
-estarán
-estaría
-estarías
-estaríamos
-estaríais
-estarían
-estaba
-estabas
-estábamos
-estabais
-estaban
-estuve
-estuviste
-estuvo
-estuvimos
-estuvisteis
-estuvieron
-estuviera
-estuvieras
-estuviéramos
-estuvierais
-estuvieran
-estuviese
-estuvieses
-estuviésemos
-estuvieseis
-estuviesen
-estando
-estado
-estada
-estados
-estadas
-estad
-
- | forms of haber, to have (not including the infinitive):
-he
-has
-ha
-hemos
-habéis
-han
-haya
-hayas
-hayamos
-hayáis
-hayan
-habré
-habrás
-habrá
-habremos
-habréis
-habrán
-habría
-habrías
-habríamos
-habríais
-habrían
-había
-habías
-habíamos
-habíais
-habían
-hube
-hubiste
-hubo
-hubimos
-hubisteis
-hubieron
-hubiera
-hubieras
-hubiéramos
-hubierais
-hubieran
-hubiese
-hubieses
-hubiésemos
-hubieseis
-hubiesen
-habiendo
-habido
-habida
-habidos
-habidas
-
- | forms of ser, to be (not including the infinitive):
-soy
-eres
-es
-somos
-sois
-son
-sea
-seas
-seamos
-seáis
-sean
-seré
-serás
-será
-seremos
-seréis
-serán
-sería
-serías
-seríamos
-seríais
-serían
-era
-eras
-éramos
-erais
-eran
-fui
-fuiste
-fue
-fuimos
-fuisteis
-fueron
-fuera
-fueras
-fuéramos
-fuerais
-fueran
-fuese
-fueses
-fuésemos
-fueseis
-fuesen
-siendo
-sido
- | sed also means 'thirst'
-
- | forms of tener, to have (not including the infinitive):
-tengo
-tienes
-tiene
-tenemos
-tenéis
-tienen
-tenga
-tengas
-tengamos
-tengáis
-tengan
-tendré
-tendrás
-tendrá
-tendremos
-tendréis
-tendrán
-tendría
-tendrías
-tendríamos
-tendríais
-tendrían
-tenía
-tenías
-teníamos
-teníais
-tenían
-tuve
-tuviste
-tuvo
-tuvimos
-tuvisteis
-tuvieron
-tuviera
-tuvieras
-tuviéramos
-tuvierais
-tuvieran
-tuviese
-tuvieses
-tuviésemos
-tuvieseis
-tuviesen
-teniendo
-tenido
-tenida
-tenidos
-tenidas
-tened
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_eu.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_eu.txt
deleted file mode 100644
index 25f1db93460..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_eu.txt
+++ /dev/null
@@ -1,99 +0,0 @@
-# example set of basque stopwords
-al
-anitz
-arabera
-asko
-baina
-bat
-batean
-batek
-bati
-batzuei
-batzuek
-batzuetan
-batzuk
-bera
-beraiek
-berau
-berauek
-bere
-berori
-beroriek
-beste
-bezala
-da
-dago
-dira
-ditu
-du
-dute
-edo
-egin
-ere
-eta
-eurak
-ez
-gainera
-gu
-gutxi
-guzti
-haiei
-haiek
-haietan
-hainbeste
-hala
-han
-handik
-hango
-hara
-hari
-hark
-hartan
-hau
-hauei
-hauek
-hauetan
-hemen
-hemendik
-hemengo
-hi
-hona
-honek
-honela
-honetan
-honi
-hor
-hori
-horiei
-horiek
-horietan
-horko
-horra
-horrek
-horrela
-horretan
-horri
-hortik
-hura
-izan
-ni
-noiz
-nola
-non
-nondik
-nongo
-nor
-nora
-ze
-zein
-zen
-zenbait
-zenbat
-zer
-zergatik
-ziren
-zituen
-zu
-zuek
-zuen
-zuten
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_fa.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_fa.txt
deleted file mode 100644
index 723641c6da7..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_fa.txt
+++ /dev/null
@@ -1,313 +0,0 @@
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-# Note: by default this file is used after normalization, so when adding entries
-# to this file, use the arabic 'ي' instead of 'ی'
-انان
-نداشته
-سراسر
-خياه
-ايشان
-وي
-تاكنون
-بيشتري
-دوم
-پس
-ناشي
-وگو
-يا
-داشتند
-سپس
-هنگام
-هرگز
-پنج
-نشان
-امسال
-ديگر
-گروهي
-شدند
-چطور
-ده
-و
-دو
-نخستين
-ولي
-چرا
-چه
-وسط
-ه
-كدام
-قابل
-يك
-رفت
-هفت
-همچنين
-در
-هزار
-بله
-بلي
-شايد
-اما
-شناسي
-گرفته
-دهد
-داشته
-دانست
-داشتن
-خواهيم
-ميليارد
-وقتيكه
-امد
-خواهد
-جز
-اورده
-شده
-بلكه
-خدمات
-شدن
-برخي
-نبود
-بسياري
-جلوگيري
-حق
-كردند
-نوعي
-بعري
-نكرده
-نظير
-نبايد
-بوده
-بودن
-داد
-اورد
-هست
-جايي
-شود
-دنبال
-داده
-بايد
-سابق
-هيچ
-همان
-انجا
-كمتر
-كجاست
-گردد
-كسي
-تر
-مردم
-تان
-دادن
-بودند
-سري
-جدا
-ندارند
-مگر
-يكديگر
-دارد
-دهند
-بنابراين
-هنگامي
-سمت
-جا
-انچه
-خود
-دادند
-زياد
-دارند
-اثر
-بدون
-بهترين
-بيشتر
-البته
-به
-براساس
-بيرون
-كرد
-بعضي
-گرفت
-توي
-اي
-ميليون
-او
-جريان
-تول
-بر
-مانند
-برابر
-باشيم
-مدتي
-گويند
-اكنون
-تا
-تنها
-جديد
-چند
-بي
-نشده
-كردن
-كردم
-گويد
-كرده
-كنيم
-نمي
-نزد
-روي
-قصد
-فقط
-بالاي
-ديگران
-اين
-ديروز
-توسط
-سوم
-ايم
-دانند
-سوي
-استفاده
-شما
-كنار
-داريم
-ساخته
-طور
-امده
-رفته
-نخست
-بيست
-نزديك
-طي
-كنيد
-از
-انها
-تمامي
-داشت
-يكي
-طريق
-اش
-چيست
-روب
-نمايد
-گفت
-چندين
-چيزي
-تواند
-ام
-ايا
-با
-ان
-ايد
-ترين
-اينكه
-ديگري
-راه
-هايي
-بروز
-همچنان
-پاعين
-كس
-حدود
-مختلف
-مقابل
-چيز
-گيرد
-ندارد
-ضد
-همچون
-سازي
-شان
-مورد
-باره
-مرسي
-خويش
-برخوردار
-چون
-خارج
-شش
-هنوز
-تحت
-ضمن
-هستيم
-گفته
-فكر
-بسيار
-پيش
-براي
-روزهاي
-انكه
-نخواهد
-بالا
-كل
-وقتي
-كي
-چنين
-كه
-گيري
-نيست
-است
-كجا
-كند
-نيز
-يابد
-بندي
-حتي
-توانند
-عقب
-خواست
-كنند
-بين
-تمام
-همه
-ما
-باشند
-مثل
-شد
-اري
-باشد
-اره
-طبق
-بعد
-اگر
-صورت
-غير
-جاي
-بيش
-ريزي
-اند
-زيرا
-چگونه
-بار
-لطفا
-مي
-درباره
-من
-ديده
-همين
-گذاري
-برداري
-علت
-گذاشته
-هم
-فوق
-نه
-ها
-شوند
-اباد
-همواره
-هر
-اول
-خواهند
-چهار
-نام
-امروز
-مان
-هاي
-قبل
-كنم
-سعي
-تازه
-را
-هستند
-زير
-جلوي
-عنوان
-بود
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_fi.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_fi.txt
deleted file mode 100644
index addad798c4b..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_fi.txt
+++ /dev/null
@@ -1,95 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
-| forms of BE
-
-olla
-olen
-olet
-on
-olemme
-olette
-ovat
-ole | negative form
-
-oli
-olisi
-olisit
-olisin
-olisimme
-olisitte
-olisivat
-olit
-olin
-olimme
-olitte
-olivat
-ollut
-olleet
-
-en | negation
-et
-ei
-emme
-ette
-eivät
-
-|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans
-minä minun minut minua minussa minusta minuun minulla minulta minulle | I
-sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you
-hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she
-me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we
-te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you
-he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they
-
-tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this
-tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that
-se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it
-nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these
-nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those
-ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they
-
-kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
-ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl)
-mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what
-mitkä | (pl)
-
-joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which
-jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl)
-
-| conjunctions
-
-että | that
-ja | and
-jos | if
-koska | because
-kuin | than
-mutta | but
-niin | so
-sekä | and
-sillä | for
-tai | or
-vaan | but
-vai | or
-vaikka | although
-
-
-| prepositions
-
-kanssa | with
-mukaan | according to
-noin | about
-poikki | across
-yli | over, across
-
-| other
-
-kun | when
-niin | so
-nyt | now
-itse | self
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_fr.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_fr.txt
deleted file mode 100644
index c00837ea939..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_fr.txt
+++ /dev/null
@@ -1,183 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A French stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-au | a + le
-aux | a + les
-avec | with
-ce | this
-ces | these
-dans | with
-de | of
-des | de + les
-du | de + le
-elle | she
-en | `of them' etc
-et | and
-eux | them
-il | he
-je | I
-la | the
-le | the
-leur | their
-lui | him
-ma | my (fem)
-mais | but
-me | me
-même | same; as in moi-même (myself) etc
-mes | me (pl)
-moi | me
-mon | my (masc)
-ne | not
-nos | our (pl)
-notre | our
-nous | we
-on | one
-ou | where
-par | by
-pas | not
-pour | for
-qu | que before vowel
-que | that
-qui | who
-sa | his, her (fem)
-se | oneself
-ses | his (pl)
-son | his, her (masc)
-sur | on
-ta | thy (fem)
-te | thee
-tes | thy (pl)
-toi | thee
-ton | thy (masc)
-tu | thou
-un | a
-une | a
-vos | your (pl)
-votre | your
-vous | you
-
- | single letter forms
-
-c | c'
-d | d'
-j | j'
-l | l'
-à | to, at
-m | m'
-n | n'
-s | s'
-t | t'
-y | there
-
- | forms of être (not including the infinitive):
-été
-étée
-étées
-étés
-étant
-suis
-es
-est
-sommes
-êtes
-sont
-serai
-seras
-sera
-serons
-serez
-seront
-serais
-serait
-serions
-seriez
-seraient
-étais
-était
-étions
-étiez
-étaient
-fus
-fut
-fûmes
-fûtes
-furent
-sois
-soit
-soyons
-soyez
-soient
-fusse
-fusses
-fût
-fussions
-fussiez
-fussent
-
- | forms of avoir (not including the infinitive):
-ayant
-eu
-eue
-eues
-eus
-ai
-as
-avons
-avez
-ont
-aurai
-auras
-aura
-aurons
-aurez
-auront
-aurais
-aurait
-aurions
-auriez
-auraient
-avais
-avait
-avions
-aviez
-avaient
-eut
-eûmes
-eûtes
-eurent
-aie
-aies
-ait
-ayons
-ayez
-aient
-eusse
-eusses
-eût
-eussions
-eussiez
-eussent
-
- | Later additions (from Jean-Christophe Deschamps)
-ceci | this
-celà | that
-cet | this
-cette | this
-ici | here
-ils | they
-les | the (pl)
-leurs | their (pl)
-quel | which
-quels | which
-quelle | which
-quelles | which
-sans | without
-soi | oneself
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_ga.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_ga.txt
deleted file mode 100644
index 9ff88d747e5..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_ga.txt
+++ /dev/null
@@ -1,110 +0,0 @@
-
-a
-ach
-ag
-agus
-an
-aon
-ar
-arna
-as
-b'
-ba
-beirt
-bhúr
-caoga
-ceathair
-ceathrar
-chomh
-chtó
-chuig
-chun
-cois
-céad
-cúig
-cúigear
-d'
-daichead
-dar
-de
-deich
-deichniúr
-den
-dhá
-do
-don
-dtí
-dá
-dár
-dó
-faoi
-faoin
-faoina
-faoinár
-fara
-fiche
-gach
-gan
-go
-gur
-haon
-hocht
-i
-iad
-idir
-in
-ina
-ins
-inár
-is
-le
-leis
-lena
-lenár
-m'
-mar
-mo
-mé
-na
-nach
-naoi
-naonúr
-ná
-ní
-níor
-nó
-nócha
-ocht
-ochtar
-os
-roimh
-sa
-seacht
-seachtar
-seachtó
-seasca
-seisear
-siad
-sibh
-sinn
-sna
-sé
-sí
-tar
-thar
-thú
-triúr
-trí
-trína
-trínár
-tríocha
-tú
-um
-ár
-é
-éis
-í
-ó
-ón
-óna
-ónár
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_gl.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_gl.txt
deleted file mode 100644
index d8760b12c14..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_gl.txt
+++ /dev/null
@@ -1,161 +0,0 @@
-# galican stopwords
-a
-aínda
-alí
-aquel
-aquela
-aquelas
-aqueles
-aquilo
-aquí
-ao
-aos
-as
-así
-á
-ben
-cando
-che
-co
-coa
-comigo
-con
-connosco
-contigo
-convosco
-coas
-cos
-cun
-cuns
-cunha
-cunhas
-da
-dalgunha
-dalgunhas
-dalgún
-dalgúns
-das
-de
-del
-dela
-delas
-deles
-desde
-deste
-do
-dos
-dun
-duns
-dunha
-dunhas
-e
-el
-ela
-elas
-eles
-en
-era
-eran
-esa
-esas
-ese
-eses
-esta
-estar
-estaba
-está
-están
-este
-estes
-estiven
-estou
-eu
-é
-facer
-foi
-foron
-fun
-había
-hai
-iso
-isto
-la
-las
-lle
-lles
-lo
-los
-mais
-me
-meu
-meus
-min
-miña
-miñas
-moi
-na
-nas
-neste
-nin
-no
-non
-nos
-nosa
-nosas
-noso
-nosos
-nós
-nun
-nunha
-nuns
-nunhas
-o
-os
-ou
-ó
-ós
-para
-pero
-pode
-pois
-pola
-polas
-polo
-polos
-por
-que
-se
-senón
-ser
-seu
-seus
-sexa
-sido
-sobre
-súa
-súas
-tamén
-tan
-te
-ten
-teñen
-teño
-ter
-teu
-teus
-ti
-tido
-tiña
-tiven
-túa
-túas
-un
-unha
-unhas
-uns
-vos
-vosa
-vosas
-voso
-vosos
-vós
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_hi.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_hi.txt
deleted file mode 100644
index 86286bb083b..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_hi.txt
+++ /dev/null
@@ -1,235 +0,0 @@
-# Also see http://www.opensource.org/licenses/bsd-license.html
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# Note: by default this file also contains forms normalized by HindiNormalizer
-# for spelling variation (see section below), such that it can be used whether or
-# not you enable that feature. When adding additional entries to this list,
-# please add the normalized form as well.
-अंदर
-अत
-अपना
-अपनी
-अपने
-अभी
-आदि
-आप
-इत्यादि
-इन
-इनका
-इन्हीं
-इन्हें
-इन्हों
-इस
-इसका
-इसकी
-इसके
-इसमें
-इसी
-इसे
-उन
-उनका
-उनकी
-उनके
-उनको
-उन्हीं
-उन्हें
-उन्हों
-उस
-उसके
-उसी
-उसे
-एक
-एवं
-एस
-ऐसे
-और
-कई
-कर
-करता
-करते
-करना
-करने
-करें
-कहते
-कहा
-का
-काफ़ी
-कि
-कितना
-किन्हें
-किन्हों
-किया
-किर
-किस
-किसी
-किसे
-की
-कुछ
-कुल
-के
-को
-कोई
-कौन
-कौनसा
-गया
-घर
-जब
-जहाँ
-जा
-जितना
-जिन
-जिन्हें
-जिन्हों
-जिस
-जिसे
-जीधर
-जैसा
-जैसे
-जो
-तक
-तब
-तरह
-तिन
-तिन्हें
-तिन्हों
-तिस
-तिसे
-तो
-था
-थी
-थे
-दबारा
-दिया
-दुसरा
-दूसरे
-दो
-द्वारा
-न
-नहीं
-ना
-निहायत
-नीचे
-ने
-पर
-पर
-पहले
-पूरा
-पे
-फिर
-बनी
-बही
-बहुत
-बाद
-बाला
-बिलकुल
-भी
-भीतर
-मगर
-मानो
-मे
-में
-यदि
-यह
-यहाँ
-यही
-या
-यिह
-ये
-रखें
-रहा
-रहे
-ऱ्वासा
-लिए
-लिये
-लेकिन
-व
-वर्ग
-वह
-वह
-वहाँ
-वहीं
-वाले
-वुह
-वे
-वग़ैरह
-संग
-सकता
-सकते
-सबसे
-सभी
-साथ
-साबुत
-साभ
-सारा
-से
-सो
-ही
-हुआ
-हुई
-हुए
-है
-हैं
-हो
-होता
-होती
-होते
-होना
-होने
-# additional normalized forms of the above
-अपनि
-जेसे
-होति
-सभि
-तिंहों
-इंहों
-दवारा
-इसि
-किंहें
-थि
-उंहों
-ओर
-जिंहें
-वहिं
-अभि
-बनि
-हि
-उंहिं
-उंहें
-हें
-वगेरह
-एसे
-रवासा
-कोन
-निचे
-काफि
-उसि
-पुरा
-भितर
-हे
-बहि
-वहां
-कोइ
-यहां
-जिंहों
-तिंहें
-किसि
-कइ
-यहि
-इंहिं
-जिधर
-इंहें
-अदि
-इतयादि
-हुइ
-कोनसा
-इसकि
-दुसरे
-जहां
-अप
-किंहों
-उनकि
-भि
-वरग
-हुअ
-जेसा
-नहिं
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_hu.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_hu.txt
deleted file mode 100644
index 1a96f1db6f2..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_hu.txt
+++ /dev/null
@@ -1,209 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
-| Hungarian stop word list
-| prepared by Anna Tordai
-
-a
-ahogy
-ahol
-aki
-akik
-akkor
-alatt
-által
-általában
-amely
-amelyek
-amelyekben
-amelyeket
-amelyet
-amelynek
-ami
-amit
-amolyan
-amíg
-amikor
-át
-abban
-ahhoz
-annak
-arra
-arról
-az
-azok
-azon
-azt
-azzal
-azért
-aztán
-azután
-azonban
-bár
-be
-belül
-benne
-cikk
-cikkek
-cikkeket
-csak
-de
-e
-eddig
-egész
-egy
-egyes
-egyetlen
-egyéb
-egyik
-egyre
-ekkor
-el
-elég
-ellen
-elő
-először
-előtt
-első
-én
-éppen
-ebben
-ehhez
-emilyen
-ennek
-erre
-ez
-ezt
-ezek
-ezen
-ezzel
-ezért
-és
-fel
-felé
-hanem
-hiszen
-hogy
-hogyan
-igen
-így
-illetve
-ill.
-ill
-ilyen
-ilyenkor
-ison
-ismét
-itt
-jó
-jól
-jobban
-kell
-kellett
-keresztül
-keressünk
-ki
-kívül
-között
-közül
-legalább
-lehet
-lehetett
-legyen
-lenne
-lenni
-lesz
-lett
-maga
-magát
-majd
-majd
-már
-más
-másik
-meg
-még
-mellett
-mert
-mely
-melyek
-mi
-mit
-míg
-miért
-milyen
-mikor
-minden
-mindent
-mindenki
-mindig
-mint
-mintha
-mivel
-most
-nagy
-nagyobb
-nagyon
-ne
-néha
-nekem
-neki
-nem
-néhány
-nélkül
-nincs
-olyan
-ott
-össze
-ő
-ők
-őket
-pedig
-persze
-rá
-s
-saját
-sem
-semmi
-sok
-sokat
-sokkal
-számára
-szemben
-szerint
-szinte
-talán
-tehát
-teljes
-tovább
-továbbá
-több
-úgy
-ugyanis
-új
-újabb
-újra
-után
-utána
-utolsó
-vagy
-vagyis
-valaki
-valami
-valamint
-való
-vagyok
-van
-vannak
-volt
-voltam
-voltak
-voltunk
-vissza
-vele
-viszont
-volna
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_hy.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_hy.txt
deleted file mode 100644
index 60c1c50fbc8..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_hy.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-# example set of Armenian stopwords.
-այդ
-այլ
-այն
-այս
-դու
-դուք
-եմ
-են
-ենք
-ես
-եք
-է
-էի
-էին
-էինք
-էիր
-էիք
-էր
-ըստ
-թ
-ի
-ին
-իսկ
-իր
-կամ
-համար
-հետ
-հետո
-մենք
-մեջ
-մի
-ն
-նա
-նաև
-նրա
-նրանք
-որ
-որը
-որոնք
-որպես
-ու
-ում
-պիտի
-վրա
-և
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_id.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_id.txt
deleted file mode 100644
index 4617f83a5c5..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_id.txt
+++ /dev/null
@@ -1,359 +0,0 @@
-# from appendix D of: A Study of Stemming Effects on Information
-# Retrieval in Bahasa Indonesia
-ada
-adanya
-adalah
-adapun
-agak
-agaknya
-agar
-akan
-akankah
-akhirnya
-aku
-akulah
-amat
-amatlah
-anda
-andalah
-antar
-diantaranya
-antara
-antaranya
-diantara
-apa
-apaan
-mengapa
-apabila
-apakah
-apalagi
-apatah
-atau
-ataukah
-ataupun
-bagai
-bagaikan
-sebagai
-sebagainya
-bagaimana
-bagaimanapun
-sebagaimana
-bagaimanakah
-bagi
-bahkan
-bahwa
-bahwasanya
-sebaliknya
-banyak
-sebanyak
-beberapa
-seberapa
-begini
-beginian
-beginikah
-beginilah
-sebegini
-begitu
-begitukah
-begitulah
-begitupun
-sebegitu
-belum
-belumlah
-sebelum
-sebelumnya
-sebenarnya
-berapa
-berapakah
-berapalah
-berapapun
-betulkah
-sebetulnya
-biasa
-biasanya
-bila
-bilakah
-bisa
-bisakah
-sebisanya
-boleh
-bolehkah
-bolehlah
-buat
-bukan
-bukankah
-bukanlah
-bukannya
-cuma
-percuma
-dahulu
-dalam
-dan
-dapat
-dari
-daripada
-dekat
-demi
-demikian
-demikianlah
-sedemikian
-dengan
-depan
-di
-dia
-dialah
-dini
-diri
-dirinya
-terdiri
-dong
-dulu
-enggak
-enggaknya
-entah
-entahlah
-terhadap
-terhadapnya
-hal
-hampir
-hanya
-hanyalah
-harus
-haruslah
-harusnya
-seharusnya
-hendak
-hendaklah
-hendaknya
-hingga
-sehingga
-ia
-ialah
-ibarat
-ingin
-inginkah
-inginkan
-ini
-inikah
-inilah
-itu
-itukah
-itulah
-jangan
-jangankan
-janganlah
-jika
-jikalau
-juga
-justru
-kala
-kalau
-kalaulah
-kalaupun
-kalian
-kami
-kamilah
-kamu
-kamulah
-kan
-kapan
-kapankah
-kapanpun
-dikarenakan
-karena
-karenanya
-ke
-kecil
-kemudian
-kenapa
-kepada
-kepadanya
-ketika
-seketika
-khususnya
-kini
-kinilah
-kiranya
-sekiranya
-kita
-kitalah
-kok
-lagi
-lagian
-selagi
-lah
-lain
-lainnya
-melainkan
-selaku
-lalu
-melalui
-terlalu
-lama
-lamanya
-selama
-selama
-selamanya
-lebih
-terlebih
-bermacam
-macam
-semacam
-maka
-makanya
-makin
-malah
-malahan
-mampu
-mampukah
-mana
-manakala
-manalagi
-masih
-masihkah
-semasih
-masing
-mau
-maupun
-semaunya
-memang
-mereka
-merekalah
-meski
-meskipun
-semula
-mungkin
-mungkinkah
-nah
-namun
-nanti
-nantinya
-nyaris
-oleh
-olehnya
-seorang
-seseorang
-pada
-padanya
-padahal
-paling
-sepanjang
-pantas
-sepantasnya
-sepantasnyalah
-para
-pasti
-pastilah
-per
-pernah
-pula
-pun
-merupakan
-rupanya
-serupa
-saat
-saatnya
-sesaat
-saja
-sajalah
-saling
-bersama
-sama
-sesama
-sambil
-sampai
-sana
-sangat
-sangatlah
-saya
-sayalah
-se
-sebab
-sebabnya
-sebuah
-tersebut
-tersebutlah
-sedang
-sedangkan
-sedikit
-sedikitnya
-segala
-segalanya
-segera
-sesegera
-sejak
-sejenak
-sekali
-sekalian
-sekalipun
-sesekali
-sekaligus
-sekarang
-sekarang
-sekitar
-sekitarnya
-sela
-selain
-selalu
-seluruh
-seluruhnya
-semakin
-sementara
-sempat
-semua
-semuanya
-sendiri
-sendirinya
-seolah
-seperti
-sepertinya
-sering
-seringnya
-serta
-siapa
-siapakah
-siapapun
-disini
-disinilah
-sini
-sinilah
-sesuatu
-sesuatunya
-suatu
-sesudah
-sesudahnya
-sudah
-sudahkah
-sudahlah
-supaya
-tadi
-tadinya
-tak
-tanpa
-setelah
-telah
-tentang
-tentu
-tentulah
-tentunya
-tertentu
-seterusnya
-tapi
-tetapi
-setiap
-tiap
-setidaknya
-tidak
-tidakkah
-tidaklah
-toh
-waduh
-wah
-wahai
-sewaktu
-walau
-walaupun
-wong
-yaitu
-yakni
-yang
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_it.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_it.txt
deleted file mode 100644
index 4cb5b0891b1..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_it.txt
+++ /dev/null
@@ -1,301 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | An Italian stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-ad | a (to) before vowel
-al | a + il
-allo | a + lo
-ai | a + i
-agli | a + gli
-all | a + l'
-agl | a + gl'
-alla | a + la
-alle | a + le
-con | with
-col | con + il
-coi | con + i (forms collo, cogli etc are now very rare)
-da | from
-dal | da + il
-dallo | da + lo
-dai | da + i
-dagli | da + gli
-dall | da + l'
-dagl | da + gll'
-dalla | da + la
-dalle | da + le
-di | of
-del | di + il
-dello | di + lo
-dei | di + i
-degli | di + gli
-dell | di + l'
-degl | di + gl'
-della | di + la
-delle | di + le
-in | in
-nel | in + el
-nello | in + lo
-nei | in + i
-negli | in + gli
-nell | in + l'
-negl | in + gl'
-nella | in + la
-nelle | in + le
-su | on
-sul | su + il
-sullo | su + lo
-sui | su + i
-sugli | su + gli
-sull | su + l'
-sugl | su + gl'
-sulla | su + la
-sulle | su + le
-per | through, by
-tra | among
-contro | against
-io | I
-tu | thou
-lui | he
-lei | she
-noi | we
-voi | you
-loro | they
-mio | my
-mia |
-miei |
-mie |
-tuo |
-tua |
-tuoi | thy
-tue |
-suo |
-sua |
-suoi | his, her
-sue |
-nostro | our
-nostra |
-nostri |
-nostre |
-vostro | your
-vostra |
-vostri |
-vostre |
-mi | me
-ti | thee
-ci | us, there
-vi | you, there
-lo | him, the
-la | her, the
-li | them
-le | them, the
-gli | to him, the
-ne | from there etc
-il | the
-un | a
-uno | a
-una | a
-ma | but
-ed | and
-se | if
-perché | why, because
-anche | also
-come | how
-dov | where (as dov')
-dove | where
-che | who, that
-chi | who
-cui | whom
-non | not
-più | more
-quale | who, that
-quanto | how much
-quanti |
-quanta |
-quante |
-quello | that
-quelli |
-quella |
-quelle |
-questo | this
-questi |
-questa |
-queste |
-si | yes
-tutto | all
-tutti | all
-
- | single letter forms:
-
-a | at
-c | as c' for ce or ci
-e | and
-i | the
-l | as l'
-o | or
-
- | forms of avere, to have (not including the infinitive):
-
-ho
-hai
-ha
-abbiamo
-avete
-hanno
-abbia
-abbiate
-abbiano
-avrò
-avrai
-avrà
-avremo
-avrete
-avranno
-avrei
-avresti
-avrebbe
-avremmo
-avreste
-avrebbero
-avevo
-avevi
-aveva
-avevamo
-avevate
-avevano
-ebbi
-avesti
-ebbe
-avemmo
-aveste
-ebbero
-avessi
-avesse
-avessimo
-avessero
-avendo
-avuto
-avuta
-avuti
-avute
-
- | forms of essere, to be (not including the infinitive):
-sono
-sei
-è
-siamo
-siete
-sia
-siate
-siano
-sarò
-sarai
-sarà
-saremo
-sarete
-saranno
-sarei
-saresti
-sarebbe
-saremmo
-sareste
-sarebbero
-ero
-eri
-era
-eravamo
-eravate
-erano
-fui
-fosti
-fu
-fummo
-foste
-furono
-fossi
-fosse
-fossimo
-fossero
-essendo
-
- | forms of fare, to do (not including the infinitive, fa, fat-):
-faccio
-fai
-facciamo
-fanno
-faccia
-facciate
-facciano
-farò
-farai
-farà
-faremo
-farete
-faranno
-farei
-faresti
-farebbe
-faremmo
-fareste
-farebbero
-facevo
-facevi
-faceva
-facevamo
-facevate
-facevano
-feci
-facesti
-fece
-facemmo
-faceste
-fecero
-facessi
-facesse
-facessimo
-facessero
-facendo
-
- | forms of stare, to be (not including the infinitive):
-sto
-stai
-sta
-stiamo
-stanno
-stia
-stiate
-stiano
-starò
-starai
-starà
-staremo
-starete
-staranno
-starei
-staresti
-starebbe
-staremmo
-stareste
-starebbero
-stavo
-stavi
-stava
-stavamo
-stavate
-stavano
-stetti
-stesti
-stette
-stemmo
-steste
-stettero
-stessi
-stesse
-stessimo
-stessero
-stando
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_ja.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_ja.txt
deleted file mode 100644
index d4321be6b16..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_ja.txt
+++ /dev/null
@@ -1,127 +0,0 @@
-#
-# This file defines a stopword set for Japanese.
-#
-# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia.
-# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745
-# for frequency lists, etc. that can be useful for making your own set (if desired)
-#
-# Note that there is an overlap between these stopwords and the terms stopped when used
-# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note
-# that comments are not allowed on the same line as stopwords.
-#
-# Also note that stopping is done in a case-insensitive manner. Change your StopFilter
-# configuration if you need case-sensitive stopping. Lastly, note that stopping is done
-# using the same character width as the entries in this file. Since this StopFilter is
-# normally done after a CJKWidthFilter in your chain, you would usually want your romaji
-# entries to be in half-width and your kana entries to be in full-width.
-#
-の
-に
-は
-を
-た
-が
-で
-て
-と
-し
-れ
-さ
-ある
-いる
-も
-する
-から
-な
-こと
-として
-い
-や
-れる
-など
-なっ
-ない
-この
-ため
-その
-あっ
-よう
-また
-もの
-という
-あり
-まで
-られ
-なる
-へ
-か
-だ
-これ
-によって
-により
-おり
-より
-による
-ず
-なり
-られる
-において
-ば
-なかっ
-なく
-しかし
-について
-せ
-だっ
-その後
-できる
-それ
-う
-ので
-なお
-のみ
-でき
-き
-つ
-における
-および
-いう
-さらに
-でも
-ら
-たり
-その他
-に関する
-たち
-ます
-ん
-なら
-に対して
-特に
-せる
-及び
-これら
-とき
-では
-にて
-ほか
-ながら
-うち
-そして
-とともに
-ただし
-かつて
-それぞれ
-または
-お
-ほど
-ものの
-に対する
-ほとんど
-と共に
-といった
-です
-とも
-ところ
-ここ
-##### End of file
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_lv.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_lv.txt
deleted file mode 100644
index e21a23c06c3..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_lv.txt
+++ /dev/null
@@ -1,172 +0,0 @@
-# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins
-# the original list of over 800 forms was refined:
-# pronouns, adverbs, interjections were removed
-#
-# prepositions
-aiz
-ap
-ar
-apakš
-ārpus
-augšpus
-bez
-caur
-dēļ
-gar
-iekš
-iz
-kopš
-labad
-lejpus
-līdz
-no
-otrpus
-pa
-par
-pār
-pēc
-pie
-pirms
-pret
-priekš
-starp
-šaipus
-uz
-viņpus
-virs
-virspus
-zem
-apakšpus
-# Conjunctions
-un
-bet
-jo
-ja
-ka
-lai
-tomēr
-tikko
-turpretī
-arī
-kaut
-gan
-tādēļ
-tā
-ne
-tikvien
-vien
-kā
-ir
-te
-vai
-kamēr
-# Particles
-ar
-diezin
-droši
-diemžēl
-nebūt
-ik
-it
-taču
-nu
-pat
-tiklab
-iekšpus
-nedz
-tik
-nevis
-turpretim
-jeb
-iekam
-iekām
-iekāms
-kolīdz
-līdzko
-tiklīdz
-jebšu
-tālab
-tāpēc
-nekā
-itin
-jā
-jau
-jel
-nē
-nezin
-tad
-tikai
-vis
-tak
-iekams
-vien
-# modal verbs
-būt
-biju
-biji
-bija
-bijām
-bijāt
-esmu
-esi
-esam
-esat
-būšu
-būsi
-būs
-būsim
-būsiet
-tikt
-tiku
-tiki
-tika
-tikām
-tikāt
-tieku
-tiec
-tiek
-tiekam
-tiekat
-tikšu
-tiks
-tiksim
-tiksiet
-tapt
-tapi
-tapāt
-topat
-tapšu
-tapsi
-taps
-tapsim
-tapsiet
-kļūt
-kļuvu
-kļuvi
-kļuva
-kļuvām
-kļuvāt
-kļūstu
-kļūsti
-kļūst
-kļūstam
-kļūstat
-kļūšu
-kļūsi
-kļūs
-kļūsim
-kļūsiet
-# verbs
-varēt
-varēju
-varējām
-varēšu
-varēsim
-var
-varēji
-varējāt
-varēsi
-varēsiet
-varat
-varēja
-varēs
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_nl.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_nl.txt
deleted file mode 100644
index f4d61f5092c..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_nl.txt
+++ /dev/null
@@ -1,117 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Dutch stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This is a ranked list (commonest to rarest) of stopwords derived from
- | a large sample of Dutch text.
-
- | Dutch stop words frequently exhibit homonym clashes. These are indicated
- | clearly below.
-
-de | the
-en | and
-van | of, from
-ik | I, the ego
-te | (1) chez, at etc, (2) to, (3) too
-dat | that, which
-die | that, those, who, which
-in | in, inside
-een | a, an, one
-hij | he
-het | the, it
-niet | not, nothing, naught
-zijn | (1) to be, being, (2) his, one's, its
-is | is
-was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
-op | on, upon, at, in, up, used up
-aan | on, upon, to (as dative)
-met | with, by
-als | like, such as, when
-voor | (1) before, in front of, (2) furrow
-had | had, past tense all persons sing. of 'hebben' (have)
-er | there
-maar | but, only
-om | round, about, for etc
-hem | him
-dan | then
-zou | should/would, past tense all persons sing. of 'zullen'
-of | or, whether, if
-wat | what, something, anything
-mijn | possessive and noun 'mine'
-men | people, 'one'
-dit | this
-zo | so, thus, in this way
-door | through by
-over | over, across
-ze | she, her, they, them
-zich | oneself
-bij | (1) a bee, (2) by, near, at
-ook | also, too
-tot | till, until
-je | you
-mij | me
-uit | out of, from
-der | Old Dutch form of 'van der' still found in surnames
-daar | (1) there, (2) because
-haar | (1) her, their, them, (2) hair
-naar | (1) unpleasant, unwell etc, (2) towards, (3) as
-heb | present first person sing. of 'to have'
-hoe | how, why
-heeft | present third person sing. of 'to have'
-hebben | 'to have' and various parts thereof
-deze | this
-u | you
-want | (1) for, (2) mitten, (3) rigging
-nog | yet, still
-zal | 'shall', first and third person sing. of verb 'zullen' (will)
-me | me
-zij | she, they
-nu | now
-ge | 'thou', still used in Belgium and south Netherlands
-geen | none
-omdat | because
-iets | something, somewhat
-worden | to become, grow, get
-toch | yet, still
-al | all, every, each
-waren | (1) 'were' (2) to wander, (3) wares, (3)
-veel | much, many
-meer | (1) more, (2) lake
-doen | to do, to make
-toen | then, when
-moet | noun 'spot/mote' and present form of 'to must'
-ben | (1) am, (2) 'are' in interrogative second person singular of 'to be'
-zonder | without
-kan | noun 'can' and present form of 'to be able'
-hun | their, them
-dus | so, consequently
-alles | all, everything, anything
-onder | under, beneath
-ja | yes, of course
-eens | once, one day
-hier | here
-wie | who
-werd | imperfect third person sing. of 'become'
-altijd | always
-doch | yet, but etc
-wordt | present third person sing. of 'become'
-wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans
-kunnen | to be able
-ons | us/our
-zelf | self
-tegen | against, towards, at
-na | after, near
-reeds | already
-wil | (1) present tense of 'want', (2) 'will', noun, (3) fender
-kon | could; past tense of 'to be able'
-niets | nothing
-uw | your
-iemand | somebody
-geweest | been; past participle of 'be'
-andere | other
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_no.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_no.txt
deleted file mode 100644
index e76f36e69ed..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_no.txt
+++ /dev/null
@@ -1,192 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Norwegian stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This stop word list is for the dominant bokmål dialect. Words unique
- | to nynorsk are marked *.
-
- | Revised by Jan Bruusgaard , Jan 2005
-
-og | and
-i | in
-jeg | I
-det | it/this/that
-at | to (w. inf.)
-en | a/an
-et | a/an
-den | it/this/that
-til | to
-er | is/am/are
-som | who/that
-på | on
-de | they / you(formal)
-med | with
-han | he
-av | of
-ikke | not
-ikkje | not *
-der | there
-så | so
-var | was/were
-meg | me
-seg | you
-men | but
-ett | one
-har | have
-om | about
-vi | we
-min | my
-mitt | my
-ha | have
-hadde | had
-hun | she
-nå | now
-over | over
-da | when/as
-ved | by/know
-fra | from
-du | you
-ut | out
-sin | your
-dem | them
-oss | us
-opp | up
-man | you/one
-kan | can
-hans | his
-hvor | where
-eller | or
-hva | what
-skal | shall/must
-selv | self (reflective)
-sjøl | self (reflective)
-her | here
-alle | all
-vil | will
-bli | become
-ble | became
-blei | became *
-blitt | have become
-kunne | could
-inn | in
-når | when
-være | be
-kom | come
-noen | some
-noe | some
-ville | would
-dere | you
-som | who/which/that
-deres | their/theirs
-kun | only/just
-ja | yes
-etter | after
-ned | down
-skulle | should
-denne | this
-for | for/because
-deg | you
-si | hers/his
-sine | hers/his
-sitt | hers/his
-mot | against
-å | to
-meget | much
-hvorfor | why
-dette | this
-disse | these/those
-uten | without
-hvordan | how
-ingen | none
-din | your
-ditt | your
-blir | become
-samme | same
-hvilken | which
-hvilke | which (plural)
-sånn | such a
-inni | inside/within
-mellom | between
-vår | our
-hver | each
-hvem | who
-vors | us/ours
-hvis | whose
-både | both
-bare | only/just
-enn | than
-fordi | as/because
-før | before
-mange | many
-også | also
-slik | just
-vært | been
-være | to be
-båe | both *
-begge | both
-siden | since
-dykk | your *
-dykkar | yours *
-dei | they *
-deira | them *
-deires | theirs *
-deim | them *
-di | your (fem.) *
-då | as/when *
-eg | I *
-ein | a/an *
-eit | a/an *
-eitt | a/an *
-elles | or *
-honom | he *
-hjå | at *
-ho | she *
-hoe | she *
-henne | her
-hennar | her/hers
-hennes | hers
-hoss | how *
-hossen | how *
-ikkje | not *
-ingi | noone *
-inkje | noone *
-korleis | how *
-korso | how *
-kva | what/which *
-kvar | where *
-kvarhelst | where *
-kven | who/whom *
-kvi | why *
-kvifor | why *
-me | we *
-medan | while *
-mi | my *
-mine | my *
-mykje | much *
-no | now *
-nokon | some (masc./neut.) *
-noka | some (fem.) *
-nokor | some *
-noko | some *
-nokre | some *
-si | his/hers *
-sia | since *
-sidan | since *
-so | so *
-somt | some *
-somme | some *
-um | about*
-upp | up *
-vere | be *
-vore | was *
-verte | become *
-vort | become *
-varte | became *
-vart | became *
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_pt.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_pt.txt
deleted file mode 100644
index 276c1b446f2..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_pt.txt
+++ /dev/null
@@ -1,251 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Portuguese stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-
- | The following is a ranked list (commonest to rarest) of stopwords
- | deriving from a large sample of text.
-
- | Extra words have been added at the end.
-
-de | of, from
-a | the; to, at; her
-o | the; him
-que | who, that
-e | and
-do | de + o
-da | de + a
-em | in
-um | a
-para | for
- | é from SER
-com | with
-não | not, no
-uma | a
-os | the; them
-no | em + o
-se | himself etc
-na | em + a
-por | for
-mais | more
-as | the; them
-dos | de + os
-como | as, like
-mas | but
- | foi from SER
-ao | a + o
-ele | he
-das | de + as
- | tem from TER
-à | a + a
-seu | his
-sua | her
-ou | or
- | ser from SER
-quando | when
-muito | much
- | há from HAV
-nos | em + os; us
-já | already, now
- | está from EST
-eu | I
-também | also
-só | only, just
-pelo | per + o
-pela | per + a
-até | up to
-isso | that
-ela | he
-entre | between
- | era from SER
-depois | after
-sem | without
-mesmo | same
-aos | a + os
- | ter from TER
-seus | his
-quem | whom
-nas | em + as
-me | me
-esse | that
-eles | they
- | estão from EST
-você | you
- | tinha from TER
- | foram from SER
-essa | that
-num | em + um
-nem | nor
-suas | her
-meu | my
-às | a + as
-minha | my
- | têm from TER
-numa | em + uma
-pelos | per + os
-elas | they
- | havia from HAV
- | seja from SER
-qual | which
- | será from SER
-nós | we
- | tenho from TER
-lhe | to him, her
-deles | of them
-essas | those
-esses | those
-pelas | per + as
-este | this
- | fosse from SER
-dele | of him
-
- | other words. There are many contractions such as naquele = em+aquele,
- | mo = me+o, but they are rare.
- | Indefinite article plural forms are also rare.
-
-tu | thou
-te | thee
-vocês | you (plural)
-vos | you
-lhes | to them
-meus | my
-minhas
-teu | thy
-tua
-teus
-tuas
-nosso | our
-nossa
-nossos
-nossas
-
-dela | of her
-delas | of them
-
-esta | this
-estes | these
-estas | these
-aquele | that
-aquela | that
-aqueles | those
-aquelas | those
-isto | this
-aquilo | that
-
- | forms of estar, to be (not including the infinitive):
-estou
-está
-estamos
-estão
-estive
-esteve
-estivemos
-estiveram
-estava
-estávamos
-estavam
-estivera
-estivéramos
-esteja
-estejamos
-estejam
-estivesse
-estivéssemos
-estivessem
-estiver
-estivermos
-estiverem
-
- | forms of haver, to have (not including the infinitive):
-hei
-há
-havemos
-hão
-houve
-houvemos
-houveram
-houvera
-houvéramos
-haja
-hajamos
-hajam
-houvesse
-houvéssemos
-houvessem
-houver
-houvermos
-houverem
-houverei
-houverá
-houveremos
-houverão
-houveria
-houveríamos
-houveriam
-
- | forms of ser, to be (not including the infinitive):
-sou
-somos
-são
-era
-éramos
-eram
-fui
-foi
-fomos
-foram
-fora
-fôramos
-seja
-sejamos
-sejam
-fosse
-fôssemos
-fossem
-for
-formos
-forem
-serei
-será
-seremos
-serão
-seria
-seríamos
-seriam
-
- | forms of ter, to have (not including the infinitive):
-tenho
-tem
-temos
-tém
-tinha
-tínhamos
-tinham
-tive
-teve
-tivemos
-tiveram
-tivera
-tivéramos
-tenha
-tenhamos
-tenham
-tivesse
-tivéssemos
-tivessem
-tiver
-tivermos
-tiverem
-terei
-terá
-teremos
-terão
-teria
-teríamos
-teriam
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_ro.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_ro.txt
deleted file mode 100644
index 4fdee90a5ba..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_ro.txt
+++ /dev/null
@@ -1,233 +0,0 @@
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-acea
-aceasta
-această
-aceea
-acei
-aceia
-acel
-acela
-acele
-acelea
-acest
-acesta
-aceste
-acestea
-aceşti
-aceştia
-acolo
-acum
-ai
-aia
-aibă
-aici
-al
-ăla
-ale
-alea
-ălea
-altceva
-altcineva
-am
-ar
-are
-aş
-aşadar
-asemenea
-asta
-ăsta
-astăzi
-astea
-ăstea
-ăştia
-asupra
-aţi
-au
-avea
-avem
-aveţi
-azi
-bine
-bucur
-bună
-ca
-că
-căci
-când
-care
-cărei
-căror
-cărui
-cât
-câte
-câţi
-către
-câtva
-ce
-cel
-ceva
-chiar
-cînd
-cine
-cineva
-cît
-cîte
-cîţi
-cîtva
-contra
-cu
-cum
-cumva
-curând
-curînd
-da
-dă
-dacă
-dar
-datorită
-de
-deci
-deja
-deoarece
-departe
-deşi
-din
-dinaintea
-dintr
-dintre
-drept
-după
-ea
-ei
-el
-ele
-eram
-este
-eşti
-eu
-face
-fără
-fi
-fie
-fiecare
-fii
-fim
-fiţi
-iar
-ieri
-îi
-îl
-îmi
-împotriva
-în
-înainte
-înaintea
-încât
-încît
-încotro
-între
-întrucât
-întrucît
-îţi
-la
-lângă
-le
-li
-lîngă
-lor
-lui
-mă
-mâine
-mea
-mei
-mele
-mereu
-meu
-mi
-mine
-mult
-multă
-mulţi
-ne
-nicăieri
-nici
-nimeni
-nişte
-noastră
-noastre
-noi
-noştri
-nostru
-nu
-ori
-oricând
-oricare
-oricât
-orice
-oricînd
-oricine
-oricît
-oricum
-oriunde
-până
-pe
-pentru
-peste
-pînă
-poate
-pot
-prea
-prima
-primul
-prin
-printr
-sa
-să
-săi
-sale
-sau
-său
-se
-şi
-sînt
-sîntem
-sînteţi
-spre
-sub
-sunt
-suntem
-sunteţi
-ta
-tăi
-tale
-tău
-te
-ţi
-ţie
-tine
-toată
-toate
-tot
-toţi
-totuşi
-tu
-un
-una
-unde
-undeva
-unei
-unele
-uneori
-unor
-vă
-vi
-voastră
-voastre
-voi
-voştri
-vostru
-vouă
-vreo
-vreun
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_ru.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_ru.txt
deleted file mode 100644
index 64307693457..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_ru.txt
+++ /dev/null
@@ -1,241 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | a russian stop word list. comments begin with vertical bar. each stop
- | word is at the start of a line.
-
- | this is a ranked list (commonest to rarest) of stopwords derived from
- | a large text sample.
-
- | letter `ё' is translated to `е'.
-
-и | and
-в | in/into
-во | alternative form
-не | not
-что | what/that
-он | he
-на | on/onto
-я | i
-с | from
-со | alternative form
-как | how
-а | milder form of `no' (but)
-то | conjunction and form of `that'
-все | all
-она | she
-так | so, thus
-его | him
-но | but
-да | yes/and
-ты | thou
-к | towards, by
-у | around, chez
-же | intensifier particle
-вы | you
-за | beyond, behind
-бы | conditional/subj. particle
-по | up to, along
-только | only
-ее | her
-мне | to me
-было | it was
-вот | here is/are, particle
-от | away from
-меня | me
-еще | still, yet, more
-нет | no, there isnt/arent
-о | about
-из | out of
-ему | to him
-теперь | now
-когда | when
-даже | even
-ну | so, well
-вдруг | suddenly
-ли | interrogative particle
-если | if
-уже | already, but homonym of `narrower'
-или | or
-ни | neither
-быть | to be
-был | he was
-него | prepositional form of его
-до | up to
-вас | you accusative
-нибудь | indef. suffix preceded by hyphen
-опять | again
-уж | already, but homonym of `adder'
-вам | to you
-сказал | he said
-ведь | particle `after all'
-там | there
-потом | then
-себя | oneself
-ничего | nothing
-ей | to her
-может | usually with `быть' as `maybe'
-они | they
-тут | here
-где | where
-есть | there is/are
-надо | got to, must
-ней | prepositional form of ей
-для | for
-мы | we
-тебя | thee
-их | them, their
-чем | than
-была | she was
-сам | self
-чтоб | in order to
-без | without
-будто | as if
-человек | man, person, one
-чего | genitive form of `what'
-раз | once
-тоже | also
-себе | to oneself
-под | beneath
-жизнь | life
-будет | will be
-ж | short form of intensifer particle `же'
-тогда | then
-кто | who
-этот | this
-говорил | was saying
-того | genitive form of `that'
-потому | for that reason
-этого | genitive form of `this'
-какой | which
-совсем | altogether
-ним | prepositional form of `его', `они'
-здесь | here
-этом | prepositional form of `этот'
-один | one
-почти | almost
-мой | my
-тем | instrumental/dative plural of `тот', `то'
-чтобы | full form of `in order that'
-нее | her (acc.)
-кажется | it seems
-сейчас | now
-были | they were
-куда | where to
-зачем | why
-сказать | to say
-всех | all (acc., gen. preposn. plural)
-никогда | never
-сегодня | today
-можно | possible, one can
-при | by
-наконец | finally
-два | two
-об | alternative form of `о', about
-другой | another
-хоть | even
-после | after
-над | above
-больше | more
-тот | that one (masc.)
-через | across, in
-эти | these
-нас | us
-про | about
-всего | in all, only, of all
-них | prepositional form of `они' (they)
-какая | which, feminine
-много | lots
-разве | interrogative particle
-сказала | she said
-три | three
-эту | this, acc. fem. sing.
-моя | my, feminine
-впрочем | moreover, besides
-хорошо | good
-свою | ones own, acc. fem. sing.
-этой | oblique form of `эта', fem. `this'
-перед | in front of
-иногда | sometimes
-лучше | better
-чуть | a little
-том | preposn. form of `that one'
-нельзя | one must not
-такой | such a one
-им | to them
-более | more
-всегда | always
-конечно | of course
-всю | acc. fem. sing of `all'
-между | between
-
-
- | b: some paradigms
- |
- | personal pronouns
- |
- | я меня мне мной [мною]
- | ты тебя тебе тобой [тобою]
- | он его ему им [него, нему, ним]
- | она ее эи ею [нее, нэи, нею]
- | оно его ему им [него, нему, ним]
- |
- | мы нас нам нами
- | вы вас вам вами
- | они их им ими [них, ним, ними]
- |
- | себя себе собой [собою]
- |
- | demonstrative pronouns: этот (this), тот (that)
- |
- | этот эта это эти
- | этого эты это эти
- | этого этой этого этих
- | этому этой этому этим
- | этим этой этим [этою] этими
- | этом этой этом этих
- |
- | тот та то те
- | того ту то те
- | того той того тех
- | тому той тому тем
- | тем той тем [тою] теми
- | том той том тех
- |
- | determinative pronouns
- |
- | (a) весь (all)
- |
- | весь вся все все
- | всего всю все все
- | всего всей всего всех
- | всему всей всему всем
- | всем всей всем [всею] всеми
- | всем всей всем всех
- |
- | (b) сам (himself etc)
- |
- | сам сама само сами
- | самого саму само самих
- | самого самой самого самих
- | самому самой самому самим
- | самим самой самим [самою] самими
- | самом самой самом самих
- |
- | stems of verbs `to be', `to have', `to do' and modal
- |
- | быть бы буд быв есть суть
- | име
- | дел
- | мог мож мочь
- | уме
- | хоч хот
- | долж
- | можн
- | нужн
- | нельзя
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_sv.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_sv.txt
deleted file mode 100644
index 22bddfd8cb3..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_sv.txt
+++ /dev/null
@@ -1,131 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Swedish stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This is a ranked list (commonest to rarest) of stopwords derived from
- | a large text sample.
-
- | Swedish stop words occasionally exhibit homonym clashes. For example
- | så = so, but also seed. These are indicated clearly below.
-
-och | and
-det | it, this/that
-att | to (with infinitive)
-i | in, at
-en | a
-jag | I
-hon | she
-som | who, that
-han | he
-på | on
-den | it, this/that
-med | with
-var | where, each
-sig | him(self) etc
-för | for
-så | so (also: seed)
-till | to
-är | is
-men | but
-ett | a
-om | if; around, about
-hade | had
-de | they, these/those
-av | of
-icke | not, no
-mig | me
-du | you
-henne | her
-då | then, when
-sin | his
-nu | now
-har | have
-inte | inte någon = no one
-hans | his
-honom | him
-skulle | 'sake'
-hennes | her
-där | there
-min | my
-man | one (pronoun)
-ej | nor
-vid | at, by, on (also: vast)
-kunde | could
-något | some etc
-från | from, off
-ut | out
-när | when
-efter | after, behind
-upp | up
-vi | we
-dem | them
-vara | be
-vad | what
-över | over
-än | than
-dig | you
-kan | can
-sina | his
-här | here
-ha | have
-mot | towards
-alla | all
-under | under (also: wonder)
-någon | some etc
-eller | or (else)
-allt | all
-mycket | much
-sedan | since
-ju | why
-denna | this/that
-själv | myself, yourself etc
-detta | this/that
-åt | to
-utan | without
-varit | was
-hur | how
-ingen | no
-mitt | my
-ni | you
-bli | to be, become
-blev | from bli
-oss | us
-din | thy
-dessa | these/those
-några | some etc
-deras | their
-blir | from bli
-mina | my
-samma | (the) same
-vilken | who, that
-er | you, your
-sådan | such a
-vår | our
-blivit | from bli
-dess | its
-inom | within
-mellan | between
-sådant | such a
-varför | why
-varje | each
-vilka | who, that
-ditt | thy
-vem | who
-vilket | who, that
-sitta | his
-sådana | such a
-vart | each
-dina | thy
-vars | whose
-vårt | our
-våra | our
-ert | your
-era | your
-vilkas | whose
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_th.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_th.txt
deleted file mode 100644
index 07f0fabe692..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_th.txt
+++ /dev/null
@@ -1,119 +0,0 @@
-# Thai stopwords from:
-# "Opinion Detection in Thai Political News Columns
-# Based on Subjectivity Analysis"
-# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak
-ไว้
-ไม่
-ไป
-ได้
-ให้
-ใน
-โดย
-แห่ง
-แล้ว
-และ
-แรก
-แบบ
-แต่
-เอง
-เห็น
-เลย
-เริ่ม
-เรา
-เมื่อ
-เพื่อ
-เพราะ
-เป็นการ
-เป็น
-เปิดเผย
-เปิด
-เนื่องจาก
-เดียวกัน
-เดียว
-เช่น
-เฉพาะ
-เคย
-เข้า
-เขา
-อีก
-อาจ
-อะไร
-ออก
-อย่าง
-อยู่
-อยาก
-หาก
-หลาย
-หลังจาก
-หลัง
-หรือ
-หนึ่ง
-ส่วน
-ส่ง
-สุด
-สําหรับ
-ว่า
-วัน
-ลง
-ร่วม
-ราย
-รับ
-ระหว่าง
-รวม
-ยัง
-มี
-มาก
-มา
-พร้อม
-พบ
-ผ่าน
-ผล
-บาง
-น่า
-นี้
-นํา
-นั้น
-นัก
-นอกจาก
-ทุก
-ที่สุด
-ที่
-ทําให้
-ทํา
-ทาง
-ทั้งนี้
-ทั้ง
-ถ้า
-ถูก
-ถึง
-ต้อง
-ต่างๆ
-ต่าง
-ต่อ
-ตาม
-ตั้งแต่
-ตั้ง
-ด้าน
-ด้วย
-ดัง
-ซึ่ง
-ช่วง
-จึง
-จาก
-จัด
-จะ
-คือ
-ความ
-ครั้ง
-คง
-ขึ้น
-ของ
-ขอ
-ขณะ
-ก่อน
-ก็
-การ
-กับ
-กัน
-กว่า
-กล่าว
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_tr.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_tr.txt
deleted file mode 100644
index 84d9408d4ea..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/stopwords_tr.txt
+++ /dev/null
@@ -1,212 +0,0 @@
-# Turkish stopwords from LUCENE-559
-# merged with the list from "Information Retrieval on Turkish Texts"
-# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
-acaba
-altmış
-altı
-ama
-ancak
-arada
-aslında
-ayrıca
-bana
-bazı
-belki
-ben
-benden
-beni
-benim
-beri
-beş
-bile
-bin
-bir
-birçok
-biri
-birkaç
-birkez
-birşey
-birşeyi
-biz
-bize
-bizden
-bizi
-bizim
-böyle
-böylece
-bu
-buna
-bunda
-bundan
-bunlar
-bunları
-bunların
-bunu
-bunun
-burada
-çok
-çünkü
-da
-daha
-dahi
-de
-defa
-değil
-diğer
-diye
-doksan
-dokuz
-dolayı
-dolayısıyla
-dört
-edecek
-eden
-ederek
-edilecek
-ediliyor
-edilmesi
-ediyor
-eğer
-elli
-en
-etmesi
-etti
-ettiği
-ettiğini
-gibi
-göre
-halen
-hangi
-hatta
-hem
-henüz
-hep
-hepsi
-her
-herhangi
-herkesin
-hiç
-hiçbir
-için
-iki
-ile
-ilgili
-ise
-işte
-itibaren
-itibariyle
-kadar
-karşın
-katrilyon
-kendi
-kendilerine
-kendini
-kendisi
-kendisine
-kendisini
-kez
-ki
-kim
-kimden
-kime
-kimi
-kimse
-kırk
-milyar
-milyon
-mu
-mü
-mı
-nasıl
-ne
-neden
-nedenle
-nerde
-nerede
-nereye
-niye
-niçin
-o
-olan
-olarak
-oldu
-olduğu
-olduğunu
-olduklarını
-olmadı
-olmadığı
-olmak
-olması
-olmayan
-olmaz
-olsa
-olsun
-olup
-olur
-olursa
-oluyor
-on
-ona
-ondan
-onlar
-onlardan
-onları
-onların
-onu
-onun
-otuz
-oysa
-öyle
-pek
-rağmen
-sadece
-sanki
-sekiz
-seksen
-sen
-senden
-seni
-senin
-siz
-sizden
-sizi
-sizin
-şey
-şeyden
-şeyi
-şeyler
-şöyle
-şu
-şuna
-şunda
-şundan
-şunları
-şunu
-tarafından
-trilyon
-tüm
-üç
-üzere
-var
-vardı
-ve
-veya
-ya
-yani
-yapacak
-yapılan
-yapılması
-yapıyor
-yapmak
-yaptı
-yaptığı
-yaptığını
-yaptıkları
-yedi
-yerine
-yetmiş
-yine
-yirmi
-yoksa
-yüz
-zaten
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/userdict_ja.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/userdict_ja.txt
deleted file mode 100644
index 6f0368e4d81..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/lang/userdict_ja.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-#
-# This is a sample user dictionary for Kuromoji (JapaneseTokenizer)
-#
-# Add entries to this file in order to override the statistical model in terms
-# of segmentation, readings and part-of-speech tags. Notice that entries do
-# not have weights since they are always used when found. This is by-design
-# in order to maximize ease-of-use.
-#
-# Entries are defined using the following CSV format:
-# , ... , ... ,
-#
-# Notice that a single half-width space separates tokens and readings, and
-# that the number tokens and readings must match exactly.
-#
-# Also notice that multiple entries with the same is undefined.
-#
-# Whitespace only lines are ignored. Comments are not allowed on entry lines.
-#
-
-# Custom segmentation for kanji compounds
-日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞
-関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞
-
-# Custom segmentation for compound katakana
-トートバッグ,トート バッグ,トート バッグ,かずカナ名詞
-ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞
-
-# Custom reading for former sumo wrestler
-朝青龍,朝青龍,アサショウリュウ,カスタム人名
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/protwords.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/protwords.txt
deleted file mode 100644
index 1dfc0abecbf..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/protwords.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#-----------------------------------------------------------------------
-# Use a protected word file to protect against the stemmer reducing two
-# unrelated words to the same base word.
-
-# Some non-words that normally won't be encountered,
-# just to test that they won't be stemmed.
-dontstems
-zwhacky
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/schema.xml b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/schema.xml
deleted file mode 100644
index 1dee871aca7..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/schema.xml
+++ /dev/null
@@ -1,941 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- id
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/solrconfig.xml b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/solrconfig.xml
deleted file mode 100644
index 9d298d43d12..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/solrconfig.xml
+++ /dev/null
@@ -1,1446 +0,0 @@
-
-
-
-
-
-
-
-
- ${tests.luceneMatchVersion:LATEST}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ${solr.data.dir:}
-
-
-
-
- ${solr.hdfs.home:}
- ${solr.hdfs.confdir:}
- ${solr.hdfs.blockcache.enabled:true}
- ${solr.hdfs.blockcache.slab.count:1}
- ${solr.hdfs.blockcache.direct.memory.allocation:true}
- ${solr.hdfs.blockcache.blocksperbank:16384}
- ${solr.hdfs.blockcache.read.enabled:true}
- ${solr.hdfs.nrtcachingdirectory.enable:true}
- ${solr.hdfs.nrtcachingdirectory.maxmergesizemb:16}
- ${solr.hdfs.nrtcachingdirectory.maxcachedmb:192}
- ${solr.hdfs.blockcache.global:false}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 128
-
-
-
-
-
-
-
-
-
- ${solr.lock.type:hdfs}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ${solr.ulog.dir:}
-
-
-
-
- ${solr.autoCommit.maxTime:60000}
- false
-
-
-
-
- ${solr.autoSoftCommit.maxTime:1000}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 1024
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- true
-
-
-
-
-
- 20
-
-
- 200
-
-
-
-
-
-
-
-
-
-
-
- static firstSearcher warming in solrconfig.xml
-
-
-
-
-
- false
-
-
- 4
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- explicit
- 10
- text
-
-
-
-
-
-
-
-
-
-
-
-
-
- explicit
- json
- true
- text
-
-
-
-
-
-
-
-
- text_general
-
-
-
-
-
- default
- text
- solr.DirectSolrSpellChecker
-
- internal
-
- 0.5
-
- 2
-
- 1
-
- 5
-
- 4
-
- 0.01
-
-
-
-
-
- wordbreak
- solr.WordBreakSolrSpellChecker
- name
- true
- true
- 10
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- text
-
- default
- wordbreak
- on
- true
- 10
- 5
- 5
- true
- true
- 10
- 5
-
-
- spellcheck
-
-
-
-
-
-
-
-
-
- text
- true
-
-
- tvComponent
-
-
-
-
-
-
-
-
- default
-
-
- org.carrot2.clustering.lingo.LingoClusteringAlgorithm
-
-
- 20
-
-
- clustering/carrot2
-
-
- ENGLISH
-
-
- stc
- org.carrot2.clustering.stc.STCClusteringAlgorithm
-
-
-
-
-
-
- true
- default
- true
-
- name
- id
-
- features
-
- true
-
-
-
- false
-
- edismax
-
- text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
-
- *:*
- 10
- *,score
-
-
- clustering
-
-
-
-
-
-
-
-
-
- true
- false
-
-
- terms
-
-
-
-
-
-
-
- string
- elevate.xml
-
-
-
-
-
- explicit
- text
-
-
- elevator
-
-
-
-
-
-
-
-
-
-
- 100
-
-
-
-
-
-
-
- 70
-
- 0.5
-
- [-\w ,/\n\"']{20,200}
-
-
-
-
-
-
- ]]>
- ]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ,,
- ,,
- ,,
- ,,
- ,]]>
- ]]>
-
-
-
-
-
- 10
- .,!?
-
-
-
-
-
-
- WORD
-
-
- en
- US
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- text/plain; charset=UTF-8
-
-
-
-
-
-
-
-
- 5
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/stopwords.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/stopwords.txt
deleted file mode 100644
index ae1e83eeb3d..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/stopwords.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/synonyms.txt b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/synonyms.txt
deleted file mode 100644
index 7f72128303b..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/synonyms.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#-----------------------------------------------------------------------
-#some test synonym mappings unlikely to appear in real input text
-aaafoo => aaabar
-bbbfoo => bbbfoo bbbbar
-cccfoo => cccbar cccbaz
-fooaaa,baraaa,bazaaa
-
-# Some synonym groups specific to this example
-GB,gib,gigabyte,gigabytes
-MB,mib,megabyte,megabytes
-Television, Televisions, TV, TVs
-#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
-#after us won't split it into two words.
-
-# Synonym mappings can be used for spelling correction too
-pixima => pixma
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/solr.xml b/solr/contrib/morphlines-core/src/test-files/solr/minimr/solr.xml
deleted file mode 100644
index 2fa9a8a3cde..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/solr.xml
+++ /dev/null
@@ -1,46 +0,0 @@
-
-
-
-
-
-
- ${shareSchema:false}
-
-
- 127.0.0.1
- ${hostPort:8983}
- ${hostContext:solr}
- ${genericCoreNodeNames:true}
- 0
- ${distribUpdateSoTimeout:120000}
- ${distribUpdateConnTimeout:15000}
- ${solr.zkclienttimeout:30000}
-
-
-
- ${urlScheme:}
- ${socketTimeout:120000}
- ${connTimeout:15000}
-
-
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/currency.xml b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/currency.xml
deleted file mode 100644
index 3a9c58afee8..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/currency.xml
+++ /dev/null
@@ -1,67 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/elevate.xml b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/elevate.xml
deleted file mode 100644
index 2c09ebed669..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/elevate.xml
+++ /dev/null
@@ -1,42 +0,0 @@
-
-
-
-
-
-
-
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/contractions_ca.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/contractions_ca.txt
deleted file mode 100644
index 307a85f913d..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/contractions_ca.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-# Set of Catalan contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-d
-l
-m
-n
-s
-t
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/contractions_fr.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/contractions_fr.txt
deleted file mode 100644
index 722db588333..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/contractions_fr.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-# Set of French contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-l
-m
-t
-qu
-n
-s
-j
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/contractions_ga.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/contractions_ga.txt
deleted file mode 100644
index 9ebe7fa349a..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/contractions_ga.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-# Set of Irish contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-d
-m
-b
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/contractions_it.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/contractions_it.txt
deleted file mode 100644
index cac04095372..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/contractions_it.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-# Set of Italian contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-c
-l
-all
-dall
-dell
-nell
-sull
-coll
-pell
-gl
-agl
-dagl
-degl
-negl
-sugl
-un
-m
-t
-s
-v
-d
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/hyphenations_ga.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/hyphenations_ga.txt
deleted file mode 100644
index 4d2642cc5a3..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/hyphenations_ga.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-# Set of Irish hyphenations for StopFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-h
-n
-t
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stemdict_nl.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stemdict_nl.txt
deleted file mode 100644
index 441072971d3..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stemdict_nl.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-# Set of overrides for the dutch stemmer
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-fiets fiets
-bromfiets bromfiets
-ei eier
-kind kinder
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stoptags_ja.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stoptags_ja.txt
deleted file mode 100644
index 71b750845e3..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stoptags_ja.txt
+++ /dev/null
@@ -1,420 +0,0 @@
-#
-# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter.
-#
-# Any token with a part-of-speech tag that exactly matches those defined in this
-# file are removed from the token stream.
-#
-# Set your own stoptags by uncommenting the lines below. Note that comments are
-# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists,
-# etc. that can be useful for building you own stoptag set.
-#
-# The entire possible tagset is provided below for convenience.
-#
-#####
-# noun: unclassified nouns
-#名詞
-#
-# noun-common: Common nouns or nouns where the sub-classification is undefined
-#名詞-一般
-#
-# noun-proper: Proper nouns where the sub-classification is undefined
-#名詞-固有名詞
-#
-# noun-proper-misc: miscellaneous proper nouns
-#名詞-固有名詞-一般
-#
-# noun-proper-person: Personal names where the sub-classification is undefined
-#名詞-固有名詞-人名
-#
-# noun-proper-person-misc: names that cannot be divided into surname and
-# given name; foreign names; names where the surname or given name is unknown.
-# e.g. お市の方
-#名詞-固有名詞-人名-一般
-#
-# noun-proper-person-surname: Mainly Japanese surnames.
-# e.g. 山田
-#名詞-固有名詞-人名-姓
-#
-# noun-proper-person-given_name: Mainly Japanese given names.
-# e.g. 太郎
-#名詞-固有名詞-人名-名
-#
-# noun-proper-organization: Names representing organizations.
-# e.g. 通産省, NHK
-#名詞-固有名詞-組織
-#
-# noun-proper-place: Place names where the sub-classification is undefined
-#名詞-固有名詞-地域
-#
-# noun-proper-place-misc: Place names excluding countries.
-# e.g. アジア, バルセロナ, 京都
-#名詞-固有名詞-地域-一般
-#
-# noun-proper-place-country: Country names.
-# e.g. 日本, オーストラリア
-#名詞-固有名詞-地域-国
-#
-# noun-pronoun: Pronouns where the sub-classification is undefined
-#名詞-代名詞
-#
-# noun-pronoun-misc: miscellaneous pronouns:
-# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ
-#名詞-代名詞-一般
-#
-# noun-pronoun-contraction: Spoken language contraction made by combining a
-# pronoun and the particle 'wa'.
-# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ
-#名詞-代名詞-縮約
-#
-# noun-adverbial: Temporal nouns such as names of days or months that behave
-# like adverbs. Nouns that represent amount or ratios and can be used adverbially,
-# e.g. 金曜, 一月, 午後, 少量
-#名詞-副詞可能
-#
-# noun-verbal: Nouns that take arguments with case and can appear followed by
-# 'suru' and related verbs (する, できる, なさる, くださる)
-# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り
-#名詞-サ変接続
-#
-# noun-adjective-base: The base form of adjectives, words that appear before な ("na")
-# e.g. 健康, 安易, 駄目, だめ
-#名詞-形容動詞語幹
-#
-# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数.
-# e.g. 0, 1, 2, 何, 数, 幾
-#名詞-数
-#
-# noun-affix: noun affixes where the sub-classification is undefined
-#名詞-非自立
-#
-# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that
-# attach to the base form of inflectional words, words that cannot be classified
-# into any of the other categories below. This category includes indefinite nouns.
-# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第,
-# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み,
-# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳,
-# わり, 割り, 割, ん-口語/, もん-口語/
-#名詞-非自立-一般
-#
-# noun-affix-adverbial: noun affixes that that can behave as adverbs.
-# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ,
-# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか,
-# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所,
-# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま,
-# 儘, 侭, みぎり, 矢先
-#名詞-非自立-副詞可能
-#
-# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars
-# with the stem よう(だ) ("you(da)").
-# e.g. よう, やう, 様 (よう)
-#名詞-非自立-助動詞語幹
-#
-# noun-affix-adjective-base: noun affixes that can connect to the indeclinable
-# connection form な (aux "da").
-# e.g. みたい, ふう
-#名詞-非自立-形容動詞語幹
-#
-# noun-special: special nouns where the sub-classification is undefined.
-#名詞-特殊
-#
-# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is
-# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base
-# form of inflectional words.
-# e.g. そう
-#名詞-特殊-助動詞語幹
-#
-# noun-suffix: noun suffixes where the sub-classification is undefined.
-#名詞-接尾
-#
-# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect
-# to ガル or タイ and can combine into compound nouns, words that cannot be classified into
-# any of the other categories below. In general, this category is more inclusive than
-# 接尾語 ("suffix") and is usually the last element in a compound noun.
-# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み,
-# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用
-#名詞-接尾-一般
-#
-# noun-suffix-person: Suffixes that form nouns and attach to person names more often
-# than other nouns.
-# e.g. 君, 様, 著
-#名詞-接尾-人名
-#
-# noun-suffix-place: Suffixes that form nouns and attach to place names more often
-# than other nouns.
-# e.g. 町, 市, 県
-#名詞-接尾-地域
-#
-# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that
-# can appear before スル ("suru").
-# e.g. 化, 視, 分け, 入り, 落ち, 買い
-#名詞-接尾-サ変接続
-#
-# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions,
-# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the
-# conjunctive form of inflectional words.
-# e.g. そう
-#名詞-接尾-助動詞語幹
-#
-# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive
-# form of inflectional words and appear before the copula だ ("da").
-# e.g. 的, げ, がち
-#名詞-接尾-形容動詞語幹
-#
-# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs.
-# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ)
-#名詞-接尾-副詞可能
-#
-# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category
-# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach
-# to numbers.
-# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半
-#名詞-接尾-助数詞
-#
-# noun-suffix-special: Special suffixes that mainly attach to inflecting words.
-# e.g. (楽し) さ, (考え) 方
-#名詞-接尾-特殊
-#
-# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words
-# together.
-# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦)
-#名詞-接続詞的
-#
-# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are
-# semantically verb-like.
-# e.g. ごらん, ご覧, 御覧, 頂戴
-#名詞-動詞非自立的
-#
-# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry,
-# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation")
-# is いわく ("iwaku").
-#名詞-引用文字列
-#
-# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and
-# behave like an adjective.
-# e.g. 申し訳, 仕方, とんでも, 違い
-#名詞-ナイ形容詞語幹
-#
-#####
-# prefix: unclassified prefixes
-#接頭詞
-#
-# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms)
-# excluding numerical expressions.
-# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派)
-#接頭詞-名詞接続
-#
-# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb
-# in conjunctive form followed by なる/なさる/くださる.
-# e.g. お (読みなさい), お (座り)
-#接頭詞-動詞接続
-#
-# prefix-adjectival: Prefixes that attach to adjectives.
-# e.g. お (寒いですねえ), バカ (でかい)
-#接頭詞-形容詞接続
-#
-# prefix-numerical: Prefixes that attach to numerical expressions.
-# e.g. 約, およそ, 毎時
-#接頭詞-数接続
-#
-#####
-# verb: unclassified verbs
-#動詞
-#
-# verb-main:
-#動詞-自立
-#
-# verb-auxiliary:
-#動詞-非自立
-#
-# verb-suffix:
-#動詞-接尾
-#
-#####
-# adjective: unclassified adjectives
-#形容詞
-#
-# adjective-main:
-#形容詞-自立
-#
-# adjective-auxiliary:
-#形容詞-非自立
-#
-# adjective-suffix:
-#形容詞-接尾
-#
-#####
-# adverb: unclassified adverbs
-#副詞
-#
-# adverb-misc: Words that can be segmented into one unit and where adnominal
-# modification is not possible.
-# e.g. あいかわらず, 多分
-#副詞-一般
-#
-# adverb-particle_conjunction: Adverbs that can be followed by の, は, に,
-# な, する, だ, etc.
-# e.g. こんなに, そんなに, あんなに, なにか, なんでも
-#副詞-助詞類接続
-#
-#####
-# adnominal: Words that only have noun-modifying forms.
-# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう,
-# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした,
-# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き
-#連体詞
-#
-#####
-# conjunction: Conjunctions that can occur independently.
-# e.g. が, けれども, そして, じゃあ, それどころか
-接続詞
-#
-#####
-# particle: unclassified particles.
-助詞
-#
-# particle-case: case particles where the subclassification is undefined.
-助詞-格助詞
-#
-# particle-case-misc: Case particles.
-# e.g. から, が, で, と, に, へ, より, を, の, にて
-助詞-格助詞-一般
-#
-# particle-case-quote: the "to" that appears after nouns, a person’s speech,
-# quotation marks, expressions of decisions from a meeting, reasons, judgements,
-# conjectures, etc.
-# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...)
-助詞-格助詞-引用
-#
-# particle-case-compound: Compounds of particles and verbs that mainly behave
-# like case particles.
-# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って,
-# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける,
-# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し,
-# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして,
-# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって,
-# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る,
-# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる,
-# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ
-助詞-格助詞-連語
-#
-# particle-conjunctive:
-# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども,
-# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/,
-# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/
-助詞-接続助詞
-#
-# particle-dependency:
-# e.g. こそ, さえ, しか, すら, は, も, ぞ
-助詞-係助詞
-#
-# particle-adverbial:
-# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/,
-# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/,
-# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに,
-# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/,
-# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」)
-助詞-副助詞
-#
-# particle-interjective: particles with interjective grammatical roles.
-# e.g. (松島) や
-助詞-間投助詞
-#
-# particle-coordinate:
-# e.g. と, たり, だの, だり, とか, なり, や, やら
-助詞-並立助詞
-#
-# particle-final:
-# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ,
-# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/
-助詞-終助詞
-#
-# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is
-# adverbial, conjunctive, or sentence final. For example:
-# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」
-# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」
-# 「(祈りが届いたせい) か (, 試験に合格した.)」
-# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」
-# e.g. か
-助詞-副助詞/並立助詞/終助詞
-#
-# particle-adnominalizer: The "no" that attaches to nouns and modifies
-# non-inflectional words.
-助詞-連体化
-#
-# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs
-# that are giongo, giseigo, or gitaigo.
-# e.g. に, と
-助詞-副詞化
-#
-# particle-special: A particle that does not fit into one of the above classifications.
-# This includes particles that are used in Tanka, Haiku, and other poetry.
-# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家)
-助詞-特殊
-#
-#####
-# auxiliary-verb:
-助動詞
-#
-#####
-# interjection: Greetings and other exclamations.
-# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます,
-# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい
-#感動詞
-#
-#####
-# symbol: unclassified Symbols.
-記号
-#
-# symbol-misc: A general symbol not in one of the categories below.
-# e.g. [○◎@$〒→+]
-記号-一般
-#
-# symbol-comma: Commas
-# e.g. [,、]
-記号-読点
-#
-# symbol-period: Periods and full stops.
-# e.g. [..。]
-記号-句点
-#
-# symbol-space: Full-width whitespace.
-記号-空白
-#
-# symbol-open_bracket:
-# e.g. [({‘“『【]
-記号-括弧開
-#
-# symbol-close_bracket:
-# e.g. [)}’”』」】]
-記号-括弧閉
-#
-# symbol-alphabetic:
-#記号-アルファベット
-#
-#####
-# other: unclassified other
-#その他
-#
-# other-interjection: Words that are hard to classify as noun-suffixes or
-# sentence-final particles.
-# e.g. (だ)ァ
-その他-間投
-#
-#####
-# filler: Aizuchi that occurs during a conversation or sounds inserted as filler.
-# e.g. あの, うんと, えと
-フィラー
-#
-#####
-# non-verbal: non-verbal sound.
-非言語音
-#
-#####
-# fragment:
-#語断片
-#
-#####
-# unknown: unknown part of speech.
-#未知語
-#
-##### End of file
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ar.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ar.txt
deleted file mode 100644
index 046829db6a2..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ar.txt
+++ /dev/null
@@ -1,125 +0,0 @@
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-# Cleaned on October 11, 2009 (not normalized, so use before normalization)
-# This means that when modifying this list, you might need to add some
-# redundant entries, for example containing forms with both أ and ا
-من
-ومن
-منها
-منه
-في
-وفي
-فيها
-فيه
-و
-ف
-ثم
-او
-أو
-ب
-بها
-به
-ا
-أ
-اى
-اي
-أي
-أى
-لا
-ولا
-الا
-ألا
-إلا
-لكن
-ما
-وما
-كما
-فما
-عن
-مع
-اذا
-إذا
-ان
-أن
-إن
-انها
-أنها
-إنها
-انه
-أنه
-إنه
-بان
-بأن
-فان
-فأن
-وان
-وأن
-وإن
-التى
-التي
-الذى
-الذي
-الذين
-الى
-الي
-إلى
-إلي
-على
-عليها
-عليه
-اما
-أما
-إما
-ايضا
-أيضا
-كل
-وكل
-لم
-ولم
-لن
-ولن
-هى
-هي
-هو
-وهى
-وهي
-وهو
-فهى
-فهي
-فهو
-انت
-أنت
-لك
-لها
-له
-هذه
-هذا
-تلك
-ذلك
-هناك
-كانت
-كان
-يكون
-تكون
-وكانت
-وكان
-غير
-بعض
-قد
-نحو
-بين
-بينما
-منذ
-ضمن
-حيث
-الان
-الآن
-خلال
-بعد
-قبل
-حتى
-عند
-عندما
-لدى
-جميع
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_bg.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_bg.txt
deleted file mode 100644
index 1ae4ba2ae38..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_bg.txt
+++ /dev/null
@@ -1,193 +0,0 @@
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-а
-аз
-ако
-ала
-бе
-без
-беше
-би
-бил
-била
-били
-било
-близо
-бъдат
-бъде
-бяха
-в
-вас
-ваш
-ваша
-вероятно
-вече
-взема
-ви
-вие
-винаги
-все
-всеки
-всички
-всичко
-всяка
-във
-въпреки
-върху
-г
-ги
-главно
-го
-д
-да
-дали
-до
-докато
-докога
-дори
-досега
-доста
-е
-едва
-един
-ето
-за
-зад
-заедно
-заради
-засега
-затова
-защо
-защото
-и
-из
-или
-им
-има
-имат
-иска
-й
-каза
-как
-каква
-какво
-както
-какъв
-като
-кога
-когато
-което
-които
-кой
-който
-колко
-която
-къде
-където
-към
-ли
-м
-ме
-между
-мен
-ми
-мнозина
-мога
-могат
-може
-моля
-момента
-му
-н
-на
-над
-назад
-най
-направи
-напред
-например
-нас
-не
-него
-нея
-ни
-ние
-никой
-нито
-но
-някои
-някой
-няма
-обаче
-около
-освен
-особено
-от
-отгоре
-отново
-още
-пак
-по
-повече
-повечето
-под
-поне
-поради
-после
-почти
-прави
-пред
-преди
-през
-при
-пък
-първо
-с
-са
-само
-се
-сега
-си
-скоро
-след
-сме
-според
-сред
-срещу
-сте
-съм
-със
-също
-т
-тази
-така
-такива
-такъв
-там
-твой
-те
-тези
-ти
-тн
-то
-това
-тогава
-този
-той
-толкова
-точно
-трябва
-тук
-тъй
-тя
-тях
-у
-харесва
-ч
-че
-често
-чрез
-ще
-щом
-я
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ca.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ca.txt
deleted file mode 100644
index 3da65deafe1..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ca.txt
+++ /dev/null
@@ -1,220 +0,0 @@
-# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
-a
-abans
-ací
-ah
-així
-això
-al
-als
-aleshores
-algun
-alguna
-algunes
-alguns
-alhora
-allà
-allí
-allò
-altra
-altre
-altres
-amb
-ambdós
-ambdues
-apa
-aquell
-aquella
-aquelles
-aquells
-aquest
-aquesta
-aquestes
-aquests
-aquí
-baix
-cada
-cadascú
-cadascuna
-cadascunes
-cadascuns
-com
-contra
-d'un
-d'una
-d'unes
-d'uns
-dalt
-de
-del
-dels
-des
-després
-dins
-dintre
-donat
-doncs
-durant
-e
-eh
-el
-els
-em
-en
-encara
-ens
-entre
-érem
-eren
-éreu
-es
-és
-esta
-està
-estàvem
-estaven
-estàveu
-esteu
-et
-etc
-ets
-fins
-fora
-gairebé
-ha
-han
-has
-havia
-he
-hem
-heu
-hi
-ho
-i
-igual
-iguals
-ja
-l'hi
-la
-les
-li
-li'n
-llavors
-m'he
-ma
-mal
-malgrat
-mateix
-mateixa
-mateixes
-mateixos
-me
-mentre
-més
-meu
-meus
-meva
-meves
-molt
-molta
-moltes
-molts
-mon
-mons
-n'he
-n'hi
-ne
-ni
-no
-nogensmenys
-només
-nosaltres
-nostra
-nostre
-nostres
-o
-oh
-oi
-on
-pas
-pel
-pels
-per
-però
-perquè
-poc
-poca
-pocs
-poques
-potser
-propi
-qual
-quals
-quan
-quant
-que
-què
-quelcom
-qui
-quin
-quina
-quines
-quins
-s'ha
-s'han
-sa
-semblant
-semblants
-ses
-seu
-seus
-seva
-seva
-seves
-si
-sobre
-sobretot
-sóc
-solament
-sols
-son
-són
-sons
-sota
-sou
-t'ha
-t'han
-t'he
-ta
-tal
-també
-tampoc
-tan
-tant
-tanta
-tantes
-teu
-teus
-teva
-teves
-ton
-tons
-tot
-tota
-totes
-tots
-un
-una
-unes
-uns
-us
-va
-vaig
-vam
-van
-vas
-veu
-vosaltres
-vostra
-vostre
-vostres
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_cz.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_cz.txt
deleted file mode 100644
index 53c6097dac7..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_cz.txt
+++ /dev/null
@@ -1,172 +0,0 @@
-a
-s
-k
-o
-i
-u
-v
-z
-dnes
-cz
-tímto
-budeš
-budem
-byli
-jseš
-můj
-svým
-ta
-tomto
-tohle
-tuto
-tyto
-jej
-zda
-proč
-máte
-tato
-kam
-tohoto
-kdo
-kteří
-mi
-nám
-tom
-tomuto
-mít
-nic
-proto
-kterou
-byla
-toho
-protože
-asi
-ho
-naši
-napište
-re
-což
-tím
-takže
-svých
-její
-svými
-jste
-aj
-tu
-tedy
-teto
-bylo
-kde
-ke
-pravé
-ji
-nad
-nejsou
-či
-pod
-téma
-mezi
-přes
-ty
-pak
-vám
-ani
-když
-však
-neg
-jsem
-tento
-článku
-články
-aby
-jsme
-před
-pta
-jejich
-byl
-ještě
-až
-bez
-také
-pouze
-první
-vaše
-která
-nás
-nový
-tipy
-pokud
-může
-strana
-jeho
-své
-jiné
-zprávy
-nové
-není
-vás
-jen
-podle
-zde
-už
-být
-více
-bude
-již
-než
-který
-by
-které
-co
-nebo
-ten
-tak
-má
-při
-od
-po
-jsou
-jak
-další
-ale
-si
-se
-ve
-to
-jako
-za
-zpět
-ze
-do
-pro
-je
-na
-atd
-atp
-jakmile
-přičemž
-já
-on
-ona
-ono
-oni
-ony
-my
-vy
-jí
-ji
-mě
-mne
-jemu
-tomu
-těm
-těmu
-němu
-němuž
-jehož
-jíž
-jelikož
-jež
-jakož
-načež
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_da.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_da.txt
deleted file mode 100644
index a3ff5fe122c..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_da.txt
+++ /dev/null
@@ -1,108 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Danish stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This is a ranked list (commonest to rarest) of stopwords derived from
- | a large text sample.
-
-
-og | and
-i | in
-jeg | I
-det | that (dem. pronoun)/it (pers. pronoun)
-at | that (in front of a sentence)/to (with infinitive)
-en | a/an
-den | it (pers. pronoun)/that (dem. pronoun)
-til | to/at/for/until/against/by/of/into, more
-er | present tense of "to be"
-som | who, as
-på | on/upon/in/on/at/to/after/of/with/for, on
-de | they
-med | with/by/in, along
-han | he
-af | of/by/from/off/for/in/with/on, off
-for | at/for/to/from/by/of/ago, in front/before, because
-ikke | not
-der | who/which, there/those
-var | past tense of "to be"
-mig | me/myself
-sig | oneself/himself/herself/itself/themselves
-men | but
-et | a/an/one, one (number), someone/somebody/one
-har | present tense of "to have"
-om | round/about/for/in/a, about/around/down, if
-vi | we
-min | my
-havde | past tense of "to have"
-ham | him
-hun | she
-nu | now
-over | over/above/across/by/beyond/past/on/about, over/past
-da | then, when/as/since
-fra | from/off/since, off, since
-du | you
-ud | out
-sin | his/her/its/one's
-dem | them
-os | us/ourselves
-op | up
-man | you/one
-hans | his
-hvor | where
-eller | or
-hvad | what
-skal | must/shall etc.
-selv | myself/youself/herself/ourselves etc., even
-her | here
-alle | all/everyone/everybody etc.
-vil | will (verb)
-blev | past tense of "to stay/to remain/to get/to become"
-kunne | could
-ind | in
-når | when
-være | present tense of "to be"
-dog | however/yet/after all
-noget | something
-ville | would
-jo | you know/you see (adv), yes
-deres | their/theirs
-efter | after/behind/according to/for/by/from, later/afterwards
-ned | down
-skulle | should
-denne | this
-end | than
-dette | this
-mit | my/mine
-også | also
-under | under/beneath/below/during, below/underneath
-have | have
-dig | you
-anden | other
-hende | her
-mine | my
-alt | everything
-meget | much/very, plenty of
-sit | his, her, its, one's
-sine | his, her, its, one's
-vor | our
-mod | against
-disse | these
-hvis | if
-din | your/yours
-nogle | some
-hos | by/at
-blive | be/become
-mange | many
-ad | by/through
-bliver | present tense of "to be/to become"
-hendes | her/hers
-været | be
-thi | for (conj)
-jer | you
-sådan | such, like this/like that
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_de.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_de.txt
deleted file mode 100644
index f7703841887..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_de.txt
+++ /dev/null
@@ -1,292 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A German stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | The number of forms in this list is reduced significantly by passing it
- | through the German stemmer.
-
-
-aber | but
-
-alle | all
-allem
-allen
-aller
-alles
-
-als | than, as
-also | so
-am | an + dem
-an | at
-
-ander | other
-andere
-anderem
-anderen
-anderer
-anderes
-anderm
-andern
-anderr
-anders
-
-auch | also
-auf | on
-aus | out of
-bei | by
-bin | am
-bis | until
-bist | art
-da | there
-damit | with it
-dann | then
-
-der | the
-den
-des
-dem
-die
-das
-
-daß | that
-
-derselbe | the same
-derselben
-denselben
-desselben
-demselben
-dieselbe
-dieselben
-dasselbe
-
-dazu | to that
-
-dein | thy
-deine
-deinem
-deinen
-deiner
-deines
-
-denn | because
-
-derer | of those
-dessen | of him
-
-dich | thee
-dir | to thee
-du | thou
-
-dies | this
-diese
-diesem
-diesen
-dieser
-dieses
-
-
-doch | (several meanings)
-dort | (over) there
-
-
-durch | through
-
-ein | a
-eine
-einem
-einen
-einer
-eines
-
-einig | some
-einige
-einigem
-einigen
-einiger
-einiges
-
-einmal | once
-
-er | he
-ihn | him
-ihm | to him
-
-es | it
-etwas | something
-
-euer | your
-eure
-eurem
-euren
-eurer
-eures
-
-für | for
-gegen | towards
-gewesen | p.p. of sein
-hab | have
-habe | have
-haben | have
-hat | has
-hatte | had
-hatten | had
-hier | here
-hin | there
-hinter | behind
-
-ich | I
-mich | me
-mir | to me
-
-
-ihr | you, to her
-ihre
-ihrem
-ihren
-ihrer
-ihres
-euch | to you
-
-im | in + dem
-in | in
-indem | while
-ins | in + das
-ist | is
-
-jede | each, every
-jedem
-jeden
-jeder
-jedes
-
-jene | that
-jenem
-jenen
-jener
-jenes
-
-jetzt | now
-kann | can
-
-kein | no
-keine
-keinem
-keinen
-keiner
-keines
-
-können | can
-könnte | could
-machen | do
-man | one
-
-manche | some, many a
-manchem
-manchen
-mancher
-manches
-
-mein | my
-meine
-meinem
-meinen
-meiner
-meines
-
-mit | with
-muss | must
-musste | had to
-nach | to(wards)
-nicht | not
-nichts | nothing
-noch | still, yet
-nun | now
-nur | only
-ob | whether
-oder | or
-ohne | without
-sehr | very
-
-sein | his
-seine
-seinem
-seinen
-seiner
-seines
-
-selbst | self
-sich | herself
-
-sie | they, she
-ihnen | to them
-
-sind | are
-so | so
-
-solche | such
-solchem
-solchen
-solcher
-solches
-
-soll | shall
-sollte | should
-sondern | but
-sonst | else
-über | over
-um | about, around
-und | and
-
-uns | us
-unse
-unsem
-unsen
-unser
-unses
-
-unter | under
-viel | much
-vom | von + dem
-von | from
-vor | before
-während | while
-war | was
-waren | were
-warst | wast
-was | what
-weg | away, off
-weil | because
-weiter | further
-
-welche | which
-welchem
-welchen
-welcher
-welches
-
-wenn | when
-werde | will
-werden | will
-wie | how
-wieder | again
-will | want
-wir | we
-wird | will
-wirst | willst
-wo | where
-wollen | want
-wollte | wanted
-würde | would
-würden | would
-zu | to
-zum | zu + dem
-zur | zu + der
-zwar | indeed
-zwischen | between
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_el.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_el.txt
deleted file mode 100644
index 232681f5bd6..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_el.txt
+++ /dev/null
@@ -1,78 +0,0 @@
-# Lucene Greek Stopwords list
-# Note: by default this file is used after GreekLowerCaseFilter,
-# so when modifying this file use 'σ' instead of 'ς'
-ο
-η
-το
-οι
-τα
-του
-τησ
-των
-τον
-την
-και
-κι
-κ
-ειμαι
-εισαι
-ειναι
-ειμαστε
-ειστε
-στο
-στον
-στη
-στην
-μα
-αλλα
-απο
-για
-προσ
-με
-σε
-ωσ
-παρα
-αντι
-κατα
-μετα
-θα
-να
-δε
-δεν
-μη
-μην
-επι
-ενω
-εαν
-αν
-τοτε
-που
-πωσ
-ποιοσ
-ποια
-ποιο
-ποιοι
-ποιεσ
-ποιων
-ποιουσ
-αυτοσ
-αυτη
-αυτο
-αυτοι
-αυτων
-αυτουσ
-αυτεσ
-αυτα
-εκεινοσ
-εκεινη
-εκεινο
-εκεινοι
-εκεινεσ
-εκεινα
-εκεινων
-εκεινουσ
-οπωσ
-ομωσ
-ισωσ
-οσο
-οτι
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_en.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_en.txt
deleted file mode 100644
index 2c164c0b2a1..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_en.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# a couple of test stopwords to test that the words are really being
-# configured from this file:
-stopworda
-stopwordb
-
-# Standard english stop words taken from Lucene's StopAnalyzer
-a
-an
-and
-are
-as
-at
-be
-but
-by
-for
-if
-in
-into
-is
-it
-no
-not
-of
-on
-or
-such
-that
-the
-their
-then
-there
-these
-they
-this
-to
-was
-will
-with
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_es.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_es.txt
deleted file mode 100644
index 2db14760075..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_es.txt
+++ /dev/null
@@ -1,354 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Spanish stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-
- | The following is a ranked list (commonest to rarest) of stopwords
- | deriving from a large sample of text.
-
- | Extra words have been added at the end.
-
-de | from, of
-la | the, her
-que | who, that
-el | the
-en | in
-y | and
-a | to
-los | the, them
-del | de + el
-se | himself, from him etc
-las | the, them
-por | for, by, etc
-un | a
-para | for
-con | with
-no | no
-una | a
-su | his, her
-al | a + el
- | es from SER
-lo | him
-como | how
-más | more
-pero | pero
-sus | su plural
-le | to him, her
-ya | already
-o | or
- | fue from SER
-este | this
- | ha from HABER
-sí | himself etc
-porque | because
-esta | this
- | son from SER
-entre | between
- | está from ESTAR
-cuando | when
-muy | very
-sin | without
-sobre | on
- | ser from SER
- | tiene from TENER
-también | also
-me | me
-hasta | until
-hay | there is/are
-donde | where
- | han from HABER
-quien | whom, that
- | están from ESTAR
- | estado from ESTAR
-desde | from
-todo | all
-nos | us
-durante | during
- | estados from ESTAR
-todos | all
-uno | a
-les | to them
-ni | nor
-contra | against
-otros | other
- | fueron from SER
-ese | that
-eso | that
- | había from HABER
-ante | before
-ellos | they
-e | and (variant of y)
-esto | this
-mí | me
-antes | before
-algunos | some
-qué | what?
-unos | a
-yo | I
-otro | other
-otras | other
-otra | other
-él | he
-tanto | so much, many
-esa | that
-estos | these
-mucho | much, many
-quienes | who
-nada | nothing
-muchos | many
-cual | who
- | sea from SER
-poco | few
-ella | she
-estar | to be
- | haber from HABER
-estas | these
- | estaba from ESTAR
- | estamos from ESTAR
-algunas | some
-algo | something
-nosotros | we
-
- | other forms
-
-mi | me
-mis | mi plural
-tú | thou
-te | thee
-ti | thee
-tu | thy
-tus | tu plural
-ellas | they
-nosotras | we
-vosotros | you
-vosotras | you
-os | you
-mío | mine
-mía |
-míos |
-mías |
-tuyo | thine
-tuya |
-tuyos |
-tuyas |
-suyo | his, hers, theirs
-suya |
-suyos |
-suyas |
-nuestro | ours
-nuestra |
-nuestros |
-nuestras |
-vuestro | yours
-vuestra |
-vuestros |
-vuestras |
-esos | those
-esas | those
-
- | forms of estar, to be (not including the infinitive):
-estoy
-estás
-está
-estamos
-estáis
-están
-esté
-estés
-estemos
-estéis
-estén
-estaré
-estarás
-estará
-estaremos
-estaréis
-estarán
-estaría
-estarías
-estaríamos
-estaríais
-estarían
-estaba
-estabas
-estábamos
-estabais
-estaban
-estuve
-estuviste
-estuvo
-estuvimos
-estuvisteis
-estuvieron
-estuviera
-estuvieras
-estuviéramos
-estuvierais
-estuvieran
-estuviese
-estuvieses
-estuviésemos
-estuvieseis
-estuviesen
-estando
-estado
-estada
-estados
-estadas
-estad
-
- | forms of haber, to have (not including the infinitive):
-he
-has
-ha
-hemos
-habéis
-han
-haya
-hayas
-hayamos
-hayáis
-hayan
-habré
-habrás
-habrá
-habremos
-habréis
-habrán
-habría
-habrías
-habríamos
-habríais
-habrían
-había
-habías
-habíamos
-habíais
-habían
-hube
-hubiste
-hubo
-hubimos
-hubisteis
-hubieron
-hubiera
-hubieras
-hubiéramos
-hubierais
-hubieran
-hubiese
-hubieses
-hubiésemos
-hubieseis
-hubiesen
-habiendo
-habido
-habida
-habidos
-habidas
-
- | forms of ser, to be (not including the infinitive):
-soy
-eres
-es
-somos
-sois
-son
-sea
-seas
-seamos
-seáis
-sean
-seré
-serás
-será
-seremos
-seréis
-serán
-sería
-serías
-seríamos
-seríais
-serían
-era
-eras
-éramos
-erais
-eran
-fui
-fuiste
-fue
-fuimos
-fuisteis
-fueron
-fuera
-fueras
-fuéramos
-fuerais
-fueran
-fuese
-fueses
-fuésemos
-fueseis
-fuesen
-siendo
-sido
- | sed also means 'thirst'
-
- | forms of tener, to have (not including the infinitive):
-tengo
-tienes
-tiene
-tenemos
-tenéis
-tienen
-tenga
-tengas
-tengamos
-tengáis
-tengan
-tendré
-tendrás
-tendrá
-tendremos
-tendréis
-tendrán
-tendría
-tendrías
-tendríamos
-tendríais
-tendrían
-tenía
-tenías
-teníamos
-teníais
-tenían
-tuve
-tuviste
-tuvo
-tuvimos
-tuvisteis
-tuvieron
-tuviera
-tuvieras
-tuviéramos
-tuvierais
-tuvieran
-tuviese
-tuvieses
-tuviésemos
-tuvieseis
-tuviesen
-teniendo
-tenido
-tenida
-tenidos
-tenidas
-tened
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_eu.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_eu.txt
deleted file mode 100644
index 25f1db93460..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_eu.txt
+++ /dev/null
@@ -1,99 +0,0 @@
-# example set of basque stopwords
-al
-anitz
-arabera
-asko
-baina
-bat
-batean
-batek
-bati
-batzuei
-batzuek
-batzuetan
-batzuk
-bera
-beraiek
-berau
-berauek
-bere
-berori
-beroriek
-beste
-bezala
-da
-dago
-dira
-ditu
-du
-dute
-edo
-egin
-ere
-eta
-eurak
-ez
-gainera
-gu
-gutxi
-guzti
-haiei
-haiek
-haietan
-hainbeste
-hala
-han
-handik
-hango
-hara
-hari
-hark
-hartan
-hau
-hauei
-hauek
-hauetan
-hemen
-hemendik
-hemengo
-hi
-hona
-honek
-honela
-honetan
-honi
-hor
-hori
-horiei
-horiek
-horietan
-horko
-horra
-horrek
-horrela
-horretan
-horri
-hortik
-hura
-izan
-ni
-noiz
-nola
-non
-nondik
-nongo
-nor
-nora
-ze
-zein
-zen
-zenbait
-zenbat
-zer
-zergatik
-ziren
-zituen
-zu
-zuek
-zuen
-zuten
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_fa.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_fa.txt
deleted file mode 100644
index 723641c6da7..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_fa.txt
+++ /dev/null
@@ -1,313 +0,0 @@
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-# Note: by default this file is used after normalization, so when adding entries
-# to this file, use the arabic 'ي' instead of 'ی'
-انان
-نداشته
-سراسر
-خياه
-ايشان
-وي
-تاكنون
-بيشتري
-دوم
-پس
-ناشي
-وگو
-يا
-داشتند
-سپس
-هنگام
-هرگز
-پنج
-نشان
-امسال
-ديگر
-گروهي
-شدند
-چطور
-ده
-و
-دو
-نخستين
-ولي
-چرا
-چه
-وسط
-ه
-كدام
-قابل
-يك
-رفت
-هفت
-همچنين
-در
-هزار
-بله
-بلي
-شايد
-اما
-شناسي
-گرفته
-دهد
-داشته
-دانست
-داشتن
-خواهيم
-ميليارد
-وقتيكه
-امد
-خواهد
-جز
-اورده
-شده
-بلكه
-خدمات
-شدن
-برخي
-نبود
-بسياري
-جلوگيري
-حق
-كردند
-نوعي
-بعري
-نكرده
-نظير
-نبايد
-بوده
-بودن
-داد
-اورد
-هست
-جايي
-شود
-دنبال
-داده
-بايد
-سابق
-هيچ
-همان
-انجا
-كمتر
-كجاست
-گردد
-كسي
-تر
-مردم
-تان
-دادن
-بودند
-سري
-جدا
-ندارند
-مگر
-يكديگر
-دارد
-دهند
-بنابراين
-هنگامي
-سمت
-جا
-انچه
-خود
-دادند
-زياد
-دارند
-اثر
-بدون
-بهترين
-بيشتر
-البته
-به
-براساس
-بيرون
-كرد
-بعضي
-گرفت
-توي
-اي
-ميليون
-او
-جريان
-تول
-بر
-مانند
-برابر
-باشيم
-مدتي
-گويند
-اكنون
-تا
-تنها
-جديد
-چند
-بي
-نشده
-كردن
-كردم
-گويد
-كرده
-كنيم
-نمي
-نزد
-روي
-قصد
-فقط
-بالاي
-ديگران
-اين
-ديروز
-توسط
-سوم
-ايم
-دانند
-سوي
-استفاده
-شما
-كنار
-داريم
-ساخته
-طور
-امده
-رفته
-نخست
-بيست
-نزديك
-طي
-كنيد
-از
-انها
-تمامي
-داشت
-يكي
-طريق
-اش
-چيست
-روب
-نمايد
-گفت
-چندين
-چيزي
-تواند
-ام
-ايا
-با
-ان
-ايد
-ترين
-اينكه
-ديگري
-راه
-هايي
-بروز
-همچنان
-پاعين
-كس
-حدود
-مختلف
-مقابل
-چيز
-گيرد
-ندارد
-ضد
-همچون
-سازي
-شان
-مورد
-باره
-مرسي
-خويش
-برخوردار
-چون
-خارج
-شش
-هنوز
-تحت
-ضمن
-هستيم
-گفته
-فكر
-بسيار
-پيش
-براي
-روزهاي
-انكه
-نخواهد
-بالا
-كل
-وقتي
-كي
-چنين
-كه
-گيري
-نيست
-است
-كجا
-كند
-نيز
-يابد
-بندي
-حتي
-توانند
-عقب
-خواست
-كنند
-بين
-تمام
-همه
-ما
-باشند
-مثل
-شد
-اري
-باشد
-اره
-طبق
-بعد
-اگر
-صورت
-غير
-جاي
-بيش
-ريزي
-اند
-زيرا
-چگونه
-بار
-لطفا
-مي
-درباره
-من
-ديده
-همين
-گذاري
-برداري
-علت
-گذاشته
-هم
-فوق
-نه
-ها
-شوند
-اباد
-همواره
-هر
-اول
-خواهند
-چهار
-نام
-امروز
-مان
-هاي
-قبل
-كنم
-سعي
-تازه
-را
-هستند
-زير
-جلوي
-عنوان
-بود
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_fi.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_fi.txt
deleted file mode 100644
index addad798c4b..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_fi.txt
+++ /dev/null
@@ -1,95 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
-| forms of BE
-
-olla
-olen
-olet
-on
-olemme
-olette
-ovat
-ole | negative form
-
-oli
-olisi
-olisit
-olisin
-olisimme
-olisitte
-olisivat
-olit
-olin
-olimme
-olitte
-olivat
-ollut
-olleet
-
-en | negation
-et
-ei
-emme
-ette
-eivät
-
-|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans
-minä minun minut minua minussa minusta minuun minulla minulta minulle | I
-sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you
-hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she
-me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we
-te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you
-he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they
-
-tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this
-tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that
-se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it
-nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these
-nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those
-ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they
-
-kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
-ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl)
-mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what
-mitkä | (pl)
-
-joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which
-jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl)
-
-| conjunctions
-
-että | that
-ja | and
-jos | if
-koska | because
-kuin | than
-mutta | but
-niin | so
-sekä | and
-sillä | for
-tai | or
-vaan | but
-vai | or
-vaikka | although
-
-
-| prepositions
-
-kanssa | with
-mukaan | according to
-noin | about
-poikki | across
-yli | over, across
-
-| other
-
-kun | when
-niin | so
-nyt | now
-itse | self
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_fr.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_fr.txt
deleted file mode 100644
index c00837ea939..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_fr.txt
+++ /dev/null
@@ -1,183 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A French stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-au | a + le
-aux | a + les
-avec | with
-ce | this
-ces | these
-dans | with
-de | of
-des | de + les
-du | de + le
-elle | she
-en | `of them' etc
-et | and
-eux | them
-il | he
-je | I
-la | the
-le | the
-leur | their
-lui | him
-ma | my (fem)
-mais | but
-me | me
-même | same; as in moi-même (myself) etc
-mes | me (pl)
-moi | me
-mon | my (masc)
-ne | not
-nos | our (pl)
-notre | our
-nous | we
-on | one
-ou | where
-par | by
-pas | not
-pour | for
-qu | que before vowel
-que | that
-qui | who
-sa | his, her (fem)
-se | oneself
-ses | his (pl)
-son | his, her (masc)
-sur | on
-ta | thy (fem)
-te | thee
-tes | thy (pl)
-toi | thee
-ton | thy (masc)
-tu | thou
-un | a
-une | a
-vos | your (pl)
-votre | your
-vous | you
-
- | single letter forms
-
-c | c'
-d | d'
-j | j'
-l | l'
-à | to, at
-m | m'
-n | n'
-s | s'
-t | t'
-y | there
-
- | forms of être (not including the infinitive):
-été
-étée
-étées
-étés
-étant
-suis
-es
-est
-sommes
-êtes
-sont
-serai
-seras
-sera
-serons
-serez
-seront
-serais
-serait
-serions
-seriez
-seraient
-étais
-était
-étions
-étiez
-étaient
-fus
-fut
-fûmes
-fûtes
-furent
-sois
-soit
-soyons
-soyez
-soient
-fusse
-fusses
-fût
-fussions
-fussiez
-fussent
-
- | forms of avoir (not including the infinitive):
-ayant
-eu
-eue
-eues
-eus
-ai
-as
-avons
-avez
-ont
-aurai
-auras
-aura
-aurons
-aurez
-auront
-aurais
-aurait
-aurions
-auriez
-auraient
-avais
-avait
-avions
-aviez
-avaient
-eut
-eûmes
-eûtes
-eurent
-aie
-aies
-ait
-ayons
-ayez
-aient
-eusse
-eusses
-eût
-eussions
-eussiez
-eussent
-
- | Later additions (from Jean-Christophe Deschamps)
-ceci | this
-celà | that
-cet | this
-cette | this
-ici | here
-ils | they
-les | the (pl)
-leurs | their (pl)
-quel | which
-quels | which
-quelle | which
-quelles | which
-sans | without
-soi | oneself
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ga.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ga.txt
deleted file mode 100644
index 9ff88d747e5..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ga.txt
+++ /dev/null
@@ -1,110 +0,0 @@
-
-a
-ach
-ag
-agus
-an
-aon
-ar
-arna
-as
-b'
-ba
-beirt
-bhúr
-caoga
-ceathair
-ceathrar
-chomh
-chtó
-chuig
-chun
-cois
-céad
-cúig
-cúigear
-d'
-daichead
-dar
-de
-deich
-deichniúr
-den
-dhá
-do
-don
-dtí
-dá
-dár
-dó
-faoi
-faoin
-faoina
-faoinár
-fara
-fiche
-gach
-gan
-go
-gur
-haon
-hocht
-i
-iad
-idir
-in
-ina
-ins
-inár
-is
-le
-leis
-lena
-lenár
-m'
-mar
-mo
-mé
-na
-nach
-naoi
-naonúr
-ná
-ní
-níor
-nó
-nócha
-ocht
-ochtar
-os
-roimh
-sa
-seacht
-seachtar
-seachtó
-seasca
-seisear
-siad
-sibh
-sinn
-sna
-sé
-sí
-tar
-thar
-thú
-triúr
-trí
-trína
-trínár
-tríocha
-tú
-um
-ár
-é
-éis
-í
-ó
-ón
-óna
-ónár
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_gl.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_gl.txt
deleted file mode 100644
index d8760b12c14..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_gl.txt
+++ /dev/null
@@ -1,161 +0,0 @@
-# galican stopwords
-a
-aínda
-alí
-aquel
-aquela
-aquelas
-aqueles
-aquilo
-aquí
-ao
-aos
-as
-así
-á
-ben
-cando
-che
-co
-coa
-comigo
-con
-connosco
-contigo
-convosco
-coas
-cos
-cun
-cuns
-cunha
-cunhas
-da
-dalgunha
-dalgunhas
-dalgún
-dalgúns
-das
-de
-del
-dela
-delas
-deles
-desde
-deste
-do
-dos
-dun
-duns
-dunha
-dunhas
-e
-el
-ela
-elas
-eles
-en
-era
-eran
-esa
-esas
-ese
-eses
-esta
-estar
-estaba
-está
-están
-este
-estes
-estiven
-estou
-eu
-é
-facer
-foi
-foron
-fun
-había
-hai
-iso
-isto
-la
-las
-lle
-lles
-lo
-los
-mais
-me
-meu
-meus
-min
-miña
-miñas
-moi
-na
-nas
-neste
-nin
-no
-non
-nos
-nosa
-nosas
-noso
-nosos
-nós
-nun
-nunha
-nuns
-nunhas
-o
-os
-ou
-ó
-ós
-para
-pero
-pode
-pois
-pola
-polas
-polo
-polos
-por
-que
-se
-senón
-ser
-seu
-seus
-sexa
-sido
-sobre
-súa
-súas
-tamén
-tan
-te
-ten
-teñen
-teño
-ter
-teu
-teus
-ti
-tido
-tiña
-tiven
-túa
-túas
-un
-unha
-unhas
-uns
-vos
-vosa
-vosas
-voso
-vosos
-vós
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_hi.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_hi.txt
deleted file mode 100644
index 86286bb083b..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_hi.txt
+++ /dev/null
@@ -1,235 +0,0 @@
-# Also see http://www.opensource.org/licenses/bsd-license.html
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# Note: by default this file also contains forms normalized by HindiNormalizer
-# for spelling variation (see section below), such that it can be used whether or
-# not you enable that feature. When adding additional entries to this list,
-# please add the normalized form as well.
-अंदर
-अत
-अपना
-अपनी
-अपने
-अभी
-आदि
-आप
-इत्यादि
-इन
-इनका
-इन्हीं
-इन्हें
-इन्हों
-इस
-इसका
-इसकी
-इसके
-इसमें
-इसी
-इसे
-उन
-उनका
-उनकी
-उनके
-उनको
-उन्हीं
-उन्हें
-उन्हों
-उस
-उसके
-उसी
-उसे
-एक
-एवं
-एस
-ऐसे
-और
-कई
-कर
-करता
-करते
-करना
-करने
-करें
-कहते
-कहा
-का
-काफ़ी
-कि
-कितना
-किन्हें
-किन्हों
-किया
-किर
-किस
-किसी
-किसे
-की
-कुछ
-कुल
-के
-को
-कोई
-कौन
-कौनसा
-गया
-घर
-जब
-जहाँ
-जा
-जितना
-जिन
-जिन्हें
-जिन्हों
-जिस
-जिसे
-जीधर
-जैसा
-जैसे
-जो
-तक
-तब
-तरह
-तिन
-तिन्हें
-तिन्हों
-तिस
-तिसे
-तो
-था
-थी
-थे
-दबारा
-दिया
-दुसरा
-दूसरे
-दो
-द्वारा
-न
-नहीं
-ना
-निहायत
-नीचे
-ने
-पर
-पर
-पहले
-पूरा
-पे
-फिर
-बनी
-बही
-बहुत
-बाद
-बाला
-बिलकुल
-भी
-भीतर
-मगर
-मानो
-मे
-में
-यदि
-यह
-यहाँ
-यही
-या
-यिह
-ये
-रखें
-रहा
-रहे
-ऱ्वासा
-लिए
-लिये
-लेकिन
-व
-वर्ग
-वह
-वह
-वहाँ
-वहीं
-वाले
-वुह
-वे
-वग़ैरह
-संग
-सकता
-सकते
-सबसे
-सभी
-साथ
-साबुत
-साभ
-सारा
-से
-सो
-ही
-हुआ
-हुई
-हुए
-है
-हैं
-हो
-होता
-होती
-होते
-होना
-होने
-# additional normalized forms of the above
-अपनि
-जेसे
-होति
-सभि
-तिंहों
-इंहों
-दवारा
-इसि
-किंहें
-थि
-उंहों
-ओर
-जिंहें
-वहिं
-अभि
-बनि
-हि
-उंहिं
-उंहें
-हें
-वगेरह
-एसे
-रवासा
-कोन
-निचे
-काफि
-उसि
-पुरा
-भितर
-हे
-बहि
-वहां
-कोइ
-यहां
-जिंहों
-तिंहें
-किसि
-कइ
-यहि
-इंहिं
-जिधर
-इंहें
-अदि
-इतयादि
-हुइ
-कोनसा
-इसकि
-दुसरे
-जहां
-अप
-किंहों
-उनकि
-भि
-वरग
-हुअ
-जेसा
-नहिं
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_hu.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_hu.txt
deleted file mode 100644
index 1a96f1db6f2..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_hu.txt
+++ /dev/null
@@ -1,209 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
-| Hungarian stop word list
-| prepared by Anna Tordai
-
-a
-ahogy
-ahol
-aki
-akik
-akkor
-alatt
-által
-általában
-amely
-amelyek
-amelyekben
-amelyeket
-amelyet
-amelynek
-ami
-amit
-amolyan
-amíg
-amikor
-át
-abban
-ahhoz
-annak
-arra
-arról
-az
-azok
-azon
-azt
-azzal
-azért
-aztán
-azután
-azonban
-bár
-be
-belül
-benne
-cikk
-cikkek
-cikkeket
-csak
-de
-e
-eddig
-egész
-egy
-egyes
-egyetlen
-egyéb
-egyik
-egyre
-ekkor
-el
-elég
-ellen
-elő
-először
-előtt
-első
-én
-éppen
-ebben
-ehhez
-emilyen
-ennek
-erre
-ez
-ezt
-ezek
-ezen
-ezzel
-ezért
-és
-fel
-felé
-hanem
-hiszen
-hogy
-hogyan
-igen
-így
-illetve
-ill.
-ill
-ilyen
-ilyenkor
-ison
-ismét
-itt
-jó
-jól
-jobban
-kell
-kellett
-keresztül
-keressünk
-ki
-kívül
-között
-közül
-legalább
-lehet
-lehetett
-legyen
-lenne
-lenni
-lesz
-lett
-maga
-magát
-majd
-majd
-már
-más
-másik
-meg
-még
-mellett
-mert
-mely
-melyek
-mi
-mit
-míg
-miért
-milyen
-mikor
-minden
-mindent
-mindenki
-mindig
-mint
-mintha
-mivel
-most
-nagy
-nagyobb
-nagyon
-ne
-néha
-nekem
-neki
-nem
-néhány
-nélkül
-nincs
-olyan
-ott
-össze
-ő
-ők
-őket
-pedig
-persze
-rá
-s
-saját
-sem
-semmi
-sok
-sokat
-sokkal
-számára
-szemben
-szerint
-szinte
-talán
-tehát
-teljes
-tovább
-továbbá
-több
-úgy
-ugyanis
-új
-újabb
-újra
-után
-utána
-utolsó
-vagy
-vagyis
-valaki
-valami
-valamint
-való
-vagyok
-van
-vannak
-volt
-voltam
-voltak
-voltunk
-vissza
-vele
-viszont
-volna
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_hy.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_hy.txt
deleted file mode 100644
index 60c1c50fbc8..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_hy.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-# example set of Armenian stopwords.
-այդ
-այլ
-այն
-այս
-դու
-դուք
-եմ
-են
-ենք
-ես
-եք
-է
-էի
-էին
-էինք
-էիր
-էիք
-էր
-ըստ
-թ
-ի
-ին
-իսկ
-իր
-կամ
-համար
-հետ
-հետո
-մենք
-մեջ
-մի
-ն
-նա
-նաև
-նրա
-նրանք
-որ
-որը
-որոնք
-որպես
-ու
-ում
-պիտի
-վրա
-և
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_id.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_id.txt
deleted file mode 100644
index 4617f83a5c5..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_id.txt
+++ /dev/null
@@ -1,359 +0,0 @@
-# from appendix D of: A Study of Stemming Effects on Information
-# Retrieval in Bahasa Indonesia
-ada
-adanya
-adalah
-adapun
-agak
-agaknya
-agar
-akan
-akankah
-akhirnya
-aku
-akulah
-amat
-amatlah
-anda
-andalah
-antar
-diantaranya
-antara
-antaranya
-diantara
-apa
-apaan
-mengapa
-apabila
-apakah
-apalagi
-apatah
-atau
-ataukah
-ataupun
-bagai
-bagaikan
-sebagai
-sebagainya
-bagaimana
-bagaimanapun
-sebagaimana
-bagaimanakah
-bagi
-bahkan
-bahwa
-bahwasanya
-sebaliknya
-banyak
-sebanyak
-beberapa
-seberapa
-begini
-beginian
-beginikah
-beginilah
-sebegini
-begitu
-begitukah
-begitulah
-begitupun
-sebegitu
-belum
-belumlah
-sebelum
-sebelumnya
-sebenarnya
-berapa
-berapakah
-berapalah
-berapapun
-betulkah
-sebetulnya
-biasa
-biasanya
-bila
-bilakah
-bisa
-bisakah
-sebisanya
-boleh
-bolehkah
-bolehlah
-buat
-bukan
-bukankah
-bukanlah
-bukannya
-cuma
-percuma
-dahulu
-dalam
-dan
-dapat
-dari
-daripada
-dekat
-demi
-demikian
-demikianlah
-sedemikian
-dengan
-depan
-di
-dia
-dialah
-dini
-diri
-dirinya
-terdiri
-dong
-dulu
-enggak
-enggaknya
-entah
-entahlah
-terhadap
-terhadapnya
-hal
-hampir
-hanya
-hanyalah
-harus
-haruslah
-harusnya
-seharusnya
-hendak
-hendaklah
-hendaknya
-hingga
-sehingga
-ia
-ialah
-ibarat
-ingin
-inginkah
-inginkan
-ini
-inikah
-inilah
-itu
-itukah
-itulah
-jangan
-jangankan
-janganlah
-jika
-jikalau
-juga
-justru
-kala
-kalau
-kalaulah
-kalaupun
-kalian
-kami
-kamilah
-kamu
-kamulah
-kan
-kapan
-kapankah
-kapanpun
-dikarenakan
-karena
-karenanya
-ke
-kecil
-kemudian
-kenapa
-kepada
-kepadanya
-ketika
-seketika
-khususnya
-kini
-kinilah
-kiranya
-sekiranya
-kita
-kitalah
-kok
-lagi
-lagian
-selagi
-lah
-lain
-lainnya
-melainkan
-selaku
-lalu
-melalui
-terlalu
-lama
-lamanya
-selama
-selama
-selamanya
-lebih
-terlebih
-bermacam
-macam
-semacam
-maka
-makanya
-makin
-malah
-malahan
-mampu
-mampukah
-mana
-manakala
-manalagi
-masih
-masihkah
-semasih
-masing
-mau
-maupun
-semaunya
-memang
-mereka
-merekalah
-meski
-meskipun
-semula
-mungkin
-mungkinkah
-nah
-namun
-nanti
-nantinya
-nyaris
-oleh
-olehnya
-seorang
-seseorang
-pada
-padanya
-padahal
-paling
-sepanjang
-pantas
-sepantasnya
-sepantasnyalah
-para
-pasti
-pastilah
-per
-pernah
-pula
-pun
-merupakan
-rupanya
-serupa
-saat
-saatnya
-sesaat
-saja
-sajalah
-saling
-bersama
-sama
-sesama
-sambil
-sampai
-sana
-sangat
-sangatlah
-saya
-sayalah
-se
-sebab
-sebabnya
-sebuah
-tersebut
-tersebutlah
-sedang
-sedangkan
-sedikit
-sedikitnya
-segala
-segalanya
-segera
-sesegera
-sejak
-sejenak
-sekali
-sekalian
-sekalipun
-sesekali
-sekaligus
-sekarang
-sekarang
-sekitar
-sekitarnya
-sela
-selain
-selalu
-seluruh
-seluruhnya
-semakin
-sementara
-sempat
-semua
-semuanya
-sendiri
-sendirinya
-seolah
-seperti
-sepertinya
-sering
-seringnya
-serta
-siapa
-siapakah
-siapapun
-disini
-disinilah
-sini
-sinilah
-sesuatu
-sesuatunya
-suatu
-sesudah
-sesudahnya
-sudah
-sudahkah
-sudahlah
-supaya
-tadi
-tadinya
-tak
-tanpa
-setelah
-telah
-tentang
-tentu
-tentulah
-tentunya
-tertentu
-seterusnya
-tapi
-tetapi
-setiap
-tiap
-setidaknya
-tidak
-tidakkah
-tidaklah
-toh
-waduh
-wah
-wahai
-sewaktu
-walau
-walaupun
-wong
-yaitu
-yakni
-yang
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_it.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_it.txt
deleted file mode 100644
index 4cb5b0891b1..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_it.txt
+++ /dev/null
@@ -1,301 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | An Italian stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-ad | a (to) before vowel
-al | a + il
-allo | a + lo
-ai | a + i
-agli | a + gli
-all | a + l'
-agl | a + gl'
-alla | a + la
-alle | a + le
-con | with
-col | con + il
-coi | con + i (forms collo, cogli etc are now very rare)
-da | from
-dal | da + il
-dallo | da + lo
-dai | da + i
-dagli | da + gli
-dall | da + l'
-dagl | da + gll'
-dalla | da + la
-dalle | da + le
-di | of
-del | di + il
-dello | di + lo
-dei | di + i
-degli | di + gli
-dell | di + l'
-degl | di + gl'
-della | di + la
-delle | di + le
-in | in
-nel | in + el
-nello | in + lo
-nei | in + i
-negli | in + gli
-nell | in + l'
-negl | in + gl'
-nella | in + la
-nelle | in + le
-su | on
-sul | su + il
-sullo | su + lo
-sui | su + i
-sugli | su + gli
-sull | su + l'
-sugl | su + gl'
-sulla | su + la
-sulle | su + le
-per | through, by
-tra | among
-contro | against
-io | I
-tu | thou
-lui | he
-lei | she
-noi | we
-voi | you
-loro | they
-mio | my
-mia |
-miei |
-mie |
-tuo |
-tua |
-tuoi | thy
-tue |
-suo |
-sua |
-suoi | his, her
-sue |
-nostro | our
-nostra |
-nostri |
-nostre |
-vostro | your
-vostra |
-vostri |
-vostre |
-mi | me
-ti | thee
-ci | us, there
-vi | you, there
-lo | him, the
-la | her, the
-li | them
-le | them, the
-gli | to him, the
-ne | from there etc
-il | the
-un | a
-uno | a
-una | a
-ma | but
-ed | and
-se | if
-perché | why, because
-anche | also
-come | how
-dov | where (as dov')
-dove | where
-che | who, that
-chi | who
-cui | whom
-non | not
-più | more
-quale | who, that
-quanto | how much
-quanti |
-quanta |
-quante |
-quello | that
-quelli |
-quella |
-quelle |
-questo | this
-questi |
-questa |
-queste |
-si | yes
-tutto | all
-tutti | all
-
- | single letter forms:
-
-a | at
-c | as c' for ce or ci
-e | and
-i | the
-l | as l'
-o | or
-
- | forms of avere, to have (not including the infinitive):
-
-ho
-hai
-ha
-abbiamo
-avete
-hanno
-abbia
-abbiate
-abbiano
-avrò
-avrai
-avrà
-avremo
-avrete
-avranno
-avrei
-avresti
-avrebbe
-avremmo
-avreste
-avrebbero
-avevo
-avevi
-aveva
-avevamo
-avevate
-avevano
-ebbi
-avesti
-ebbe
-avemmo
-aveste
-ebbero
-avessi
-avesse
-avessimo
-avessero
-avendo
-avuto
-avuta
-avuti
-avute
-
- | forms of essere, to be (not including the infinitive):
-sono
-sei
-è
-siamo
-siete
-sia
-siate
-siano
-sarò
-sarai
-sarà
-saremo
-sarete
-saranno
-sarei
-saresti
-sarebbe
-saremmo
-sareste
-sarebbero
-ero
-eri
-era
-eravamo
-eravate
-erano
-fui
-fosti
-fu
-fummo
-foste
-furono
-fossi
-fosse
-fossimo
-fossero
-essendo
-
- | forms of fare, to do (not including the infinitive, fa, fat-):
-faccio
-fai
-facciamo
-fanno
-faccia
-facciate
-facciano
-farò
-farai
-farà
-faremo
-farete
-faranno
-farei
-faresti
-farebbe
-faremmo
-fareste
-farebbero
-facevo
-facevi
-faceva
-facevamo
-facevate
-facevano
-feci
-facesti
-fece
-facemmo
-faceste
-fecero
-facessi
-facesse
-facessimo
-facessero
-facendo
-
- | forms of stare, to be (not including the infinitive):
-sto
-stai
-sta
-stiamo
-stanno
-stia
-stiate
-stiano
-starò
-starai
-starà
-staremo
-starete
-staranno
-starei
-staresti
-starebbe
-staremmo
-stareste
-starebbero
-stavo
-stavi
-stava
-stavamo
-stavate
-stavano
-stetti
-stesti
-stette
-stemmo
-steste
-stettero
-stessi
-stesse
-stessimo
-stessero
-stando
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ja.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ja.txt
deleted file mode 100644
index d4321be6b16..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ja.txt
+++ /dev/null
@@ -1,127 +0,0 @@
-#
-# This file defines a stopword set for Japanese.
-#
-# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia.
-# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745
-# for frequency lists, etc. that can be useful for making your own set (if desired)
-#
-# Note that there is an overlap between these stopwords and the terms stopped when used
-# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note
-# that comments are not allowed on the same line as stopwords.
-#
-# Also note that stopping is done in a case-insensitive manner. Change your StopFilter
-# configuration if you need case-sensitive stopping. Lastly, note that stopping is done
-# using the same character width as the entries in this file. Since this StopFilter is
-# normally done after a CJKWidthFilter in your chain, you would usually want your romaji
-# entries to be in half-width and your kana entries to be in full-width.
-#
-の
-に
-は
-を
-た
-が
-で
-て
-と
-し
-れ
-さ
-ある
-いる
-も
-する
-から
-な
-こと
-として
-い
-や
-れる
-など
-なっ
-ない
-この
-ため
-その
-あっ
-よう
-また
-もの
-という
-あり
-まで
-られ
-なる
-へ
-か
-だ
-これ
-によって
-により
-おり
-より
-による
-ず
-なり
-られる
-において
-ば
-なかっ
-なく
-しかし
-について
-せ
-だっ
-その後
-できる
-それ
-う
-ので
-なお
-のみ
-でき
-き
-つ
-における
-および
-いう
-さらに
-でも
-ら
-たり
-その他
-に関する
-たち
-ます
-ん
-なら
-に対して
-特に
-せる
-及び
-これら
-とき
-では
-にて
-ほか
-ながら
-うち
-そして
-とともに
-ただし
-かつて
-それぞれ
-または
-お
-ほど
-ものの
-に対する
-ほとんど
-と共に
-といった
-です
-とも
-ところ
-ここ
-##### End of file
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_lv.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_lv.txt
deleted file mode 100644
index e21a23c06c3..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_lv.txt
+++ /dev/null
@@ -1,172 +0,0 @@
-# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins
-# the original list of over 800 forms was refined:
-# pronouns, adverbs, interjections were removed
-#
-# prepositions
-aiz
-ap
-ar
-apakš
-ārpus
-augšpus
-bez
-caur
-dēļ
-gar
-iekš
-iz
-kopš
-labad
-lejpus
-līdz
-no
-otrpus
-pa
-par
-pār
-pēc
-pie
-pirms
-pret
-priekš
-starp
-šaipus
-uz
-viņpus
-virs
-virspus
-zem
-apakšpus
-# Conjunctions
-un
-bet
-jo
-ja
-ka
-lai
-tomēr
-tikko
-turpretī
-arī
-kaut
-gan
-tādēļ
-tā
-ne
-tikvien
-vien
-kā
-ir
-te
-vai
-kamēr
-# Particles
-ar
-diezin
-droši
-diemžēl
-nebūt
-ik
-it
-taču
-nu
-pat
-tiklab
-iekšpus
-nedz
-tik
-nevis
-turpretim
-jeb
-iekam
-iekām
-iekāms
-kolīdz
-līdzko
-tiklīdz
-jebšu
-tālab
-tāpēc
-nekā
-itin
-jā
-jau
-jel
-nē
-nezin
-tad
-tikai
-vis
-tak
-iekams
-vien
-# modal verbs
-būt
-biju
-biji
-bija
-bijām
-bijāt
-esmu
-esi
-esam
-esat
-būšu
-būsi
-būs
-būsim
-būsiet
-tikt
-tiku
-tiki
-tika
-tikām
-tikāt
-tieku
-tiec
-tiek
-tiekam
-tiekat
-tikšu
-tiks
-tiksim
-tiksiet
-tapt
-tapi
-tapāt
-topat
-tapšu
-tapsi
-taps
-tapsim
-tapsiet
-kļūt
-kļuvu
-kļuvi
-kļuva
-kļuvām
-kļuvāt
-kļūstu
-kļūsti
-kļūst
-kļūstam
-kļūstat
-kļūšu
-kļūsi
-kļūs
-kļūsim
-kļūsiet
-# verbs
-varēt
-varēju
-varējām
-varēšu
-varēsim
-var
-varēji
-varējāt
-varēsi
-varēsiet
-varat
-varēja
-varēs
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_nl.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_nl.txt
deleted file mode 100644
index f4d61f5092c..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_nl.txt
+++ /dev/null
@@ -1,117 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Dutch stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This is a ranked list (commonest to rarest) of stopwords derived from
- | a large sample of Dutch text.
-
- | Dutch stop words frequently exhibit homonym clashes. These are indicated
- | clearly below.
-
-de | the
-en | and
-van | of, from
-ik | I, the ego
-te | (1) chez, at etc, (2) to, (3) too
-dat | that, which
-die | that, those, who, which
-in | in, inside
-een | a, an, one
-hij | he
-het | the, it
-niet | not, nothing, naught
-zijn | (1) to be, being, (2) his, one's, its
-is | is
-was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
-op | on, upon, at, in, up, used up
-aan | on, upon, to (as dative)
-met | with, by
-als | like, such as, when
-voor | (1) before, in front of, (2) furrow
-had | had, past tense all persons sing. of 'hebben' (have)
-er | there
-maar | but, only
-om | round, about, for etc
-hem | him
-dan | then
-zou | should/would, past tense all persons sing. of 'zullen'
-of | or, whether, if
-wat | what, something, anything
-mijn | possessive and noun 'mine'
-men | people, 'one'
-dit | this
-zo | so, thus, in this way
-door | through by
-over | over, across
-ze | she, her, they, them
-zich | oneself
-bij | (1) a bee, (2) by, near, at
-ook | also, too
-tot | till, until
-je | you
-mij | me
-uit | out of, from
-der | Old Dutch form of 'van der' still found in surnames
-daar | (1) there, (2) because
-haar | (1) her, their, them, (2) hair
-naar | (1) unpleasant, unwell etc, (2) towards, (3) as
-heb | present first person sing. of 'to have'
-hoe | how, why
-heeft | present third person sing. of 'to have'
-hebben | 'to have' and various parts thereof
-deze | this
-u | you
-want | (1) for, (2) mitten, (3) rigging
-nog | yet, still
-zal | 'shall', first and third person sing. of verb 'zullen' (will)
-me | me
-zij | she, they
-nu | now
-ge | 'thou', still used in Belgium and south Netherlands
-geen | none
-omdat | because
-iets | something, somewhat
-worden | to become, grow, get
-toch | yet, still
-al | all, every, each
-waren | (1) 'were' (2) to wander, (3) wares, (3)
-veel | much, many
-meer | (1) more, (2) lake
-doen | to do, to make
-toen | then, when
-moet | noun 'spot/mote' and present form of 'to must'
-ben | (1) am, (2) 'are' in interrogative second person singular of 'to be'
-zonder | without
-kan | noun 'can' and present form of 'to be able'
-hun | their, them
-dus | so, consequently
-alles | all, everything, anything
-onder | under, beneath
-ja | yes, of course
-eens | once, one day
-hier | here
-wie | who
-werd | imperfect third person sing. of 'become'
-altijd | always
-doch | yet, but etc
-wordt | present third person sing. of 'become'
-wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans
-kunnen | to be able
-ons | us/our
-zelf | self
-tegen | against, towards, at
-na | after, near
-reeds | already
-wil | (1) present tense of 'want', (2) 'will', noun, (3) fender
-kon | could; past tense of 'to be able'
-niets | nothing
-uw | your
-iemand | somebody
-geweest | been; past participle of 'be'
-andere | other
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_no.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_no.txt
deleted file mode 100644
index e76f36e69ed..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_no.txt
+++ /dev/null
@@ -1,192 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Norwegian stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This stop word list is for the dominant bokmål dialect. Words unique
- | to nynorsk are marked *.
-
- | Revised by Jan Bruusgaard , Jan 2005
-
-og | and
-i | in
-jeg | I
-det | it/this/that
-at | to (w. inf.)
-en | a/an
-et | a/an
-den | it/this/that
-til | to
-er | is/am/are
-som | who/that
-på | on
-de | they / you(formal)
-med | with
-han | he
-av | of
-ikke | not
-ikkje | not *
-der | there
-så | so
-var | was/were
-meg | me
-seg | you
-men | but
-ett | one
-har | have
-om | about
-vi | we
-min | my
-mitt | my
-ha | have
-hadde | had
-hun | she
-nå | now
-over | over
-da | when/as
-ved | by/know
-fra | from
-du | you
-ut | out
-sin | your
-dem | them
-oss | us
-opp | up
-man | you/one
-kan | can
-hans | his
-hvor | where
-eller | or
-hva | what
-skal | shall/must
-selv | self (reflective)
-sjøl | self (reflective)
-her | here
-alle | all
-vil | will
-bli | become
-ble | became
-blei | became *
-blitt | have become
-kunne | could
-inn | in
-når | when
-være | be
-kom | come
-noen | some
-noe | some
-ville | would
-dere | you
-som | who/which/that
-deres | their/theirs
-kun | only/just
-ja | yes
-etter | after
-ned | down
-skulle | should
-denne | this
-for | for/because
-deg | you
-si | hers/his
-sine | hers/his
-sitt | hers/his
-mot | against
-å | to
-meget | much
-hvorfor | why
-dette | this
-disse | these/those
-uten | without
-hvordan | how
-ingen | none
-din | your
-ditt | your
-blir | become
-samme | same
-hvilken | which
-hvilke | which (plural)
-sånn | such a
-inni | inside/within
-mellom | between
-vår | our
-hver | each
-hvem | who
-vors | us/ours
-hvis | whose
-både | both
-bare | only/just
-enn | than
-fordi | as/because
-før | before
-mange | many
-også | also
-slik | just
-vært | been
-være | to be
-båe | both *
-begge | both
-siden | since
-dykk | your *
-dykkar | yours *
-dei | they *
-deira | them *
-deires | theirs *
-deim | them *
-di | your (fem.) *
-då | as/when *
-eg | I *
-ein | a/an *
-eit | a/an *
-eitt | a/an *
-elles | or *
-honom | he *
-hjå | at *
-ho | she *
-hoe | she *
-henne | her
-hennar | her/hers
-hennes | hers
-hoss | how *
-hossen | how *
-ikkje | not *
-ingi | noone *
-inkje | noone *
-korleis | how *
-korso | how *
-kva | what/which *
-kvar | where *
-kvarhelst | where *
-kven | who/whom *
-kvi | why *
-kvifor | why *
-me | we *
-medan | while *
-mi | my *
-mine | my *
-mykje | much *
-no | now *
-nokon | some (masc./neut.) *
-noka | some (fem.) *
-nokor | some *
-noko | some *
-nokre | some *
-si | his/hers *
-sia | since *
-sidan | since *
-so | so *
-somt | some *
-somme | some *
-um | about*
-upp | up *
-vere | be *
-vore | was *
-verte | become *
-vort | become *
-varte | became *
-vart | became *
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_pt.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_pt.txt
deleted file mode 100644
index 276c1b446f2..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_pt.txt
+++ /dev/null
@@ -1,251 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Portuguese stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-
- | The following is a ranked list (commonest to rarest) of stopwords
- | deriving from a large sample of text.
-
- | Extra words have been added at the end.
-
-de | of, from
-a | the; to, at; her
-o | the; him
-que | who, that
-e | and
-do | de + o
-da | de + a
-em | in
-um | a
-para | for
- | é from SER
-com | with
-não | not, no
-uma | a
-os | the; them
-no | em + o
-se | himself etc
-na | em + a
-por | for
-mais | more
-as | the; them
-dos | de + os
-como | as, like
-mas | but
- | foi from SER
-ao | a + o
-ele | he
-das | de + as
- | tem from TER
-à | a + a
-seu | his
-sua | her
-ou | or
- | ser from SER
-quando | when
-muito | much
- | há from HAV
-nos | em + os; us
-já | already, now
- | está from EST
-eu | I
-também | also
-só | only, just
-pelo | per + o
-pela | per + a
-até | up to
-isso | that
-ela | he
-entre | between
- | era from SER
-depois | after
-sem | without
-mesmo | same
-aos | a + os
- | ter from TER
-seus | his
-quem | whom
-nas | em + as
-me | me
-esse | that
-eles | they
- | estão from EST
-você | you
- | tinha from TER
- | foram from SER
-essa | that
-num | em + um
-nem | nor
-suas | her
-meu | my
-às | a + as
-minha | my
- | têm from TER
-numa | em + uma
-pelos | per + os
-elas | they
- | havia from HAV
- | seja from SER
-qual | which
- | será from SER
-nós | we
- | tenho from TER
-lhe | to him, her
-deles | of them
-essas | those
-esses | those
-pelas | per + as
-este | this
- | fosse from SER
-dele | of him
-
- | other words. There are many contractions such as naquele = em+aquele,
- | mo = me+o, but they are rare.
- | Indefinite article plural forms are also rare.
-
-tu | thou
-te | thee
-vocês | you (plural)
-vos | you
-lhes | to them
-meus | my
-minhas
-teu | thy
-tua
-teus
-tuas
-nosso | our
-nossa
-nossos
-nossas
-
-dela | of her
-delas | of them
-
-esta | this
-estes | these
-estas | these
-aquele | that
-aquela | that
-aqueles | those
-aquelas | those
-isto | this
-aquilo | that
-
- | forms of estar, to be (not including the infinitive):
-estou
-está
-estamos
-estão
-estive
-esteve
-estivemos
-estiveram
-estava
-estávamos
-estavam
-estivera
-estivéramos
-esteja
-estejamos
-estejam
-estivesse
-estivéssemos
-estivessem
-estiver
-estivermos
-estiverem
-
- | forms of haver, to have (not including the infinitive):
-hei
-há
-havemos
-hão
-houve
-houvemos
-houveram
-houvera
-houvéramos
-haja
-hajamos
-hajam
-houvesse
-houvéssemos
-houvessem
-houver
-houvermos
-houverem
-houverei
-houverá
-houveremos
-houverão
-houveria
-houveríamos
-houveriam
-
- | forms of ser, to be (not including the infinitive):
-sou
-somos
-são
-era
-éramos
-eram
-fui
-foi
-fomos
-foram
-fora
-fôramos
-seja
-sejamos
-sejam
-fosse
-fôssemos
-fossem
-for
-formos
-forem
-serei
-será
-seremos
-serão
-seria
-seríamos
-seriam
-
- | forms of ter, to have (not including the infinitive):
-tenho
-tem
-temos
-tém
-tinha
-tínhamos
-tinham
-tive
-teve
-tivemos
-tiveram
-tivera
-tivéramos
-tenha
-tenhamos
-tenham
-tivesse
-tivéssemos
-tivessem
-tiver
-tivermos
-tiverem
-terei
-terá
-teremos
-terão
-teria
-teríamos
-teriam
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ro.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ro.txt
deleted file mode 100644
index 4fdee90a5ba..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ro.txt
+++ /dev/null
@@ -1,233 +0,0 @@
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-acea
-aceasta
-această
-aceea
-acei
-aceia
-acel
-acela
-acele
-acelea
-acest
-acesta
-aceste
-acestea
-aceşti
-aceştia
-acolo
-acum
-ai
-aia
-aibă
-aici
-al
-ăla
-ale
-alea
-ălea
-altceva
-altcineva
-am
-ar
-are
-aş
-aşadar
-asemenea
-asta
-ăsta
-astăzi
-astea
-ăstea
-ăştia
-asupra
-aţi
-au
-avea
-avem
-aveţi
-azi
-bine
-bucur
-bună
-ca
-că
-căci
-când
-care
-cărei
-căror
-cărui
-cât
-câte
-câţi
-către
-câtva
-ce
-cel
-ceva
-chiar
-cînd
-cine
-cineva
-cît
-cîte
-cîţi
-cîtva
-contra
-cu
-cum
-cumva
-curând
-curînd
-da
-dă
-dacă
-dar
-datorită
-de
-deci
-deja
-deoarece
-departe
-deşi
-din
-dinaintea
-dintr
-dintre
-drept
-după
-ea
-ei
-el
-ele
-eram
-este
-eşti
-eu
-face
-fără
-fi
-fie
-fiecare
-fii
-fim
-fiţi
-iar
-ieri
-îi
-îl
-îmi
-împotriva
-în
-înainte
-înaintea
-încât
-încît
-încotro
-între
-întrucât
-întrucît
-îţi
-la
-lângă
-le
-li
-lîngă
-lor
-lui
-mă
-mâine
-mea
-mei
-mele
-mereu
-meu
-mi
-mine
-mult
-multă
-mulţi
-ne
-nicăieri
-nici
-nimeni
-nişte
-noastră
-noastre
-noi
-noştri
-nostru
-nu
-ori
-oricând
-oricare
-oricât
-orice
-oricînd
-oricine
-oricît
-oricum
-oriunde
-până
-pe
-pentru
-peste
-pînă
-poate
-pot
-prea
-prima
-primul
-prin
-printr
-sa
-să
-săi
-sale
-sau
-său
-se
-şi
-sînt
-sîntem
-sînteţi
-spre
-sub
-sunt
-suntem
-sunteţi
-ta
-tăi
-tale
-tău
-te
-ţi
-ţie
-tine
-toată
-toate
-tot
-toţi
-totuşi
-tu
-un
-una
-unde
-undeva
-unei
-unele
-uneori
-unor
-vă
-vi
-voastră
-voastre
-voi
-voştri
-vostru
-vouă
-vreo
-vreun
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ru.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ru.txt
deleted file mode 100644
index 64307693457..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ru.txt
+++ /dev/null
@@ -1,241 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | a russian stop word list. comments begin with vertical bar. each stop
- | word is at the start of a line.
-
- | this is a ranked list (commonest to rarest) of stopwords derived from
- | a large text sample.
-
- | letter `ё' is translated to `е'.
-
-и | and
-в | in/into
-во | alternative form
-не | not
-что | what/that
-он | he
-на | on/onto
-я | i
-с | from
-со | alternative form
-как | how
-а | milder form of `no' (but)
-то | conjunction and form of `that'
-все | all
-она | she
-так | so, thus
-его | him
-но | but
-да | yes/and
-ты | thou
-к | towards, by
-у | around, chez
-же | intensifier particle
-вы | you
-за | beyond, behind
-бы | conditional/subj. particle
-по | up to, along
-только | only
-ее | her
-мне | to me
-было | it was
-вот | here is/are, particle
-от | away from
-меня | me
-еще | still, yet, more
-нет | no, there isnt/arent
-о | about
-из | out of
-ему | to him
-теперь | now
-когда | when
-даже | even
-ну | so, well
-вдруг | suddenly
-ли | interrogative particle
-если | if
-уже | already, but homonym of `narrower'
-или | or
-ни | neither
-быть | to be
-был | he was
-него | prepositional form of его
-до | up to
-вас | you accusative
-нибудь | indef. suffix preceded by hyphen
-опять | again
-уж | already, but homonym of `adder'
-вам | to you
-сказал | he said
-ведь | particle `after all'
-там | there
-потом | then
-себя | oneself
-ничего | nothing
-ей | to her
-может | usually with `быть' as `maybe'
-они | they
-тут | here
-где | where
-есть | there is/are
-надо | got to, must
-ней | prepositional form of ей
-для | for
-мы | we
-тебя | thee
-их | them, their
-чем | than
-была | she was
-сам | self
-чтоб | in order to
-без | without
-будто | as if
-человек | man, person, one
-чего | genitive form of `what'
-раз | once
-тоже | also
-себе | to oneself
-под | beneath
-жизнь | life
-будет | will be
-ж | short form of intensifer particle `же'
-тогда | then
-кто | who
-этот | this
-говорил | was saying
-того | genitive form of `that'
-потому | for that reason
-этого | genitive form of `this'
-какой | which
-совсем | altogether
-ним | prepositional form of `его', `они'
-здесь | here
-этом | prepositional form of `этот'
-один | one
-почти | almost
-мой | my
-тем | instrumental/dative plural of `тот', `то'
-чтобы | full form of `in order that'
-нее | her (acc.)
-кажется | it seems
-сейчас | now
-были | they were
-куда | where to
-зачем | why
-сказать | to say
-всех | all (acc., gen. preposn. plural)
-никогда | never
-сегодня | today
-можно | possible, one can
-при | by
-наконец | finally
-два | two
-об | alternative form of `о', about
-другой | another
-хоть | even
-после | after
-над | above
-больше | more
-тот | that one (masc.)
-через | across, in
-эти | these
-нас | us
-про | about
-всего | in all, only, of all
-них | prepositional form of `они' (they)
-какая | which, feminine
-много | lots
-разве | interrogative particle
-сказала | she said
-три | three
-эту | this, acc. fem. sing.
-моя | my, feminine
-впрочем | moreover, besides
-хорошо | good
-свою | ones own, acc. fem. sing.
-этой | oblique form of `эта', fem. `this'
-перед | in front of
-иногда | sometimes
-лучше | better
-чуть | a little
-том | preposn. form of `that one'
-нельзя | one must not
-такой | such a one
-им | to them
-более | more
-всегда | always
-конечно | of course
-всю | acc. fem. sing of `all'
-между | between
-
-
- | b: some paradigms
- |
- | personal pronouns
- |
- | я меня мне мной [мною]
- | ты тебя тебе тобой [тобою]
- | он его ему им [него, нему, ним]
- | она ее эи ею [нее, нэи, нею]
- | оно его ему им [него, нему, ним]
- |
- | мы нас нам нами
- | вы вас вам вами
- | они их им ими [них, ним, ними]
- |
- | себя себе собой [собою]
- |
- | demonstrative pronouns: этот (this), тот (that)
- |
- | этот эта это эти
- | этого эты это эти
- | этого этой этого этих
- | этому этой этому этим
- | этим этой этим [этою] этими
- | этом этой этом этих
- |
- | тот та то те
- | того ту то те
- | того той того тех
- | тому той тому тем
- | тем той тем [тою] теми
- | том той том тех
- |
- | determinative pronouns
- |
- | (a) весь (all)
- |
- | весь вся все все
- | всего всю все все
- | всего всей всего всех
- | всему всей всему всем
- | всем всей всем [всею] всеми
- | всем всей всем всех
- |
- | (b) сам (himself etc)
- |
- | сам сама само сами
- | самого саму само самих
- | самого самой самого самих
- | самому самой самому самим
- | самим самой самим [самою] самими
- | самом самой самом самих
- |
- | stems of verbs `to be', `to have', `to do' and modal
- |
- | быть бы буд быв есть суть
- | име
- | дел
- | мог мож мочь
- | уме
- | хоч хот
- | долж
- | можн
- | нужн
- | нельзя
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_sv.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_sv.txt
deleted file mode 100644
index 22bddfd8cb3..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_sv.txt
+++ /dev/null
@@ -1,131 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Swedish stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This is a ranked list (commonest to rarest) of stopwords derived from
- | a large text sample.
-
- | Swedish stop words occasionally exhibit homonym clashes. For example
- | så = so, but also seed. These are indicated clearly below.
-
-och | and
-det | it, this/that
-att | to (with infinitive)
-i | in, at
-en | a
-jag | I
-hon | she
-som | who, that
-han | he
-på | on
-den | it, this/that
-med | with
-var | where, each
-sig | him(self) etc
-för | for
-så | so (also: seed)
-till | to
-är | is
-men | but
-ett | a
-om | if; around, about
-hade | had
-de | they, these/those
-av | of
-icke | not, no
-mig | me
-du | you
-henne | her
-då | then, when
-sin | his
-nu | now
-har | have
-inte | inte någon = no one
-hans | his
-honom | him
-skulle | 'sake'
-hennes | her
-där | there
-min | my
-man | one (pronoun)
-ej | nor
-vid | at, by, on (also: vast)
-kunde | could
-något | some etc
-från | from, off
-ut | out
-när | when
-efter | after, behind
-upp | up
-vi | we
-dem | them
-vara | be
-vad | what
-över | over
-än | than
-dig | you
-kan | can
-sina | his
-här | here
-ha | have
-mot | towards
-alla | all
-under | under (also: wonder)
-någon | some etc
-eller | or (else)
-allt | all
-mycket | much
-sedan | since
-ju | why
-denna | this/that
-själv | myself, yourself etc
-detta | this/that
-åt | to
-utan | without
-varit | was
-hur | how
-ingen | no
-mitt | my
-ni | you
-bli | to be, become
-blev | from bli
-oss | us
-din | thy
-dessa | these/those
-några | some etc
-deras | their
-blir | from bli
-mina | my
-samma | (the) same
-vilken | who, that
-er | you, your
-sådan | such a
-vår | our
-blivit | from bli
-dess | its
-inom | within
-mellan | between
-sådant | such a
-varför | why
-varje | each
-vilka | who, that
-ditt | thy
-vem | who
-vilket | who, that
-sitta | his
-sådana | such a
-vart | each
-dina | thy
-vars | whose
-vårt | our
-våra | our
-ert | your
-era | your
-vilkas | whose
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_th.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_th.txt
deleted file mode 100644
index 07f0fabe692..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_th.txt
+++ /dev/null
@@ -1,119 +0,0 @@
-# Thai stopwords from:
-# "Opinion Detection in Thai Political News Columns
-# Based on Subjectivity Analysis"
-# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak
-ไว้
-ไม่
-ไป
-ได้
-ให้
-ใน
-โดย
-แห่ง
-แล้ว
-และ
-แรก
-แบบ
-แต่
-เอง
-เห็น
-เลย
-เริ่ม
-เรา
-เมื่อ
-เพื่อ
-เพราะ
-เป็นการ
-เป็น
-เปิดเผย
-เปิด
-เนื่องจาก
-เดียวกัน
-เดียว
-เช่น
-เฉพาะ
-เคย
-เข้า
-เขา
-อีก
-อาจ
-อะไร
-ออก
-อย่าง
-อยู่
-อยาก
-หาก
-หลาย
-หลังจาก
-หลัง
-หรือ
-หนึ่ง
-ส่วน
-ส่ง
-สุด
-สําหรับ
-ว่า
-วัน
-ลง
-ร่วม
-ราย
-รับ
-ระหว่าง
-รวม
-ยัง
-มี
-มาก
-มา
-พร้อม
-พบ
-ผ่าน
-ผล
-บาง
-น่า
-นี้
-นํา
-นั้น
-นัก
-นอกจาก
-ทุก
-ที่สุด
-ที่
-ทําให้
-ทํา
-ทาง
-ทั้งนี้
-ทั้ง
-ถ้า
-ถูก
-ถึง
-ต้อง
-ต่างๆ
-ต่าง
-ต่อ
-ตาม
-ตั้งแต่
-ตั้ง
-ด้าน
-ด้วย
-ดัง
-ซึ่ง
-ช่วง
-จึง
-จาก
-จัด
-จะ
-คือ
-ความ
-ครั้ง
-คง
-ขึ้น
-ของ
-ขอ
-ขณะ
-ก่อน
-ก็
-การ
-กับ
-กัน
-กว่า
-กล่าว
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_tr.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_tr.txt
deleted file mode 100644
index 84d9408d4ea..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_tr.txt
+++ /dev/null
@@ -1,212 +0,0 @@
-# Turkish stopwords from LUCENE-559
-# merged with the list from "Information Retrieval on Turkish Texts"
-# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
-acaba
-altmış
-altı
-ama
-ancak
-arada
-aslında
-ayrıca
-bana
-bazı
-belki
-ben
-benden
-beni
-benim
-beri
-beş
-bile
-bin
-bir
-birçok
-biri
-birkaç
-birkez
-birşey
-birşeyi
-biz
-bize
-bizden
-bizi
-bizim
-böyle
-böylece
-bu
-buna
-bunda
-bundan
-bunlar
-bunları
-bunların
-bunu
-bunun
-burada
-çok
-çünkü
-da
-daha
-dahi
-de
-defa
-değil
-diğer
-diye
-doksan
-dokuz
-dolayı
-dolayısıyla
-dört
-edecek
-eden
-ederek
-edilecek
-ediliyor
-edilmesi
-ediyor
-eğer
-elli
-en
-etmesi
-etti
-ettiği
-ettiğini
-gibi
-göre
-halen
-hangi
-hatta
-hem
-henüz
-hep
-hepsi
-her
-herhangi
-herkesin
-hiç
-hiçbir
-için
-iki
-ile
-ilgili
-ise
-işte
-itibaren
-itibariyle
-kadar
-karşın
-katrilyon
-kendi
-kendilerine
-kendini
-kendisi
-kendisine
-kendisini
-kez
-ki
-kim
-kimden
-kime
-kimi
-kimse
-kırk
-milyar
-milyon
-mu
-mü
-mı
-nasıl
-ne
-neden
-nedenle
-nerde
-nerede
-nereye
-niye
-niçin
-o
-olan
-olarak
-oldu
-olduğu
-olduğunu
-olduklarını
-olmadı
-olmadığı
-olmak
-olması
-olmayan
-olmaz
-olsa
-olsun
-olup
-olur
-olursa
-oluyor
-on
-ona
-ondan
-onlar
-onlardan
-onları
-onların
-onu
-onun
-otuz
-oysa
-öyle
-pek
-rağmen
-sadece
-sanki
-sekiz
-seksen
-sen
-senden
-seni
-senin
-siz
-sizden
-sizi
-sizin
-şey
-şeyden
-şeyi
-şeyler
-şöyle
-şu
-şuna
-şunda
-şundan
-şunları
-şunu
-tarafından
-trilyon
-tüm
-üç
-üzere
-var
-vardı
-ve
-veya
-ya
-yani
-yapacak
-yapılan
-yapılması
-yapıyor
-yapmak
-yaptı
-yaptığı
-yaptığını
-yaptıkları
-yedi
-yerine
-yetmiş
-yine
-yirmi
-yoksa
-yüz
-zaten
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/userdict_ja.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/userdict_ja.txt
deleted file mode 100644
index 6f0368e4d81..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/userdict_ja.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-#
-# This is a sample user dictionary for Kuromoji (JapaneseTokenizer)
-#
-# Add entries to this file in order to override the statistical model in terms
-# of segmentation, readings and part-of-speech tags. Notice that entries do
-# not have weights since they are always used when found. This is by-design
-# in order to maximize ease-of-use.
-#
-# Entries are defined using the following CSV format:
-# , ... , ... ,
-#
-# Notice that a single half-width space separates tokens and readings, and
-# that the number tokens and readings must match exactly.
-#
-# Also notice that multiple entries with the same is undefined.
-#
-# Whitespace only lines are ignored. Comments are not allowed on entry lines.
-#
-
-# Custom segmentation for kanji compounds
-日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞
-関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞
-
-# Custom segmentation for compound katakana
-トートバッグ,トート バッグ,トート バッグ,かずカナ名詞
-ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞
-
-# Custom reading for former sumo wrestler
-朝青龍,朝青龍,アサショウリュウ,カスタム人名
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/protwords.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/protwords.txt
deleted file mode 100644
index 1dfc0abecbf..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/protwords.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#-----------------------------------------------------------------------
-# Use a protected word file to protect against the stemmer reducing two
-# unrelated words to the same base word.
-
-# Some non-words that normally won't be encountered,
-# just to test that they won't be stemmed.
-dontstems
-zwhacky
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/schema.xml b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/schema.xml
deleted file mode 100644
index bb2f803ba8d..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/schema.xml
+++ /dev/null
@@ -1,940 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- id
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/solrconfig.xml b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/solrconfig.xml
deleted file mode 100644
index 72e2add8547..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/solrconfig.xml
+++ /dev/null
@@ -1,1449 +0,0 @@
-
-
-
-
-
-
-
-
- ${tests.luceneMatchVersion:LATEST}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ${solr.data.dir:}
-
-
-
-
- ${solr.hdfs.home:}
- ${solr.hdfs.confdir:}
- ${solr.hdfs.security.kerberos.enabled:false}
- ${solr.hdfs.security.kerberos.keytabfile:}
- ${solr.hdfs.security.kerberos.principal:}
- ${solr.hdfs.blockcache.enabled:true}
- ${solr.hdfs.blockcache.slab.count:1}
- ${solr.hdfs.blockcache.direct.memory.allocation:true}
- ${solr.hdfs.blockcache.blocksperbank:16384}
- ${solr.hdfs.blockcache.read.enabled:true}
- ${solr.hdfs.nrtcachingdirectory.enable:true}
- ${solr.hdfs.nrtcachingdirectory.maxmergesizemb:16}
- ${solr.hdfs.nrtcachingdirectory.maxcachedmb:192}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 128
-
-
-
-
-
-
-
-
-
- ${solr.lock.type:hdfs}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ${solr.autoCommit.maxTime:60000}
- false
-
-
-
-
- ${solr.autoSoftCommit.maxTime:1000}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 1024
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- true
-
-
-
-
-
- 20
-
-
- 200
-
-
-
-
-
-
-
-
-
-
-
- static firstSearcher warming in solrconfig.xml
-
-
-
-
-
- false
-
-
- 4
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- explicit
- 10
- text
-
-
-
-
-
-
-
-
-
-
-
-
-
- explicit
- json
- true
- text
-
-
-
-
-
-
-
-
- text_general
-
-
-
-
-
- default
- text
- solr.DirectSolrSpellChecker
-
- internal
-
- 0.5
-
- 2
-
- 1
-
- 5
-
- 4
-
- 0.01
-
-
-
-
-
- wordbreak
- solr.WordBreakSolrSpellChecker
- name
- true
- true
- 10
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- text
-
- default
- wordbreak
- on
- true
- 10
- 5
- 5
- true
- true
- 10
- 5
-
-
- spellcheck
-
-
-
-
-
-
-
-
-
- text
- true
-
-
- tvComponent
-
-
-
-
-
-
-
-
- default
-
-
- org.carrot2.clustering.lingo.LingoClusteringAlgorithm
-
-
- 20
-
-
- clustering/carrot2
-
-
- ENGLISH
-
-
- stc
- org.carrot2.clustering.stc.STCClusteringAlgorithm
-
-
-
-
-
-
- true
- default
- true
-
- name
- id
-
- features
-
- true
-
-
-
- false
-
- edismax
-
- text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
-
- *:*
- 10
- *,score
-
-
- clustering
-
-
-
-
-
-
-
-
-
- true
- false
-
-
- terms
-
-
-
-
-
-
-
- string
- elevate.xml
-
-
-
-
-
- explicit
- text
-
-
- elevator
-
-
-
-
-
-
-
-
-
-
- 100
-
-
-
-
-
-
-
- 70
-
- 0.5
-
- [-\w ,/\n\"']{20,200}
-
-
-
-
-
-
- ]]>
- ]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ,,
- ,,
- ,,
- ,,
- ,]]>
- ]]>
-
-
-
-
-
- 10
- .,!?
-
-
-
-
-
-
- WORD
-
-
- en
- US
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- text/plain; charset=UTF-8
-
-
-
-
-
-
-
-
- 5
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/stopwords.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/stopwords.txt
deleted file mode 100644
index ae1e83eeb3d..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/stopwords.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/synonyms.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/synonyms.txt
deleted file mode 100644
index 7f72128303b..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/synonyms.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#-----------------------------------------------------------------------
-#some test synonym mappings unlikely to appear in real input text
-aaafoo => aaabar
-bbbfoo => bbbfoo bbbbar
-cccfoo => cccbar cccbaz
-fooaaa,baraaa,bazaaa
-
-# Some synonym groups specific to this example
-GB,gib,gigabyte,gigabytes
-MB,mib,megabyte,megabytes
-Television, Televisions, TV, TVs
-#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
-#after us won't split it into two words.
-
-# Synonym mappings can be used for spelling correction too
-pixima => pixma
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/solr.xml b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/solr.xml
deleted file mode 100644
index 8c1509aabc4..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/solr.xml
+++ /dev/null
@@ -1,45 +0,0 @@
-
-
-
-
-
-
- ${shareSchema:false}
-
-
- ${hostPort:8983}
- ${hostPort:8983}
- ${solr.zkclienttimeout:30000}
- ${genericCoreNodeNames:true}
- 0
- ${distribUpdateConnTimeout:15000}
- ${distribUpdateSoTimeout:120000}
-
-
-
- ${urlScheme:}
- ${socketTimeout:120000}
- ${connTimeout:15000}
-
-
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solr.xml b/solr/contrib/morphlines-core/src/test-files/solr/solr.xml
deleted file mode 100644
index e67eb1d4054..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solr.xml
+++ /dev/null
@@ -1,34 +0,0 @@
-
-
-
-
-
-
- ${hostPort:8983}
- ${hostContext:solr}
- ${solr.zkclienttimeout:30000}
- 127.0.0.1
-
-
-
- ${urlScheme:}
- ${socketTimeout:120000}
- ${connTimeout:15000}
-
-
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/currency.xml b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/currency.xml
deleted file mode 100644
index 3a9c58afee8..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/currency.xml
+++ /dev/null
@@ -1,67 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/elevate.xml b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/elevate.xml
deleted file mode 100644
index 2c09ebed669..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/elevate.xml
+++ /dev/null
@@ -1,42 +0,0 @@
-
-
-
-
-
-
-
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ca.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ca.txt
deleted file mode 100644
index 307a85f913d..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ca.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-# Set of Catalan contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-d
-l
-m
-n
-s
-t
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_fr.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_fr.txt
deleted file mode 100644
index 722db588333..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_fr.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-# Set of French contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-l
-m
-t
-qu
-n
-s
-j
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ga.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ga.txt
deleted file mode 100644
index 9ebe7fa349a..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ga.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-# Set of Irish contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-d
-m
-b
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_it.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_it.txt
deleted file mode 100644
index cac04095372..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_it.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-# Set of Italian contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-c
-l
-all
-dall
-dell
-nell
-sull
-coll
-pell
-gl
-agl
-dagl
-degl
-negl
-sugl
-un
-m
-t
-s
-v
-d
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/hyphenations_ga.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/hyphenations_ga.txt
deleted file mode 100644
index 4d2642cc5a3..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/hyphenations_ga.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-# Set of Irish hyphenations for StopFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-h
-n
-t
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stemdict_nl.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stemdict_nl.txt
deleted file mode 100644
index 441072971d3..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stemdict_nl.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-# Set of overrides for the dutch stemmer
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-fiets fiets
-bromfiets bromfiets
-ei eier
-kind kinder
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stoptags_ja.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stoptags_ja.txt
deleted file mode 100644
index 71b750845e3..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stoptags_ja.txt
+++ /dev/null
@@ -1,420 +0,0 @@
-#
-# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter.
-#
-# Any token with a part-of-speech tag that exactly matches those defined in this
-# file are removed from the token stream.
-#
-# Set your own stoptags by uncommenting the lines below. Note that comments are
-# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists,
-# etc. that can be useful for building you own stoptag set.
-#
-# The entire possible tagset is provided below for convenience.
-#
-#####
-# noun: unclassified nouns
-#名詞
-#
-# noun-common: Common nouns or nouns where the sub-classification is undefined
-#名詞-一般
-#
-# noun-proper: Proper nouns where the sub-classification is undefined
-#名詞-固有名詞
-#
-# noun-proper-misc: miscellaneous proper nouns
-#名詞-固有名詞-一般
-#
-# noun-proper-person: Personal names where the sub-classification is undefined
-#名詞-固有名詞-人名
-#
-# noun-proper-person-misc: names that cannot be divided into surname and
-# given name; foreign names; names where the surname or given name is unknown.
-# e.g. お市の方
-#名詞-固有名詞-人名-一般
-#
-# noun-proper-person-surname: Mainly Japanese surnames.
-# e.g. 山田
-#名詞-固有名詞-人名-姓
-#
-# noun-proper-person-given_name: Mainly Japanese given names.
-# e.g. 太郎
-#名詞-固有名詞-人名-名
-#
-# noun-proper-organization: Names representing organizations.
-# e.g. 通産省, NHK
-#名詞-固有名詞-組織
-#
-# noun-proper-place: Place names where the sub-classification is undefined
-#名詞-固有名詞-地域
-#
-# noun-proper-place-misc: Place names excluding countries.
-# e.g. アジア, バルセロナ, 京都
-#名詞-固有名詞-地域-一般
-#
-# noun-proper-place-country: Country names.
-# e.g. 日本, オーストラリア
-#名詞-固有名詞-地域-国
-#
-# noun-pronoun: Pronouns where the sub-classification is undefined
-#名詞-代名詞
-#
-# noun-pronoun-misc: miscellaneous pronouns:
-# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ
-#名詞-代名詞-一般
-#
-# noun-pronoun-contraction: Spoken language contraction made by combining a
-# pronoun and the particle 'wa'.
-# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ
-#名詞-代名詞-縮約
-#
-# noun-adverbial: Temporal nouns such as names of days or months that behave
-# like adverbs. Nouns that represent amount or ratios and can be used adverbially,
-# e.g. 金曜, 一月, 午後, 少量
-#名詞-副詞可能
-#
-# noun-verbal: Nouns that take arguments with case and can appear followed by
-# 'suru' and related verbs (する, できる, なさる, くださる)
-# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り
-#名詞-サ変接続
-#
-# noun-adjective-base: The base form of adjectives, words that appear before な ("na")
-# e.g. 健康, 安易, 駄目, だめ
-#名詞-形容動詞語幹
-#
-# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数.
-# e.g. 0, 1, 2, 何, 数, 幾
-#名詞-数
-#
-# noun-affix: noun affixes where the sub-classification is undefined
-#名詞-非自立
-#
-# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that
-# attach to the base form of inflectional words, words that cannot be classified
-# into any of the other categories below. This category includes indefinite nouns.
-# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第,
-# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み,
-# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳,
-# わり, 割り, 割, ん-口語/, もん-口語/
-#名詞-非自立-一般
-#
-# noun-affix-adverbial: noun affixes that that can behave as adverbs.
-# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ,
-# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか,
-# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所,
-# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま,
-# 儘, 侭, みぎり, 矢先
-#名詞-非自立-副詞可能
-#
-# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars
-# with the stem よう(だ) ("you(da)").
-# e.g. よう, やう, 様 (よう)
-#名詞-非自立-助動詞語幹
-#
-# noun-affix-adjective-base: noun affixes that can connect to the indeclinable
-# connection form な (aux "da").
-# e.g. みたい, ふう
-#名詞-非自立-形容動詞語幹
-#
-# noun-special: special nouns where the sub-classification is undefined.
-#名詞-特殊
-#
-# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is
-# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base
-# form of inflectional words.
-# e.g. そう
-#名詞-特殊-助動詞語幹
-#
-# noun-suffix: noun suffixes where the sub-classification is undefined.
-#名詞-接尾
-#
-# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect
-# to ガル or タイ and can combine into compound nouns, words that cannot be classified into
-# any of the other categories below. In general, this category is more inclusive than
-# 接尾語 ("suffix") and is usually the last element in a compound noun.
-# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み,
-# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用
-#名詞-接尾-一般
-#
-# noun-suffix-person: Suffixes that form nouns and attach to person names more often
-# than other nouns.
-# e.g. 君, 様, 著
-#名詞-接尾-人名
-#
-# noun-suffix-place: Suffixes that form nouns and attach to place names more often
-# than other nouns.
-# e.g. 町, 市, 県
-#名詞-接尾-地域
-#
-# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that
-# can appear before スル ("suru").
-# e.g. 化, 視, 分け, 入り, 落ち, 買い
-#名詞-接尾-サ変接続
-#
-# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions,
-# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the
-# conjunctive form of inflectional words.
-# e.g. そう
-#名詞-接尾-助動詞語幹
-#
-# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive
-# form of inflectional words and appear before the copula だ ("da").
-# e.g. 的, げ, がち
-#名詞-接尾-形容動詞語幹
-#
-# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs.
-# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ)
-#名詞-接尾-副詞可能
-#
-# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category
-# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach
-# to numbers.
-# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半
-#名詞-接尾-助数詞
-#
-# noun-suffix-special: Special suffixes that mainly attach to inflecting words.
-# e.g. (楽し) さ, (考え) 方
-#名詞-接尾-特殊
-#
-# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words
-# together.
-# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦)
-#名詞-接続詞的
-#
-# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are
-# semantically verb-like.
-# e.g. ごらん, ご覧, 御覧, 頂戴
-#名詞-動詞非自立的
-#
-# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry,
-# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation")
-# is いわく ("iwaku").
-#名詞-引用文字列
-#
-# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and
-# behave like an adjective.
-# e.g. 申し訳, 仕方, とんでも, 違い
-#名詞-ナイ形容詞語幹
-#
-#####
-# prefix: unclassified prefixes
-#接頭詞
-#
-# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms)
-# excluding numerical expressions.
-# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派)
-#接頭詞-名詞接続
-#
-# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb
-# in conjunctive form followed by なる/なさる/くださる.
-# e.g. お (読みなさい), お (座り)
-#接頭詞-動詞接続
-#
-# prefix-adjectival: Prefixes that attach to adjectives.
-# e.g. お (寒いですねえ), バカ (でかい)
-#接頭詞-形容詞接続
-#
-# prefix-numerical: Prefixes that attach to numerical expressions.
-# e.g. 約, およそ, 毎時
-#接頭詞-数接続
-#
-#####
-# verb: unclassified verbs
-#動詞
-#
-# verb-main:
-#動詞-自立
-#
-# verb-auxiliary:
-#動詞-非自立
-#
-# verb-suffix:
-#動詞-接尾
-#
-#####
-# adjective: unclassified adjectives
-#形容詞
-#
-# adjective-main:
-#形容詞-自立
-#
-# adjective-auxiliary:
-#形容詞-非自立
-#
-# adjective-suffix:
-#形容詞-接尾
-#
-#####
-# adverb: unclassified adverbs
-#副詞
-#
-# adverb-misc: Words that can be segmented into one unit and where adnominal
-# modification is not possible.
-# e.g. あいかわらず, 多分
-#副詞-一般
-#
-# adverb-particle_conjunction: Adverbs that can be followed by の, は, に,
-# な, する, だ, etc.
-# e.g. こんなに, そんなに, あんなに, なにか, なんでも
-#副詞-助詞類接続
-#
-#####
-# adnominal: Words that only have noun-modifying forms.
-# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう,
-# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした,
-# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き
-#連体詞
-#
-#####
-# conjunction: Conjunctions that can occur independently.
-# e.g. が, けれども, そして, じゃあ, それどころか
-接続詞
-#
-#####
-# particle: unclassified particles.
-助詞
-#
-# particle-case: case particles where the subclassification is undefined.
-助詞-格助詞
-#
-# particle-case-misc: Case particles.
-# e.g. から, が, で, と, に, へ, より, を, の, にて
-助詞-格助詞-一般
-#
-# particle-case-quote: the "to" that appears after nouns, a person’s speech,
-# quotation marks, expressions of decisions from a meeting, reasons, judgements,
-# conjectures, etc.
-# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...)
-助詞-格助詞-引用
-#
-# particle-case-compound: Compounds of particles and verbs that mainly behave
-# like case particles.
-# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って,
-# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける,
-# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し,
-# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして,
-# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって,
-# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る,
-# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる,
-# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ
-助詞-格助詞-連語
-#
-# particle-conjunctive:
-# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども,
-# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/,
-# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/
-助詞-接続助詞
-#
-# particle-dependency:
-# e.g. こそ, さえ, しか, すら, は, も, ぞ
-助詞-係助詞
-#
-# particle-adverbial:
-# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/,
-# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/,
-# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに,
-# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/,
-# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」)
-助詞-副助詞
-#
-# particle-interjective: particles with interjective grammatical roles.
-# e.g. (松島) や
-助詞-間投助詞
-#
-# particle-coordinate:
-# e.g. と, たり, だの, だり, とか, なり, や, やら
-助詞-並立助詞
-#
-# particle-final:
-# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ,
-# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/
-助詞-終助詞
-#
-# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is
-# adverbial, conjunctive, or sentence final. For example:
-# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」
-# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」
-# 「(祈りが届いたせい) か (, 試験に合格した.)」
-# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」
-# e.g. か
-助詞-副助詞/並立助詞/終助詞
-#
-# particle-adnominalizer: The "no" that attaches to nouns and modifies
-# non-inflectional words.
-助詞-連体化
-#
-# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs
-# that are giongo, giseigo, or gitaigo.
-# e.g. に, と
-助詞-副詞化
-#
-# particle-special: A particle that does not fit into one of the above classifications.
-# This includes particles that are used in Tanka, Haiku, and other poetry.
-# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家)
-助詞-特殊
-#
-#####
-# auxiliary-verb:
-助動詞
-#
-#####
-# interjection: Greetings and other exclamations.
-# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます,
-# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい
-#感動詞
-#
-#####
-# symbol: unclassified Symbols.
-記号
-#
-# symbol-misc: A general symbol not in one of the categories below.
-# e.g. [○◎@$〒→+]
-記号-一般
-#
-# symbol-comma: Commas
-# e.g. [,、]
-記号-読点
-#
-# symbol-period: Periods and full stops.
-# e.g. [..。]
-記号-句点
-#
-# symbol-space: Full-width whitespace.
-記号-空白
-#
-# symbol-open_bracket:
-# e.g. [({‘“『【]
-記号-括弧開
-#
-# symbol-close_bracket:
-# e.g. [)}’”』」】]
-記号-括弧閉
-#
-# symbol-alphabetic:
-#記号-アルファベット
-#
-#####
-# other: unclassified other
-#その他
-#
-# other-interjection: Words that are hard to classify as noun-suffixes or
-# sentence-final particles.
-# e.g. (だ)ァ
-その他-間投
-#
-#####
-# filler: Aizuchi that occurs during a conversation or sounds inserted as filler.
-# e.g. あの, うんと, えと
-フィラー
-#
-#####
-# non-verbal: non-verbal sound.
-非言語音
-#
-#####
-# fragment:
-#語断片
-#
-#####
-# unknown: unknown part of speech.
-#未知語
-#
-##### End of file
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ar.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ar.txt
deleted file mode 100644
index 046829db6a2..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ar.txt
+++ /dev/null
@@ -1,125 +0,0 @@
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-# Cleaned on October 11, 2009 (not normalized, so use before normalization)
-# This means that when modifying this list, you might need to add some
-# redundant entries, for example containing forms with both أ and ا
-من
-ومن
-منها
-منه
-في
-وفي
-فيها
-فيه
-و
-ف
-ثم
-او
-أو
-ب
-بها
-به
-ا
-أ
-اى
-اي
-أي
-أى
-لا
-ولا
-الا
-ألا
-إلا
-لكن
-ما
-وما
-كما
-فما
-عن
-مع
-اذا
-إذا
-ان
-أن
-إن
-انها
-أنها
-إنها
-انه
-أنه
-إنه
-بان
-بأن
-فان
-فأن
-وان
-وأن
-وإن
-التى
-التي
-الذى
-الذي
-الذين
-الى
-الي
-إلى
-إلي
-على
-عليها
-عليه
-اما
-أما
-إما
-ايضا
-أيضا
-كل
-وكل
-لم
-ولم
-لن
-ولن
-هى
-هي
-هو
-وهى
-وهي
-وهو
-فهى
-فهي
-فهو
-انت
-أنت
-لك
-لها
-له
-هذه
-هذا
-تلك
-ذلك
-هناك
-كانت
-كان
-يكون
-تكون
-وكانت
-وكان
-غير
-بعض
-قد
-نحو
-بين
-بينما
-منذ
-ضمن
-حيث
-الان
-الآن
-خلال
-بعد
-قبل
-حتى
-عند
-عندما
-لدى
-جميع
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_bg.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_bg.txt
deleted file mode 100644
index 1ae4ba2ae38..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_bg.txt
+++ /dev/null
@@ -1,193 +0,0 @@
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-а
-аз
-ако
-ала
-бе
-без
-беше
-би
-бил
-била
-били
-било
-близо
-бъдат
-бъде
-бяха
-в
-вас
-ваш
-ваша
-вероятно
-вече
-взема
-ви
-вие
-винаги
-все
-всеки
-всички
-всичко
-всяка
-във
-въпреки
-върху
-г
-ги
-главно
-го
-д
-да
-дали
-до
-докато
-докога
-дори
-досега
-доста
-е
-едва
-един
-ето
-за
-зад
-заедно
-заради
-засега
-затова
-защо
-защото
-и
-из
-или
-им
-има
-имат
-иска
-й
-каза
-как
-каква
-какво
-както
-какъв
-като
-кога
-когато
-което
-които
-кой
-който
-колко
-която
-къде
-където
-към
-ли
-м
-ме
-между
-мен
-ми
-мнозина
-мога
-могат
-може
-моля
-момента
-му
-н
-на
-над
-назад
-най
-направи
-напред
-например
-нас
-не
-него
-нея
-ни
-ние
-никой
-нито
-но
-някои
-някой
-няма
-обаче
-около
-освен
-особено
-от
-отгоре
-отново
-още
-пак
-по
-повече
-повечето
-под
-поне
-поради
-после
-почти
-прави
-пред
-преди
-през
-при
-пък
-първо
-с
-са
-само
-се
-сега
-си
-скоро
-след
-сме
-според
-сред
-срещу
-сте
-съм
-със
-също
-т
-тази
-така
-такива
-такъв
-там
-твой
-те
-тези
-ти
-тн
-то
-това
-тогава
-този
-той
-толкова
-точно
-трябва
-тук
-тъй
-тя
-тях
-у
-харесва
-ч
-че
-често
-чрез
-ще
-щом
-я
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ca.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ca.txt
deleted file mode 100644
index 3da65deafe1..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ca.txt
+++ /dev/null
@@ -1,220 +0,0 @@
-# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
-a
-abans
-ací
-ah
-així
-això
-al
-als
-aleshores
-algun
-alguna
-algunes
-alguns
-alhora
-allà
-allí
-allò
-altra
-altre
-altres
-amb
-ambdós
-ambdues
-apa
-aquell
-aquella
-aquelles
-aquells
-aquest
-aquesta
-aquestes
-aquests
-aquí
-baix
-cada
-cadascú
-cadascuna
-cadascunes
-cadascuns
-com
-contra
-d'un
-d'una
-d'unes
-d'uns
-dalt
-de
-del
-dels
-des
-després
-dins
-dintre
-donat
-doncs
-durant
-e
-eh
-el
-els
-em
-en
-encara
-ens
-entre
-érem
-eren
-éreu
-es
-és
-esta
-està
-estàvem
-estaven
-estàveu
-esteu
-et
-etc
-ets
-fins
-fora
-gairebé
-ha
-han
-has
-havia
-he
-hem
-heu
-hi
-ho
-i
-igual
-iguals
-ja
-l'hi
-la
-les
-li
-li'n
-llavors
-m'he
-ma
-mal
-malgrat
-mateix
-mateixa
-mateixes
-mateixos
-me
-mentre
-més
-meu
-meus
-meva
-meves
-molt
-molta
-moltes
-molts
-mon
-mons
-n'he
-n'hi
-ne
-ni
-no
-nogensmenys
-només
-nosaltres
-nostra
-nostre
-nostres
-o
-oh
-oi
-on
-pas
-pel
-pels
-per
-però
-perquè
-poc
-poca
-pocs
-poques
-potser
-propi
-qual
-quals
-quan
-quant
-que
-què
-quelcom
-qui
-quin
-quina
-quines
-quins
-s'ha
-s'han
-sa
-semblant
-semblants
-ses
-seu
-seus
-seva
-seva
-seves
-si
-sobre
-sobretot
-sóc
-solament
-sols
-son
-són
-sons
-sota
-sou
-t'ha
-t'han
-t'he
-ta
-tal
-també
-tampoc
-tan
-tant
-tanta
-tantes
-teu
-teus
-teva
-teves
-ton
-tons
-tot
-tota
-totes
-tots
-un
-una
-unes
-uns
-us
-va
-vaig
-vam
-van
-vas
-veu
-vosaltres
-vostra
-vostre
-vostres
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_cz.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_cz.txt
deleted file mode 100644
index 53c6097dac7..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_cz.txt
+++ /dev/null
@@ -1,172 +0,0 @@
-a
-s
-k
-o
-i
-u
-v
-z
-dnes
-cz
-tímto
-budeš
-budem
-byli
-jseš
-můj
-svým
-ta
-tomto
-tohle
-tuto
-tyto
-jej
-zda
-proč
-máte
-tato
-kam
-tohoto
-kdo
-kteří
-mi
-nám
-tom
-tomuto
-mít
-nic
-proto
-kterou
-byla
-toho
-protože
-asi
-ho
-naši
-napište
-re
-což
-tím
-takže
-svých
-její
-svými
-jste
-aj
-tu
-tedy
-teto
-bylo
-kde
-ke
-pravé
-ji
-nad
-nejsou
-či
-pod
-téma
-mezi
-přes
-ty
-pak
-vám
-ani
-když
-však
-neg
-jsem
-tento
-článku
-články
-aby
-jsme
-před
-pta
-jejich
-byl
-ještě
-až
-bez
-také
-pouze
-první
-vaše
-která
-nás
-nový
-tipy
-pokud
-může
-strana
-jeho
-své
-jiné
-zprávy
-nové
-není
-vás
-jen
-podle
-zde
-už
-být
-více
-bude
-již
-než
-který
-by
-které
-co
-nebo
-ten
-tak
-má
-při
-od
-po
-jsou
-jak
-další
-ale
-si
-se
-ve
-to
-jako
-za
-zpět
-ze
-do
-pro
-je
-na
-atd
-atp
-jakmile
-přičemž
-já
-on
-ona
-ono
-oni
-ony
-my
-vy
-jí
-ji
-mě
-mne
-jemu
-tomu
-těm
-těmu
-němu
-němuž
-jehož
-jíž
-jelikož
-jež
-jakož
-načež
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_da.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_da.txt
deleted file mode 100644
index a3ff5fe122c..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_da.txt
+++ /dev/null
@@ -1,108 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Danish stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This is a ranked list (commonest to rarest) of stopwords derived from
- | a large text sample.
-
-
-og | and
-i | in
-jeg | I
-det | that (dem. pronoun)/it (pers. pronoun)
-at | that (in front of a sentence)/to (with infinitive)
-en | a/an
-den | it (pers. pronoun)/that (dem. pronoun)
-til | to/at/for/until/against/by/of/into, more
-er | present tense of "to be"
-som | who, as
-på | on/upon/in/on/at/to/after/of/with/for, on
-de | they
-med | with/by/in, along
-han | he
-af | of/by/from/off/for/in/with/on, off
-for | at/for/to/from/by/of/ago, in front/before, because
-ikke | not
-der | who/which, there/those
-var | past tense of "to be"
-mig | me/myself
-sig | oneself/himself/herself/itself/themselves
-men | but
-et | a/an/one, one (number), someone/somebody/one
-har | present tense of "to have"
-om | round/about/for/in/a, about/around/down, if
-vi | we
-min | my
-havde | past tense of "to have"
-ham | him
-hun | she
-nu | now
-over | over/above/across/by/beyond/past/on/about, over/past
-da | then, when/as/since
-fra | from/off/since, off, since
-du | you
-ud | out
-sin | his/her/its/one's
-dem | them
-os | us/ourselves
-op | up
-man | you/one
-hans | his
-hvor | where
-eller | or
-hvad | what
-skal | must/shall etc.
-selv | myself/youself/herself/ourselves etc., even
-her | here
-alle | all/everyone/everybody etc.
-vil | will (verb)
-blev | past tense of "to stay/to remain/to get/to become"
-kunne | could
-ind | in
-når | when
-være | present tense of "to be"
-dog | however/yet/after all
-noget | something
-ville | would
-jo | you know/you see (adv), yes
-deres | their/theirs
-efter | after/behind/according to/for/by/from, later/afterwards
-ned | down
-skulle | should
-denne | this
-end | than
-dette | this
-mit | my/mine
-også | also
-under | under/beneath/below/during, below/underneath
-have | have
-dig | you
-anden | other
-hende | her
-mine | my
-alt | everything
-meget | much/very, plenty of
-sit | his, her, its, one's
-sine | his, her, its, one's
-vor | our
-mod | against
-disse | these
-hvis | if
-din | your/yours
-nogle | some
-hos | by/at
-blive | be/become
-mange | many
-ad | by/through
-bliver | present tense of "to be/to become"
-hendes | her/hers
-været | be
-thi | for (conj)
-jer | you
-sådan | such, like this/like that
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_de.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_de.txt
deleted file mode 100644
index f7703841887..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_de.txt
+++ /dev/null
@@ -1,292 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A German stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | The number of forms in this list is reduced significantly by passing it
- | through the German stemmer.
-
-
-aber | but
-
-alle | all
-allem
-allen
-aller
-alles
-
-als | than, as
-also | so
-am | an + dem
-an | at
-
-ander | other
-andere
-anderem
-anderen
-anderer
-anderes
-anderm
-andern
-anderr
-anders
-
-auch | also
-auf | on
-aus | out of
-bei | by
-bin | am
-bis | until
-bist | art
-da | there
-damit | with it
-dann | then
-
-der | the
-den
-des
-dem
-die
-das
-
-daß | that
-
-derselbe | the same
-derselben
-denselben
-desselben
-demselben
-dieselbe
-dieselben
-dasselbe
-
-dazu | to that
-
-dein | thy
-deine
-deinem
-deinen
-deiner
-deines
-
-denn | because
-
-derer | of those
-dessen | of him
-
-dich | thee
-dir | to thee
-du | thou
-
-dies | this
-diese
-diesem
-diesen
-dieser
-dieses
-
-
-doch | (several meanings)
-dort | (over) there
-
-
-durch | through
-
-ein | a
-eine
-einem
-einen
-einer
-eines
-
-einig | some
-einige
-einigem
-einigen
-einiger
-einiges
-
-einmal | once
-
-er | he
-ihn | him
-ihm | to him
-
-es | it
-etwas | something
-
-euer | your
-eure
-eurem
-euren
-eurer
-eures
-
-für | for
-gegen | towards
-gewesen | p.p. of sein
-hab | have
-habe | have
-haben | have
-hat | has
-hatte | had
-hatten | had
-hier | here
-hin | there
-hinter | behind
-
-ich | I
-mich | me
-mir | to me
-
-
-ihr | you, to her
-ihre
-ihrem
-ihren
-ihrer
-ihres
-euch | to you
-
-im | in + dem
-in | in
-indem | while
-ins | in + das
-ist | is
-
-jede | each, every
-jedem
-jeden
-jeder
-jedes
-
-jene | that
-jenem
-jenen
-jener
-jenes
-
-jetzt | now
-kann | can
-
-kein | no
-keine
-keinem
-keinen
-keiner
-keines
-
-können | can
-könnte | could
-machen | do
-man | one
-
-manche | some, many a
-manchem
-manchen
-mancher
-manches
-
-mein | my
-meine
-meinem
-meinen
-meiner
-meines
-
-mit | with
-muss | must
-musste | had to
-nach | to(wards)
-nicht | not
-nichts | nothing
-noch | still, yet
-nun | now
-nur | only
-ob | whether
-oder | or
-ohne | without
-sehr | very
-
-sein | his
-seine
-seinem
-seinen
-seiner
-seines
-
-selbst | self
-sich | herself
-
-sie | they, she
-ihnen | to them
-
-sind | are
-so | so
-
-solche | such
-solchem
-solchen
-solcher
-solches
-
-soll | shall
-sollte | should
-sondern | but
-sonst | else
-über | over
-um | about, around
-und | and
-
-uns | us
-unse
-unsem
-unsen
-unser
-unses
-
-unter | under
-viel | much
-vom | von + dem
-von | from
-vor | before
-während | while
-war | was
-waren | were
-warst | wast
-was | what
-weg | away, off
-weil | because
-weiter | further
-
-welche | which
-welchem
-welchen
-welcher
-welches
-
-wenn | when
-werde | will
-werden | will
-wie | how
-wieder | again
-will | want
-wir | we
-wird | will
-wirst | willst
-wo | where
-wollen | want
-wollte | wanted
-würde | would
-würden | would
-zu | to
-zum | zu + dem
-zur | zu + der
-zwar | indeed
-zwischen | between
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_el.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_el.txt
deleted file mode 100644
index 232681f5bd6..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_el.txt
+++ /dev/null
@@ -1,78 +0,0 @@
-# Lucene Greek Stopwords list
-# Note: by default this file is used after GreekLowerCaseFilter,
-# so when modifying this file use 'σ' instead of 'ς'
-ο
-η
-το
-οι
-τα
-του
-τησ
-των
-τον
-την
-και
-κι
-κ
-ειμαι
-εισαι
-ειναι
-ειμαστε
-ειστε
-στο
-στον
-στη
-στην
-μα
-αλλα
-απο
-για
-προσ
-με
-σε
-ωσ
-παρα
-αντι
-κατα
-μετα
-θα
-να
-δε
-δεν
-μη
-μην
-επι
-ενω
-εαν
-αν
-τοτε
-που
-πωσ
-ποιοσ
-ποια
-ποιο
-ποιοι
-ποιεσ
-ποιων
-ποιουσ
-αυτοσ
-αυτη
-αυτο
-αυτοι
-αυτων
-αυτουσ
-αυτεσ
-αυτα
-εκεινοσ
-εκεινη
-εκεινο
-εκεινοι
-εκεινεσ
-εκεινα
-εκεινων
-εκεινουσ
-οπωσ
-ομωσ
-ισωσ
-οσο
-οτι
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_en.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_en.txt
deleted file mode 100644
index 2c164c0b2a1..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_en.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# a couple of test stopwords to test that the words are really being
-# configured from this file:
-stopworda
-stopwordb
-
-# Standard english stop words taken from Lucene's StopAnalyzer
-a
-an
-and
-are
-as
-at
-be
-but
-by
-for
-if
-in
-into
-is
-it
-no
-not
-of
-on
-or
-such
-that
-the
-their
-then
-there
-these
-they
-this
-to
-was
-will
-with
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_es.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_es.txt
deleted file mode 100644
index 2db14760075..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_es.txt
+++ /dev/null
@@ -1,354 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Spanish stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-
- | The following is a ranked list (commonest to rarest) of stopwords
- | deriving from a large sample of text.
-
- | Extra words have been added at the end.
-
-de | from, of
-la | the, her
-que | who, that
-el | the
-en | in
-y | and
-a | to
-los | the, them
-del | de + el
-se | himself, from him etc
-las | the, them
-por | for, by, etc
-un | a
-para | for
-con | with
-no | no
-una | a
-su | his, her
-al | a + el
- | es from SER
-lo | him
-como | how
-más | more
-pero | pero
-sus | su plural
-le | to him, her
-ya | already
-o | or
- | fue from SER
-este | this
- | ha from HABER
-sí | himself etc
-porque | because
-esta | this
- | son from SER
-entre | between
- | está from ESTAR
-cuando | when
-muy | very
-sin | without
-sobre | on
- | ser from SER
- | tiene from TENER
-también | also
-me | me
-hasta | until
-hay | there is/are
-donde | where
- | han from HABER
-quien | whom, that
- | están from ESTAR
- | estado from ESTAR
-desde | from
-todo | all
-nos | us
-durante | during
- | estados from ESTAR
-todos | all
-uno | a
-les | to them
-ni | nor
-contra | against
-otros | other
- | fueron from SER
-ese | that
-eso | that
- | había from HABER
-ante | before
-ellos | they
-e | and (variant of y)
-esto | this
-mí | me
-antes | before
-algunos | some
-qué | what?
-unos | a
-yo | I
-otro | other
-otras | other
-otra | other
-él | he
-tanto | so much, many
-esa | that
-estos | these
-mucho | much, many
-quienes | who
-nada | nothing
-muchos | many
-cual | who
- | sea from SER
-poco | few
-ella | she
-estar | to be
- | haber from HABER
-estas | these
- | estaba from ESTAR
- | estamos from ESTAR
-algunas | some
-algo | something
-nosotros | we
-
- | other forms
-
-mi | me
-mis | mi plural
-tú | thou
-te | thee
-ti | thee
-tu | thy
-tus | tu plural
-ellas | they
-nosotras | we
-vosotros | you
-vosotras | you
-os | you
-mío | mine
-mía |
-míos |
-mías |
-tuyo | thine
-tuya |
-tuyos |
-tuyas |
-suyo | his, hers, theirs
-suya |
-suyos |
-suyas |
-nuestro | ours
-nuestra |
-nuestros |
-nuestras |
-vuestro | yours
-vuestra |
-vuestros |
-vuestras |
-esos | those
-esas | those
-
- | forms of estar, to be (not including the infinitive):
-estoy
-estás
-está
-estamos
-estáis
-están
-esté
-estés
-estemos
-estéis
-estén
-estaré
-estarás
-estará
-estaremos
-estaréis
-estarán
-estaría
-estarías
-estaríamos
-estaríais
-estarían
-estaba
-estabas
-estábamos
-estabais
-estaban
-estuve
-estuviste
-estuvo
-estuvimos
-estuvisteis
-estuvieron
-estuviera
-estuvieras
-estuviéramos
-estuvierais
-estuvieran
-estuviese
-estuvieses
-estuviésemos
-estuvieseis
-estuviesen
-estando
-estado
-estada
-estados
-estadas
-estad
-
- | forms of haber, to have (not including the infinitive):
-he
-has
-ha
-hemos
-habéis
-han
-haya
-hayas
-hayamos
-hayáis
-hayan
-habré
-habrás
-habrá
-habremos
-habréis
-habrán
-habría
-habrías
-habríamos
-habríais
-habrían
-había
-habías
-habíamos
-habíais
-habían
-hube
-hubiste
-hubo
-hubimos
-hubisteis
-hubieron
-hubiera
-hubieras
-hubiéramos
-hubierais
-hubieran
-hubiese
-hubieses
-hubiésemos
-hubieseis
-hubiesen
-habiendo
-habido
-habida
-habidos
-habidas
-
- | forms of ser, to be (not including the infinitive):
-soy
-eres
-es
-somos
-sois
-son
-sea
-seas
-seamos
-seáis
-sean
-seré
-serás
-será
-seremos
-seréis
-serán
-sería
-serías
-seríamos
-seríais
-serían
-era
-eras
-éramos
-erais
-eran
-fui
-fuiste
-fue
-fuimos
-fuisteis
-fueron
-fuera
-fueras
-fuéramos
-fuerais
-fueran
-fuese
-fueses
-fuésemos
-fueseis
-fuesen
-siendo
-sido
- | sed also means 'thirst'
-
- | forms of tener, to have (not including the infinitive):
-tengo
-tienes
-tiene
-tenemos
-tenéis
-tienen
-tenga
-tengas
-tengamos
-tengáis
-tengan
-tendré
-tendrás
-tendrá
-tendremos
-tendréis
-tendrán
-tendría
-tendrías
-tendríamos
-tendríais
-tendrían
-tenía
-tenías
-teníamos
-teníais
-tenían
-tuve
-tuviste
-tuvo
-tuvimos
-tuvisteis
-tuvieron
-tuviera
-tuvieras
-tuviéramos
-tuvierais
-tuvieran
-tuviese
-tuvieses
-tuviésemos
-tuvieseis
-tuviesen
-teniendo
-tenido
-tenida
-tenidos
-tenidas
-tened
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_eu.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_eu.txt
deleted file mode 100644
index 25f1db93460..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_eu.txt
+++ /dev/null
@@ -1,99 +0,0 @@
-# example set of basque stopwords
-al
-anitz
-arabera
-asko
-baina
-bat
-batean
-batek
-bati
-batzuei
-batzuek
-batzuetan
-batzuk
-bera
-beraiek
-berau
-berauek
-bere
-berori
-beroriek
-beste
-bezala
-da
-dago
-dira
-ditu
-du
-dute
-edo
-egin
-ere
-eta
-eurak
-ez
-gainera
-gu
-gutxi
-guzti
-haiei
-haiek
-haietan
-hainbeste
-hala
-han
-handik
-hango
-hara
-hari
-hark
-hartan
-hau
-hauei
-hauek
-hauetan
-hemen
-hemendik
-hemengo
-hi
-hona
-honek
-honela
-honetan
-honi
-hor
-hori
-horiei
-horiek
-horietan
-horko
-horra
-horrek
-horrela
-horretan
-horri
-hortik
-hura
-izan
-ni
-noiz
-nola
-non
-nondik
-nongo
-nor
-nora
-ze
-zein
-zen
-zenbait
-zenbat
-zer
-zergatik
-ziren
-zituen
-zu
-zuek
-zuen
-zuten
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fa.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fa.txt
deleted file mode 100644
index 723641c6da7..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fa.txt
+++ /dev/null
@@ -1,313 +0,0 @@
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-# Note: by default this file is used after normalization, so when adding entries
-# to this file, use the arabic 'ي' instead of 'ی'
-انان
-نداشته
-سراسر
-خياه
-ايشان
-وي
-تاكنون
-بيشتري
-دوم
-پس
-ناشي
-وگو
-يا
-داشتند
-سپس
-هنگام
-هرگز
-پنج
-نشان
-امسال
-ديگر
-گروهي
-شدند
-چطور
-ده
-و
-دو
-نخستين
-ولي
-چرا
-چه
-وسط
-ه
-كدام
-قابل
-يك
-رفت
-هفت
-همچنين
-در
-هزار
-بله
-بلي
-شايد
-اما
-شناسي
-گرفته
-دهد
-داشته
-دانست
-داشتن
-خواهيم
-ميليارد
-وقتيكه
-امد
-خواهد
-جز
-اورده
-شده
-بلكه
-خدمات
-شدن
-برخي
-نبود
-بسياري
-جلوگيري
-حق
-كردند
-نوعي
-بعري
-نكرده
-نظير
-نبايد
-بوده
-بودن
-داد
-اورد
-هست
-جايي
-شود
-دنبال
-داده
-بايد
-سابق
-هيچ
-همان
-انجا
-كمتر
-كجاست
-گردد
-كسي
-تر
-مردم
-تان
-دادن
-بودند
-سري
-جدا
-ندارند
-مگر
-يكديگر
-دارد
-دهند
-بنابراين
-هنگامي
-سمت
-جا
-انچه
-خود
-دادند
-زياد
-دارند
-اثر
-بدون
-بهترين
-بيشتر
-البته
-به
-براساس
-بيرون
-كرد
-بعضي
-گرفت
-توي
-اي
-ميليون
-او
-جريان
-تول
-بر
-مانند
-برابر
-باشيم
-مدتي
-گويند
-اكنون
-تا
-تنها
-جديد
-چند
-بي
-نشده
-كردن
-كردم
-گويد
-كرده
-كنيم
-نمي
-نزد
-روي
-قصد
-فقط
-بالاي
-ديگران
-اين
-ديروز
-توسط
-سوم
-ايم
-دانند
-سوي
-استفاده
-شما
-كنار
-داريم
-ساخته
-طور
-امده
-رفته
-نخست
-بيست
-نزديك
-طي
-كنيد
-از
-انها
-تمامي
-داشت
-يكي
-طريق
-اش
-چيست
-روب
-نمايد
-گفت
-چندين
-چيزي
-تواند
-ام
-ايا
-با
-ان
-ايد
-ترين
-اينكه
-ديگري
-راه
-هايي
-بروز
-همچنان
-پاعين
-كس
-حدود
-مختلف
-مقابل
-چيز
-گيرد
-ندارد
-ضد
-همچون
-سازي
-شان
-مورد
-باره
-مرسي
-خويش
-برخوردار
-چون
-خارج
-شش
-هنوز
-تحت
-ضمن
-هستيم
-گفته
-فكر
-بسيار
-پيش
-براي
-روزهاي
-انكه
-نخواهد
-بالا
-كل
-وقتي
-كي
-چنين
-كه
-گيري
-نيست
-است
-كجا
-كند
-نيز
-يابد
-بندي
-حتي
-توانند
-عقب
-خواست
-كنند
-بين
-تمام
-همه
-ما
-باشند
-مثل
-شد
-اري
-باشد
-اره
-طبق
-بعد
-اگر
-صورت
-غير
-جاي
-بيش
-ريزي
-اند
-زيرا
-چگونه
-بار
-لطفا
-مي
-درباره
-من
-ديده
-همين
-گذاري
-برداري
-علت
-گذاشته
-هم
-فوق
-نه
-ها
-شوند
-اباد
-همواره
-هر
-اول
-خواهند
-چهار
-نام
-امروز
-مان
-هاي
-قبل
-كنم
-سعي
-تازه
-را
-هستند
-زير
-جلوي
-عنوان
-بود
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fi.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fi.txt
deleted file mode 100644
index addad798c4b..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fi.txt
+++ /dev/null
@@ -1,95 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
-| forms of BE
-
-olla
-olen
-olet
-on
-olemme
-olette
-ovat
-ole | negative form
-
-oli
-olisi
-olisit
-olisin
-olisimme
-olisitte
-olisivat
-olit
-olin
-olimme
-olitte
-olivat
-ollut
-olleet
-
-en | negation
-et
-ei
-emme
-ette
-eivät
-
-|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans
-minä minun minut minua minussa minusta minuun minulla minulta minulle | I
-sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you
-hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she
-me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we
-te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you
-he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they
-
-tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this
-tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that
-se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it
-nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these
-nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those
-ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they
-
-kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
-ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl)
-mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what
-mitkä | (pl)
-
-joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which
-jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl)
-
-| conjunctions
-
-että | that
-ja | and
-jos | if
-koska | because
-kuin | than
-mutta | but
-niin | so
-sekä | and
-sillä | for
-tai | or
-vaan | but
-vai | or
-vaikka | although
-
-
-| prepositions
-
-kanssa | with
-mukaan | according to
-noin | about
-poikki | across
-yli | over, across
-
-| other
-
-kun | when
-niin | so
-nyt | now
-itse | self
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fr.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fr.txt
deleted file mode 100644
index c00837ea939..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fr.txt
+++ /dev/null
@@ -1,183 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A French stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-au | a + le
-aux | a + les
-avec | with
-ce | this
-ces | these
-dans | with
-de | of
-des | de + les
-du | de + le
-elle | she
-en | `of them' etc
-et | and
-eux | them
-il | he
-je | I
-la | the
-le | the
-leur | their
-lui | him
-ma | my (fem)
-mais | but
-me | me
-même | same; as in moi-même (myself) etc
-mes | me (pl)
-moi | me
-mon | my (masc)
-ne | not
-nos | our (pl)
-notre | our
-nous | we
-on | one
-ou | where
-par | by
-pas | not
-pour | for
-qu | que before vowel
-que | that
-qui | who
-sa | his, her (fem)
-se | oneself
-ses | his (pl)
-son | his, her (masc)
-sur | on
-ta | thy (fem)
-te | thee
-tes | thy (pl)
-toi | thee
-ton | thy (masc)
-tu | thou
-un | a
-une | a
-vos | your (pl)
-votre | your
-vous | you
-
- | single letter forms
-
-c | c'
-d | d'
-j | j'
-l | l'
-à | to, at
-m | m'
-n | n'
-s | s'
-t | t'
-y | there
-
- | forms of être (not including the infinitive):
-été
-étée
-étées
-étés
-étant
-suis
-es
-est
-sommes
-êtes
-sont
-serai
-seras
-sera
-serons
-serez
-seront
-serais
-serait
-serions
-seriez
-seraient
-étais
-était
-étions
-étiez
-étaient
-fus
-fut
-fûmes
-fûtes
-furent
-sois
-soit
-soyons
-soyez
-soient
-fusse
-fusses
-fût
-fussions
-fussiez
-fussent
-
- | forms of avoir (not including the infinitive):
-ayant
-eu
-eue
-eues
-eus
-ai
-as
-avons
-avez
-ont
-aurai
-auras
-aura
-aurons
-aurez
-auront
-aurais
-aurait
-aurions
-auriez
-auraient
-avais
-avait
-avions
-aviez
-avaient
-eut
-eûmes
-eûtes
-eurent
-aie
-aies
-ait
-ayons
-ayez
-aient
-eusse
-eusses
-eût
-eussions
-eussiez
-eussent
-
- | Later additions (from Jean-Christophe Deschamps)
-ceci | this
-celà | that
-cet | this
-cette | this
-ici | here
-ils | they
-les | the (pl)
-leurs | their (pl)
-quel | which
-quels | which
-quelle | which
-quelles | which
-sans | without
-soi | oneself
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ga.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ga.txt
deleted file mode 100644
index 9ff88d747e5..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ga.txt
+++ /dev/null
@@ -1,110 +0,0 @@
-
-a
-ach
-ag
-agus
-an
-aon
-ar
-arna
-as
-b'
-ba
-beirt
-bhúr
-caoga
-ceathair
-ceathrar
-chomh
-chtó
-chuig
-chun
-cois
-céad
-cúig
-cúigear
-d'
-daichead
-dar
-de
-deich
-deichniúr
-den
-dhá
-do
-don
-dtí
-dá
-dár
-dó
-faoi
-faoin
-faoina
-faoinár
-fara
-fiche
-gach
-gan
-go
-gur
-haon
-hocht
-i
-iad
-idir
-in
-ina
-ins
-inár
-is
-le
-leis
-lena
-lenár
-m'
-mar
-mo
-mé
-na
-nach
-naoi
-naonúr
-ná
-ní
-níor
-nó
-nócha
-ocht
-ochtar
-os
-roimh
-sa
-seacht
-seachtar
-seachtó
-seasca
-seisear
-siad
-sibh
-sinn
-sna
-sé
-sí
-tar
-thar
-thú
-triúr
-trí
-trína
-trínár
-tríocha
-tú
-um
-ár
-é
-éis
-í
-ó
-ón
-óna
-ónár
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_gl.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_gl.txt
deleted file mode 100644
index d8760b12c14..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_gl.txt
+++ /dev/null
@@ -1,161 +0,0 @@
-# galican stopwords
-a
-aínda
-alí
-aquel
-aquela
-aquelas
-aqueles
-aquilo
-aquí
-ao
-aos
-as
-así
-á
-ben
-cando
-che
-co
-coa
-comigo
-con
-connosco
-contigo
-convosco
-coas
-cos
-cun
-cuns
-cunha
-cunhas
-da
-dalgunha
-dalgunhas
-dalgún
-dalgúns
-das
-de
-del
-dela
-delas
-deles
-desde
-deste
-do
-dos
-dun
-duns
-dunha
-dunhas
-e
-el
-ela
-elas
-eles
-en
-era
-eran
-esa
-esas
-ese
-eses
-esta
-estar
-estaba
-está
-están
-este
-estes
-estiven
-estou
-eu
-é
-facer
-foi
-foron
-fun
-había
-hai
-iso
-isto
-la
-las
-lle
-lles
-lo
-los
-mais
-me
-meu
-meus
-min
-miña
-miñas
-moi
-na
-nas
-neste
-nin
-no
-non
-nos
-nosa
-nosas
-noso
-nosos
-nós
-nun
-nunha
-nuns
-nunhas
-o
-os
-ou
-ó
-ós
-para
-pero
-pode
-pois
-pola
-polas
-polo
-polos
-por
-que
-se
-senón
-ser
-seu
-seus
-sexa
-sido
-sobre
-súa
-súas
-tamén
-tan
-te
-ten
-teñen
-teño
-ter
-teu
-teus
-ti
-tido
-tiña
-tiven
-túa
-túas
-un
-unha
-unhas
-uns
-vos
-vosa
-vosas
-voso
-vosos
-vós
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hi.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hi.txt
deleted file mode 100644
index 86286bb083b..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hi.txt
+++ /dev/null
@@ -1,235 +0,0 @@
-# Also see http://www.opensource.org/licenses/bsd-license.html
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# Note: by default this file also contains forms normalized by HindiNormalizer
-# for spelling variation (see section below), such that it can be used whether or
-# not you enable that feature. When adding additional entries to this list,
-# please add the normalized form as well.
-अंदर
-अत
-अपना
-अपनी
-अपने
-अभी
-आदि
-आप
-इत्यादि
-इन
-इनका
-इन्हीं
-इन्हें
-इन्हों
-इस
-इसका
-इसकी
-इसके
-इसमें
-इसी
-इसे
-उन
-उनका
-उनकी
-उनके
-उनको
-उन्हीं
-उन्हें
-उन्हों
-उस
-उसके
-उसी
-उसे
-एक
-एवं
-एस
-ऐसे
-और
-कई
-कर
-करता
-करते
-करना
-करने
-करें
-कहते
-कहा
-का
-काफ़ी
-कि
-कितना
-किन्हें
-किन्हों
-किया
-किर
-किस
-किसी
-किसे
-की
-कुछ
-कुल
-के
-को
-कोई
-कौन
-कौनसा
-गया
-घर
-जब
-जहाँ
-जा
-जितना
-जिन
-जिन्हें
-जिन्हों
-जिस
-जिसे
-जीधर
-जैसा
-जैसे
-जो
-तक
-तब
-तरह
-तिन
-तिन्हें
-तिन्हों
-तिस
-तिसे
-तो
-था
-थी
-थे
-दबारा
-दिया
-दुसरा
-दूसरे
-दो
-द्वारा
-न
-नहीं
-ना
-निहायत
-नीचे
-ने
-पर
-पर
-पहले
-पूरा
-पे
-फिर
-बनी
-बही
-बहुत
-बाद
-बाला
-बिलकुल
-भी
-भीतर
-मगर
-मानो
-मे
-में
-यदि
-यह
-यहाँ
-यही
-या
-यिह
-ये
-रखें
-रहा
-रहे
-ऱ्वासा
-लिए
-लिये
-लेकिन
-व
-वर्ग
-वह
-वह
-वहाँ
-वहीं
-वाले
-वुह
-वे
-वग़ैरह
-संग
-सकता
-सकते
-सबसे
-सभी
-साथ
-साबुत
-साभ
-सारा
-से
-सो
-ही
-हुआ
-हुई
-हुए
-है
-हैं
-हो
-होता
-होती
-होते
-होना
-होने
-# additional normalized forms of the above
-अपनि
-जेसे
-होति
-सभि
-तिंहों
-इंहों
-दवारा
-इसि
-किंहें
-थि
-उंहों
-ओर
-जिंहें
-वहिं
-अभि
-बनि
-हि
-उंहिं
-उंहें
-हें
-वगेरह
-एसे
-रवासा
-कोन
-निचे
-काफि
-उसि
-पुरा
-भितर
-हे
-बहि
-वहां
-कोइ
-यहां
-जिंहों
-तिंहें
-किसि
-कइ
-यहि
-इंहिं
-जिधर
-इंहें
-अदि
-इतयादि
-हुइ
-कोनसा
-इसकि
-दुसरे
-जहां
-अप
-किंहों
-उनकि
-भि
-वरग
-हुअ
-जेसा
-नहिं
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hu.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hu.txt
deleted file mode 100644
index 1a96f1db6f2..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hu.txt
+++ /dev/null
@@ -1,209 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
-| Hungarian stop word list
-| prepared by Anna Tordai
-
-a
-ahogy
-ahol
-aki
-akik
-akkor
-alatt
-által
-általában
-amely
-amelyek
-amelyekben
-amelyeket
-amelyet
-amelynek
-ami
-amit
-amolyan
-amíg
-amikor
-át
-abban
-ahhoz
-annak
-arra
-arról
-az
-azok
-azon
-azt
-azzal
-azért
-aztán
-azután
-azonban
-bár
-be
-belül
-benne
-cikk
-cikkek
-cikkeket
-csak
-de
-e
-eddig
-egész
-egy
-egyes
-egyetlen
-egyéb
-egyik
-egyre
-ekkor
-el
-elég
-ellen
-elő
-először
-előtt
-első
-én
-éppen
-ebben
-ehhez
-emilyen
-ennek
-erre
-ez
-ezt
-ezek
-ezen
-ezzel
-ezért
-és
-fel
-felé
-hanem
-hiszen
-hogy
-hogyan
-igen
-így
-illetve
-ill.
-ill
-ilyen
-ilyenkor
-ison
-ismét
-itt
-jó
-jól
-jobban
-kell
-kellett
-keresztül
-keressünk
-ki
-kívül
-között
-közül
-legalább
-lehet
-lehetett
-legyen
-lenne
-lenni
-lesz
-lett
-maga
-magát
-majd
-majd
-már
-más
-másik
-meg
-még
-mellett
-mert
-mely
-melyek
-mi
-mit
-míg
-miért
-milyen
-mikor
-minden
-mindent
-mindenki
-mindig
-mint
-mintha
-mivel
-most
-nagy
-nagyobb
-nagyon
-ne
-néha
-nekem
-neki
-nem
-néhány
-nélkül
-nincs
-olyan
-ott
-össze
-ő
-ők
-őket
-pedig
-persze
-rá
-s
-saját
-sem
-semmi
-sok
-sokat
-sokkal
-számára
-szemben
-szerint
-szinte
-talán
-tehát
-teljes
-tovább
-továbbá
-több
-úgy
-ugyanis
-új
-újabb
-újra
-után
-utána
-utolsó
-vagy
-vagyis
-valaki
-valami
-valamint
-való
-vagyok
-van
-vannak
-volt
-voltam
-voltak
-voltunk
-vissza
-vele
-viszont
-volna
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hy.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hy.txt
deleted file mode 100644
index 60c1c50fbc8..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hy.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-# example set of Armenian stopwords.
-այդ
-այլ
-այն
-այս
-դու
-դուք
-եմ
-են
-ենք
-ես
-եք
-է
-էի
-էին
-էինք
-էիր
-էիք
-էր
-ըստ
-թ
-ի
-ին
-իսկ
-իր
-կամ
-համար
-հետ
-հետո
-մենք
-մեջ
-մի
-ն
-նա
-նաև
-նրա
-նրանք
-որ
-որը
-որոնք
-որպես
-ու
-ում
-պիտի
-վրա
-և
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_id.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_id.txt
deleted file mode 100644
index 4617f83a5c5..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_id.txt
+++ /dev/null
@@ -1,359 +0,0 @@
-# from appendix D of: A Study of Stemming Effects on Information
-# Retrieval in Bahasa Indonesia
-ada
-adanya
-adalah
-adapun
-agak
-agaknya
-agar
-akan
-akankah
-akhirnya
-aku
-akulah
-amat
-amatlah
-anda
-andalah
-antar
-diantaranya
-antara
-antaranya
-diantara
-apa
-apaan
-mengapa
-apabila
-apakah
-apalagi
-apatah
-atau
-ataukah
-ataupun
-bagai
-bagaikan
-sebagai
-sebagainya
-bagaimana
-bagaimanapun
-sebagaimana
-bagaimanakah
-bagi
-bahkan
-bahwa
-bahwasanya
-sebaliknya
-banyak
-sebanyak
-beberapa
-seberapa
-begini
-beginian
-beginikah
-beginilah
-sebegini
-begitu
-begitukah
-begitulah
-begitupun
-sebegitu
-belum
-belumlah
-sebelum
-sebelumnya
-sebenarnya
-berapa
-berapakah
-berapalah
-berapapun
-betulkah
-sebetulnya
-biasa
-biasanya
-bila
-bilakah
-bisa
-bisakah
-sebisanya
-boleh
-bolehkah
-bolehlah
-buat
-bukan
-bukankah
-bukanlah
-bukannya
-cuma
-percuma
-dahulu
-dalam
-dan
-dapat
-dari
-daripada
-dekat
-demi
-demikian
-demikianlah
-sedemikian
-dengan
-depan
-di
-dia
-dialah
-dini
-diri
-dirinya
-terdiri
-dong
-dulu
-enggak
-enggaknya
-entah
-entahlah
-terhadap
-terhadapnya
-hal
-hampir
-hanya
-hanyalah
-harus
-haruslah
-harusnya
-seharusnya
-hendak
-hendaklah
-hendaknya
-hingga
-sehingga
-ia
-ialah
-ibarat
-ingin
-inginkah
-inginkan
-ini
-inikah
-inilah
-itu
-itukah
-itulah
-jangan
-jangankan
-janganlah
-jika
-jikalau
-juga
-justru
-kala
-kalau
-kalaulah
-kalaupun
-kalian
-kami
-kamilah
-kamu
-kamulah
-kan
-kapan
-kapankah
-kapanpun
-dikarenakan
-karena
-karenanya
-ke
-kecil
-kemudian
-kenapa
-kepada
-kepadanya
-ketika
-seketika
-khususnya
-kini
-kinilah
-kiranya
-sekiranya
-kita
-kitalah
-kok
-lagi
-lagian
-selagi
-lah
-lain
-lainnya
-melainkan
-selaku
-lalu
-melalui
-terlalu
-lama
-lamanya
-selama
-selama
-selamanya
-lebih
-terlebih
-bermacam
-macam
-semacam
-maka
-makanya
-makin
-malah
-malahan
-mampu
-mampukah
-mana
-manakala
-manalagi
-masih
-masihkah
-semasih
-masing
-mau
-maupun
-semaunya
-memang
-mereka
-merekalah
-meski
-meskipun
-semula
-mungkin
-mungkinkah
-nah
-namun
-nanti
-nantinya
-nyaris
-oleh
-olehnya
-seorang
-seseorang
-pada
-padanya
-padahal
-paling
-sepanjang
-pantas
-sepantasnya
-sepantasnyalah
-para
-pasti
-pastilah
-per
-pernah
-pula
-pun
-merupakan
-rupanya
-serupa
-saat
-saatnya
-sesaat
-saja
-sajalah
-saling
-bersama
-sama
-sesama
-sambil
-sampai
-sana
-sangat
-sangatlah
-saya
-sayalah
-se
-sebab
-sebabnya
-sebuah
-tersebut
-tersebutlah
-sedang
-sedangkan
-sedikit
-sedikitnya
-segala
-segalanya
-segera
-sesegera
-sejak
-sejenak
-sekali
-sekalian
-sekalipun
-sesekali
-sekaligus
-sekarang
-sekarang
-sekitar
-sekitarnya
-sela
-selain
-selalu
-seluruh
-seluruhnya
-semakin
-sementara
-sempat
-semua
-semuanya
-sendiri
-sendirinya
-seolah
-seperti
-sepertinya
-sering
-seringnya
-serta
-siapa
-siapakah
-siapapun
-disini
-disinilah
-sini
-sinilah
-sesuatu
-sesuatunya
-suatu
-sesudah
-sesudahnya
-sudah
-sudahkah
-sudahlah
-supaya
-tadi
-tadinya
-tak
-tanpa
-setelah
-telah
-tentang
-tentu
-tentulah
-tentunya
-tertentu
-seterusnya
-tapi
-tetapi
-setiap
-tiap
-setidaknya
-tidak
-tidakkah
-tidaklah
-toh
-waduh
-wah
-wahai
-sewaktu
-walau
-walaupun
-wong
-yaitu
-yakni
-yang
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_it.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_it.txt
deleted file mode 100644
index 4cb5b0891b1..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_it.txt
+++ /dev/null
@@ -1,301 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | An Italian stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-ad | a (to) before vowel
-al | a + il
-allo | a + lo
-ai | a + i
-agli | a + gli
-all | a + l'
-agl | a + gl'
-alla | a + la
-alle | a + le
-con | with
-col | con + il
-coi | con + i (forms collo, cogli etc are now very rare)
-da | from
-dal | da + il
-dallo | da + lo
-dai | da + i
-dagli | da + gli
-dall | da + l'
-dagl | da + gll'
-dalla | da + la
-dalle | da + le
-di | of
-del | di + il
-dello | di + lo
-dei | di + i
-degli | di + gli
-dell | di + l'
-degl | di + gl'
-della | di + la
-delle | di + le
-in | in
-nel | in + el
-nello | in + lo
-nei | in + i
-negli | in + gli
-nell | in + l'
-negl | in + gl'
-nella | in + la
-nelle | in + le
-su | on
-sul | su + il
-sullo | su + lo
-sui | su + i
-sugli | su + gli
-sull | su + l'
-sugl | su + gl'
-sulla | su + la
-sulle | su + le
-per | through, by
-tra | among
-contro | against
-io | I
-tu | thou
-lui | he
-lei | she
-noi | we
-voi | you
-loro | they
-mio | my
-mia |
-miei |
-mie |
-tuo |
-tua |
-tuoi | thy
-tue |
-suo |
-sua |
-suoi | his, her
-sue |
-nostro | our
-nostra |
-nostri |
-nostre |
-vostro | your
-vostra |
-vostri |
-vostre |
-mi | me
-ti | thee
-ci | us, there
-vi | you, there
-lo | him, the
-la | her, the
-li | them
-le | them, the
-gli | to him, the
-ne | from there etc
-il | the
-un | a
-uno | a
-una | a
-ma | but
-ed | and
-se | if
-perché | why, because
-anche | also
-come | how
-dov | where (as dov')
-dove | where
-che | who, that
-chi | who
-cui | whom
-non | not
-più | more
-quale | who, that
-quanto | how much
-quanti |
-quanta |
-quante |
-quello | that
-quelli |
-quella |
-quelle |
-questo | this
-questi |
-questa |
-queste |
-si | yes
-tutto | all
-tutti | all
-
- | single letter forms:
-
-a | at
-c | as c' for ce or ci
-e | and
-i | the
-l | as l'
-o | or
-
- | forms of avere, to have (not including the infinitive):
-
-ho
-hai
-ha
-abbiamo
-avete
-hanno
-abbia
-abbiate
-abbiano
-avrò
-avrai
-avrà
-avremo
-avrete
-avranno
-avrei
-avresti
-avrebbe
-avremmo
-avreste
-avrebbero
-avevo
-avevi
-aveva
-avevamo
-avevate
-avevano
-ebbi
-avesti
-ebbe
-avemmo
-aveste
-ebbero
-avessi
-avesse
-avessimo
-avessero
-avendo
-avuto
-avuta
-avuti
-avute
-
- | forms of essere, to be (not including the infinitive):
-sono
-sei
-è
-siamo
-siete
-sia
-siate
-siano
-sarò
-sarai
-sarà
-saremo
-sarete
-saranno
-sarei
-saresti
-sarebbe
-saremmo
-sareste
-sarebbero
-ero
-eri
-era
-eravamo
-eravate
-erano
-fui
-fosti
-fu
-fummo
-foste
-furono
-fossi
-fosse
-fossimo
-fossero
-essendo
-
- | forms of fare, to do (not including the infinitive, fa, fat-):
-faccio
-fai
-facciamo
-fanno
-faccia
-facciate
-facciano
-farò
-farai
-farà
-faremo
-farete
-faranno
-farei
-faresti
-farebbe
-faremmo
-fareste
-farebbero
-facevo
-facevi
-faceva
-facevamo
-facevate
-facevano
-feci
-facesti
-fece
-facemmo
-faceste
-fecero
-facessi
-facesse
-facessimo
-facessero
-facendo
-
- | forms of stare, to be (not including the infinitive):
-sto
-stai
-sta
-stiamo
-stanno
-stia
-stiate
-stiano
-starò
-starai
-starà
-staremo
-starete
-staranno
-starei
-staresti
-starebbe
-staremmo
-stareste
-starebbero
-stavo
-stavi
-stava
-stavamo
-stavate
-stavano
-stetti
-stesti
-stette
-stemmo
-steste
-stettero
-stessi
-stesse
-stessimo
-stessero
-stando
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ja.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ja.txt
deleted file mode 100644
index d4321be6b16..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ja.txt
+++ /dev/null
@@ -1,127 +0,0 @@
-#
-# This file defines a stopword set for Japanese.
-#
-# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia.
-# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745
-# for frequency lists, etc. that can be useful for making your own set (if desired)
-#
-# Note that there is an overlap between these stopwords and the terms stopped when used
-# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note
-# that comments are not allowed on the same line as stopwords.
-#
-# Also note that stopping is done in a case-insensitive manner. Change your StopFilter
-# configuration if you need case-sensitive stopping. Lastly, note that stopping is done
-# using the same character width as the entries in this file. Since this StopFilter is
-# normally done after a CJKWidthFilter in your chain, you would usually want your romaji
-# entries to be in half-width and your kana entries to be in full-width.
-#
-の
-に
-は
-を
-た
-が
-で
-て
-と
-し
-れ
-さ
-ある
-いる
-も
-する
-から
-な
-こと
-として
-い
-や
-れる
-など
-なっ
-ない
-この
-ため
-その
-あっ
-よう
-また
-もの
-という
-あり
-まで
-られ
-なる
-へ
-か
-だ
-これ
-によって
-により
-おり
-より
-による
-ず
-なり
-られる
-において
-ば
-なかっ
-なく
-しかし
-について
-せ
-だっ
-その後
-できる
-それ
-う
-ので
-なお
-のみ
-でき
-き
-つ
-における
-および
-いう
-さらに
-でも
-ら
-たり
-その他
-に関する
-たち
-ます
-ん
-なら
-に対して
-特に
-せる
-及び
-これら
-とき
-では
-にて
-ほか
-ながら
-うち
-そして
-とともに
-ただし
-かつて
-それぞれ
-または
-お
-ほど
-ものの
-に対する
-ほとんど
-と共に
-といった
-です
-とも
-ところ
-ここ
-##### End of file
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_lv.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_lv.txt
deleted file mode 100644
index e21a23c06c3..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_lv.txt
+++ /dev/null
@@ -1,172 +0,0 @@
-# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins
-# the original list of over 800 forms was refined:
-# pronouns, adverbs, interjections were removed
-#
-# prepositions
-aiz
-ap
-ar
-apakš
-ārpus
-augšpus
-bez
-caur
-dēļ
-gar
-iekš
-iz
-kopš
-labad
-lejpus
-līdz
-no
-otrpus
-pa
-par
-pār
-pēc
-pie
-pirms
-pret
-priekš
-starp
-šaipus
-uz
-viņpus
-virs
-virspus
-zem
-apakšpus
-# Conjunctions
-un
-bet
-jo
-ja
-ka
-lai
-tomēr
-tikko
-turpretī
-arī
-kaut
-gan
-tādēļ
-tā
-ne
-tikvien
-vien
-kā
-ir
-te
-vai
-kamēr
-# Particles
-ar
-diezin
-droši
-diemžēl
-nebūt
-ik
-it
-taču
-nu
-pat
-tiklab
-iekšpus
-nedz
-tik
-nevis
-turpretim
-jeb
-iekam
-iekām
-iekāms
-kolīdz
-līdzko
-tiklīdz
-jebšu
-tālab
-tāpēc
-nekā
-itin
-jā
-jau
-jel
-nē
-nezin
-tad
-tikai
-vis
-tak
-iekams
-vien
-# modal verbs
-būt
-biju
-biji
-bija
-bijām
-bijāt
-esmu
-esi
-esam
-esat
-būšu
-būsi
-būs
-būsim
-būsiet
-tikt
-tiku
-tiki
-tika
-tikām
-tikāt
-tieku
-tiec
-tiek
-tiekam
-tiekat
-tikšu
-tiks
-tiksim
-tiksiet
-tapt
-tapi
-tapāt
-topat
-tapšu
-tapsi
-taps
-tapsim
-tapsiet
-kļūt
-kļuvu
-kļuvi
-kļuva
-kļuvām
-kļuvāt
-kļūstu
-kļūsti
-kļūst
-kļūstam
-kļūstat
-kļūšu
-kļūsi
-kļūs
-kļūsim
-kļūsiet
-# verbs
-varēt
-varēju
-varējām
-varēšu
-varēsim
-var
-varēji
-varējāt
-varēsi
-varēsiet
-varat
-varēja
-varēs
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_nl.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_nl.txt
deleted file mode 100644
index f4d61f5092c..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_nl.txt
+++ /dev/null
@@ -1,117 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Dutch stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This is a ranked list (commonest to rarest) of stopwords derived from
- | a large sample of Dutch text.
-
- | Dutch stop words frequently exhibit homonym clashes. These are indicated
- | clearly below.
-
-de | the
-en | and
-van | of, from
-ik | I, the ego
-te | (1) chez, at etc, (2) to, (3) too
-dat | that, which
-die | that, those, who, which
-in | in, inside
-een | a, an, one
-hij | he
-het | the, it
-niet | not, nothing, naught
-zijn | (1) to be, being, (2) his, one's, its
-is | is
-was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
-op | on, upon, at, in, up, used up
-aan | on, upon, to (as dative)
-met | with, by
-als | like, such as, when
-voor | (1) before, in front of, (2) furrow
-had | had, past tense all persons sing. of 'hebben' (have)
-er | there
-maar | but, only
-om | round, about, for etc
-hem | him
-dan | then
-zou | should/would, past tense all persons sing. of 'zullen'
-of | or, whether, if
-wat | what, something, anything
-mijn | possessive and noun 'mine'
-men | people, 'one'
-dit | this
-zo | so, thus, in this way
-door | through by
-over | over, across
-ze | she, her, they, them
-zich | oneself
-bij | (1) a bee, (2) by, near, at
-ook | also, too
-tot | till, until
-je | you
-mij | me
-uit | out of, from
-der | Old Dutch form of 'van der' still found in surnames
-daar | (1) there, (2) because
-haar | (1) her, their, them, (2) hair
-naar | (1) unpleasant, unwell etc, (2) towards, (3) as
-heb | present first person sing. of 'to have'
-hoe | how, why
-heeft | present third person sing. of 'to have'
-hebben | 'to have' and various parts thereof
-deze | this
-u | you
-want | (1) for, (2) mitten, (3) rigging
-nog | yet, still
-zal | 'shall', first and third person sing. of verb 'zullen' (will)
-me | me
-zij | she, they
-nu | now
-ge | 'thou', still used in Belgium and south Netherlands
-geen | none
-omdat | because
-iets | something, somewhat
-worden | to become, grow, get
-toch | yet, still
-al | all, every, each
-waren | (1) 'were' (2) to wander, (3) wares, (3)
-veel | much, many
-meer | (1) more, (2) lake
-doen | to do, to make
-toen | then, when
-moet | noun 'spot/mote' and present form of 'to must'
-ben | (1) am, (2) 'are' in interrogative second person singular of 'to be'
-zonder | without
-kan | noun 'can' and present form of 'to be able'
-hun | their, them
-dus | so, consequently
-alles | all, everything, anything
-onder | under, beneath
-ja | yes, of course
-eens | once, one day
-hier | here
-wie | who
-werd | imperfect third person sing. of 'become'
-altijd | always
-doch | yet, but etc
-wordt | present third person sing. of 'become'
-wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans
-kunnen | to be able
-ons | us/our
-zelf | self
-tegen | against, towards, at
-na | after, near
-reeds | already
-wil | (1) present tense of 'want', (2) 'will', noun, (3) fender
-kon | could; past tense of 'to be able'
-niets | nothing
-uw | your
-iemand | somebody
-geweest | been; past participle of 'be'
-andere | other
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_no.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_no.txt
deleted file mode 100644
index e76f36e69ed..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_no.txt
+++ /dev/null
@@ -1,192 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Norwegian stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This stop word list is for the dominant bokmål dialect. Words unique
- | to nynorsk are marked *.
-
- | Revised by Jan Bruusgaard , Jan 2005
-
-og | and
-i | in
-jeg | I
-det | it/this/that
-at | to (w. inf.)
-en | a/an
-et | a/an
-den | it/this/that
-til | to
-er | is/am/are
-som | who/that
-på | on
-de | they / you(formal)
-med | with
-han | he
-av | of
-ikke | not
-ikkje | not *
-der | there
-så | so
-var | was/were
-meg | me
-seg | you
-men | but
-ett | one
-har | have
-om | about
-vi | we
-min | my
-mitt | my
-ha | have
-hadde | had
-hun | she
-nå | now
-over | over
-da | when/as
-ved | by/know
-fra | from
-du | you
-ut | out
-sin | your
-dem | them
-oss | us
-opp | up
-man | you/one
-kan | can
-hans | his
-hvor | where
-eller | or
-hva | what
-skal | shall/must
-selv | self (reflective)
-sjøl | self (reflective)
-her | here
-alle | all
-vil | will
-bli | become
-ble | became
-blei | became *
-blitt | have become
-kunne | could
-inn | in
-når | when
-være | be
-kom | come
-noen | some
-noe | some
-ville | would
-dere | you
-som | who/which/that
-deres | their/theirs
-kun | only/just
-ja | yes
-etter | after
-ned | down
-skulle | should
-denne | this
-for | for/because
-deg | you
-si | hers/his
-sine | hers/his
-sitt | hers/his
-mot | against
-å | to
-meget | much
-hvorfor | why
-dette | this
-disse | these/those
-uten | without
-hvordan | how
-ingen | none
-din | your
-ditt | your
-blir | become
-samme | same
-hvilken | which
-hvilke | which (plural)
-sånn | such a
-inni | inside/within
-mellom | between
-vår | our
-hver | each
-hvem | who
-vors | us/ours
-hvis | whose
-både | both
-bare | only/just
-enn | than
-fordi | as/because
-før | before
-mange | many
-også | also
-slik | just
-vært | been
-være | to be
-båe | both *
-begge | both
-siden | since
-dykk | your *
-dykkar | yours *
-dei | they *
-deira | them *
-deires | theirs *
-deim | them *
-di | your (fem.) *
-då | as/when *
-eg | I *
-ein | a/an *
-eit | a/an *
-eitt | a/an *
-elles | or *
-honom | he *
-hjå | at *
-ho | she *
-hoe | she *
-henne | her
-hennar | her/hers
-hennes | hers
-hoss | how *
-hossen | how *
-ikkje | not *
-ingi | noone *
-inkje | noone *
-korleis | how *
-korso | how *
-kva | what/which *
-kvar | where *
-kvarhelst | where *
-kven | who/whom *
-kvi | why *
-kvifor | why *
-me | we *
-medan | while *
-mi | my *
-mine | my *
-mykje | much *
-no | now *
-nokon | some (masc./neut.) *
-noka | some (fem.) *
-nokor | some *
-noko | some *
-nokre | some *
-si | his/hers *
-sia | since *
-sidan | since *
-so | so *
-somt | some *
-somme | some *
-um | about*
-upp | up *
-vere | be *
-vore | was *
-verte | become *
-vort | become *
-varte | became *
-vart | became *
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_pt.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_pt.txt
deleted file mode 100644
index 276c1b446f2..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_pt.txt
+++ /dev/null
@@ -1,251 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Portuguese stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-
- | The following is a ranked list (commonest to rarest) of stopwords
- | deriving from a large sample of text.
-
- | Extra words have been added at the end.
-
-de | of, from
-a | the; to, at; her
-o | the; him
-que | who, that
-e | and
-do | de + o
-da | de + a
-em | in
-um | a
-para | for
- | é from SER
-com | with
-não | not, no
-uma | a
-os | the; them
-no | em + o
-se | himself etc
-na | em + a
-por | for
-mais | more
-as | the; them
-dos | de + os
-como | as, like
-mas | but
- | foi from SER
-ao | a + o
-ele | he
-das | de + as
- | tem from TER
-à | a + a
-seu | his
-sua | her
-ou | or
- | ser from SER
-quando | when
-muito | much
- | há from HAV
-nos | em + os; us
-já | already, now
- | está from EST
-eu | I
-também | also
-só | only, just
-pelo | per + o
-pela | per + a
-até | up to
-isso | that
-ela | he
-entre | between
- | era from SER
-depois | after
-sem | without
-mesmo | same
-aos | a + os
- | ter from TER
-seus | his
-quem | whom
-nas | em + as
-me | me
-esse | that
-eles | they
- | estão from EST
-você | you
- | tinha from TER
- | foram from SER
-essa | that
-num | em + um
-nem | nor
-suas | her
-meu | my
-às | a + as
-minha | my
- | têm from TER
-numa | em + uma
-pelos | per + os
-elas | they
- | havia from HAV
- | seja from SER
-qual | which
- | será from SER
-nós | we
- | tenho from TER
-lhe | to him, her
-deles | of them
-essas | those
-esses | those
-pelas | per + as
-este | this
- | fosse from SER
-dele | of him
-
- | other words. There are many contractions such as naquele = em+aquele,
- | mo = me+o, but they are rare.
- | Indefinite article plural forms are also rare.
-
-tu | thou
-te | thee
-vocês | you (plural)
-vos | you
-lhes | to them
-meus | my
-minhas
-teu | thy
-tua
-teus
-tuas
-nosso | our
-nossa
-nossos
-nossas
-
-dela | of her
-delas | of them
-
-esta | this
-estes | these
-estas | these
-aquele | that
-aquela | that
-aqueles | those
-aquelas | those
-isto | this
-aquilo | that
-
- | forms of estar, to be (not including the infinitive):
-estou
-está
-estamos
-estão
-estive
-esteve
-estivemos
-estiveram
-estava
-estávamos
-estavam
-estivera
-estivéramos
-esteja
-estejamos
-estejam
-estivesse
-estivéssemos
-estivessem
-estiver
-estivermos
-estiverem
-
- | forms of haver, to have (not including the infinitive):
-hei
-há
-havemos
-hão
-houve
-houvemos
-houveram
-houvera
-houvéramos
-haja
-hajamos
-hajam
-houvesse
-houvéssemos
-houvessem
-houver
-houvermos
-houverem
-houverei
-houverá
-houveremos
-houverão
-houveria
-houveríamos
-houveriam
-
- | forms of ser, to be (not including the infinitive):
-sou
-somos
-são
-era
-éramos
-eram
-fui
-foi
-fomos
-foram
-fora
-fôramos
-seja
-sejamos
-sejam
-fosse
-fôssemos
-fossem
-for
-formos
-forem
-serei
-será
-seremos
-serão
-seria
-seríamos
-seriam
-
- | forms of ter, to have (not including the infinitive):
-tenho
-tem
-temos
-tém
-tinha
-tínhamos
-tinham
-tive
-teve
-tivemos
-tiveram
-tivera
-tivéramos
-tenha
-tenhamos
-tenham
-tivesse
-tivéssemos
-tivessem
-tiver
-tivermos
-tiverem
-terei
-terá
-teremos
-terão
-teria
-teríamos
-teriam
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ro.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ro.txt
deleted file mode 100644
index 4fdee90a5ba..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ro.txt
+++ /dev/null
@@ -1,233 +0,0 @@
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-acea
-aceasta
-această
-aceea
-acei
-aceia
-acel
-acela
-acele
-acelea
-acest
-acesta
-aceste
-acestea
-aceşti
-aceştia
-acolo
-acum
-ai
-aia
-aibă
-aici
-al
-ăla
-ale
-alea
-ălea
-altceva
-altcineva
-am
-ar
-are
-aş
-aşadar
-asemenea
-asta
-ăsta
-astăzi
-astea
-ăstea
-ăştia
-asupra
-aţi
-au
-avea
-avem
-aveţi
-azi
-bine
-bucur
-bună
-ca
-că
-căci
-când
-care
-cărei
-căror
-cărui
-cât
-câte
-câţi
-către
-câtva
-ce
-cel
-ceva
-chiar
-cînd
-cine
-cineva
-cît
-cîte
-cîţi
-cîtva
-contra
-cu
-cum
-cumva
-curând
-curînd
-da
-dă
-dacă
-dar
-datorită
-de
-deci
-deja
-deoarece
-departe
-deşi
-din
-dinaintea
-dintr
-dintre
-drept
-după
-ea
-ei
-el
-ele
-eram
-este
-eşti
-eu
-face
-fără
-fi
-fie
-fiecare
-fii
-fim
-fiţi
-iar
-ieri
-îi
-îl
-îmi
-împotriva
-în
-înainte
-înaintea
-încât
-încît
-încotro
-între
-întrucât
-întrucît
-îţi
-la
-lângă
-le
-li
-lîngă
-lor
-lui
-mă
-mâine
-mea
-mei
-mele
-mereu
-meu
-mi
-mine
-mult
-multă
-mulţi
-ne
-nicăieri
-nici
-nimeni
-nişte
-noastră
-noastre
-noi
-noştri
-nostru
-nu
-ori
-oricând
-oricare
-oricât
-orice
-oricînd
-oricine
-oricît
-oricum
-oriunde
-până
-pe
-pentru
-peste
-pînă
-poate
-pot
-prea
-prima
-primul
-prin
-printr
-sa
-să
-săi
-sale
-sau
-său
-se
-şi
-sînt
-sîntem
-sînteţi
-spre
-sub
-sunt
-suntem
-sunteţi
-ta
-tăi
-tale
-tău
-te
-ţi
-ţie
-tine
-toată
-toate
-tot
-toţi
-totuşi
-tu
-un
-una
-unde
-undeva
-unei
-unele
-uneori
-unor
-vă
-vi
-voastră
-voastre
-voi
-voştri
-vostru
-vouă
-vreo
-vreun
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ru.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ru.txt
deleted file mode 100644
index 64307693457..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ru.txt
+++ /dev/null
@@ -1,241 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | a russian stop word list. comments begin with vertical bar. each stop
- | word is at the start of a line.
-
- | this is a ranked list (commonest to rarest) of stopwords derived from
- | a large text sample.
-
- | letter `ё' is translated to `е'.
-
-и | and
-в | in/into
-во | alternative form
-не | not
-что | what/that
-он | he
-на | on/onto
-я | i
-с | from
-со | alternative form
-как | how
-а | milder form of `no' (but)
-то | conjunction and form of `that'
-все | all
-она | she
-так | so, thus
-его | him
-но | but
-да | yes/and
-ты | thou
-к | towards, by
-у | around, chez
-же | intensifier particle
-вы | you
-за | beyond, behind
-бы | conditional/subj. particle
-по | up to, along
-только | only
-ее | her
-мне | to me
-было | it was
-вот | here is/are, particle
-от | away from
-меня | me
-еще | still, yet, more
-нет | no, there isnt/arent
-о | about
-из | out of
-ему | to him
-теперь | now
-когда | when
-даже | even
-ну | so, well
-вдруг | suddenly
-ли | interrogative particle
-если | if
-уже | already, but homonym of `narrower'
-или | or
-ни | neither
-быть | to be
-был | he was
-него | prepositional form of его
-до | up to
-вас | you accusative
-нибудь | indef. suffix preceded by hyphen
-опять | again
-уж | already, but homonym of `adder'
-вам | to you
-сказал | he said
-ведь | particle `after all'
-там | there
-потом | then
-себя | oneself
-ничего | nothing
-ей | to her
-может | usually with `быть' as `maybe'
-они | they
-тут | here
-где | where
-есть | there is/are
-надо | got to, must
-ней | prepositional form of ей
-для | for
-мы | we
-тебя | thee
-их | them, their
-чем | than
-была | she was
-сам | self
-чтоб | in order to
-без | without
-будто | as if
-человек | man, person, one
-чего | genitive form of `what'
-раз | once
-тоже | also
-себе | to oneself
-под | beneath
-жизнь | life
-будет | will be
-ж | short form of intensifer particle `же'
-тогда | then
-кто | who
-этот | this
-говорил | was saying
-того | genitive form of `that'
-потому | for that reason
-этого | genitive form of `this'
-какой | which
-совсем | altogether
-ним | prepositional form of `его', `они'
-здесь | here
-этом | prepositional form of `этот'
-один | one
-почти | almost
-мой | my
-тем | instrumental/dative plural of `тот', `то'
-чтобы | full form of `in order that'
-нее | her (acc.)
-кажется | it seems
-сейчас | now
-были | they were
-куда | where to
-зачем | why
-сказать | to say
-всех | all (acc., gen. preposn. plural)
-никогда | never
-сегодня | today
-можно | possible, one can
-при | by
-наконец | finally
-два | two
-об | alternative form of `о', about
-другой | another
-хоть | even
-после | after
-над | above
-больше | more
-тот | that one (masc.)
-через | across, in
-эти | these
-нас | us
-про | about
-всего | in all, only, of all
-них | prepositional form of `они' (they)
-какая | which, feminine
-много | lots
-разве | interrogative particle
-сказала | she said
-три | three
-эту | this, acc. fem. sing.
-моя | my, feminine
-впрочем | moreover, besides
-хорошо | good
-свою | ones own, acc. fem. sing.
-этой | oblique form of `эта', fem. `this'
-перед | in front of
-иногда | sometimes
-лучше | better
-чуть | a little
-том | preposn. form of `that one'
-нельзя | one must not
-такой | such a one
-им | to them
-более | more
-всегда | always
-конечно | of course
-всю | acc. fem. sing of `all'
-между | between
-
-
- | b: some paradigms
- |
- | personal pronouns
- |
- | я меня мне мной [мною]
- | ты тебя тебе тобой [тобою]
- | он его ему им [него, нему, ним]
- | она ее эи ею [нее, нэи, нею]
- | оно его ему им [него, нему, ним]
- |
- | мы нас нам нами
- | вы вас вам вами
- | они их им ими [них, ним, ними]
- |
- | себя себе собой [собою]
- |
- | demonstrative pronouns: этот (this), тот (that)
- |
- | этот эта это эти
- | этого эты это эти
- | этого этой этого этих
- | этому этой этому этим
- | этим этой этим [этою] этими
- | этом этой этом этих
- |
- | тот та то те
- | того ту то те
- | того той того тех
- | тому той тому тем
- | тем той тем [тою] теми
- | том той том тех
- |
- | determinative pronouns
- |
- | (a) весь (all)
- |
- | весь вся все все
- | всего всю все все
- | всего всей всего всех
- | всему всей всему всем
- | всем всей всем [всею] всеми
- | всем всей всем всех
- |
- | (b) сам (himself etc)
- |
- | сам сама само сами
- | самого саму само самих
- | самого самой самого самих
- | самому самой самому самим
- | самим самой самим [самою] самими
- | самом самой самом самих
- |
- | stems of verbs `to be', `to have', `to do' and modal
- |
- | быть бы буд быв есть суть
- | име
- | дел
- | мог мож мочь
- | уме
- | хоч хот
- | долж
- | можн
- | нужн
- | нельзя
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_sv.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_sv.txt
deleted file mode 100644
index 22bddfd8cb3..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_sv.txt
+++ /dev/null
@@ -1,131 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Swedish stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This is a ranked list (commonest to rarest) of stopwords derived from
- | a large text sample.
-
- | Swedish stop words occasionally exhibit homonym clashes. For example
- | så = so, but also seed. These are indicated clearly below.
-
-och | and
-det | it, this/that
-att | to (with infinitive)
-i | in, at
-en | a
-jag | I
-hon | she
-som | who, that
-han | he
-på | on
-den | it, this/that
-med | with
-var | where, each
-sig | him(self) etc
-för | for
-så | so (also: seed)
-till | to
-är | is
-men | but
-ett | a
-om | if; around, about
-hade | had
-de | they, these/those
-av | of
-icke | not, no
-mig | me
-du | you
-henne | her
-då | then, when
-sin | his
-nu | now
-har | have
-inte | inte någon = no one
-hans | his
-honom | him
-skulle | 'sake'
-hennes | her
-där | there
-min | my
-man | one (pronoun)
-ej | nor
-vid | at, by, on (also: vast)
-kunde | could
-något | some etc
-från | from, off
-ut | out
-när | when
-efter | after, behind
-upp | up
-vi | we
-dem | them
-vara | be
-vad | what
-över | over
-än | than
-dig | you
-kan | can
-sina | his
-här | here
-ha | have
-mot | towards
-alla | all
-under | under (also: wonder)
-någon | some etc
-eller | or (else)
-allt | all
-mycket | much
-sedan | since
-ju | why
-denna | this/that
-själv | myself, yourself etc
-detta | this/that
-åt | to
-utan | without
-varit | was
-hur | how
-ingen | no
-mitt | my
-ni | you
-bli | to be, become
-blev | from bli
-oss | us
-din | thy
-dessa | these/those
-några | some etc
-deras | their
-blir | from bli
-mina | my
-samma | (the) same
-vilken | who, that
-er | you, your
-sådan | such a
-vår | our
-blivit | from bli
-dess | its
-inom | within
-mellan | between
-sådant | such a
-varför | why
-varje | each
-vilka | who, that
-ditt | thy
-vem | who
-vilket | who, that
-sitta | his
-sådana | such a
-vart | each
-dina | thy
-vars | whose
-vårt | our
-våra | our
-ert | your
-era | your
-vilkas | whose
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_th.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_th.txt
deleted file mode 100644
index 07f0fabe692..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_th.txt
+++ /dev/null
@@ -1,119 +0,0 @@
-# Thai stopwords from:
-# "Opinion Detection in Thai Political News Columns
-# Based on Subjectivity Analysis"
-# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak
-ไว้
-ไม่
-ไป
-ได้
-ให้
-ใน
-โดย
-แห่ง
-แล้ว
-และ
-แรก
-แบบ
-แต่
-เอง
-เห็น
-เลย
-เริ่ม
-เรา
-เมื่อ
-เพื่อ
-เพราะ
-เป็นการ
-เป็น
-เปิดเผย
-เปิด
-เนื่องจาก
-เดียวกัน
-เดียว
-เช่น
-เฉพาะ
-เคย
-เข้า
-เขา
-อีก
-อาจ
-อะไร
-ออก
-อย่าง
-อยู่
-อยาก
-หาก
-หลาย
-หลังจาก
-หลัง
-หรือ
-หนึ่ง
-ส่วน
-ส่ง
-สุด
-สําหรับ
-ว่า
-วัน
-ลง
-ร่วม
-ราย
-รับ
-ระหว่าง
-รวม
-ยัง
-มี
-มาก
-มา
-พร้อม
-พบ
-ผ่าน
-ผล
-บาง
-น่า
-นี้
-นํา
-นั้น
-นัก
-นอกจาก
-ทุก
-ที่สุด
-ที่
-ทําให้
-ทํา
-ทาง
-ทั้งนี้
-ทั้ง
-ถ้า
-ถูก
-ถึง
-ต้อง
-ต่างๆ
-ต่าง
-ต่อ
-ตาม
-ตั้งแต่
-ตั้ง
-ด้าน
-ด้วย
-ดัง
-ซึ่ง
-ช่วง
-จึง
-จาก
-จัด
-จะ
-คือ
-ความ
-ครั้ง
-คง
-ขึ้น
-ของ
-ขอ
-ขณะ
-ก่อน
-ก็
-การ
-กับ
-กัน
-กว่า
-กล่าว
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_tr.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_tr.txt
deleted file mode 100644
index 84d9408d4ea..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_tr.txt
+++ /dev/null
@@ -1,212 +0,0 @@
-# Turkish stopwords from LUCENE-559
-# merged with the list from "Information Retrieval on Turkish Texts"
-# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
-acaba
-altmış
-altı
-ama
-ancak
-arada
-aslında
-ayrıca
-bana
-bazı
-belki
-ben
-benden
-beni
-benim
-beri
-beş
-bile
-bin
-bir
-birçok
-biri
-birkaç
-birkez
-birşey
-birşeyi
-biz
-bize
-bizden
-bizi
-bizim
-böyle
-böylece
-bu
-buna
-bunda
-bundan
-bunlar
-bunları
-bunların
-bunu
-bunun
-burada
-çok
-çünkü
-da
-daha
-dahi
-de
-defa
-değil
-diğer
-diye
-doksan
-dokuz
-dolayı
-dolayısıyla
-dört
-edecek
-eden
-ederek
-edilecek
-ediliyor
-edilmesi
-ediyor
-eğer
-elli
-en
-etmesi
-etti
-ettiği
-ettiğini
-gibi
-göre
-halen
-hangi
-hatta
-hem
-henüz
-hep
-hepsi
-her
-herhangi
-herkesin
-hiç
-hiçbir
-için
-iki
-ile
-ilgili
-ise
-işte
-itibaren
-itibariyle
-kadar
-karşın
-katrilyon
-kendi
-kendilerine
-kendini
-kendisi
-kendisine
-kendisini
-kez
-ki
-kim
-kimden
-kime
-kimi
-kimse
-kırk
-milyar
-milyon
-mu
-mü
-mı
-nasıl
-ne
-neden
-nedenle
-nerde
-nerede
-nereye
-niye
-niçin
-o
-olan
-olarak
-oldu
-olduğu
-olduğunu
-olduklarını
-olmadı
-olmadığı
-olmak
-olması
-olmayan
-olmaz
-olsa
-olsun
-olup
-olur
-olursa
-oluyor
-on
-ona
-ondan
-onlar
-onlardan
-onları
-onların
-onu
-onun
-otuz
-oysa
-öyle
-pek
-rağmen
-sadece
-sanki
-sekiz
-seksen
-sen
-senden
-seni
-senin
-siz
-sizden
-sizi
-sizin
-şey
-şeyden
-şeyi
-şeyler
-şöyle
-şu
-şuna
-şunda
-şundan
-şunları
-şunu
-tarafından
-trilyon
-tüm
-üç
-üzere
-var
-vardı
-ve
-veya
-ya
-yani
-yapacak
-yapılan
-yapılması
-yapıyor
-yapmak
-yaptı
-yaptığı
-yaptığını
-yaptıkları
-yedi
-yerine
-yetmiş
-yine
-yirmi
-yoksa
-yüz
-zaten
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/userdict_ja.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/userdict_ja.txt
deleted file mode 100644
index 6f0368e4d81..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/lang/userdict_ja.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-#
-# This is a sample user dictionary for Kuromoji (JapaneseTokenizer)
-#
-# Add entries to this file in order to override the statistical model in terms
-# of segmentation, readings and part-of-speech tags. Notice that entries do
-# not have weights since they are always used when found. This is by-design
-# in order to maximize ease-of-use.
-#
-# Entries are defined using the following CSV format:
-# , ... , ... ,
-#
-# Notice that a single half-width space separates tokens and readings, and
-# that the number tokens and readings must match exactly.
-#
-# Also notice that multiple entries with the same is undefined.
-#
-# Whitespace only lines are ignored. Comments are not allowed on entry lines.
-#
-
-# Custom segmentation for kanji compounds
-日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞
-関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞
-
-# Custom segmentation for compound katakana
-トートバッグ,トート バッグ,トート バッグ,かずカナ名詞
-ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞
-
-# Custom reading for former sumo wrestler
-朝青龍,朝青龍,アサショウリュウ,カスタム人名
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/protwords.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/protwords.txt
deleted file mode 100644
index 1dfc0abecbf..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/protwords.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#-----------------------------------------------------------------------
-# Use a protected word file to protect against the stemmer reducing two
-# unrelated words to the same base word.
-
-# Some non-words that normally won't be encountered,
-# just to test that they won't be stemmed.
-dontstems
-zwhacky
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/schema.xml b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/schema.xml
deleted file mode 100644
index aed9f441fd6..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/schema.xml
+++ /dev/null
@@ -1,893 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- id
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/solrconfig.xml b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/solrconfig.xml
deleted file mode 100644
index 05de0b628c9..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/solrconfig.xml
+++ /dev/null
@@ -1,1426 +0,0 @@
-
-
-
-
-
-
-
-
- ${tests.luceneMatchVersion:LATEST}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ${solr.data.dir:}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 128
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ${solr.ulog.dir:}
-
-
-
-
- ${solr.autoCommit.maxTime:60000}
- false
-
-
-
-
-
- ${solr.autoSoftCommit.maxTime:1000}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 1024
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- true
-
-
-
-
-
- 20
-
-
- 200
-
-
-
-
-
-
-
-
-
-
-
- static firstSearcher warming in solrconfig.xml
-
-
-
-
-
- false
-
-
- 4
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- explicit
- 10
- text
-
-
-
-
-
-
-
-
-
-
-
-
-
- explicit
- json
- true
- text
-
-
-
-
-
-
-
-
- textSpell
-
-
-
-
-
- default
- name
- solr.DirectSolrSpellChecker
-
- internal
-
- 0.5
-
- 2
-
- 1
-
- 5
-
- 4
-
- 0.01
-
-
-
-
-
- wordbreak
- solr.WordBreakSolrSpellChecker
- name
- true
- true
- 10
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- text
-
- default
- wordbreak
- on
- true
- 10
- 5
- 5
- true
- true
- 10
- 5
-
-
- spellcheck
-
-
-
-
-
-
-
-
-
- text
- true
-
-
- tvComponent
-
-
-
-
-
-
-
-
- default
-
-
- org.carrot2.clustering.lingo.LingoClusteringAlgorithm
-
-
- 20
-
-
- clustering/carrot2
-
-
- ENGLISH
-
-
- stc
- org.carrot2.clustering.stc.STCClusteringAlgorithm
-
-
-
-
-
-
- true
- default
- true
-
- name
- id
-
- features
-
- true
-
-
-
- false
-
- edismax
-
- text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
-
- *:*
- 10
- *,score
-
-
- clustering
-
-
-
-
-
-
-
-
-
- true
-
-
- terms
-
-
-
-
-
-
-
- string
- elevate.xml
-
-
-
-
-
- explicit
- text
-
-
- elevator
-
-
-
-
-
-
-
-
-
-
- 100
-
-
-
-
-
-
-
- 70
-
- 0.5
-
- [-\w ,/\n\"']{20,200}
-
-
-
-
-
-
- ]]>
- ]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ,,
- ,,
- ,,
- ,,
- ,]]>
- ]]>
-
-
-
-
-
- 10
- .,!?
-
-
-
-
-
-
- WORD
-
-
- en
- US
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- text/plain; charset=UTF-8
-
-
-
-
-
-
-
-
- 5
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/stopwords.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/stopwords.txt
deleted file mode 100644
index ae1e83eeb3d..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/stopwords.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/synonyms.txt b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/synonyms.txt
deleted file mode 100644
index 7f72128303b..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/synonyms.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#-----------------------------------------------------------------------
-#some test synonym mappings unlikely to appear in real input text
-aaafoo => aaabar
-bbbfoo => bbbfoo bbbbar
-cccfoo => cccbar cccbaz
-fooaaa,baraaa,bazaaa
-
-# Some synonym groups specific to this example
-GB,gib,gigabyte,gigabytes
-MB,mib,megabyte,megabytes
-Television, Televisions, TV, TVs
-#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
-#after us won't split it into two words.
-
-# Synonym mappings can be used for spelling correction too
-pixima => pixma
-
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcloud/conf/solrconfig.xml b/solr/contrib/morphlines-core/src/test-files/solr/solrcloud/conf/solrconfig.xml
deleted file mode 100644
index 42ab0565f07..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcloud/conf/solrconfig.xml
+++ /dev/null
@@ -1,1437 +0,0 @@
-
-
-
-
-
-
-
-
- ${tests.luceneMatchVersion:LATEST}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ${solr.data.dir:}
-
-
-
-
- ${solr.hdfs.home:}
- ${solr.hdfs.confdir:}
- ${solr.hdfs.security.kerberos.enabled:false}
- ${solr.hdfs.security.kerberos.keytabfile:}
- ${solr.hdfs.security.kerberos.principal:}
- ${solr.hdfs.blockcache.enabled:true}
- ${solr.hdfs.blockcache.slab.count:1}
- ${solr.hdfs.blockcache.direct.memory.allocation:true}
- ${solr.hdfs.blockcache.blocksperbank:16384}
- ${solr.hdfs.blockcache.read.enabled:true}
- ${solr.hdfs.nrtcachingdirectory.enable:true}
- ${solr.hdfs.nrtcachingdirectory.maxmergesizemb:16}
- ${solr.hdfs.nrtcachingdirectory.maxcachedmb:192}
- ${solr.hdfs.blockcache.global:false}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 128
-
-
-
-
-
-
-
-
-
- ${solr.lock.type:hdfs}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ${solr.ulog.dir:}
-
-
-
-
- ${solr.autoCommit.maxTime:60000}
- false
-
-
-
-
- ${solr.autoSoftCommit.maxTime:1000}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 1024
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- true
-
-
-
-
-
- 20
-
-
- 200
-
-
-
-
-
-
-
-
-
-
-
- static firstSearcher warming in solrconfig.xml
-
-
-
-
-
- false
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- explicit
- 10
- text
-
-
-
-
-
-
-
-
-
-
-
-
-
- explicit
- json
- true
- text
-
-
-
-
-
-
-
- text_general
-
-
-
-
-
- default
- text
- solr.DirectSolrSpellChecker
-
- internal
-
- 0.5
-
- 2
-
- 1
-
- 5
-
- 4
-
- 0.01
-
-
-
-
-
- wordbreak
- solr.WordBreakSolrSpellChecker
- name
- true
- true
- 10
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- text
-
- default
- wordbreak
- on
- true
- 10
- 5
- 5
- true
- true
- 10
- 5
-
-
- spellcheck
-
-
-
-
-
-
-
-
-
- text
- true
-
-
- tvComponent
-
-
-
-
-
-
-
-
- default
-
-
- org.carrot2.clustering.lingo.LingoClusteringAlgorithm
-
-
- 20
-
-
- clustering/carrot2
-
-
- ENGLISH
-
-
- stc
- org.carrot2.clustering.stc.STCClusteringAlgorithm
-
-
-
-
-
-
- true
- default
- true
-
- name
- id
-
- features
-
- true
-
-
-
- false
-
- edismax
-
- text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
-
- *:*
- 10
- *,score
-
-
- clustering
-
-
-
-
-
-
-
-
-
- true
- false
-
-
- terms
-
-
-
-
-
-
-
- string
- elevate.xml
-
-
-
-
-
- explicit
- text
-
-
- elevator
-
-
-
-
-
-
-
-
-
-
- 100
-
-
-
-
-
-
-
- 70
-
- 0.5
-
- [-\w ,/\n\"']{20,200}
-
-
-
-
-
-
- ]]>
- ]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ,,
- ,,
- ,,
- ,,
- ,]]>
- ]]>
-
-
-
-
-
- 10
- .,!?
-
-
-
-
-
-
- WORD
-
-
- en
- US
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- text/plain; charset=UTF-8
-
-
-
-
-
-
-
-
- 5
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/solr/contrib/morphlines-core/src/test-files/test-documents/NullHeader.docx b/solr/contrib/morphlines-core/src/test-files/test-documents/NullHeader.docx
deleted file mode 100644
index cc62b8d6beb..00000000000
Binary files a/solr/contrib/morphlines-core/src/test-files/test-documents/NullHeader.docx and /dev/null differ
diff --git a/solr/contrib/morphlines-core/src/test-files/test-documents/boilerplate.html b/solr/contrib/morphlines-core/src/test-files/test-documents/boilerplate.html
deleted file mode 100644
index 615f84e85ee..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/test-documents/boilerplate.html
+++ /dev/null
@@ -1,58 +0,0 @@
-
-
-
-
-
-
-
- Title
-
-
-
-
-
-This is the real meat of the page,
-and represents the text we want.
-It has lots of juicy content.
-
-We assume that it won't get filtered out.
-And that all of the lines will be in the
-output.
-
-
-
-Here's another paragraph of text.
-This is the end of the text.
-
-
-footer
-
-
-
diff --git a/solr/contrib/morphlines-core/src/test-files/test-documents/cars.csv b/solr/contrib/morphlines-core/src/test-files/test-documents/cars.csv
deleted file mode 100644
index 8f1f9e1ae6a..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/test-documents/cars.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-Age,Color,Extras,Type,Used
-2,blue,GPS,"Gas, with electric",""
-10,green,"Labeled ""Vintage, 1913""",,yes
-100,red,"Labeled ""Vintage 1913""",yes
-5,orange,none,"This is a
-multi, line text",no
\ No newline at end of file
diff --git a/solr/contrib/morphlines-core/src/test-files/test-documents/cars.csv.gz b/solr/contrib/morphlines-core/src/test-files/test-documents/cars.csv.gz
deleted file mode 100644
index ee2a951eddc..00000000000
Binary files a/solr/contrib/morphlines-core/src/test-files/test-documents/cars.csv.gz and /dev/null differ
diff --git a/solr/contrib/morphlines-core/src/test-files/test-documents/cars.tar.gz b/solr/contrib/morphlines-core/src/test-files/test-documents/cars.tar.gz
deleted file mode 100644
index 5ca3cf1cdcb..00000000000
Binary files a/solr/contrib/morphlines-core/src/test-files/test-documents/cars.tar.gz and /dev/null differ
diff --git a/solr/contrib/morphlines-core/src/test-files/test-documents/complex.mbox b/solr/contrib/morphlines-core/src/test-files/test-documents/complex.mbox
deleted file mode 100644
index 27f7017d265..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/test-documents/complex.mbox
+++ /dev/null
@@ -1,291 +0,0 @@
-From core-user-return-14700-apmail-hadoop-core-user-archive=hadoop.apache.org@hadoop.apache.org Mon Jun 01 04:28:28 2009
-Return-Path:
-Delivered-To: apmail-hadoop-core-user-archive@www.apache.org
-Received: (qmail 19921 invoked from network); 1 Jun 2009 04:28:28 -0000
-Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3)
- by minotaur.apache.org with SMTP; 1 Jun 2009 04:28:28 -0000
-Received: (qmail 84995 invoked by uid 500); 1 Jun 2009 04:28:38 -0000
-Delivered-To: apmail-hadoop-core-user-archive@hadoop.apache.org
-Received: (qmail 84895 invoked by uid 500); 1 Jun 2009 04:28:38 -0000
-Mailing-List: contact core-user-help@hadoop.apache.org; run by ezmlm
-Precedence: bulk
-List-Help:
-List-Unsubscribe:
-List-Post:
-List-Id:
-Reply-To: core-user@hadoop.apache.org
-Delivered-To: mailing list core-user@hadoop.apache.org
-Received: (qmail 84885 invoked by uid 99); 1 Jun 2009 04:28:38 -0000
-Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136)
- by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 01 Jun 2009 04:28:38 +0000
-X-ASF-Spam-Status: No, hits=1.2 required=10.0
- tests=SPF_NEUTRAL
-X-Spam-Check-By: apache.org
-Received-SPF: neutral (athena.apache.org: local policy)
-Received: from [69.147.107.21] (HELO mrout2-b.corp.re1.wahoo.com) (69.147.107.21)
- by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 01 Jun 2009 04:28:26 +0000
-Received: from SNV-EXPF01.ds.corp.wahoo.com (snv-expf01.ds.corp.wahoo.com [207.126.227.250])
- by mrout2-b.corp.re1.wahoo.com (8.13.8/8.13.8/y.out) with ESMTP id n514QYA6099963
- for ; Sun, 31 May 2009 21:26:35 -0700 (PDT)
-DomainKey-Signature: a=rsa-sha1; s=serpent; d=wahoo-inc.com; c=nofws; q=dns;
- h=received:user-agent:date:subject:from:to:message-id:
- thread-topic:thread-index:in-reply-to:mime-version:content-type:
- content-transfer-encoding:x-originalarrivaltime;
- b=YVtSNdgjeeSBS1yY3XDolul49i+HrgNG7QszMo9LzGnrwejjgsl5+iUM6EiQgEpV
-Received: from SNV-EXVS08.ds.corp.wahoo.com ([207.126.227.9]) by SNV-EXPF01.ds.corp.wahoo.com with Microsoft SMTPSVC(6.0.3790.3959);
- Sun, 31 May 2009 21:26:34 -0700
-Received: from 10.66.92.213 ([10.66.92.213]) by SNV-EXVS08.ds.corp.wahoo.com ([207.126.227.58]) with Microsoft Exchange Server HTTP-DAV ;
- Mon, 1 Jun 2009 04:26:33 +0000
-User-Agent: Microsoft-Entourage/12.17.0.090302
-Date: Mon, 01 Jun 2009 09:56:31 +0530
-Subject: Re: question about when shuffle/sort start working
-From: Sam Judgement
-To:
-Message-ID:
-Thread-Topic: question about when shuffle/sort start working
-Thread-Index: AcnicSNoBw19cMU8UEaXwAdZ1YYhuw==
-In-Reply-To: <440622.41041.qm@web111005.mail.gq1.wahoo.com>
-Mime-version: 1.0
-Content-type: text/plain;
- charset="US-ASCII"
-Content-transfer-encoding: 7bit
-X-OriginalArrivalTime: 01 Jun 2009 04:26:34.0501 (UTC) FILETIME=[257EAB50:01C9E271]
-X-Virus-Checked: Checked by ClamAV on apache.org
-
-When a Mapper completes, MapCompletionEvents are generated. Reducers try to
-fetch map outputs for a given map only on the receipt of such events.
-
-Sam
-
-
-On 5/30/09 10:00 AM, "Jianmin Foo" wrote:
-
-> Hi,
-> I am being confused by the protocol between mapper and reducer. When mapper
-> emitting the (key,value) pair done, is there any signal the mapper send out to
-> hadoop framework in protocol to indicate that map is done and the shuffle/sort
-> can begin for reducer? If there is no this signal in protocol, when the
-> framework begin the shuffle/sort?
->
-> Thanks,
-> Jianmin
->
->
->
->
-
-
-From core-user-return-14701-apmail-hadoop-core-user-archive=hadoop.apache.org@hadoop.apache.org Mon Jun 01 05:31:14 2009
-Return-Path:
-Delivered-To: apmail-hadoop-core-user-archive@www.apache.org
-Received: (qmail 38243 invoked from network); 1 Jun 2009 05:31:14 -0000
-Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3)
- by minotaur.apache.org with SMTP; 1 Jun 2009 05:31:14 -0000
-Received: (qmail 15621 invoked by uid 500); 1 Jun 2009 05:31:24 -0000
-Delivered-To: apmail-hadoop-core-user-archive@hadoop.apache.org
-Received: (qmail 15557 invoked by uid 500); 1 Jun 2009 05:31:24 -0000
-Mailing-List: contact core-user-help@hadoop.apache.org; run by ezmlm
-Precedence: bulk
-List-Help:
-List-Unsubscribe:
-List-Post:
-List-Id:
-Reply-To: core-user@hadoop.apache.org
-Delivered-To: mailing list core-user@hadoop.apache.org
-Received: (qmail 15547 invoked by uid 99); 1 Jun 2009 05:31:24 -0000
-Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230)
- by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 01 Jun 2009 05:31:24 +0000
-X-ASF-Spam-Status: No, hits=2.2 required=10.0
- tests=HTML_MESSAGE,SPF_PASS
-X-Spam-Check-By: apache.org
-Received-SPF: pass (nike.apache.org: local policy)
-Received: from [68.142.237.94] (HELO n9.bullet.re3.wahoo.com) (68.142.237.94)
- by apache.org (qpsmtpd/0.29) with SMTP; Mon, 01 Jun 2009 05:31:11 +0000
-Received: from [68.142.237.88] by n9.bullet.re3.wahoo.com with NNFMP; 01 Jun 2009 05:30:50 -0000
-Received: from [67.195.9.82] by t4.bullet.re3.wahoo.com with NNFMP; 01 Jun 2009 05:30:49 -0000
-Received: from [67.195.9.99] by t2.bullet.mail.gq1.wahoo.com with NNFMP; 01 Jun 2009 05:30:49 -0000
-Received: from [127.0.0.1] by omp103.mail.gq1.wahoo.com with NNFMP; 01 Jun 2009 05:28:01 -0000
-X-wahoo-Newman-Property: ymail-3
-X-wahoo-Newman-Id: 796121.97519.bm@omp103.mail.gq1.wahoo.com
-Received: (qmail 35264 invoked by uid 60001); 1 Jun 2009 05:30:49 -0000
-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=wahoo.com; s=s1024; t=1243834249; bh=R8qzdi/IbLyO8UwpnaujDpT9E+6bJ7nkmZN2803EmRk=; h=Message-ID:X-YMail-OSG:Received:X-Mailer:References:Date:From:Subject:To:In-Reply-To:MIME-Version:Content-Type; b=vq4c6RIDbkuLPYd8mirusIXf6DqTb/IeT55In7W00Y5Sxx1ZiXBb78yE9+TDfXJ0elsEZvqv4ocyvolGE0eGtyYeJA0mZikpRNu6pidxPNpCplOcLHBRz7YQ7iERwv3TagRlWy2Xd3oD9ZeV0A05P7WUOiNNX1PUUJD1IVdrEZo=
-DomainKey-Signature:a=rsa-sha1; q=dns; c=nofws;
- s=s1024; d=wahoo.com;
- h=Message-ID:X-YMail-OSG:Received:X-Mailer:References:Date:From:Subject:To:In-Reply-To:MIME-Version:Content-Type;
- b=6HXZV98ON5vBwmE/xS8stVD0D2F4dkMY7a0suX5KVTb736JdR8G59mqBq/dWcpbFTLiCLtxi18LMb/dU1RKRGOEdn3l3j/jKXhBrhIgfg3qtNskPedXDKBvn7JGXiSkqpA/tUtPjvc0Uuk8/LaA01SQTz40Engg7nD8/EJdIAhA=;
-Message-ID: <592088.35091.qm@web111010.mail.gq1.wahoo.com>
-X-YMail-OSG: KzhhrJYVM1m.MCS6vRpRP2ZZO2PrfnbngosELDCIa91ZqvhJph4RdmzfUW0jw9W04RCSch1K730bPohwNpNBIk2QR_zt4_mfbhfq7YEPkSoz9LSXG90P9vIo5Fc8qyZN0U6vA9gtdyGQTpN5ahvillUH9nAF0TMWv2SvZJLjPlQ0Z0p8oK8ltBwGTgLrM8Jtdn9D29yoRyi3_EpVOfdD9OP.EK50Vr1XwSUYMbnpZ0WGHMwd.Yig7A6Elwadm3YVbfOdx2mfrG.jQsUAxQjRBNvbrOM57.FaE11kHTe9aoBWSeihNg--
-Received: from [216.145.54.7] by web111010.mail.gq1.wahoo.com via HTTP; Sun, 31 May 2009 22:30:49 PDT
-X-Mailer: wahooMailRC/1277.43 wahooMailWebService/0.7.289.10
-References:
-Date: Sun, 31 May 2009 22:30:49 -0700 (PDT)
-From: Jianmin Foo
-Subject: Re: question about when shuffle/sort start working
-To: core-user@hadoop.apache.org
-In-Reply-To:
-MIME-Version: 1.0
-Content-Type: multipart/alternative; boundary="0-1193839393-1243834249=:35091"
-X-Virus-Checked: Checked by ClamAV on apache.org
-
---0-1193839393-1243834249=:35091
-Content-Type: text/plain; charset=us-ascii
-
-Thanks a lot for your explanation, Sam.
-
-So is this event generated by hadoop framework? Is there any API in mapper to fire this event? Actually, I am thinking to implement a mapper that will emit some pairs, then fire this event to let the reducer works, the same mapper task then emit some other pairs and repeat. Do you think is this logic feasible by current API?
-
-Thanks,
-Jianmin
-
-
-
-
-
-________________________________
-From: Sam Judgement
-To: core-user@hadoop.apache.org
-Sent: Monday, June 1, 2009 12:26:31 PM
-Subject: Re: question about when shuffle/sort start working
-
-When a Mapper completes, MapCompletionEvents are generated. Reducers try to
-fetch map outputs for a given map only on the receipt of such events.
-
-Sam
-
-
-On 5/30/09 10:00 AM, "Jianmin Foo" wrote:
-
-> Hi,
-> I am being confused by the protocol between mapper and reducer. When mapper
-> emitting the (key,value) pair done, is there any signal the mapper send out to
-> hadoop framework in protocol to indicate that map is done and the shuffle/sort
-> can begin for reducer? If there is no this signal in protocol, when the
-> framework begin the shuffle/sort?
->
-> Thanks,
-> Jianmin
->
->
->
->
-
-
-
---0-1193839393-1243834249=:35091--
-
-
-From core-user-return-14702-apmail-hadoop-core-user-archive=hadoop.apache.org@hadoop.apache.org Mon Jun 01 06:04:30 2009
-Return-Path:
-Delivered-To: apmail-hadoop-core-user-archive@www.apache.org
-Received: (qmail 53387 invoked from network); 1 Jun 2009 06:04:29 -0000
-Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3)
- by minotaur.apache.org with SMTP; 1 Jun 2009 06:04:29 -0000
-Received: (qmail 39066 invoked by uid 500); 1 Jun 2009 06:04:39 -0000
-Delivered-To: apmail-hadoop-core-user-archive@hadoop.apache.org
-Received: (qmail 38970 invoked by uid 500); 1 Jun 2009 06:04:39 -0000
-Mailing-List: contact core-user-help@hadoop.apache.org; run by ezmlm
-Precedence: bulk
-List-Help:
-List-Unsubscribe:
-List-Post:
-List-Id:
-Reply-To: core-user@hadoop.apache.org
-Delivered-To: mailing list core-user@hadoop.apache.org
-Received: (qmail 38955 invoked by uid 99); 1 Jun 2009 06:04:39 -0000
-Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136)
- by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 01 Jun 2009 06:04:39 +0000
-X-ASF-Spam-Status: No, hits=1.2 required=10.0
- tests=SPF_NEUTRAL
-X-Spam-Check-By: apache.org
-Received-SPF: neutral (athena.apache.org: local policy)
-Received: from [216.145.54.172] (HELO mrout2.wahoo.com) (216.145.54.172)
- by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 01 Jun 2009 06:04:28 +0000
-Received: from SNV-EXBH01.ds.corp.wahoo.com (snv-exbh01.ds.corp.wahoo.com [207.126.227.249])
- by mrout2.wahoo.com (8.13.6/8.13.6/y.out) with ESMTP id n5163FGq038852
- for ; Sun, 31 May 2009 23:03:15 -0700 (PDT)
-DomainKey-Signature: a=rsa-sha1; s=serpent; d=wahoo-inc.com; c=nofws; q=dns;
- h=received:user-agent:date:subject:from:to:message-id:
- thread-topic:thread-index:in-reply-to:mime-version:content-type:
- content-transfer-encoding:x-originalarrivaltime;
- b=rChE4SCnwtWaZpjhovkiXDKfDiVNdRRvsadSGG9S9bgvOexn/9/5JjEQx1pOR7Nb
-Received: from SNV-EXVS08.ds.corp.wahoo.com ([207.126.227.9]) by SNV-EXBH01.ds.corp.wahoo.com with Microsoft SMTPSVC(6.0.3790.3959);
- Sun, 31 May 2009 23:03:15 -0700
-Received: from 10.66.92.213 ([10.66.92.213]) by SNV-EXVS08.ds.corp.wahoo.com ([207.126.227.58]) with Microsoft Exchange Server HTTP-DAV ;
- Mon, 1 Jun 2009 06:03:15 +0000
-User-Agent: Microsoft-Entourage/12.17.0.090302
-Date: Mon, 01 Jun 2009 11:33:13 +0530
-Subject: Re: question about when shuffle/sort start working
-From: Sam Judgement
-To:
-Message-ID:
-Thread-Topic: question about when shuffle/sort start working
-Thread-Index: AcnifqWrLG6N7GAk7kqy9QalVWfegQ==
-In-Reply-To: <592088.35091.qm@web111010.mail.gq1.wahoo.com>
-Mime-version: 1.0
-Content-type: text/plain;
- charset="US-ASCII"
-Content-transfer-encoding: 7bit
-X-OriginalArrivalTime: 01 Jun 2009 06:03:15.0462 (UTC) FILETIME=[A7231260:01C9E27E]
-X-Virus-Checked: Checked by ClamAV on apache.org
-
-
-No you cannot raise this event yourself, this event is generated internally
-by the framework.
-
-I am guessing that what you probably want is to have a chain of MapReduce
-Jobs where the output of one is automatically fed as input to another. You
-can look at these classes: JobControl and ChainMapper/ChainReducer.
-
-Sam
-
-On 6/1/09 11:00 AM, "Jianmin Foo" wrote:
-
-> Thanks a lot for your explanation, Sam.
->
-> So is this event generated by hadoop framework? Is there any API in mapper to
-> fire this event? Actually, I am thinking to implement a mapper that will emit
-> some pairs, then fire this event to let the reducer works, the
-> same mapper task then emit some other pairs and repeat. Do you
-> think is this logic feasible by current API?
->
-> Thanks,
-> Jianmin
->
->
->
->
->
-> ________________________________
-> From: Sam Judgement
-> To: core-user@hadoop.apache.org
-> Sent: Monday, June 1, 2009 12:26:31 PM
-> Subject: Re: question about when shuffle/sort start working
->
-> When a Mapper completes, MapCompletionEvents are generated. Reducers try to
-> fetch map outputs for a given map only on the receipt of such events.
->
-> Sam
->
->
-> On 5/30/09 10:00 AM, "Jianmin Foo" wrote:
->
->> Hi,
->> I am being confused by the protocol between mapper and reducer. When mapper
->> emitting the (key,value) pair done, is there any signal the mapper send out
->> to
->> hadoop framework in protocol to indicate that map is done and the
->> shuffle/sort
->> can begin for reducer? If there is no this signal in protocol, when the
->> framework begin the shuffle/sort?
->>
->> Thanks,
->> Jianmin
->>
->>
->>
->>
->
->
->
-
-
diff --git a/solr/contrib/morphlines-core/src/test-files/test-documents/email.eml b/solr/contrib/morphlines-core/src/test-files/test-documents/email.eml
deleted file mode 100644
index d45f430869c..00000000000
--- a/solr/contrib/morphlines-core/src/test-files/test-documents/email.eml
+++ /dev/null
@@ -1,40 +0,0 @@
-MIME-Version: 1.0
-Received: by 10.216.199.5 with HTTP; Wed, 27 Nov 2013 12:01:23 -0800
-(PST)
-Date: Wed, 27 Nov 2013 13:01:23 -0700
-Delivered-To: foo@cloudera.com
-Message-ID:
-
-Subject: Test EML
-From: Patrick Foo
-To: Patrick Foo
-Content-Type: multipart/alternative;
-boundary=001a11c3815cb55dda04ec2e0f3b
-
---001a11c3815cb55dda04ec2e0f3b
-Content-Type: text/plain; charset=ISO-8859-1
-
-This is a test
-
---
-Patrick Foo
-Customer Operations Engineer
-
-
-
---001a11c3815cb55dda04ec2e0f3b
-Content-Type: text/html; charset=ISO-8859-1
-Content-Transfer-Encoding: quoted-printable
-
-This is a test
--
-
Patrick Foo
Customer Operations
-Engineer
=
-