+
+
@@ -53,6 +55,8 @@
+
+
diff --git a/lucene/ivy-versions.properties b/lucene/ivy-versions.properties
index a71f538dc51..a340d2bbdae 100644
--- a/lucene/ivy-versions.properties
+++ b/lucene/ivy-versions.properties
@@ -2,23 +2,63 @@
# Blank lines, comment lines, and keys that aren't in /org/name format are ignored
# when the lexical sort check is performed by the ant check-lib-versions target.
+
+/aopalliance/aopalliance = 1.0
+/asm/asm = 3.1
/cglib/cglib-nodep = 2.2
+/com.adobe.xmp/xmpcore = 5.1.2
com.carrotsearch.randomizedtesting.version = 2.0.13
/com.carrotsearch.randomizedtesting/junit4-ant = ${com.carrotsearch.randomizedtesting.version}
/com.carrotsearch.randomizedtesting/randomizedtesting-runner = ${com.carrotsearch.randomizedtesting.version}
/com.carrotsearch/hppc = 0.5.2
+
+com.cloudera.cdk.cdk-morphlines.version = 0.8.1
+/com.cloudera.cdk/cdk-morphlines-avro = ${com.cloudera.cdk.cdk-morphlines.version}
+/com.cloudera.cdk/cdk-morphlines-core = ${com.cloudera.cdk.cdk-morphlines.version}
+/com.cloudera.cdk/cdk-morphlines-hadoop-sequencefile = ${com.cloudera.cdk.cdk-morphlines.version}
+/com.cloudera.cdk/cdk-morphlines-json = ${com.cloudera.cdk.cdk-morphlines.version}
+/com.cloudera.cdk/cdk-morphlines-saxon = ${com.cloudera.cdk.cdk-morphlines.version}
+/com.cloudera.cdk/cdk-morphlines-tika-core = ${com.cloudera.cdk.cdk-morphlines.version}
+/com.cloudera.cdk/cdk-morphlines-tika-decompress = ${com.cloudera.cdk.cdk-morphlines.version}
+/com.cloudera.cdk/cdk-morphlines-twitter = ${com.cloudera.cdk.cdk-morphlines.version}
+
+com.codahale.metrics.version = 3.0.1
+/com.codahale.metrics/metrics-core = ${com.codahale.metrics.version}
+/com.codahale.metrics/metrics-healthchecks = ${com.codahale.metrics.version}
+
/com.cybozu.labs/langdetect = 1.1-20120112
/com.drewnoakes/metadata-extractor = 2.6.2
+
+com.fasterxml.jackson.core.version = 2.2.3
+/com.fasterxml.jackson.core/jackson-annotations = ${com.fasterxml.jackson.core.version}
+/com.fasterxml.jackson.core/jackson-core = ${com.fasterxml.jackson.core.version}
+/com.fasterxml.jackson.core/jackson-databind = ${com.fasterxml.jackson.core.version}
+
/com.google.guava/guava = 14.0.1
+
+com.google.inject.guice.version = 3.0
+/com.google.inject.extensions/guice-servlet = ${com.google.inject.guice.version}
+/com.google.inject/guice = ${com.google.inject.guice.version}
+
/com.google.protobuf/protobuf-java = 2.5.0
/com.googlecode.concurrentlinkedhashmap/concurrentlinkedhashmap-lru = 1.2
/com.googlecode.juniversalchardet/juniversalchardet = 1.0.3
/com.googlecode.mp4parser/isoparser = 1.0-RC-1
/com.ibm.icu/icu4j = 49.1
/com.spatial4j/spatial4j = 0.3
-/com.sun.jersey/jersey-core = 1.16
+
+com.sun.jersey.version = 1.8
+/com.sun.jersey.contribs/jersey-guice = ${com.sun.jersey.version}
+/com.sun.jersey/jersey-bundle = ${com.sun.jersey.version}
+/com.sun.jersey/jersey-core = ${com.sun.jersey.version}
+/com.sun.jersey/jersey-json = ${com.sun.jersey.version}
+/com.sun.jersey/jersey-server = ${com.sun.jersey.version}
+
+/com.sun.xml.bind/jaxb-impl = 2.2.2
+/com.thoughtworks.paranamer/paranamer = 2.3
+/com.typesafe/config = 1.0.2
/commons-beanutils/commons-beanutils = 1.7.0
/commons-cli/commons-cli = 1.2
/commons-codec/commons-codec = 1.7
@@ -33,8 +73,10 @@ com.carrotsearch.randomizedtesting.version = 2.0.13
/dom4j/dom4j = 1.6.1
/edu.ucar/netcdf = 4.2-min
/hsqldb/hsqldb = 1.8.0.10
+/io.netty/netty = 3.6.2.Final
/jakarta-regexp/jakarta-regexp = 1.4
/javax.activation/activation = 1.1
+/javax.inject/javax.inject= 1
/javax.mail/mail = 1.4.1
/javax.servlet/javax.servlet-api = 3.0.1
/javax.servlet/servlet-api = 2.4
@@ -45,9 +87,12 @@ com.carrotsearch.randomizedtesting.version = 2.0.13
/mecab/mecab-ipadic = 2.7.0-20070801
/mecab/mecab-naist-jdic = 0.6.3b-20111013
/net.arnx/jsonic = 1.2.7
+/net.sf.saxon/Saxon-HE = 9.5.1-2
+/net.sourceforge.argparse4j/argparse4j = 0.4.0
/net.sourceforge.nekohtml/nekohtml = 1.9.17
/org.antlr/antlr-runtime = 3.5
/org.apache.ant/ant = 1.8.2
+/org.apache.avro/avro = 1.7.4
/org.apache.commons/commons-compress = 1.4.1
/org.apache.derby/derby = 10.9.1.0
@@ -57,18 +102,35 @@ org.apache.hadoop.version = 2.2.0
/org.apache.hadoop/hadoop-common = ${org.apache.hadoop.version}
/org.apache.hadoop/hadoop-hdfs = ${org.apache.hadoop.version}
+/org.apache.hadoop/hadoop-mapreduce-client-app = ${org.apache.hadoop.version}
+/org.apache.hadoop/hadoop-mapreduce-client-common = ${org.apache.hadoop.version}
+/org.apache.hadoop/hadoop-mapreduce-client-core = ${org.apache.hadoop.version}
+/org.apache.hadoop/hadoop-mapreduce-client-hs = ${org.apache.hadoop.version}
+/org.apache.hadoop/hadoop-mapreduce-client-jobclient = ${org.apache.hadoop.version}
+/org.apache.hadoop/hadoop-mapreduce-client-shuffle = ${org.apache.hadoop.version}
+/org.apache.hadoop/hadoop-yarn-api = ${org.apache.hadoop.version}
+/org.apache.hadoop/hadoop-yarn-client = ${org.apache.hadoop.version}
+/org.apache.hadoop/hadoop-yarn-common = ${org.apache.hadoop.version}
+/org.apache.hadoop/hadoop-yarn-server = ${org.apache.hadoop.version}
+/org.apache.hadoop/hadoop-yarn-server-common = ${org.apache.hadoop.version}
+/org.apache.hadoop/hadoop-yarn-server-nodemanager = ${org.apache.hadoop.version}
+/org.apache.hadoop/hadoop-yarn-server-resourcemanager = ${org.apache.hadoop.version}
+/org.apache.hadoop/hadoop-yarn-server-tests = ${org.apache.hadoop.version}
+/org.apache.hadoop/hadoop-yarn-server-web-proxy = ${org.apache.hadoop.version}
+
# The httpcore version is often different from the httpclient and httpmime versions,
# so the httpcore version value should not share the same symbolic name with them.
/org.apache.httpcomponents/httpclient = 4.2.6
/org.apache.httpcomponents/httpcore = 4.2.5
/org.apache.httpcomponents/httpmime = 4.2.6
-org.apache.james.apache.mime4j = 0.7.2
-/org.apache.james/apache-mime4j-core = ${org.apache.james.apache.mime4j}
-/org.apache.james/apache-mime4j-dom = ${org.apache.james.apache.mime4j}
+org.apache.james.apache.mime4j.version = 0.7.2
+/org.apache.james/apache-mime4j-core = ${org.apache.james.apache.mime4j.version}
+/org.apache.james/apache-mime4j-dom = ${org.apache.james.apache.mime4j.version}
/org.apache.mahout/mahout-collections = 1.0
/org.apache.mahout/mahout-math = 0.6
+/org.apache.mrunit/mrunit = 1.0.0
org.apache.pdfbox.version = 1.8.1
/org.apache.pdfbox/fontbox = ${org.apache.pdfbox.version}
@@ -84,6 +146,7 @@ org.apache.poi.version = 3.9
org.apache.tika.version = 1.4
/org.apache.tika/tika-core = ${org.apache.tika.version}
/org.apache.tika/tika-parsers = ${org.apache.tika.version}
+/org.apache.tika/tika-xmp = ${org.apache.tika.version}
org.apache.uima.version = 2.3.1
/org.apache.uima/AlchemyAPIAnnotator = ${org.apache.uima.version}
@@ -96,6 +159,7 @@ org.apache.uima.version = 2.3.1
/org.apache.velocity/velocity-tools = 2.0
/org.apache.xmlbeans/xmlbeans = 2.3.0
/org.apache.zookeeper/zookeeper = 3.4.5
+/org.aspectj/aspectjrt = 1.6.11
org.bouncycastle.version = 1.45
/org.bouncycastle/bcmail-jdk15 = ${org.bouncycastle.version}
@@ -111,8 +175,9 @@ org.carrot2.morfologik.version = 1.7.1
/org.ccil.cowan.tagsoup/tagsoup = 1.2.1
-org.codehaus.jackson.version = 1.7.4
+org.codehaus.jackson.version = 1.9.13
/org.codehaus.jackson/jackson-core-asl = ${org.codehaus.jackson.version}
+/org.codehaus.jackson/jackson-jaxrs = ${org.codehaus.jackson.version}
/org.codehaus.jackson/jackson-mapper-asl = ${org.codehaus.jackson.version}
/org.codehaus.woodstox/wstx-asl = 3.2.7
@@ -137,6 +202,8 @@ org.gagravarr.vorbis.java.version = 0.1
/org.gagravarr/vorbis-java-core = ${org.gagravarr.vorbis.java.version}
/org.gagravarr/vorbis-java-tika = ${org.gagravarr.vorbis.java.version}
+/org.mockito/mockito-core = 1.9.5
+
org.mortbay.jetty.version = 6.1.26
/org.mortbay.jetty/jetty = ${org.mortbay.jetty.version}
/org.mortbay.jetty/jetty-util = ${org.mortbay.jetty.version}
@@ -161,5 +228,6 @@ org.slf4j.version = 1.6.6
/org.slf4j/slf4j-log4j12 = ${org.slf4j.version}
/org.tukaani/xz = 1.0
+/org.xerial.snappy/snappy-java = 1.0.4.1
/rome/rome = 0.9
/xerces/xercesImpl = 2.9.1
diff --git a/lucene/tools/custom-tasks.xml b/lucene/tools/custom-tasks.xml
index e17480b1d3c..e38b0b137a4 100644
--- a/lucene/tools/custom-tasks.xml
+++ b/lucene/tools/custom-tasks.xml
@@ -45,7 +45,7 @@
-
+
diff --git a/lucene/tools/junit4/tests.policy b/lucene/tools/junit4/tests.policy
index 0933cab39bf..b1c4311e3b0 100644
--- a/lucene/tools/junit4/tests.policy
+++ b/lucene/tools/junit4/tests.policy
@@ -63,6 +63,7 @@ grant {
permission javax.security.auth.PrivateCredentialPermission "org.apache.hadoop.security.Credentials * \"*\"", "read";
permission java.security.SecurityPermission "putProviderProperty.SaslPlainServer";
permission java.security.SecurityPermission "insertProvider.SaslPlainServer";
+ permission javax.xml.bind.JAXBPermission "setDatatypeConverter";
// TIKA uses BouncyCastle and that registers new provider for PDF parsing + MSOffice parsing. Maybe report as bug!
permission java.security.SecurityPermission "putProviderProperty.BC";
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 9189b01c1d7..1e5f40d38a9 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -44,6 +44,15 @@ Upgrading from Solr 4.x
Detailed Change List
----------------------
+New Features
+----------------------
+
+* SOLR-1301: Add a Solr contrib that allows for building Solr indexes via
+ Hadoop's MapReduce. (Matt Revelle, Alexander Kanarsky, Steve Rowe,
+ Mark Miller, Greg Bowyer, Jason Rutherglen, Kris Jirapinyo, Jason Venner ,
+ Andrzej Bialecki, Patrick Hunt, Wolfgang Hoschek, Roman Shaposhnik,
+ Eric Wong)
+
Other Changes
----------------------
diff --git a/solr/contrib/extraction/ivy.xml b/solr/contrib/extraction/ivy.xml
index 40e5201f60d..263c48832c9 100644
--- a/solr/contrib/extraction/ivy.xml
+++ b/solr/contrib/extraction/ivy.xml
@@ -22,6 +22,7 @@
+
@@ -44,12 +45,19 @@
+
+
+
+
+
+
+
diff --git a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandlerFactory.java b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandlerFactory.java
index c91dd47306f..acf94a2d801 100644
--- a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandlerFactory.java
+++ b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandlerFactory.java
@@ -1,4 +1,4 @@
-/*
+/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
diff --git a/solr/contrib/solr-morphlines-cell/build.xml b/solr/contrib/solr-morphlines-cell/build.xml
new file mode 100644
index 00000000000..e0da709634a
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/build.xml
@@ -0,0 +1,143 @@
+
+
+
+
+
+
+
+ Solr Cell Morphline commands.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-cell/ivy.xml b/solr/contrib/solr-morphlines-cell/ivy.xml
new file mode 100644
index 00000000000..ee652bd8363
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/ivy.xml
@@ -0,0 +1,36 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java b/solr/contrib/solr-morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java
new file mode 100644
index 00000000000..8d5873fe4e3
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java
@@ -0,0 +1,344 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.morphlines.cell;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.StringWriter;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.TreeMap;
+
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.SolrInputField;
+import org.apache.solr.common.params.MultiMapSolrParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.DateUtil;
+import org.apache.solr.handler.extraction.ExtractingParams;
+import org.apache.solr.handler.extraction.SolrContentHandler;
+import org.apache.solr.handler.extraction.SolrContentHandlerFactory;
+import org.apache.solr.morphlines.solr.SolrLocator;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.TeeContentHandler;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.apache.tika.sax.xpath.Matcher;
+import org.apache.tika.sax.xpath.MatchingContentHandler;
+import org.apache.tika.sax.xpath.XPathParser;
+import org.apache.xml.serialize.OutputFormat;
+import org.apache.xml.serialize.XMLSerializer;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import com.cloudera.cdk.morphline.api.Command;
+import com.cloudera.cdk.morphline.api.CommandBuilder;
+import com.cloudera.cdk.morphline.api.MorphlineCompilationException;
+import com.cloudera.cdk.morphline.api.MorphlineContext;
+import com.cloudera.cdk.morphline.api.MorphlineRuntimeException;
+import com.cloudera.cdk.morphline.api.Record;
+import com.cloudera.cdk.morphline.base.Fields;
+import com.cloudera.cdk.morphline.stdio.AbstractParser;
+import com.google.common.base.Joiner;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.ListMultimap;
+import com.google.common.io.Closeables;
+import com.typesafe.config.Config;
+
+/**
+ * Command that pipes the first attachment of a record into one of the given Tika parsers, then maps
+ * the Tika output back to a record using SolrCell.
+ *
+ * The Tika parser is chosen from the configurable list of parsers, depending on the MIME type
+ * specified in the input record. Typically, this requires an upstream DetectMimeTypeBuilder
+ * in a prior command.
+ */
+public final class SolrCellBuilder implements CommandBuilder {
+
+ @Override
+ public Collection getNames() {
+ return Collections.singletonList("solrCell");
+ }
+
+ @Override
+ public Command build(Config config, Command parent, Command child, MorphlineContext context) {
+ return new SolrCell(config, parent, child, context);
+ }
+
+
+ ///////////////////////////////////////////////////////////////////////////////
+ // Nested classes:
+ ///////////////////////////////////////////////////////////////////////////////
+ private static final class SolrCell extends AbstractParser {
+
+ private final IndexSchema schema;
+ private final List dateFormats;
+ private final String xpathExpr;
+ private final List parsers = new ArrayList();
+ private final SolrContentHandlerFactory solrContentHandlerFactory;
+
+ private final SolrParams solrParams;
+ private final Map mediaTypeToParserMap;
+
+ private static final XPathParser PARSER = new XPathParser("xhtml", XHTMLContentHandler.XHTML);
+
+ public static final String ADDITIONAL_SUPPORTED_MIME_TYPES = "additionalSupportedMimeTypes";
+
+ public SolrCell(Config config, Command parent, Command child, MorphlineContext context) {
+ super(config, parent, child, context);
+
+ Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator");
+ SolrLocator locator = new SolrLocator(solrLocatorConfig, context);
+ LOG.debug("solrLocator: {}", locator);
+ this.schema = locator.getIndexSchema();
+ Preconditions.checkNotNull(schema);
+ LOG.trace("Solr schema: \n{}", Joiner.on("\n").join(new TreeMap(schema.getFields()).values()));
+
+ ListMultimap cellParams = ArrayListMultimap.create();
+ String uprefix = getConfigs().getString(config, ExtractingParams.UNKNOWN_FIELD_PREFIX, null);
+ if (uprefix != null) {
+ cellParams.put(ExtractingParams.UNKNOWN_FIELD_PREFIX, uprefix);
+ }
+ for (String capture : getConfigs().getStringList(config, ExtractingParams.CAPTURE_ELEMENTS, Collections.EMPTY_LIST)) {
+ cellParams.put(ExtractingParams.CAPTURE_ELEMENTS, capture);
+ }
+ Config fmapConfig = getConfigs().getConfig(config, "fmap", null);
+ if (fmapConfig != null) {
+ for (Map.Entry entry : fmapConfig.root().unwrapped().entrySet()) {
+ cellParams.put(ExtractingParams.MAP_PREFIX + entry.getKey(), entry.getValue().toString());
+ }
+ }
+ String captureAttributes = getConfigs().getString(config, ExtractingParams.CAPTURE_ATTRIBUTES, null);
+ if (captureAttributes != null) {
+ cellParams.put(ExtractingParams.CAPTURE_ATTRIBUTES, captureAttributes);
+ }
+ String lowerNames = getConfigs().getString(config, ExtractingParams.LOWERNAMES, null);
+ if (lowerNames != null) {
+ cellParams.put(ExtractingParams.LOWERNAMES, lowerNames);
+ }
+ String defaultField = getConfigs().getString(config, ExtractingParams.DEFAULT_FIELD, null);
+ if (defaultField != null) {
+ cellParams.put(ExtractingParams.DEFAULT_FIELD, defaultField);
+ }
+ xpathExpr = getConfigs().getString(config, ExtractingParams.XPATH_EXPRESSION, null);
+ if (xpathExpr != null) {
+ cellParams.put(ExtractingParams.XPATH_EXPRESSION, xpathExpr);
+ }
+
+ this.dateFormats = getConfigs().getStringList(config, "dateFormats", new ArrayList(DateUtil.DEFAULT_DATE_FORMATS));
+
+ String handlerStr = getConfigs().getString(config, "solrContentHandlerFactory", TrimSolrContentHandlerFactory.class.getName());
+ Class extends SolrContentHandlerFactory> factoryClass;
+ try {
+ factoryClass = (Class extends SolrContentHandlerFactory>)Class.forName(handlerStr);
+ } catch (ClassNotFoundException cnfe) {
+ throw new MorphlineCompilationException("Could not find class "
+ + handlerStr + " to use for " + "solrContentHandlerFactory", config, cnfe);
+ }
+ this.solrContentHandlerFactory = getSolrContentHandlerFactory(factoryClass, dateFormats, config);
+
+ this.mediaTypeToParserMap = new HashMap();
+ //MimeTypes mimeTypes = MimeTypes.getDefaultMimeTypes(); // FIXME getMediaTypeRegistry.normalize()
+
+ List extends Config> parserConfigs = getConfigs().getConfigList(config, "parsers");
+ for (Config parserConfig : parserConfigs) {
+ String parserClassName = getConfigs().getString(parserConfig, "parser");
+
+ Object obj;
+ try {
+ obj = Class.forName(parserClassName).newInstance();
+ } catch (Throwable e) {
+ throw new MorphlineCompilationException("Cannot instantiate Tika parser: " + parserClassName, config, e);
+ }
+ if (!(obj instanceof Parser)) {
+ throw new MorphlineCompilationException("Tika parser " + obj.getClass().getName()
+ + " must be an instance of class " + Parser.class.getName(), config);
+ }
+ Parser parser = (Parser) obj;
+ this.parsers.add(parser);
+
+ List mediaTypes = getConfigs().getStringList(parserConfig, SUPPORTED_MIME_TYPES, Collections.EMPTY_LIST);
+ for (String mediaTypeStr : mediaTypes) {
+ MediaType mediaType = parseMediaType(mediaTypeStr);
+ addSupportedMimeType(mediaTypeStr);
+ this.mediaTypeToParserMap.put(mediaType, parser);
+ }
+
+ if (!parserConfig.hasPath(SUPPORTED_MIME_TYPES)) {
+ for (MediaType mediaType : parser.getSupportedTypes(new ParseContext())) {
+ mediaType = mediaType.getBaseType();
+ addSupportedMimeType(mediaType.toString());
+ this.mediaTypeToParserMap.put(mediaType, parser);
+ }
+ List extras = getConfigs().getStringList(parserConfig, ADDITIONAL_SUPPORTED_MIME_TYPES, Collections.EMPTY_LIST);
+ for (String mediaTypeStr : extras) {
+ MediaType mediaType = parseMediaType(mediaTypeStr);
+ addSupportedMimeType(mediaTypeStr);
+ this.mediaTypeToParserMap.put(mediaType, parser);
+ }
+ }
+ }
+ //LOG.info("mediaTypeToParserMap="+mediaTypeToParserMap);
+
+ Map tmp = new HashMap();
+ for (Map.Entry> entry : cellParams.asMap().entrySet()) {
+ tmp.put(entry.getKey(), entry.getValue().toArray(new String[entry.getValue().size()]));
+ }
+ this.solrParams = new MultiMapSolrParams(tmp);
+ validateArguments();
+ }
+
+ @Override
+ protected boolean doProcess(Record record, InputStream inputStream) {
+ Parser parser = detectParser(record);
+ if (parser == null) {
+ return false;
+ }
+
+ ParseContext parseContext = new ParseContext();
+
+ // necessary for gzipped files or tar files, etc! copied from TikaCLI
+ parseContext.set(Parser.class, parser);
+
+ Metadata metadata = new Metadata();
+ for (Entry entry : record.getFields().entries()) {
+ metadata.add(entry.getKey(), entry.getValue().toString());
+ }
+
+ SolrContentHandler handler = solrContentHandlerFactory.createSolrContentHandler(metadata, solrParams, schema);
+
+ try {
+ inputStream = TikaInputStream.get(inputStream);
+
+ ContentHandler parsingHandler = handler;
+ StringWriter debugWriter = null;
+ if (LOG.isTraceEnabled()) {
+ debugWriter = new StringWriter();
+ ContentHandler serializer = new XMLSerializer(debugWriter, new OutputFormat("XML", "UTF-8", true));
+ parsingHandler = new TeeContentHandler(parsingHandler, serializer);
+ }
+
+ // String xpathExpr = "/xhtml:html/xhtml:body/xhtml:div/descendant:node()";
+ if (xpathExpr != null) {
+ Matcher matcher = PARSER.parse(xpathExpr);
+ parsingHandler = new MatchingContentHandler(parsingHandler, matcher);
+ }
+
+ try {
+ parser.parse(inputStream, parsingHandler, metadata, parseContext);
+ } catch (IOException e) {
+ throw new MorphlineRuntimeException("Cannot parse", e);
+ } catch (SAXException e) {
+ throw new MorphlineRuntimeException("Cannot parse", e);
+ } catch (TikaException e) {
+ throw new MorphlineRuntimeException("Cannot parse", e);
+ }
+
+ LOG.trace("debug XML doc: {}", debugWriter);
+ } finally {
+ if (inputStream != null) {
+ Closeables.closeQuietly(inputStream);
+ }
+ }
+
+ SolrInputDocument doc = handler.newDocument();
+ LOG.debug("solr doc: {}", doc);
+ Record outputRecord = toRecord(doc);
+ return getChild().process(outputRecord);
+ }
+
+ private Parser detectParser(Record record) {
+ if (!hasAtLeastOneMimeType(record)) {
+ return null;
+ }
+ String mediaTypeStr = (String) record.getFirstValue(Fields.ATTACHMENT_MIME_TYPE); //ExtractingParams.STREAM_TYPE);
+ assert mediaTypeStr != null;
+
+ MediaType mediaType = parseMediaType(mediaTypeStr).getBaseType();
+ Parser parser = mediaTypeToParserMap.get(mediaType); // fast path
+ if (parser != null) {
+ return parser;
+ }
+ // wildcard matching
+ for (Map.Entry entry : mediaTypeToParserMap.entrySet()) {
+ if (isMediaTypeMatch(mediaType, entry.getKey())) {
+ return entry.getValue();
+ }
+ }
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("No supported MIME type parser found for " + Fields.ATTACHMENT_MIME_TYPE + "=" + mediaTypeStr);
+ }
+ return null;
+ }
+
+ private boolean hasAtLeastOneMimeType(Record record) {
+ if (!record.getFields().containsKey(Fields.ATTACHMENT_MIME_TYPE)) {
+ LOG.debug("Command failed because of missing MIME type for record: {}", record);
+ return false;
+ }
+ return true;
+ }
+
+ private MediaType parseMediaType(String mediaTypeStr) {
+ MediaType mediaType = MediaType.parse(mediaTypeStr.trim().toLowerCase(Locale.ROOT));
+ return mediaType.getBaseType();
+ };
+
+ /** Returns true if mediaType falls withing the given range (pattern), false otherwise */
+ private boolean isMediaTypeMatch(MediaType mediaType, MediaType rangePattern) {
+ String WILDCARD = "*";
+ String rangePatternType = rangePattern.getType();
+ String rangePatternSubtype = rangePattern.getSubtype();
+ return (rangePatternType.equals(WILDCARD) || rangePatternType.equals(mediaType.getType()))
+ && (rangePatternSubtype.equals(WILDCARD) || rangePatternSubtype.equals(mediaType.getSubtype()));
+ }
+
+ private static SolrContentHandlerFactory getSolrContentHandlerFactory(
+ Class extends SolrContentHandlerFactory> factoryClass, Collection dateFormats, Config config) {
+ try {
+ return factoryClass.getConstructor(Collection.class).newInstance(dateFormats);
+ } catch (NoSuchMethodException nsme) {
+ throw new MorphlineCompilationException("Unable to find valid constructor of type "
+ + factoryClass.getName() + " for creating SolrContentHandler", config, nsme);
+ } catch (Exception e) {
+ throw new MorphlineCompilationException("Unexpected exception when trying to create SolrContentHandlerFactory of type "
+ + factoryClass.getName(), config, e);
+ }
+ }
+
+ private Record toRecord(SolrInputDocument doc) {
+ Record record = new Record();
+ for (Entry entry : doc.entrySet()) {
+ record.getFields().putAll(entry.getKey(), entry.getValue().getValues());
+ }
+ return record;
+ }
+
+ }
+
+}
diff --git a/solr/contrib/solr-morphlines-cell/src/java/org/apache/solr/morphlines/cell/StripNonCharSolrContentHandlerFactory.java b/solr/contrib/solr-morphlines-cell/src/java/org/apache/solr/morphlines/cell/StripNonCharSolrContentHandlerFactory.java
new file mode 100644
index 00000000000..81f49afd4e5
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/java/org/apache/solr/morphlines/cell/StripNonCharSolrContentHandlerFactory.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.morphlines.cell;
+
+import java.util.Collection;
+
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.handler.extraction.SolrContentHandler;
+import org.apache.solr.handler.extraction.SolrContentHandlerFactory;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.schema.SchemaField;
+import org.apache.tika.metadata.Metadata;
+
+/**
+ * {@link SolrContentHandler} and associated factory that strips non-characters and trims on output.
+ * This prevents exceptions on parsing integer fields inside Solr server.
+ */
+public class StripNonCharSolrContentHandlerFactory extends SolrContentHandlerFactory {
+
+ public StripNonCharSolrContentHandlerFactory(Collection dateFormats) {
+ super(dateFormats);
+ }
+
+ @Override
+ public SolrContentHandler createSolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema) {
+ return new StripNonCharSolrContentHandler(metadata, params, schema, dateFormats);
+ }
+
+
+ ///////////////////////////////////////////////////////////////////////////////
+ // Nested classes:
+ ///////////////////////////////////////////////////////////////////////////////
+ private static final class StripNonCharSolrContentHandler extends SolrContentHandler {
+
+ public StripNonCharSolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema, Collection dateFormats) {
+ super(metadata, params, schema, dateFormats);
+ }
+
+ /**
+ * Strip all non-characters, which can cause SolrReducer problems if present.
+ * This is borrowed from Apache Nutch.
+ */
+ private static String stripNonCharCodepoints(String input) {
+ StringBuilder stripped = new StringBuilder(input.length());
+ char ch;
+ for (int i = 0; i < input.length(); i++) {
+ ch = input.charAt(i);
+ // Strip all non-characters http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Noncharacter_Code_Point=True:]
+ // and non-printable control characters except tabulator, new line and carriage return
+ if (ch % 0x10000 != 0xffff && // 0xffff - 0x10ffff range step 0x10000
+ ch % 0x10000 != 0xfffe && // 0xfffe - 0x10fffe range
+ (ch <= 0xfdd0 || ch >= 0xfdef) && // 0xfdd0 - 0xfdef
+ (ch > 0x1F || ch == 0x9 || ch == 0xa || ch == 0xd)) {
+ stripped.append(ch);
+ }
+ }
+ return stripped.toString();
+ }
+
+ @Override
+ protected String transformValue(String val, SchemaField schemaField) {
+ String ret = super.transformValue(val, schemaField).trim();
+ ret = stripNonCharCodepoints(ret);
+ return ret;
+ }
+ }
+}
diff --git a/solr/contrib/solr-morphlines-cell/src/java/org/apache/solr/morphlines/cell/TrimSolrContentHandlerFactory.java b/solr/contrib/solr-morphlines-cell/src/java/org/apache/solr/morphlines/cell/TrimSolrContentHandlerFactory.java
new file mode 100644
index 00000000000..6e7df593ff8
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/java/org/apache/solr/morphlines/cell/TrimSolrContentHandlerFactory.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.morphlines.cell;
+
+import java.util.Collection;
+
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.handler.extraction.SolrContentHandler;
+import org.apache.solr.handler.extraction.SolrContentHandlerFactory;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.schema.SchemaField;
+import org.apache.tika.metadata.Metadata;
+
+/**
+ * {@link SolrContentHandler} and associated factory that trims field values on output.
+ * This prevents exceptions on parsing integer fields inside Solr server.
+ */
+public class TrimSolrContentHandlerFactory extends SolrContentHandlerFactory {
+
+ public TrimSolrContentHandlerFactory(Collection dateFormats) {
+ super(dateFormats);
+ }
+
+ @Override
+ public SolrContentHandler createSolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema) {
+ return new TrimSolrContentHandler(metadata, params, schema, dateFormats);
+ }
+
+
+ ///////////////////////////////////////////////////////////////////////////////
+ // Nested classes:
+ ///////////////////////////////////////////////////////////////////////////////
+ private static final class TrimSolrContentHandler extends SolrContentHandler {
+
+ public TrimSolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema, Collection dateFormats) {
+ super(metadata, params, schema, dateFormats);
+ }
+
+ @Override
+ protected String transformValue(String val, SchemaField schemaField) {
+ return super.transformValue(val, schemaField).trim();
+ }
+ }
+}
diff --git a/solr/contrib/solr-morphlines-cell/src/java/org/apache/solr/morphlines/cell/package.html b/solr/contrib/solr-morphlines-cell/src/java/org/apache/solr/morphlines/cell/package.html
new file mode 100644
index 00000000000..9d5daec89bb
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/java/org/apache/solr/morphlines/cell/package.html
@@ -0,0 +1,22 @@
+
+
+
+
+Morphlines Solr Cell related code.
+
+
diff --git a/solr/contrib/solr-morphlines-cell/src/java/overview.html b/solr/contrib/solr-morphlines-cell/src/java/overview.html
new file mode 100644
index 00000000000..3e25367d302
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/java/overview.html
@@ -0,0 +1,21 @@
+
+
+
+Apache Solr Search Server: Solr Cell Morphline Commands
+
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/currency.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/currency.xml
new file mode 100644
index 00000000000..3a9c58afee8
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/currency.xml
@@ -0,0 +1,67 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/elevate.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/elevate.xml
new file mode 100644
index 00000000000..25d5cebe4fb
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/elevate.xml
@@ -0,0 +1,38 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_ca.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_ca.txt
new file mode 100644
index 00000000000..307a85f913d
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_ca.txt
@@ -0,0 +1,8 @@
+# Set of Catalan contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+d
+l
+m
+n
+s
+t
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_fr.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_fr.txt
new file mode 100644
index 00000000000..722db588333
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_fr.txt
@@ -0,0 +1,9 @@
+# Set of French contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+l
+m
+t
+qu
+n
+s
+j
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_ga.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_ga.txt
new file mode 100644
index 00000000000..9ebe7fa349a
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_ga.txt
@@ -0,0 +1,5 @@
+# Set of Irish contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+d
+m
+b
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_it.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_it.txt
new file mode 100644
index 00000000000..cac04095372
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_it.txt
@@ -0,0 +1,23 @@
+# Set of Italian contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+c
+l
+all
+dall
+dell
+nell
+sull
+coll
+pell
+gl
+agl
+dagl
+degl
+negl
+sugl
+un
+m
+t
+s
+v
+d
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/hyphenations_ga.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/hyphenations_ga.txt
new file mode 100644
index 00000000000..4d2642cc5a3
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/hyphenations_ga.txt
@@ -0,0 +1,5 @@
+# Set of Irish hyphenations for StopFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+h
+n
+t
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stemdict_nl.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stemdict_nl.txt
new file mode 100644
index 00000000000..441072971d3
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stemdict_nl.txt
@@ -0,0 +1,6 @@
+# Set of overrides for the dutch stemmer
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+fiets fiets
+bromfiets bromfiets
+ei eier
+kind kinder
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stoptags_ja.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stoptags_ja.txt
new file mode 100644
index 00000000000..71b750845e3
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stoptags_ja.txt
@@ -0,0 +1,420 @@
+#
+# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter.
+#
+# Any token with a part-of-speech tag that exactly matches those defined in this
+# file are removed from the token stream.
+#
+# Set your own stoptags by uncommenting the lines below. Note that comments are
+# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists,
+# etc. that can be useful for building you own stoptag set.
+#
+# The entire possible tagset is provided below for convenience.
+#
+#####
+# noun: unclassified nouns
+#åè©ž
+#
+# noun-common: Common nouns or nouns where the sub-classification is undefined
+#åè©ž-一般
+#
+# noun-proper: Proper nouns where the sub-classification is undefined
+#åè©ž-固有åè©ž
+#
+# noun-proper-misc: miscellaneous proper nouns
+#åè©ž-固有åè©ž-一般
+#
+# noun-proper-person: Personal names where the sub-classification is undefined
+#åè©ž-固有åè©ž-人å
+#
+# noun-proper-person-misc: names that cannot be divided into surname and
+# given name; foreign names; names where the surname or given name is unknown.
+# e.g. ãŠå¸‚ã®æ–¹
+#åè©ž-固有åè©ž-人å-一般
+#
+# noun-proper-person-surname: Mainly Japanese surnames.
+# e.g. 山田
+#åè©ž-固有åè©ž-人å-姓
+#
+# noun-proper-person-given_name: Mainly Japanese given names.
+# e.g. 太郎
+#åè©ž-固有åè©ž-人å-å
+#
+# noun-proper-organization: Names representing organizations.
+# e.g. 通産çœ, NHK
+#åè©ž-固有åè©ž-組織
+#
+# noun-proper-place: Place names where the sub-classification is undefined
+#åè©ž-固有åè©ž-地域
+#
+# noun-proper-place-misc: Place names excluding countries.
+# e.g. アジア, ãƒãƒ«ã‚»ãƒãƒŠ, 京都
+#åè©ž-固有åè©ž-地域-一般
+#
+# noun-proper-place-country: Country names.
+# e.g. 日本, オーストラリア
+#åè©ž-固有åè©ž-地域-国
+#
+# noun-pronoun: Pronouns where the sub-classification is undefined
+#åè©ž-代åè©ž
+#
+# noun-pronoun-misc: miscellaneous pronouns:
+# e.g. ãã‚Œ, ã“ã“, ã‚ã„ã¤, ã‚ãªãŸ, ã‚ã¡ã“ã¡, ã„ãã¤, ã©ã“ã‹, ãªã«, ã¿ãªã•ã‚“, ã¿ã‚“ãª, ã‚ãŸãã—, ã‚ã‚Œã‚ã‚Œ
+#åè©ž-代åè©ž-一般
+#
+# noun-pronoun-contraction: Spoken language contraction made by combining a
+# pronoun and the particle 'wa'.
+# e.g. ã‚りゃ, ã“りゃ, ã“りゃã‚, ãりゃ, ãりゃã‚
+#åè©ž-代åè©ž-縮約
+#
+# noun-adverbial: Temporal nouns such as names of days or months that behave
+# like adverbs. Nouns that represent amount or ratios and can be used adverbially,
+# e.g. 金曜, 一月, åˆå¾Œ, å°‘é‡
+#åè©ž-副詞å¯èƒ½
+#
+# noun-verbal: Nouns that take arguments with case and can appear followed by
+# 'suru' and related verbs (ã™ã‚‹, ã§ãã‚‹, ãªã•ã‚‹, ãã ã•ã‚‹)
+# e.g. インプット, æ„›ç€, 悪化, 悪戦苦闘, 一安心, 下å–ã‚Š
+#åè©ž-サ変接続
+#
+# noun-adjective-base: The base form of adjectives, words that appear before 㪠("na")
+# e.g. å¥åº·, 安易, 駄目, ã ã‚
+#åè©ž-形容動詞語幹
+#
+# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数.
+# e.g. 0, 1, 2, 何, 数, 幾
+#åè©ž-æ•°
+#
+# noun-affix: noun affixes where the sub-classification is undefined
+#åè©ž-éžè‡ªç«‹
+#
+# noun-affix-misc: Of adnominalizers, the case-marker ã® ("no"), and words that
+# attach to the base form of inflectional words, words that cannot be classified
+# into any of the other categories below. This category includes indefinite nouns.
+# e.g. ã‚ã‹ã¤ã, æš, ã‹ã„, 甲æ–, æ°—, ãらã„, å«Œã„, ãã›, ç™–, ã“ã¨, 事, ã”ã¨, 毎, ã—ã ã„, 次第,
+# é †, ã›ã„, 所為, ã¤ã„ã§, åºã§, ã¤ã‚‚ã‚Š, ç©ã‚‚ã‚Š, 点, ã©ã“ã‚, ã®, ã¯ãš, çˆ, ã¯ãšã¿, å¼¾ã¿,
+# æ‹å, ãµã†, ãµã‚Š, 振り, ã»ã†, æ–¹, æ—¨, ã‚‚ã®, 物, 者, ゆãˆ, æ•…, ゆãˆã‚“, 所以, ã‚ã‘, 訳,
+# ã‚ã‚Š, 割り, 割, ã‚“-å£èªž/, ã‚‚ã‚“-å£èªž/
+#åè©ž-éžè‡ªç«‹-一般
+#
+# noun-affix-adverbial: noun affixes that that can behave as adverbs.
+# e.g. ã‚ã„ã , é–“, ã‚ã’ã, 挙ã’å¥, ã‚ã¨, 後, 余り, 以外, 以é™, 以後, 以上, 以å‰, 一方, ã†ãˆ,
+# 上, ã†ã¡, 内, ãŠã‚Š, 折り, ã‹ãŽã‚Š, é™ã‚Š, ãã‚Š, ã£ãã‚Š, çµæžœ, ã“ã‚, é ƒ, ã•ã„, éš›, 最ä¸, ã•ãªã‹,
+# 最ä¸, ã˜ãŸã„, 自体, ãŸã³, 度, ãŸã‚, 為, ã¤ã©, 都度, ã¨ãŠã‚Š, 通り, ã¨ã, 時, ã¨ã“ã‚, 所,
+# ã¨ãŸã‚“, 途端, ãªã‹, ä¸, ã®ã¡, 後, ã°ã‚ã„, å ´åˆ, æ—¥, ã¶ã‚“, 分, ã»ã‹, ä»–, ã¾ãˆ, å‰, ã¾ã¾,
+# 儘, ä¾, ã¿ãŽã‚Š, 矢先
+#åè©ž-éžè‡ªç«‹-副詞å¯èƒ½
+#
+# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars
+# with the stem よã†(ã ) ("you(da)").
+# e.g. よã†, ã‚„ã†, 様 (よã†)
+#åè©ž-éžè‡ªç«‹-助動詞語幹
+#
+# noun-affix-adjective-base: noun affixes that can connect to the indeclinable
+# connection form 㪠(aux "da").
+# e.g. ã¿ãŸã„, ãµã†
+#åè©ž-éžè‡ªç«‹-形容動詞語幹
+#
+# noun-special: special nouns where the sub-classification is undefined.
+#åè©ž-特殊
+#
+# noun-special-aux: The ãã†ã ("souda") stem form that is used for reporting news, is
+# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base
+# form of inflectional words.
+# e.g. ãã†
+#åè©ž-特殊-助動詞語幹
+#
+# noun-suffix: noun suffixes where the sub-classification is undefined.
+#åè©ž-接尾
+#
+# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect
+# to ガル or タイ and can combine into compound nouns, words that cannot be classified into
+# any of the other categories below. In general, this category is more inclusive than
+# 接尾語 ("suffix") and is usually the last element in a compound noun.
+# e.g. ãŠã, ã‹ãŸ, æ–¹, ç”²æ– (ãŒã„), ãŒã‹ã‚Š, ãŽã¿, 気味, ãã‚‹ã¿, (~ã—ãŸ) ã•, 次第, 済 (ãš) ã¿,
+# よã†, (ã§ã)ã£ã“, æ„Ÿ, 観, 性, å¦, é¡ž, é¢, 用
+#åè©ž-接尾-一般
+#
+# noun-suffix-person: Suffixes that form nouns and attach to person names more often
+# than other nouns.
+# e.g. å›, 様, è‘—
+#åè©ž-接尾-人å
+#
+# noun-suffix-place: Suffixes that form nouns and attach to place names more often
+# than other nouns.
+# e.g. 町, 市, 県
+#åè©ž-接尾-地域
+#
+# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that
+# can appear before スル ("suru").
+# e.g. 化, 視, 分ã‘, 入り, è½ã¡, è²·ã„
+#åè©ž-接尾-サ変接続
+#
+# noun-suffix-aux: The stem form of ãã†ã (様態) that is used to indicate conditions,
+# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the
+# conjunctive form of inflectional words.
+# e.g. ãã†
+#åè©ž-接尾-助動詞語幹
+#
+# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive
+# form of inflectional words and appear before the copula ã ("da").
+# e.g. çš„, ã’, ãŒã¡
+#åè©ž-接尾-形容動詞語幹
+#
+# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs.
+# e.g. 後 (ã”), 以後, 以é™, 以å‰, å‰å¾Œ, ä¸, 末, 上, 時 (ã˜)
+#åè©ž-接尾-副詞å¯èƒ½
+#
+# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category
+# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach
+# to numbers.
+# e.g. 個, ã¤, 本, 冊, パーセント, cm, kg, カ月, ã‹å›½, 区画, 時間, 時åŠ
+#åè©ž-接尾-助数詞
+#
+# noun-suffix-special: Special suffixes that mainly attach to inflecting words.
+# e.g. (楽ã—) ã•, (考ãˆ) æ–¹
+#åè©ž-接尾-特殊
+#
+# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words
+# together.
+# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦)
+#åè©ž-接続詞的
+#
+# noun-verbal_aux: Nouns that attach to the conjunctive particle 㦠("te") and are
+# semantically verb-like.
+# e.g. ã”らん, ã”覧, 御覧, é ‚æˆ´
+#åè©ž-å‹•è©žéžè‡ªç«‹çš„
+#
+# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry,
+# dialects, English, etc. Currently, the only entry for åè©ž 引用文å—列 ("noun quotation")
+# is ã„ã‚ã ("iwaku").
+#åè©ž-引用文å—列
+#
+# noun-nai_adjective: Words that appear before the auxiliary verb ãªã„ ("nai") and
+# behave like an adjective.
+# e.g. 申ã—訳, 仕方, ã¨ã‚“ã§ã‚‚, é•ã„
+#åè©ž-ナイ形容詞語幹
+#
+#####
+# prefix: unclassified prefixes
+#接é è©ž
+#
+# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms)
+# excluding numerical expressions.
+# e.g. ㊠(æ°´), æŸ (æ°), åŒ (社), æ•… (~æ°), 高 (å“質), ㊠(見事), ã” (ç«‹æ´¾)
+#接é è©ž-å詞接続
+#
+# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb
+# in conjunctive form followed by ãªã‚‹/ãªã•ã‚‹/ãã ã•ã‚‹.
+# e.g. ㊠(èªã¿ãªã•ã„), ㊠(座り)
+#接é è©ž-動詞接続
+#
+# prefix-adjectival: Prefixes that attach to adjectives.
+# e.g. ㊠(寒ã„ã§ã™ããˆ), ãƒã‚« (ã§ã‹ã„)
+#接é è©ž-形容詞接続
+#
+# prefix-numerical: Prefixes that attach to numerical expressions.
+# e.g. ç´„, ãŠã‚ˆã, 毎時
+#接é è©ž-数接続
+#
+#####
+# verb: unclassified verbs
+#å‹•è©ž
+#
+# verb-main:
+#動詞-自立
+#
+# verb-auxiliary:
+#å‹•è©ž-éžè‡ªç«‹
+#
+# verb-suffix:
+#動詞-接尾
+#
+#####
+# adjective: unclassified adjectives
+#形容詞
+#
+# adjective-main:
+#形容詞-自立
+#
+# adjective-auxiliary:
+#形容詞-éžè‡ªç«‹
+#
+# adjective-suffix:
+#形容詞-接尾
+#
+#####
+# adverb: unclassified adverbs
+#副詞
+#
+# adverb-misc: Words that can be segmented into one unit and where adnominal
+# modification is not possible.
+# e.g. ã‚ã„ã‹ã‚らãš, 多分
+#副詞-一般
+#
+# adverb-particle_conjunction: Adverbs that can be followed by ã®, ã¯, ã«,
+# ãª, ã™ã‚‹, ã , etc.
+# e.g. ã“ã‚“ãªã«, ãã‚“ãªã«, ã‚ã‚“ãªã«, ãªã«ã‹, ãªã‚“ã§ã‚‚
+#副詞-助詞類接続
+#
+#####
+# adnominal: Words that only have noun-modifying forms.
+# e.g. ã“ã®, ãã®, ã‚ã®, ã©ã®, ã„ã‚ゆる, ãªã‚“らã‹ã®, 何らã‹ã®, ã„ã‚ã‚“ãª, ã“ã†ã„ã†, ãã†ã„ã†, ã‚ã‚ã„ã†,
+# ã©ã†ã„ã†, ã“ã‚“ãª, ãã‚“ãª, ã‚ã‚“ãª, ã©ã‚“ãª, 大ããª, å°ã•ãª, ãŠã‹ã—ãª, ã»ã‚“ã®, ãŸã„ã—ãŸ,
+# 「(, ã‚‚) ã•ã‚‹ (ã“ã¨ãªãŒã‚‰)ã€, 微々ãŸã‚‹, å ‚ã€…ãŸã‚‹, å˜ãªã‚‹, ã„ã‹ãªã‚‹, 我ãŒã€ã€ŒåŒã˜, 亡ã
+#連体詞
+#
+#####
+# conjunction: Conjunctions that can occur independently.
+# e.g. ãŒ, ã‘ã‚Œã©ã‚‚, ãã—ã¦, ã˜ã‚ƒã‚, ãã‚Œã©ã“ã‚ã‹
+接続詞
+#
+#####
+# particle: unclassified particles.
+助詞
+#
+# particle-case: case particles where the subclassification is undefined.
+助詞-æ ¼åŠ©è©ž
+#
+# particle-case-misc: Case particles.
+# e.g. ã‹ã‚‰, ãŒ, ã§, ã¨, ã«, ã¸, より, ã‚’, ã®, ã«ã¦
+助詞-æ ¼åŠ©è©ž-一般
+#
+# particle-case-quote: the "to" that appears after nouns, a person’s speech,
+# quotation marks, expressions of decisions from a meeting, reasons, judgements,
+# conjectures, etc.
+# e.g. ( ã ) 㨠(è¿°ã¹ãŸ.), ( ã§ã‚ã‚‹) 㨠(ã—ã¦åŸ·è¡ŒçŒ¶äºˆ...)
+助詞-æ ¼åŠ©è©ž-引用
+#
+# particle-case-compound: Compounds of particles and verbs that mainly behave
+# like case particles.
+# e.g. ã¨ã„ã†, ã¨ã„ã£ãŸ, ã¨ã‹ã„ã†, ã¨ã—ã¦, ã¨ã¨ã‚‚ã«, ã¨å…±ã«, ã§ã‚‚ã£ã¦, ã«ã‚ãŸã£ã¦, ã«å½“ãŸã£ã¦, ã«å½“ã£ã¦,
+# ã«ã‚ãŸã‚Š, ã«å½“ãŸã‚Š, ã«å½“ã‚Š, ã«å½“ãŸã‚‹, ã«ã‚ãŸã‚‹, ã«ãŠã„ã¦, ã«æ–¼ã„ã¦,ã«æ–¼ã¦, ã«ãŠã‘ã‚‹, ã«æ–¼ã‘ã‚‹,
+# ã«ã‹ã‘, ã«ã‹ã‘ã¦, ã«ã‹ã‚“ã—, ã«é–¢ã—, ã«ã‹ã‚“ã—ã¦, ã«é–¢ã—ã¦, ã«ã‹ã‚“ã™ã‚‹, ã«é–¢ã™ã‚‹, ã«éš›ã—,
+# ã«éš›ã—ã¦, ã«ã—ãŸãŒã„, ã«å¾“ã„, ã«å¾“ã†, ã«ã—ãŸãŒã£ã¦, ã«å¾“ã£ã¦, ã«ãŸã„ã—, ã«å¯¾ã—, ã«ãŸã„ã—ã¦,
+# ã«å¯¾ã—ã¦, ã«ãŸã„ã™ã‚‹, ã«å¯¾ã™ã‚‹, ã«ã¤ã„ã¦, ã«ã¤ã, ã«ã¤ã‘, ã«ã¤ã‘ã¦, ã«ã¤ã‚Œ, ã«ã¤ã‚Œã¦, ã«ã¨ã£ã¦,
+# ã«ã¨ã‚Š, ã«ã¾ã¤ã‚ã‚‹, ã«ã‚ˆã£ã¦, ã«ä¾ã£ã¦, ã«å› ã£ã¦, ã«ã‚ˆã‚Š, ã«ä¾ã‚Š, ã«å› ã‚Š, ã«ã‚ˆã‚‹, ã«ä¾ã‚‹, ã«å› ã‚‹,
+# ã«ã‚ãŸã£ã¦, ã«ã‚ãŸã‚‹, ã‚’ã‚‚ã£ã¦, を以ã£ã¦, を通ã˜, を通ã˜ã¦, を通ã—ã¦, ã‚’ã‚ãã£ã¦, ã‚’ã‚ãã‚Š, ã‚’ã‚ãã‚‹,
+# ã£ã¦-å£èªž/, ã¡ã‚…ã†-関西å¼ã€Œã¨ã„ã†ã€/, (何) ã¦ã„ㆠ(人)-å£èªž/, ã£ã¦ã„ã†-å£èªž/, ã¨ã„ãµ, ã¨ã‹ã„ãµ
+助詞-æ ¼åŠ©è©ž-連語
+#
+# particle-conjunctive:
+# e.g. ã‹ã‚‰, ã‹ã‚‰ã«ã¯, ãŒ, ã‘ã‚Œã©, ã‘ã‚Œã©ã‚‚, ã‘ã©, ã—, ã¤ã¤, ã¦, ã§, ã¨, ã¨ã“ã‚ãŒ, ã©ã“ã‚ã‹, ã¨ã‚‚, ã©ã‚‚,
+# ãªãŒã‚‰, ãªã‚Š, ã®ã§, ã®ã«, ã°, ã‚‚ã®ã®, ã‚„ ( ã—ãŸ), ã‚„ã„ãªã‚„, (ã“ã‚ã‚“) ã˜ã‚ƒ(ã„ã‘ãªã„)-å£èªž/,
+# (è¡Œã£) ã¡ã‚ƒ(ã„ã‘ãªã„)-å£èªž/, (言ã£) ãŸã£ã¦ (ã—ã‹ãŸãŒãªã„)-å£èªž/, (ãã‚ŒãŒãªã)ã£ãŸã£ã¦ (平気)-å£èªž/
+助詞-接続助詞
+#
+# particle-dependency:
+# e.g. ã“ã, ã•ãˆ, ã—ã‹, ã™ã‚‰, ã¯, ã‚‚, ãž
+助詞-係助詞
+#
+# particle-adverbial:
+# e.g. ãŒã¦ã‚‰, ã‹ã‚‚, ãらã„, ä½, ãらã„, ã—ã‚‚, (å¦æ ¡) ã˜ã‚ƒ(ã“ã‚ŒãŒæµè¡Œã£ã¦ã„ã‚‹)-å£èªž/,
+# (ãã‚Œ)ã˜ã‚ƒã‚ (よããªã„)-å£èªž/, ãšã¤, (ç§) ãªãž, ãªã©, (ç§) ãªã‚Š (ã«), (先生) ãªã‚“ã‹ (大嫌ã„)-å£èªž/,
+# (ç§) ãªã‚“ãž, (先生) ãªã‚“㦠(大嫌ã„)-å£èªž/, ã®ã¿, ã ã‘, (ç§) ã ã£ã¦-å£èªž/, ã ã«,
+# (å½¼)ã£ãŸã‚‰-å£èªž/, (ãŠèŒ¶) ã§ã‚‚ (ã„ã‹ãŒ), ç‰ (ã¨ã†), (今後) ã¨ã‚‚, ã°ã‹ã‚Š, ã°ã£ã‹-å£èªž/, ã°ã£ã‹ã‚Š-å£èªž/,
+# ã»ã©, 程, ã¾ã§, è¿„, (誰) ã‚‚ (ãŒ)([助詞-æ ¼åŠ©è©ž] ãŠã‚ˆã³ [助詞-係助詞] ã®å‰ã«ä½ç½®ã™ã‚‹ã€Œã‚‚ã€)
+助詞-副助詞
+#
+# particle-interjective: particles with interjective grammatical roles.
+# e.g. (æ¾å³¶) ã‚„
+助詞-間投助詞
+#
+# particle-coordinate:
+# e.g. ã¨, ãŸã‚Š, ã ã®, ã ã‚Š, ã¨ã‹, ãªã‚Š, ã‚„, やら
+助詞-並立助詞
+#
+# particle-final:
+# e.g. ã‹ã„, ã‹ã—ら, ã•, ãœ, (ã )ã£ã‘-å£èªž/, (ã¨ã¾ã£ã¦ã‚‹) ã§-方言/, ãª, ナ, ãªã‚-å£èªž/, ãž, ã, ãƒ,
+# ãã‡-å£èªž/, ããˆ-å£èªž/, ãã‚“-方言/, ã®, ã®ã†-å£èªž/, ã‚„, よ, ヨ, よã‰-å£èªž/, ã‚, ã‚ã„-å£èªž/
+助詞-終助詞
+#
+# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is
+# adverbial, conjunctive, or sentence final. For example:
+# (a) 「A ã‹ B ã‹ã€. Ex:「(国内ã§é‹ç”¨ã™ã‚‹) ã‹,(海外ã§é‹ç”¨ã™ã‚‹) ã‹ (.)ã€
+# (b) Inside an adverb phrase. Ex:「(幸ã„ã¨ã„ã†) ã‹ (, æ»è€…ã¯ã„ãªã‹ã£ãŸ.)ã€
+# 「(祈りãŒå±Šã„ãŸã›ã„) ã‹ (, 試験ã«åˆæ ¼ã—ãŸ.)ã€
+# (c) 「ã‹ã®ã‚ˆã†ã«ã€. Ex:「(何もãªã‹ã£ãŸ) ã‹ (ã®ã‚ˆã†ã«æŒ¯ã‚‹èˆžã£ãŸ.)ã€
+# e.g. ã‹
+助詞-副助詞ï¼ä¸¦ç«‹åŠ©è©žï¼çµ‚助詞
+#
+# particle-adnominalizer: The "no" that attaches to nouns and modifies
+# non-inflectional words.
+助詞-連体化
+#
+# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs
+# that are giongo, giseigo, or gitaigo.
+# e.g. ã«, ã¨
+助詞-副詞化
+#
+# particle-special: A particle that does not fit into one of the above classifications.
+# This includes particles that are used in Tanka, Haiku, and other poetry.
+# e.g. ã‹ãª, ã‘ã‚€, ( ã—ãŸã ã‚ã†) ã«, (ã‚ã‚“ãŸ) ã«ã‚ƒ(ã‚ã‹ã‚‰ã‚“), (俺) ã‚“ (家)
+助詞-特殊
+#
+#####
+# auxiliary-verb:
+助動詞
+#
+#####
+# interjection: Greetings and other exclamations.
+# e.g. ãŠã¯ã‚ˆã†, ãŠã¯ã‚ˆã†ã”ã–ã„ã¾ã™, ã“ã‚“ã«ã¡ã¯, ã“ã‚“ã°ã‚“ã¯, ã‚ã‚ŠãŒã¨ã†, ã©ã†ã‚‚ã‚ã‚ŠãŒã¨ã†, ã‚ã‚ŠãŒã¨ã†ã”ã–ã„ã¾ã™,
+# ã„ãŸã ãã¾ã™, ã”ã¡ãã†ã•ã¾, ã•ã‚ˆãªã‚‰, ã•ã‚ˆã†ãªã‚‰, ã¯ã„, ã„ã„ãˆ, ã”ã‚ã‚“, ã”ã‚ã‚“ãªã•ã„
+#æ„Ÿå‹•è©ž
+#
+#####
+# symbol: unclassified Symbols.
+記å·
+#
+# symbol-misc: A general symbol not in one of the categories below.
+# e.g. [○◎@$〒→+]
+記å·-一般
+#
+# symbol-comma: Commas
+# e.g. [,ã€]
+記å·-èªç‚¹
+#
+# symbol-period: Periods and full stops.
+# e.g. [..。]
+記å·-å¥ç‚¹
+#
+# symbol-space: Full-width whitespace.
+記å·-空白
+#
+# symbol-open_bracket:
+# e.g. [({‘“『ã€]
+記å·-括弧開
+#
+# symbol-close_bracket:
+# e.g. [)}’â€ã€ã€ã€‘]
+記å·-括弧閉
+#
+# symbol-alphabetic:
+#記å·-アルファベット
+#
+#####
+# other: unclassified other
+#ãã®ä»–
+#
+# other-interjection: Words that are hard to classify as noun-suffixes or
+# sentence-final particles.
+# e.g. (ã )ã‚¡
+ãã®ä»–-間投
+#
+#####
+# filler: Aizuchi that occurs during a conversation or sounds inserted as filler.
+# e.g. ã‚ã®, ã†ã‚“ã¨, ãˆã¨
+フィラー
+#
+#####
+# non-verbal: non-verbal sound.
+éžè¨€èªžéŸ³
+#
+#####
+# fragment:
+#語æ–片
+#
+#####
+# unknown: unknown part of speech.
+#未知語
+#
+##### End of file
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ar.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ar.txt
new file mode 100644
index 00000000000..046829db6a2
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ar.txt
@@ -0,0 +1,125 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# Cleaned on October 11, 2009 (not normalized, so use before normalization)
+# This means that when modifying this list, you might need to add some
+# redundant entries, for example containing forms with both أ and ا
+من
+ومن
+منها
+منه
+ÙÙŠ
+ÙˆÙÙŠ
+Ùيها
+Ùيه
+Ùˆ
+Ù
+ثم
+او
+أو
+ب
+بها
+به
+ا
+Ø£
+اى
+اي
+أي
+أى
+لا
+ولا
+الا
+ألا
+إلا
+لكن
+ما
+وما
+كما
+Ùما
+عن
+مع
+اذا
+إذا
+ان
+أن
+إن
+انها
+أنها
+إنها
+انه
+أنه
+إنه
+بان
+بأن
+Ùان
+Ùأن
+وان
+وأن
+وإن
+التى
+التي
+الذى
+الذي
+الذين
+الى
+الي
+إلى
+إلي
+على
+عليها
+عليه
+اما
+أما
+إما
+ايضا
+أيضا
+كل
+وكل
+لم
+ولم
+لن
+ولن
+هى
+هي
+هو
+وهى
+وهي
+وهو
+Ùهى
+Ùهي
+Ùهو
+انت
+أنت
+لك
+لها
+له
+هذه
+هذا
+تلك
+ذلك
+هناك
+كانت
+كان
+يكون
+تكون
+وكانت
+وكان
+غير
+بعض
+قد
+Ù†ØÙˆ
+بين
+بينما
+منذ
+ضمن
+Øيث
+الان
+الآن
+خلال
+بعد
+قبل
+Øتى
+عند
+عندما
+لدى
+جميع
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_bg.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_bg.txt
new file mode 100644
index 00000000000..1ae4ba2ae38
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_bg.txt
@@ -0,0 +1,193 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+а
+аз
+ако
+ала
+бе
+без
+беше
+би
+бил
+била
+били
+било
+близо
+бъдат
+бъде
+бÑха
+в
+ваÑ
+ваш
+ваша
+вероÑтно
+вече
+взема
+ви
+вие
+винаги
+вÑе
+вÑеки
+вÑички
+вÑичко
+вÑÑка
+във
+въпреки
+върху
+г
+ги
+главно
+го
+д
+да
+дали
+до
+докато
+докога
+дори
+доÑега
+доÑта
+е
+едва
+един
+ето
+за
+зад
+заедно
+заради
+заÑега
+затова
+защо
+защото
+и
+из
+или
+им
+има
+имат
+иÑка
+й
+каза
+как
+каква
+какво
+както
+какъв
+като
+кога
+когато
+което
+които
+кой
+който
+колко
+коÑто
+къде
+където
+към
+ли
+м
+ме
+между
+мен
+ми
+мнозина
+мога
+могат
+може
+молÑ
+момента
+му
+н
+на
+над
+назад
+най
+направи
+напред
+например
+наÑ
+не
+него
+неÑ
+ни
+ние
+никой
+нито
+но
+нÑкои
+нÑкой
+нÑма
+обаче
+около
+оÑвен
+оÑобено
+от
+отгоре
+отново
+още
+пак
+по
+повече
+повечето
+под
+поне
+поради
+поÑле
+почти
+прави
+пред
+преди
+през
+при
+пък
+първо
+Ñ
+Ñа
+Ñамо
+Ñе
+Ñега
+Ñи
+Ñкоро
+Ñлед
+Ñме
+Ñпоред
+Ñред
+Ñрещу
+Ñте
+Ñъм
+ÑÑŠÑ
+Ñъщо
+Ñ‚
+тази
+така
+такива
+такъв
+там
+твой
+те
+тези
+ти
+тн
+то
+това
+тогава
+този
+той
+толкова
+точно
+Ñ‚Ñ€Ñбва
+тук
+тъй
+Ñ‚Ñ
+Ñ‚ÑÑ…
+у
+хареÑва
+ч
+че
+чеÑто
+чрез
+ще
+щом
+Ñ
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ca.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ca.txt
new file mode 100644
index 00000000000..3da65deafe1
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ca.txt
@@ -0,0 +1,220 @@
+# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
+a
+abans
+acÃ
+ah
+aixÃ
+això
+al
+als
+aleshores
+algun
+alguna
+algunes
+alguns
+alhora
+allÃ
+allÃ
+allò
+altra
+altre
+altres
+amb
+ambdós
+ambdues
+apa
+aquell
+aquella
+aquelles
+aquells
+aquest
+aquesta
+aquestes
+aquests
+aquÃ
+baix
+cada
+cadascú
+cadascuna
+cadascunes
+cadascuns
+com
+contra
+d'un
+d'una
+d'unes
+d'uns
+dalt
+de
+del
+dels
+des
+després
+dins
+dintre
+donat
+doncs
+durant
+e
+eh
+el
+els
+em
+en
+encara
+ens
+entre
+érem
+eren
+éreu
+es
+és
+esta
+estÃ
+està vem
+estaven
+està veu
+esteu
+et
+etc
+ets
+fins
+fora
+gairebé
+ha
+han
+has
+havia
+he
+hem
+heu
+hi
+ho
+i
+igual
+iguals
+ja
+l'hi
+la
+les
+li
+li'n
+llavors
+m'he
+ma
+mal
+malgrat
+mateix
+mateixa
+mateixes
+mateixos
+me
+mentre
+més
+meu
+meus
+meva
+meves
+molt
+molta
+moltes
+molts
+mon
+mons
+n'he
+n'hi
+ne
+ni
+no
+nogensmenys
+només
+nosaltres
+nostra
+nostre
+nostres
+o
+oh
+oi
+on
+pas
+pel
+pels
+per
+però
+perquè
+poc
+poca
+pocs
+poques
+potser
+propi
+qual
+quals
+quan
+quant
+que
+què
+quelcom
+qui
+quin
+quina
+quines
+quins
+s'ha
+s'han
+sa
+semblant
+semblants
+ses
+seu
+seus
+seva
+seva
+seves
+si
+sobre
+sobretot
+sóc
+solament
+sols
+son
+són
+sons
+sota
+sou
+t'ha
+t'han
+t'he
+ta
+tal
+també
+tampoc
+tan
+tant
+tanta
+tantes
+teu
+teus
+teva
+teves
+ton
+tons
+tot
+tota
+totes
+tots
+un
+una
+unes
+uns
+us
+va
+vaig
+vam
+van
+vas
+veu
+vosaltres
+vostra
+vostre
+vostres
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_cz.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_cz.txt
new file mode 100644
index 00000000000..53c6097dac7
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_cz.txt
@@ -0,0 +1,172 @@
+a
+s
+k
+o
+i
+u
+v
+z
+dnes
+cz
+tÃmto
+budeš
+budem
+byli
+jseš
+můj
+svým
+ta
+tomto
+tohle
+tuto
+tyto
+jej
+zda
+proÄ
+máte
+tato
+kam
+tohoto
+kdo
+kteÅ™Ã
+mi
+nám
+tom
+tomuto
+mÃt
+nic
+proto
+kterou
+byla
+toho
+protože
+asi
+ho
+naši
+napište
+re
+což
+tÃm
+takže
+svých
+jejÃ
+svými
+jste
+aj
+tu
+tedy
+teto
+bylo
+kde
+ke
+pravé
+ji
+nad
+nejsou
+Äi
+pod
+téma
+mezi
+přes
+ty
+pak
+vám
+ani
+když
+však
+neg
+jsem
+tento
+Älánku
+Älánky
+aby
+jsme
+před
+pta
+jejich
+byl
+ještě
+až
+bez
+také
+pouze
+prvnÃ
+vaše
+která
+nás
+nový
+tipy
+pokud
+může
+strana
+jeho
+své
+jiné
+zprávy
+nové
+nenÃ
+vás
+jen
+podle
+zde
+už
+být
+vÃce
+bude
+již
+než
+který
+by
+které
+co
+nebo
+ten
+tak
+má
+při
+od
+po
+jsou
+jak
+dalÅ¡Ã
+ale
+si
+se
+ve
+to
+jako
+za
+zpět
+ze
+do
+pro
+je
+na
+atd
+atp
+jakmile
+pÅ™iÄemž
+já
+on
+ona
+ono
+oni
+ony
+my
+vy
+jÃ
+ji
+mÄ›
+mne
+jemu
+tomu
+těm
+těmu
+němu
+němuž
+jehož
+jÞ
+jelikož
+jež
+jakož
+naÄež
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_da.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_da.txt
new file mode 100644
index 00000000000..a3ff5fe122c
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_da.txt
@@ -0,0 +1,108 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Danish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+
+og | and
+i | in
+jeg | I
+det | that (dem. pronoun)/it (pers. pronoun)
+at | that (in front of a sentence)/to (with infinitive)
+en | a/an
+den | it (pers. pronoun)/that (dem. pronoun)
+til | to/at/for/until/against/by/of/into, more
+er | present tense of "to be"
+som | who, as
+på | on/upon/in/on/at/to/after/of/with/for, on
+de | they
+med | with/by/in, along
+han | he
+af | of/by/from/off/for/in/with/on, off
+for | at/for/to/from/by/of/ago, in front/before, because
+ikke | not
+der | who/which, there/those
+var | past tense of "to be"
+mig | me/myself
+sig | oneself/himself/herself/itself/themselves
+men | but
+et | a/an/one, one (number), someone/somebody/one
+har | present tense of "to have"
+om | round/about/for/in/a, about/around/down, if
+vi | we
+min | my
+havde | past tense of "to have"
+ham | him
+hun | she
+nu | now
+over | over/above/across/by/beyond/past/on/about, over/past
+da | then, when/as/since
+fra | from/off/since, off, since
+du | you
+ud | out
+sin | his/her/its/one's
+dem | them
+os | us/ourselves
+op | up
+man | you/one
+hans | his
+hvor | where
+eller | or
+hvad | what
+skal | must/shall etc.
+selv | myself/youself/herself/ourselves etc., even
+her | here
+alle | all/everyone/everybody etc.
+vil | will (verb)
+blev | past tense of "to stay/to remain/to get/to become"
+kunne | could
+ind | in
+når | when
+være | present tense of "to be"
+dog | however/yet/after all
+noget | something
+ville | would
+jo | you know/you see (adv), yes
+deres | their/theirs
+efter | after/behind/according to/for/by/from, later/afterwards
+ned | down
+skulle | should
+denne | this
+end | than
+dette | this
+mit | my/mine
+også | also
+under | under/beneath/below/during, below/underneath
+have | have
+dig | you
+anden | other
+hende | her
+mine | my
+alt | everything
+meget | much/very, plenty of
+sit | his, her, its, one's
+sine | his, her, its, one's
+vor | our
+mod | against
+disse | these
+hvis | if
+din | your/yours
+nogle | some
+hos | by/at
+blive | be/become
+mange | many
+ad | by/through
+bliver | present tense of "to be/to become"
+hendes | her/hers
+været | be
+thi | for (conj)
+jer | you
+sådan | such, like this/like that
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_de.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_de.txt
new file mode 100644
index 00000000000..f7703841887
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_de.txt
@@ -0,0 +1,292 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A German stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | The number of forms in this list is reduced significantly by passing it
+ | through the German stemmer.
+
+
+aber | but
+
+alle | all
+allem
+allen
+aller
+alles
+
+als | than, as
+also | so
+am | an + dem
+an | at
+
+ander | other
+andere
+anderem
+anderen
+anderer
+anderes
+anderm
+andern
+anderr
+anders
+
+auch | also
+auf | on
+aus | out of
+bei | by
+bin | am
+bis | until
+bist | art
+da | there
+damit | with it
+dann | then
+
+der | the
+den
+des
+dem
+die
+das
+
+daß | that
+
+derselbe | the same
+derselben
+denselben
+desselben
+demselben
+dieselbe
+dieselben
+dasselbe
+
+dazu | to that
+
+dein | thy
+deine
+deinem
+deinen
+deiner
+deines
+
+denn | because
+
+derer | of those
+dessen | of him
+
+dich | thee
+dir | to thee
+du | thou
+
+dies | this
+diese
+diesem
+diesen
+dieser
+dieses
+
+
+doch | (several meanings)
+dort | (over) there
+
+
+durch | through
+
+ein | a
+eine
+einem
+einen
+einer
+eines
+
+einig | some
+einige
+einigem
+einigen
+einiger
+einiges
+
+einmal | once
+
+er | he
+ihn | him
+ihm | to him
+
+es | it
+etwas | something
+
+euer | your
+eure
+eurem
+euren
+eurer
+eures
+
+für | for
+gegen | towards
+gewesen | p.p. of sein
+hab | have
+habe | have
+haben | have
+hat | has
+hatte | had
+hatten | had
+hier | here
+hin | there
+hinter | behind
+
+ich | I
+mich | me
+mir | to me
+
+
+ihr | you, to her
+ihre
+ihrem
+ihren
+ihrer
+ihres
+euch | to you
+
+im | in + dem
+in | in
+indem | while
+ins | in + das
+ist | is
+
+jede | each, every
+jedem
+jeden
+jeder
+jedes
+
+jene | that
+jenem
+jenen
+jener
+jenes
+
+jetzt | now
+kann | can
+
+kein | no
+keine
+keinem
+keinen
+keiner
+keines
+
+können | can
+könnte | could
+machen | do
+man | one
+
+manche | some, many a
+manchem
+manchen
+mancher
+manches
+
+mein | my
+meine
+meinem
+meinen
+meiner
+meines
+
+mit | with
+muss | must
+musste | had to
+nach | to(wards)
+nicht | not
+nichts | nothing
+noch | still, yet
+nun | now
+nur | only
+ob | whether
+oder | or
+ohne | without
+sehr | very
+
+sein | his
+seine
+seinem
+seinen
+seiner
+seines
+
+selbst | self
+sich | herself
+
+sie | they, she
+ihnen | to them
+
+sind | are
+so | so
+
+solche | such
+solchem
+solchen
+solcher
+solches
+
+soll | shall
+sollte | should
+sondern | but
+sonst | else
+über | over
+um | about, around
+und | and
+
+uns | us
+unse
+unsem
+unsen
+unser
+unses
+
+unter | under
+viel | much
+vom | von + dem
+von | from
+vor | before
+während | while
+war | was
+waren | were
+warst | wast
+was | what
+weg | away, off
+weil | because
+weiter | further
+
+welche | which
+welchem
+welchen
+welcher
+welches
+
+wenn | when
+werde | will
+werden | will
+wie | how
+wieder | again
+will | want
+wir | we
+wird | will
+wirst | willst
+wo | where
+wollen | want
+wollte | wanted
+würde | would
+würden | would
+zu | to
+zum | zu + dem
+zur | zu + der
+zwar | indeed
+zwischen | between
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_el.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_el.txt
new file mode 100644
index 00000000000..232681f5bd6
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_el.txt
@@ -0,0 +1,78 @@
+# Lucene Greek Stopwords list
+# Note: by default this file is used after GreekLowerCaseFilter,
+# so when modifying this file use 'σ' instead of 'ς'
+ο
+η
+το
+οι
+τα
+του
+τησ
+των
+τον
+την
+και
+κι
+κ
+ειμαι
+εισαι
+ειναι
+ειμαστε
+ειστε
+στο
+στον
+στη
+στην
+μα
+αλλα
+απο
+για
+Ï€Ïοσ
+με
+σε
+ωσ
+παÏα
+αντι
+κατα
+μετα
+θα
+να
+δε
+δεν
+μη
+μην
+επι
+ενω
+εαν
+αν
+τοτε
+που
+πωσ
+ποιοσ
+ποια
+ποιο
+ποιοι
+ποιεσ
+ποιων
+ποιουσ
+αυτοσ
+αυτη
+αυτο
+αυτοι
+αυτων
+αυτουσ
+αυτεσ
+αυτα
+εκεινοσ
+εκεινη
+εκεινο
+εκεινοι
+εκεινεσ
+εκεινα
+εκεινων
+εκεινουσ
+οπωσ
+ομωσ
+ισωσ
+οσο
+οτι
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_en.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_en.txt
new file mode 100644
index 00000000000..2c164c0b2a1
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_en.txt
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# a couple of test stopwords to test that the words are really being
+# configured from this file:
+stopworda
+stopwordb
+
+# Standard english stop words taken from Lucene's StopAnalyzer
+a
+an
+and
+are
+as
+at
+be
+but
+by
+for
+if
+in
+into
+is
+it
+no
+not
+of
+on
+or
+such
+that
+the
+their
+then
+there
+these
+they
+this
+to
+was
+will
+with
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_es.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_es.txt
new file mode 100644
index 00000000000..2db14760075
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_es.txt
@@ -0,0 +1,354 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Spanish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de | from, of
+la | the, her
+que | who, that
+el | the
+en | in
+y | and
+a | to
+los | the, them
+del | de + el
+se | himself, from him etc
+las | the, them
+por | for, by, etc
+un | a
+para | for
+con | with
+no | no
+una | a
+su | his, her
+al | a + el
+ | es from SER
+lo | him
+como | how
+más | more
+pero | pero
+sus | su plural
+le | to him, her
+ya | already
+o | or
+ | fue from SER
+este | this
+ | ha from HABER
+sà | himself etc
+porque | because
+esta | this
+ | son from SER
+entre | between
+ | está from ESTAR
+cuando | when
+muy | very
+sin | without
+sobre | on
+ | ser from SER
+ | tiene from TENER
+también | also
+me | me
+hasta | until
+hay | there is/are
+donde | where
+ | han from HABER
+quien | whom, that
+ | están from ESTAR
+ | estado from ESTAR
+desde | from
+todo | all
+nos | us
+durante | during
+ | estados from ESTAR
+todos | all
+uno | a
+les | to them
+ni | nor
+contra | against
+otros | other
+ | fueron from SER
+ese | that
+eso | that
+ | habÃa from HABER
+ante | before
+ellos | they
+e | and (variant of y)
+esto | this
+mà | me
+antes | before
+algunos | some
+qué | what?
+unos | a
+yo | I
+otro | other
+otras | other
+otra | other
+él | he
+tanto | so much, many
+esa | that
+estos | these
+mucho | much, many
+quienes | who
+nada | nothing
+muchos | many
+cual | who
+ | sea from SER
+poco | few
+ella | she
+estar | to be
+ | haber from HABER
+estas | these
+ | estaba from ESTAR
+ | estamos from ESTAR
+algunas | some
+algo | something
+nosotros | we
+
+ | other forms
+
+mi | me
+mis | mi plural
+tú | thou
+te | thee
+ti | thee
+tu | thy
+tus | tu plural
+ellas | they
+nosotras | we
+vosotros | you
+vosotras | you
+os | you
+mÃo | mine
+mÃa |
+mÃos |
+mÃas |
+tuyo | thine
+tuya |
+tuyos |
+tuyas |
+suyo | his, hers, theirs
+suya |
+suyos |
+suyas |
+nuestro | ours
+nuestra |
+nuestros |
+nuestras |
+vuestro | yours
+vuestra |
+vuestros |
+vuestras |
+esos | those
+esas | those
+
+ | forms of estar, to be (not including the infinitive):
+estoy
+estás
+está
+estamos
+estáis
+están
+esté
+estés
+estemos
+estéis
+estén
+estaré
+estarás
+estará
+estaremos
+estaréis
+estarán
+estarÃa
+estarÃas
+estarÃamos
+estarÃais
+estarÃan
+estaba
+estabas
+estábamos
+estabais
+estaban
+estuve
+estuviste
+estuvo
+estuvimos
+estuvisteis
+estuvieron
+estuviera
+estuvieras
+estuviéramos
+estuvierais
+estuvieran
+estuviese
+estuvieses
+estuviésemos
+estuvieseis
+estuviesen
+estando
+estado
+estada
+estados
+estadas
+estad
+
+ | forms of haber, to have (not including the infinitive):
+he
+has
+ha
+hemos
+habéis
+han
+haya
+hayas
+hayamos
+hayáis
+hayan
+habré
+habrás
+habrá
+habremos
+habréis
+habrán
+habrÃa
+habrÃas
+habrÃamos
+habrÃais
+habrÃan
+habÃa
+habÃas
+habÃamos
+habÃais
+habÃan
+hube
+hubiste
+hubo
+hubimos
+hubisteis
+hubieron
+hubiera
+hubieras
+hubiéramos
+hubierais
+hubieran
+hubiese
+hubieses
+hubiésemos
+hubieseis
+hubiesen
+habiendo
+habido
+habida
+habidos
+habidas
+
+ | forms of ser, to be (not including the infinitive):
+soy
+eres
+es
+somos
+sois
+son
+sea
+seas
+seamos
+seáis
+sean
+seré
+serás
+será
+seremos
+seréis
+serán
+serÃa
+serÃas
+serÃamos
+serÃais
+serÃan
+era
+eras
+éramos
+erais
+eran
+fui
+fuiste
+fue
+fuimos
+fuisteis
+fueron
+fuera
+fueras
+fuéramos
+fuerais
+fueran
+fuese
+fueses
+fuésemos
+fueseis
+fuesen
+siendo
+sido
+ | sed also means 'thirst'
+
+ | forms of tener, to have (not including the infinitive):
+tengo
+tienes
+tiene
+tenemos
+tenéis
+tienen
+tenga
+tengas
+tengamos
+tengáis
+tengan
+tendré
+tendrás
+tendrá
+tendremos
+tendréis
+tendrán
+tendrÃa
+tendrÃas
+tendrÃamos
+tendrÃais
+tendrÃan
+tenÃa
+tenÃas
+tenÃamos
+tenÃais
+tenÃan
+tuve
+tuviste
+tuvo
+tuvimos
+tuvisteis
+tuvieron
+tuviera
+tuvieras
+tuviéramos
+tuvierais
+tuvieran
+tuviese
+tuvieses
+tuviésemos
+tuvieseis
+tuviesen
+teniendo
+tenido
+tenida
+tenidos
+tenidas
+tened
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_eu.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_eu.txt
new file mode 100644
index 00000000000..25f1db93460
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_eu.txt
@@ -0,0 +1,99 @@
+# example set of basque stopwords
+al
+anitz
+arabera
+asko
+baina
+bat
+batean
+batek
+bati
+batzuei
+batzuek
+batzuetan
+batzuk
+bera
+beraiek
+berau
+berauek
+bere
+berori
+beroriek
+beste
+bezala
+da
+dago
+dira
+ditu
+du
+dute
+edo
+egin
+ere
+eta
+eurak
+ez
+gainera
+gu
+gutxi
+guzti
+haiei
+haiek
+haietan
+hainbeste
+hala
+han
+handik
+hango
+hara
+hari
+hark
+hartan
+hau
+hauei
+hauek
+hauetan
+hemen
+hemendik
+hemengo
+hi
+hona
+honek
+honela
+honetan
+honi
+hor
+hori
+horiei
+horiek
+horietan
+horko
+horra
+horrek
+horrela
+horretan
+horri
+hortik
+hura
+izan
+ni
+noiz
+nola
+non
+nondik
+nongo
+nor
+nora
+ze
+zein
+zen
+zenbait
+zenbat
+zer
+zergatik
+ziren
+zituen
+zu
+zuek
+zuen
+zuten
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_fa.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_fa.txt
new file mode 100644
index 00000000000..723641c6da7
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_fa.txt
@@ -0,0 +1,313 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# Note: by default this file is used after normalization, so when adding entries
+# to this file, use the arabic 'ي' instead of 'ی'
+انان
+نداشته
+سراسر
+خياه
+ايشان
+وي
+تاكنون
+بيشتري
+دوم
+پس
+ناشي
+ÙˆÚ¯Ùˆ
+يا
+داشتند
+سپس
+هنگام
+هرگز
+پنج
+نشان
+امسال
+ديگر
+گروهي
+شدند
+چطور
+ده
+Ùˆ
+دو
+نخستين
+ولي
+چرا
+Ú†Ù‡
+وسط
+Ù‡
+كدام
+قابل
+يك
+رÙت
+Ù‡Ùت
+همچنين
+در
+هزار
+بله
+بلي
+شايد
+اما
+شناسي
+گرÙته
+دهد
+داشته
+دانست
+داشتن
+خواهيم
+ميليارد
+وقتيكه
+امد
+خواهد
+جز
+اورده
+شده
+بلكه
+خدمات
+شدن
+برخي
+نبود
+بسياري
+جلوگيري
+ØÙ‚
+كردند
+نوعي
+بعري
+نكرده
+نظير
+نبايد
+بوده
+بودن
+داد
+اورد
+هست
+جايي
+شود
+دنبال
+داده
+بايد
+سابق
+هيچ
+همان
+انجا
+كمتر
+كجاست
+گردد
+كسي
+تر
+مردم
+تان
+دادن
+بودند
+سري
+جدا
+ندارند
+مگر
+يكديگر
+دارد
+دهند
+بنابراين
+هنگامي
+سمت
+جا
+انچه
+خود
+دادند
+زياد
+دارند
+اثر
+بدون
+بهترين
+بيشتر
+البته
+به
+براساس
+بيرون
+كرد
+بعضي
+گرÙت
+توي
+اي
+ميليون
+او
+جريان
+تول
+بر
+مانند
+برابر
+باشيم
+مدتي
+گويند
+اكنون
+تا
+تنها
+جديد
+چند
+بي
+نشده
+كردن
+كردم
+گويد
+كرده
+كنيم
+نمي
+نزد
+روي
+قصد
+Ùقط
+بالاي
+ديگران
+اين
+ديروز
+توسط
+سوم
+ايم
+دانند
+سوي
+استÙاده
+شما
+كنار
+داريم
+ساخته
+طور
+امده
+رÙته
+نخست
+بيست
+نزديك
+طي
+كنيد
+از
+انها
+تمامي
+داشت
+يكي
+طريق
+اش
+چيست
+روب
+نمايد
+Ú¯Ùت
+چندين
+چيزي
+تواند
+ام
+ايا
+با
+ان
+ايد
+ترين
+اينكه
+ديگري
+راه
+هايي
+بروز
+همچنان
+پاعين
+كس
+Øدود
+مختلÙ
+مقابل
+چيز
+گيرد
+ندارد
+ضد
+همچون
+سازي
+شان
+مورد
+باره
+مرسي
+خويش
+برخوردار
+چون
+خارج
+شش
+هنوز
+تØت
+ضمن
+هستيم
+Ú¯Ùته
+Ùكر
+بسيار
+پيش
+براي
+روزهاي
+انكه
+نخواهد
+بالا
+كل
+وقتي
+كي
+چنين
+كه
+گيري
+نيست
+است
+كجا
+كند
+نيز
+يابد
+بندي
+Øتي
+توانند
+عقب
+خواست
+كنند
+بين
+تمام
+همه
+ما
+باشند
+مثل
+شد
+اري
+باشد
+اره
+طبق
+بعد
+اگر
+صورت
+غير
+جاي
+بيش
+ريزي
+اند
+زيرا
+چگونه
+بار
+لطÙا
+مي
+درباره
+من
+ديده
+همين
+گذاري
+برداري
+علت
+گذاشته
+هم
+Ùوق
+نه
+ها
+شوند
+اباد
+همواره
+هر
+اول
+خواهند
+چهار
+نام
+امروز
+مان
+هاي
+قبل
+كنم
+سعي
+تازه
+را
+هستند
+زير
+جلوي
+عنوان
+بود
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_fi.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_fi.txt
new file mode 100644
index 00000000000..addad798c4b
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_fi.txt
@@ -0,0 +1,95 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+| forms of BE
+
+olla
+olen
+olet
+on
+olemme
+olette
+ovat
+ole | negative form
+
+oli
+olisi
+olisit
+olisin
+olisimme
+olisitte
+olisivat
+olit
+olin
+olimme
+olitte
+olivat
+ollut
+olleet
+
+en | negation
+et
+ei
+emme
+ette
+eivät
+
+|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans
+minä minun minut minua minussa minusta minuun minulla minulta minulle | I
+sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you
+hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she
+me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we
+te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you
+he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they
+
+tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this
+tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that
+se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it
+nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these
+nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those
+ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they
+
+kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
+ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl)
+mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what
+mitkä | (pl)
+
+joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which
+jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl)
+
+| conjunctions
+
+että | that
+ja | and
+jos | if
+koska | because
+kuin | than
+mutta | but
+niin | so
+sekä | and
+sillä | for
+tai | or
+vaan | but
+vai | or
+vaikka | although
+
+
+| prepositions
+
+kanssa | with
+mukaan | according to
+noin | about
+poikki | across
+yli | over, across
+
+| other
+
+kun | when
+niin | so
+nyt | now
+itse | self
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_fr.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_fr.txt
new file mode 100644
index 00000000000..c00837ea939
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_fr.txt
@@ -0,0 +1,183 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A French stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+au | a + le
+aux | a + les
+avec | with
+ce | this
+ces | these
+dans | with
+de | of
+des | de + les
+du | de + le
+elle | she
+en | `of them' etc
+et | and
+eux | them
+il | he
+je | I
+la | the
+le | the
+leur | their
+lui | him
+ma | my (fem)
+mais | but
+me | me
+même | same; as in moi-même (myself) etc
+mes | me (pl)
+moi | me
+mon | my (masc)
+ne | not
+nos | our (pl)
+notre | our
+nous | we
+on | one
+ou | where
+par | by
+pas | not
+pour | for
+qu | que before vowel
+que | that
+qui | who
+sa | his, her (fem)
+se | oneself
+ses | his (pl)
+son | his, her (masc)
+sur | on
+ta | thy (fem)
+te | thee
+tes | thy (pl)
+toi | thee
+ton | thy (masc)
+tu | thou
+un | a
+une | a
+vos | your (pl)
+votre | your
+vous | you
+
+ | single letter forms
+
+c | c'
+d | d'
+j | j'
+l | l'
+Ã | to, at
+m | m'
+n | n'
+s | s'
+t | t'
+y | there
+
+ | forms of être (not including the infinitive):
+été
+étée
+étées
+étés
+étant
+suis
+es
+est
+sommes
+êtes
+sont
+serai
+seras
+sera
+serons
+serez
+seront
+serais
+serait
+serions
+seriez
+seraient
+étais
+était
+étions
+étiez
+étaient
+fus
+fut
+fûmes
+fûtes
+furent
+sois
+soit
+soyons
+soyez
+soient
+fusse
+fusses
+fût
+fussions
+fussiez
+fussent
+
+ | forms of avoir (not including the infinitive):
+ayant
+eu
+eue
+eues
+eus
+ai
+as
+avons
+avez
+ont
+aurai
+auras
+aura
+aurons
+aurez
+auront
+aurais
+aurait
+aurions
+auriez
+auraient
+avais
+avait
+avions
+aviez
+avaient
+eut
+eûmes
+eûtes
+eurent
+aie
+aies
+ait
+ayons
+ayez
+aient
+eusse
+eusses
+eût
+eussions
+eussiez
+eussent
+
+ | Later additions (from Jean-Christophe Deschamps)
+ceci | this
+celà  | that
+cet | this
+cette | this
+ici | here
+ils | they
+les | the (pl)
+leurs | their (pl)
+quel | which
+quels | which
+quelle | which
+quelles | which
+sans | without
+soi | oneself
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ga.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ga.txt
new file mode 100644
index 00000000000..9ff88d747e5
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ga.txt
@@ -0,0 +1,110 @@
+
+a
+ach
+ag
+agus
+an
+aon
+ar
+arna
+as
+b'
+ba
+beirt
+bhúr
+caoga
+ceathair
+ceathrar
+chomh
+chtó
+chuig
+chun
+cois
+céad
+cúig
+cúigear
+d'
+daichead
+dar
+de
+deich
+deichniúr
+den
+dhá
+do
+don
+dtÃ
+dá
+dár
+dó
+faoi
+faoin
+faoina
+faoinár
+fara
+fiche
+gach
+gan
+go
+gur
+haon
+hocht
+i
+iad
+idir
+in
+ina
+ins
+inár
+is
+le
+leis
+lena
+lenár
+m'
+mar
+mo
+mé
+na
+nach
+naoi
+naonúr
+ná
+nÃ
+nÃor
+nó
+nócha
+ocht
+ochtar
+os
+roimh
+sa
+seacht
+seachtar
+seachtó
+seasca
+seisear
+siad
+sibh
+sinn
+sna
+sé
+sÃ
+tar
+thar
+thú
+triúr
+trÃ
+trÃna
+trÃnár
+trÃocha
+tú
+um
+ár
+é
+éis
+Ã
+ó
+ón
+óna
+ónár
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_gl.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_gl.txt
new file mode 100644
index 00000000000..d8760b12c14
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_gl.txt
@@ -0,0 +1,161 @@
+# galican stopwords
+a
+aÃnda
+alÃ
+aquel
+aquela
+aquelas
+aqueles
+aquilo
+aquÃ
+ao
+aos
+as
+asÃ
+á
+ben
+cando
+che
+co
+coa
+comigo
+con
+connosco
+contigo
+convosco
+coas
+cos
+cun
+cuns
+cunha
+cunhas
+da
+dalgunha
+dalgunhas
+dalgún
+dalgúns
+das
+de
+del
+dela
+delas
+deles
+desde
+deste
+do
+dos
+dun
+duns
+dunha
+dunhas
+e
+el
+ela
+elas
+eles
+en
+era
+eran
+esa
+esas
+ese
+eses
+esta
+estar
+estaba
+está
+están
+este
+estes
+estiven
+estou
+eu
+é
+facer
+foi
+foron
+fun
+habÃa
+hai
+iso
+isto
+la
+las
+lle
+lles
+lo
+los
+mais
+me
+meu
+meus
+min
+miña
+miñas
+moi
+na
+nas
+neste
+nin
+no
+non
+nos
+nosa
+nosas
+noso
+nosos
+nós
+nun
+nunha
+nuns
+nunhas
+o
+os
+ou
+ó
+ós
+para
+pero
+pode
+pois
+pola
+polas
+polo
+polos
+por
+que
+se
+senón
+ser
+seu
+seus
+sexa
+sido
+sobre
+súa
+súas
+tamén
+tan
+te
+ten
+teñen
+teño
+ter
+teu
+teus
+ti
+tido
+tiña
+tiven
+túa
+túas
+un
+unha
+unhas
+uns
+vos
+vosa
+vosas
+voso
+vosos
+vós
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_hi.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_hi.txt
new file mode 100644
index 00000000000..86286bb083b
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_hi.txt
@@ -0,0 +1,235 @@
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# Note: by default this file also contains forms normalized by HindiNormalizer
+# for spelling variation (see section below), such that it can be used whether or
+# not you enable that feature. When adding additional entries to this list,
+# please add the normalized form as well.
+अंदर
+अत
+अपना
+अपनी
+अपने
+अà¤à¥€
+आदि
+आप
+इतà¥à¤¯à¤¾à¤¦à¤¿
+इन
+इनका
+इनà¥à¤¹à¥€à¤‚
+इनà¥à¤¹à¥‡à¤‚
+इनà¥à¤¹à¥‹à¤‚
+इस
+इसका
+इसकी
+इसके
+इसमें
+इसी
+इसे
+उन
+उनका
+उनकी
+उनके
+उनको
+उनà¥à¤¹à¥€à¤‚
+उनà¥à¤¹à¥‡à¤‚
+उनà¥à¤¹à¥‹à¤‚
+उस
+उसके
+उसी
+उसे
+à¤à¤•
+à¤à¤µà¤‚
+à¤à¤¸
+à¤à¤¸à¥‡
+और
+कई
+कर
+करता
+करते
+करना
+करने
+करें
+कहते
+कहा
+का
+काफ़ी
+कि
+कितना
+किनà¥à¤¹à¥‡à¤‚
+किनà¥à¤¹à¥‹à¤‚
+किया
+किर
+किस
+किसी
+किसे
+की
+कà¥à¤›
+कà¥à¤²
+के
+को
+कोई
+कौन
+कौनसा
+गया
+घर
+जब
+जहाà¤
+जा
+जितना
+जिन
+जिनà¥à¤¹à¥‡à¤‚
+जिनà¥à¤¹à¥‹à¤‚
+जिस
+जिसे
+जीधर
+जैसा
+जैसे
+जो
+तक
+तब
+तरह
+तिन
+तिनà¥à¤¹à¥‡à¤‚
+तिनà¥à¤¹à¥‹à¤‚
+तिस
+तिसे
+तो
+था
+थी
+थे
+दबारा
+दिया
+दà¥à¤¸à¤°à¤¾
+दूसरे
+दो
+दà¥à¤µà¤¾à¤°à¤¾
+न
+नहीं
+ना
+निहायत
+नीचे
+ने
+पर
+पर
+पहले
+पूरा
+पे
+फिर
+बनी
+बही
+बहà¥à¤¤
+बाद
+बाला
+बिलकà¥à¤²
+à¤à¥€
+à¤à¥€à¤¤à¤°
+मगर
+मानो
+मे
+में
+यदि
+यह
+यहाà¤
+यही
+या
+यिह
+ये
+रखें
+रहा
+रहे
+ऱà¥à¤µà¤¾à¤¸à¤¾
+लिà¤
+लिये
+लेकिन
+व
+वरà¥à¤—
+वह
+वह
+वहाà¤
+वहीं
+वाले
+वà¥à¤¹
+वे
+वग़ैरह
+संग
+सकता
+सकते
+सबसे
+सà¤à¥€
+साथ
+साबà¥à¤¤
+साà¤
+सारा
+से
+सो
+ही
+हà¥à¤†
+हà¥à¤ˆ
+हà¥à¤
+है
+हैं
+हो
+होता
+होती
+होते
+होना
+होने
+# additional normalized forms of the above
+अपनि
+जेसे
+होति
+सà¤à¤¿
+तिंहों
+इंहों
+दवारा
+इसि
+किंहें
+थि
+उंहों
+ओर
+जिंहें
+वहिं
+अà¤à¤¿
+बनि
+हि
+उंहिं
+उंहें
+हें
+वगेरह
+à¤à¤¸à¥‡
+रवासा
+कोन
+निचे
+काफि
+उसि
+पà¥à¤°à¤¾
+à¤à¤¿à¤¤à¤°
+हे
+बहि
+वहां
+कोइ
+यहां
+जिंहों
+तिंहें
+किसि
+कइ
+यहि
+इंहिं
+जिधर
+इंहें
+अदि
+इतयादि
+हà¥à¤‡
+कोनसा
+इसकि
+दà¥à¤¸à¤°à¥‡
+जहां
+अप
+किंहों
+उनकि
+à¤à¤¿
+वरग
+हà¥à¤…
+जेसा
+नहिं
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_hu.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_hu.txt
new file mode 100644
index 00000000000..1a96f1db6f2
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_hu.txt
@@ -0,0 +1,209 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+| Hungarian stop word list
+| prepared by Anna Tordai
+
+a
+ahogy
+ahol
+aki
+akik
+akkor
+alatt
+által
+általában
+amely
+amelyek
+amelyekben
+amelyeket
+amelyet
+amelynek
+ami
+amit
+amolyan
+amÃg
+amikor
+át
+abban
+ahhoz
+annak
+arra
+arról
+az
+azok
+azon
+azt
+azzal
+azért
+aztán
+azután
+azonban
+bár
+be
+belül
+benne
+cikk
+cikkek
+cikkeket
+csak
+de
+e
+eddig
+egész
+egy
+egyes
+egyetlen
+egyéb
+egyik
+egyre
+ekkor
+el
+elég
+ellen
+elő
+először
+előtt
+első
+én
+éppen
+ebben
+ehhez
+emilyen
+ennek
+erre
+ez
+ezt
+ezek
+ezen
+ezzel
+ezért
+és
+fel
+felé
+hanem
+hiszen
+hogy
+hogyan
+igen
+Ãgy
+illetve
+ill.
+ill
+ilyen
+ilyenkor
+ison
+ismét
+itt
+jó
+jól
+jobban
+kell
+kellett
+keresztül
+keressünk
+ki
+kÃvül
+között
+közül
+legalább
+lehet
+lehetett
+legyen
+lenne
+lenni
+lesz
+lett
+maga
+magát
+majd
+majd
+már
+más
+másik
+meg
+még
+mellett
+mert
+mely
+melyek
+mi
+mit
+mÃg
+miért
+milyen
+mikor
+minden
+mindent
+mindenki
+mindig
+mint
+mintha
+mivel
+most
+nagy
+nagyobb
+nagyon
+ne
+néha
+nekem
+neki
+nem
+néhány
+nélkül
+nincs
+olyan
+ott
+össze
+Å‘
+Å‘k
+Å‘ket
+pedig
+persze
+rá
+s
+saját
+sem
+semmi
+sok
+sokat
+sokkal
+számára
+szemben
+szerint
+szinte
+talán
+tehát
+teljes
+tovább
+továbbá
+több
+úgy
+ugyanis
+új
+újabb
+újra
+után
+utána
+utolsó
+vagy
+vagyis
+valaki
+valami
+valamint
+való
+vagyok
+van
+vannak
+volt
+voltam
+voltak
+voltunk
+vissza
+vele
+viszont
+volna
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_hy.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_hy.txt
new file mode 100644
index 00000000000..60c1c50fbc8
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_hy.txt
@@ -0,0 +1,46 @@
+# example set of Armenian stopwords.
+Õ¡ÕµÕ¤
+Õ¡ÕµÕ¬
+Õ¡ÕµÕ¶
+Õ¡ÕµÕ½
+Õ¤Õ¸Ö‚
+Õ¤Õ¸Ö‚Ö„
+Õ¥Õ´
+Õ¥Õ¶
+Õ¥Õ¶Ö„
+Õ¥Õ½
+Õ¥Ö„
+Õ§
+Õ§Õ«
+Õ§Õ«Õ¶
+Õ§Õ«Õ¶Ö„
+Õ§Õ«Ö€
+Õ§Õ«Ö„
+Õ§Ö€
+Õ¨Õ½Õ¿
+Õ©
+Õ«
+Õ«Õ¶
+Õ«Õ½Õ¯
+Õ«Ö€
+Õ¯Õ¡Õ´
+Õ°Õ¡Õ´Õ¡Ö€
+Õ°Õ¥Õ¿
+Õ°Õ¥Õ¿Õ¸
+Õ´Õ¥Õ¶Ö„
+Õ´Õ¥Õ»
+Õ´Õ«
+Õ¶
+Õ¶Õ¡
+Õ¶Õ¡Ö‡
+Õ¶Ö€Õ¡
+Õ¶Ö€Õ¡Õ¶Ö„
+Õ¸Ö€
+Õ¸Ö€Õ¨
+Õ¸Ö€Õ¸Õ¶Ö„
+Õ¸Ö€ÕºÕ¥Õ½
+Õ¸Ö‚
+Õ¸Ö‚Õ´
+ÕºÕ«Õ¿Õ«
+Õ¾Ö€Õ¡
+Ö‡
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_id.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_id.txt
new file mode 100644
index 00000000000..4617f83a5c5
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_id.txt
@@ -0,0 +1,359 @@
+# from appendix D of: A Study of Stemming Effects on Information
+# Retrieval in Bahasa Indonesia
+ada
+adanya
+adalah
+adapun
+agak
+agaknya
+agar
+akan
+akankah
+akhirnya
+aku
+akulah
+amat
+amatlah
+anda
+andalah
+antar
+diantaranya
+antara
+antaranya
+diantara
+apa
+apaan
+mengapa
+apabila
+apakah
+apalagi
+apatah
+atau
+ataukah
+ataupun
+bagai
+bagaikan
+sebagai
+sebagainya
+bagaimana
+bagaimanapun
+sebagaimana
+bagaimanakah
+bagi
+bahkan
+bahwa
+bahwasanya
+sebaliknya
+banyak
+sebanyak
+beberapa
+seberapa
+begini
+beginian
+beginikah
+beginilah
+sebegini
+begitu
+begitukah
+begitulah
+begitupun
+sebegitu
+belum
+belumlah
+sebelum
+sebelumnya
+sebenarnya
+berapa
+berapakah
+berapalah
+berapapun
+betulkah
+sebetulnya
+biasa
+biasanya
+bila
+bilakah
+bisa
+bisakah
+sebisanya
+boleh
+bolehkah
+bolehlah
+buat
+bukan
+bukankah
+bukanlah
+bukannya
+cuma
+percuma
+dahulu
+dalam
+dan
+dapat
+dari
+daripada
+dekat
+demi
+demikian
+demikianlah
+sedemikian
+dengan
+depan
+di
+dia
+dialah
+dini
+diri
+dirinya
+terdiri
+dong
+dulu
+enggak
+enggaknya
+entah
+entahlah
+terhadap
+terhadapnya
+hal
+hampir
+hanya
+hanyalah
+harus
+haruslah
+harusnya
+seharusnya
+hendak
+hendaklah
+hendaknya
+hingga
+sehingga
+ia
+ialah
+ibarat
+ingin
+inginkah
+inginkan
+ini
+inikah
+inilah
+itu
+itukah
+itulah
+jangan
+jangankan
+janganlah
+jika
+jikalau
+juga
+justru
+kala
+kalau
+kalaulah
+kalaupun
+kalian
+kami
+kamilah
+kamu
+kamulah
+kan
+kapan
+kapankah
+kapanpun
+dikarenakan
+karena
+karenanya
+ke
+kecil
+kemudian
+kenapa
+kepada
+kepadanya
+ketika
+seketika
+khususnya
+kini
+kinilah
+kiranya
+sekiranya
+kita
+kitalah
+kok
+lagi
+lagian
+selagi
+lah
+lain
+lainnya
+melainkan
+selaku
+lalu
+melalui
+terlalu
+lama
+lamanya
+selama
+selama
+selamanya
+lebih
+terlebih
+bermacam
+macam
+semacam
+maka
+makanya
+makin
+malah
+malahan
+mampu
+mampukah
+mana
+manakala
+manalagi
+masih
+masihkah
+semasih
+masing
+mau
+maupun
+semaunya
+memang
+mereka
+merekalah
+meski
+meskipun
+semula
+mungkin
+mungkinkah
+nah
+namun
+nanti
+nantinya
+nyaris
+oleh
+olehnya
+seorang
+seseorang
+pada
+padanya
+padahal
+paling
+sepanjang
+pantas
+sepantasnya
+sepantasnyalah
+para
+pasti
+pastilah
+per
+pernah
+pula
+pun
+merupakan
+rupanya
+serupa
+saat
+saatnya
+sesaat
+saja
+sajalah
+saling
+bersama
+sama
+sesama
+sambil
+sampai
+sana
+sangat
+sangatlah
+saya
+sayalah
+se
+sebab
+sebabnya
+sebuah
+tersebut
+tersebutlah
+sedang
+sedangkan
+sedikit
+sedikitnya
+segala
+segalanya
+segera
+sesegera
+sejak
+sejenak
+sekali
+sekalian
+sekalipun
+sesekali
+sekaligus
+sekarang
+sekarang
+sekitar
+sekitarnya
+sela
+selain
+selalu
+seluruh
+seluruhnya
+semakin
+sementara
+sempat
+semua
+semuanya
+sendiri
+sendirinya
+seolah
+seperti
+sepertinya
+sering
+seringnya
+serta
+siapa
+siapakah
+siapapun
+disini
+disinilah
+sini
+sinilah
+sesuatu
+sesuatunya
+suatu
+sesudah
+sesudahnya
+sudah
+sudahkah
+sudahlah
+supaya
+tadi
+tadinya
+tak
+tanpa
+setelah
+telah
+tentang
+tentu
+tentulah
+tentunya
+tertentu
+seterusnya
+tapi
+tetapi
+setiap
+tiap
+setidaknya
+tidak
+tidakkah
+tidaklah
+toh
+waduh
+wah
+wahai
+sewaktu
+walau
+walaupun
+wong
+yaitu
+yakni
+yang
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_it.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_it.txt
new file mode 100644
index 00000000000..4cb5b0891b1
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_it.txt
@@ -0,0 +1,301 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | An Italian stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ad | a (to) before vowel
+al | a + il
+allo | a + lo
+ai | a + i
+agli | a + gli
+all | a + l'
+agl | a + gl'
+alla | a + la
+alle | a + le
+con | with
+col | con + il
+coi | con + i (forms collo, cogli etc are now very rare)
+da | from
+dal | da + il
+dallo | da + lo
+dai | da + i
+dagli | da + gli
+dall | da + l'
+dagl | da + gll'
+dalla | da + la
+dalle | da + le
+di | of
+del | di + il
+dello | di + lo
+dei | di + i
+degli | di + gli
+dell | di + l'
+degl | di + gl'
+della | di + la
+delle | di + le
+in | in
+nel | in + el
+nello | in + lo
+nei | in + i
+negli | in + gli
+nell | in + l'
+negl | in + gl'
+nella | in + la
+nelle | in + le
+su | on
+sul | su + il
+sullo | su + lo
+sui | su + i
+sugli | su + gli
+sull | su + l'
+sugl | su + gl'
+sulla | su + la
+sulle | su + le
+per | through, by
+tra | among
+contro | against
+io | I
+tu | thou
+lui | he
+lei | she
+noi | we
+voi | you
+loro | they
+mio | my
+mia |
+miei |
+mie |
+tuo |
+tua |
+tuoi | thy
+tue |
+suo |
+sua |
+suoi | his, her
+sue |
+nostro | our
+nostra |
+nostri |
+nostre |
+vostro | your
+vostra |
+vostri |
+vostre |
+mi | me
+ti | thee
+ci | us, there
+vi | you, there
+lo | him, the
+la | her, the
+li | them
+le | them, the
+gli | to him, the
+ne | from there etc
+il | the
+un | a
+uno | a
+una | a
+ma | but
+ed | and
+se | if
+perché | why, because
+anche | also
+come | how
+dov | where (as dov')
+dove | where
+che | who, that
+chi | who
+cui | whom
+non | not
+più | more
+quale | who, that
+quanto | how much
+quanti |
+quanta |
+quante |
+quello | that
+quelli |
+quella |
+quelle |
+questo | this
+questi |
+questa |
+queste |
+si | yes
+tutto | all
+tutti | all
+
+ | single letter forms:
+
+a | at
+c | as c' for ce or ci
+e | and
+i | the
+l | as l'
+o | or
+
+ | forms of avere, to have (not including the infinitive):
+
+ho
+hai
+ha
+abbiamo
+avete
+hanno
+abbia
+abbiate
+abbiano
+avrò
+avrai
+avrÃ
+avremo
+avrete
+avranno
+avrei
+avresti
+avrebbe
+avremmo
+avreste
+avrebbero
+avevo
+avevi
+aveva
+avevamo
+avevate
+avevano
+ebbi
+avesti
+ebbe
+avemmo
+aveste
+ebbero
+avessi
+avesse
+avessimo
+avessero
+avendo
+avuto
+avuta
+avuti
+avute
+
+ | forms of essere, to be (not including the infinitive):
+sono
+sei
+è
+siamo
+siete
+sia
+siate
+siano
+sarò
+sarai
+sarÃ
+saremo
+sarete
+saranno
+sarei
+saresti
+sarebbe
+saremmo
+sareste
+sarebbero
+ero
+eri
+era
+eravamo
+eravate
+erano
+fui
+fosti
+fu
+fummo
+foste
+furono
+fossi
+fosse
+fossimo
+fossero
+essendo
+
+ | forms of fare, to do (not including the infinitive, fa, fat-):
+faccio
+fai
+facciamo
+fanno
+faccia
+facciate
+facciano
+farò
+farai
+farÃ
+faremo
+farete
+faranno
+farei
+faresti
+farebbe
+faremmo
+fareste
+farebbero
+facevo
+facevi
+faceva
+facevamo
+facevate
+facevano
+feci
+facesti
+fece
+facemmo
+faceste
+fecero
+facessi
+facesse
+facessimo
+facessero
+facendo
+
+ | forms of stare, to be (not including the infinitive):
+sto
+stai
+sta
+stiamo
+stanno
+stia
+stiate
+stiano
+starò
+starai
+starÃ
+staremo
+starete
+staranno
+starei
+staresti
+starebbe
+staremmo
+stareste
+starebbero
+stavo
+stavi
+stava
+stavamo
+stavate
+stavano
+stetti
+stesti
+stette
+stemmo
+steste
+stettero
+stessi
+stesse
+stessimo
+stessero
+stando
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ja.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ja.txt
new file mode 100644
index 00000000000..d4321be6b16
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ja.txt
@@ -0,0 +1,127 @@
+#
+# This file defines a stopword set for Japanese.
+#
+# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia.
+# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745
+# for frequency lists, etc. that can be useful for making your own set (if desired)
+#
+# Note that there is an overlap between these stopwords and the terms stopped when used
+# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note
+# that comments are not allowed on the same line as stopwords.
+#
+# Also note that stopping is done in a case-insensitive manner. Change your StopFilter
+# configuration if you need case-sensitive stopping. Lastly, note that stopping is done
+# using the same character width as the entries in this file. Since this StopFilter is
+# normally done after a CJKWidthFilter in your chain, you would usually want your romaji
+# entries to be in half-width and your kana entries to be in full-width.
+#
+ã®
+ã«
+ã¯
+ã‚’
+ãŸ
+ãŒ
+ã§
+ã¦
+ã¨
+ã—
+れ
+ã•
+ã‚ã‚‹
+ã„ã‚‹
+ã‚‚
+ã™ã‚‹
+ã‹ã‚‰
+ãª
+ã“ã¨
+ã¨ã—ã¦
+ã„
+ã‚„
+れる
+ãªã©
+ãªã£
+ãªã„
+ã“ã®
+ãŸã‚
+ãã®
+ã‚ã£
+よã†
+ã¾ãŸ
+ã‚‚ã®
+ã¨ã„ã†
+ã‚ã‚Š
+ã¾ã§
+られ
+ãªã‚‹
+ã¸
+ã‹
+ã
+ã“ã‚Œ
+ã«ã‚ˆã£ã¦
+ã«ã‚ˆã‚Š
+ãŠã‚Š
+より
+ã«ã‚ˆã‚‹
+ãš
+ãªã‚Š
+られる
+ã«ãŠã„ã¦
+ã°
+ãªã‹ã£
+ãªã
+ã—ã‹ã—
+ã«ã¤ã„ã¦
+ã›
+ã ã£
+ãã®å¾Œ
+ã§ãã‚‹
+ãã‚Œ
+ã†
+ã®ã§
+ãªãŠ
+ã®ã¿
+ã§ã
+ã
+ã¤
+ã«ãŠã‘ã‚‹
+ãŠã‚ˆã³
+ã„ã†
+ã•ã‚‰ã«
+ã§ã‚‚
+ら
+ãŸã‚Š
+ãã®ä»–
+ã«é–¢ã™ã‚‹
+ãŸã¡
+ã¾ã™
+ã‚“
+ãªã‚‰
+ã«å¯¾ã—ã¦
+特ã«
+ã›ã‚‹
+åŠã³
+ã“れら
+ã¨ã
+ã§ã¯
+ã«ã¦
+ã»ã‹
+ãªãŒã‚‰
+ã†ã¡
+ãã—ã¦
+ã¨ã¨ã‚‚ã«
+ãŸã ã—
+ã‹ã¤ã¦
+ãã‚Œãžã‚Œ
+ã¾ãŸã¯
+ãŠ
+ã»ã©
+ã‚‚ã®ã®
+ã«å¯¾ã™ã‚‹
+ã»ã¨ã‚“ã©
+ã¨å…±ã«
+ã¨ã„ã£ãŸ
+ã§ã™
+ã¨ã‚‚
+ã¨ã“ã‚
+ã“ã“
+##### End of file
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_lv.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_lv.txt
new file mode 100644
index 00000000000..e21a23c06c3
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_lv.txt
@@ -0,0 +1,172 @@
+# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins
+# the original list of over 800 forms was refined:
+# pronouns, adverbs, interjections were removed
+#
+# prepositions
+aiz
+ap
+ar
+apakš
+Ärpus
+augšpus
+bez
+caur
+dēļ
+gar
+iekš
+iz
+kopš
+labad
+lejpus
+līdz
+no
+otrpus
+pa
+par
+pÄr
+pēc
+pie
+pirms
+pret
+priekš
+starp
+Å¡aipus
+uz
+viņpus
+virs
+virspus
+zem
+apakšpus
+# Conjunctions
+un
+bet
+jo
+ja
+ka
+lai
+tomēr
+tikko
+turpretī
+arī
+kaut
+gan
+tÄdēļ
+tÄ
+ne
+tikvien
+vien
+kÄ
+ir
+te
+vai
+kamēr
+# Particles
+ar
+diezin
+droši
+diemžēl
+nebūt
+ik
+it
+taÄu
+nu
+pat
+tiklab
+iekšpus
+nedz
+tik
+nevis
+turpretim
+jeb
+iekam
+iekÄm
+iekÄms
+kolīdz
+līdzko
+tiklīdz
+jebšu
+tÄlab
+tÄpÄ“c
+nekÄ
+itin
+jÄ
+jau
+jel
+nē
+nezin
+tad
+tikai
+vis
+tak
+iekams
+vien
+# modal verbs
+būt
+biju
+biji
+bija
+bijÄm
+bijÄt
+esmu
+esi
+esam
+esat
+būšu
+būsi
+būs
+būsim
+būsiet
+tikt
+tiku
+tiki
+tika
+tikÄm
+tikÄt
+tieku
+tiec
+tiek
+tiekam
+tiekat
+tikšu
+tiks
+tiksim
+tiksiet
+tapt
+tapi
+tapÄt
+topat
+tapšu
+tapsi
+taps
+tapsim
+tapsiet
+kļūt
+kļuvu
+kļuvi
+kļuva
+kļuvÄm
+kļuvÄt
+kļūstu
+kļūsti
+kļūst
+kļūstam
+kļūstat
+kļūšu
+kļūsi
+kļūs
+kļūsim
+kļūsiet
+# verbs
+varēt
+varēju
+varÄ“jÄm
+varēšu
+varēsim
+var
+varēji
+varÄ“jÄt
+varēsi
+varēsiet
+varat
+varēja
+varēs
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_nl.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_nl.txt
new file mode 100644
index 00000000000..f4d61f5092c
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_nl.txt
@@ -0,0 +1,117 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Dutch stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large sample of Dutch text.
+
+ | Dutch stop words frequently exhibit homonym clashes. These are indicated
+ | clearly below.
+
+de | the
+en | and
+van | of, from
+ik | I, the ego
+te | (1) chez, at etc, (2) to, (3) too
+dat | that, which
+die | that, those, who, which
+in | in, inside
+een | a, an, one
+hij | he
+het | the, it
+niet | not, nothing, naught
+zijn | (1) to be, being, (2) his, one's, its
+is | is
+was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
+op | on, upon, at, in, up, used up
+aan | on, upon, to (as dative)
+met | with, by
+als | like, such as, when
+voor | (1) before, in front of, (2) furrow
+had | had, past tense all persons sing. of 'hebben' (have)
+er | there
+maar | but, only
+om | round, about, for etc
+hem | him
+dan | then
+zou | should/would, past tense all persons sing. of 'zullen'
+of | or, whether, if
+wat | what, something, anything
+mijn | possessive and noun 'mine'
+men | people, 'one'
+dit | this
+zo | so, thus, in this way
+door | through by
+over | over, across
+ze | she, her, they, them
+zich | oneself
+bij | (1) a bee, (2) by, near, at
+ook | also, too
+tot | till, until
+je | you
+mij | me
+uit | out of, from
+der | Old Dutch form of 'van der' still found in surnames
+daar | (1) there, (2) because
+haar | (1) her, their, them, (2) hair
+naar | (1) unpleasant, unwell etc, (2) towards, (3) as
+heb | present first person sing. of 'to have'
+hoe | how, why
+heeft | present third person sing. of 'to have'
+hebben | 'to have' and various parts thereof
+deze | this
+u | you
+want | (1) for, (2) mitten, (3) rigging
+nog | yet, still
+zal | 'shall', first and third person sing. of verb 'zullen' (will)
+me | me
+zij | she, they
+nu | now
+ge | 'thou', still used in Belgium and south Netherlands
+geen | none
+omdat | because
+iets | something, somewhat
+worden | to become, grow, get
+toch | yet, still
+al | all, every, each
+waren | (1) 'were' (2) to wander, (3) wares, (3)
+veel | much, many
+meer | (1) more, (2) lake
+doen | to do, to make
+toen | then, when
+moet | noun 'spot/mote' and present form of 'to must'
+ben | (1) am, (2) 'are' in interrogative second person singular of 'to be'
+zonder | without
+kan | noun 'can' and present form of 'to be able'
+hun | their, them
+dus | so, consequently
+alles | all, everything, anything
+onder | under, beneath
+ja | yes, of course
+eens | once, one day
+hier | here
+wie | who
+werd | imperfect third person sing. of 'become'
+altijd | always
+doch | yet, but etc
+wordt | present third person sing. of 'become'
+wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans
+kunnen | to be able
+ons | us/our
+zelf | self
+tegen | against, towards, at
+na | after, near
+reeds | already
+wil | (1) present tense of 'want', (2) 'will', noun, (3) fender
+kon | could; past tense of 'to be able'
+niets | nothing
+uw | your
+iemand | somebody
+geweest | been; past participle of 'be'
+andere | other
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_no.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_no.txt
new file mode 100644
index 00000000000..e76f36e69ed
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_no.txt
@@ -0,0 +1,192 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Norwegian stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This stop word list is for the dominant bokmål dialect. Words unique
+ | to nynorsk are marked *.
+
+ | Revised by Jan Bruusgaard , Jan 2005
+
+og | and
+i | in
+jeg | I
+det | it/this/that
+at | to (w. inf.)
+en | a/an
+et | a/an
+den | it/this/that
+til | to
+er | is/am/are
+som | who/that
+på | on
+de | they / you(formal)
+med | with
+han | he
+av | of
+ikke | not
+ikkje | not *
+der | there
+så | so
+var | was/were
+meg | me
+seg | you
+men | but
+ett | one
+har | have
+om | about
+vi | we
+min | my
+mitt | my
+ha | have
+hadde | had
+hun | she
+nå | now
+over | over
+da | when/as
+ved | by/know
+fra | from
+du | you
+ut | out
+sin | your
+dem | them
+oss | us
+opp | up
+man | you/one
+kan | can
+hans | his
+hvor | where
+eller | or
+hva | what
+skal | shall/must
+selv | self (reflective)
+sjøl | self (reflective)
+her | here
+alle | all
+vil | will
+bli | become
+ble | became
+blei | became *
+blitt | have become
+kunne | could
+inn | in
+når | when
+være | be
+kom | come
+noen | some
+noe | some
+ville | would
+dere | you
+som | who/which/that
+deres | their/theirs
+kun | only/just
+ja | yes
+etter | after
+ned | down
+skulle | should
+denne | this
+for | for/because
+deg | you
+si | hers/his
+sine | hers/his
+sitt | hers/his
+mot | against
+Ã¥ | to
+meget | much
+hvorfor | why
+dette | this
+disse | these/those
+uten | without
+hvordan | how
+ingen | none
+din | your
+ditt | your
+blir | become
+samme | same
+hvilken | which
+hvilke | which (plural)
+sånn | such a
+inni | inside/within
+mellom | between
+vår | our
+hver | each
+hvem | who
+vors | us/ours
+hvis | whose
+både | both
+bare | only/just
+enn | than
+fordi | as/because
+før | before
+mange | many
+også | also
+slik | just
+vært | been
+være | to be
+båe | both *
+begge | both
+siden | since
+dykk | your *
+dykkar | yours *
+dei | they *
+deira | them *
+deires | theirs *
+deim | them *
+di | your (fem.) *
+då | as/when *
+eg | I *
+ein | a/an *
+eit | a/an *
+eitt | a/an *
+elles | or *
+honom | he *
+hjå | at *
+ho | she *
+hoe | she *
+henne | her
+hennar | her/hers
+hennes | hers
+hoss | how *
+hossen | how *
+ikkje | not *
+ingi | noone *
+inkje | noone *
+korleis | how *
+korso | how *
+kva | what/which *
+kvar | where *
+kvarhelst | where *
+kven | who/whom *
+kvi | why *
+kvifor | why *
+me | we *
+medan | while *
+mi | my *
+mine | my *
+mykje | much *
+no | now *
+nokon | some (masc./neut.) *
+noka | some (fem.) *
+nokor | some *
+noko | some *
+nokre | some *
+si | his/hers *
+sia | since *
+sidan | since *
+so | so *
+somt | some *
+somme | some *
+um | about*
+upp | up *
+vere | be *
+vore | was *
+verte | become *
+vort | become *
+varte | became *
+vart | became *
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_pt.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_pt.txt
new file mode 100644
index 00000000000..276c1b446f2
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_pt.txt
@@ -0,0 +1,251 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Portuguese stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de | of, from
+a | the; to, at; her
+o | the; him
+que | who, that
+e | and
+do | de + o
+da | de + a
+em | in
+um | a
+para | for
+ | é from SER
+com | with
+não | not, no
+uma | a
+os | the; them
+no | em + o
+se | himself etc
+na | em + a
+por | for
+mais | more
+as | the; them
+dos | de + os
+como | as, like
+mas | but
+ | foi from SER
+ao | a + o
+ele | he
+das | de + as
+ | tem from TER
+Ã | a + a
+seu | his
+sua | her
+ou | or
+ | ser from SER
+quando | when
+muito | much
+ | há from HAV
+nos | em + os; us
+já | already, now
+ | está from EST
+eu | I
+também | also
+só | only, just
+pelo | per + o
+pela | per + a
+até | up to
+isso | that
+ela | he
+entre | between
+ | era from SER
+depois | after
+sem | without
+mesmo | same
+aos | a + os
+ | ter from TER
+seus | his
+quem | whom
+nas | em + as
+me | me
+esse | that
+eles | they
+ | estão from EST
+você | you
+ | tinha from TER
+ | foram from SER
+essa | that
+num | em + um
+nem | nor
+suas | her
+meu | my
+Ã s | a + as
+minha | my
+ | têm from TER
+numa | em + uma
+pelos | per + os
+elas | they
+ | havia from HAV
+ | seja from SER
+qual | which
+ | será from SER
+nós | we
+ | tenho from TER
+lhe | to him, her
+deles | of them
+essas | those
+esses | those
+pelas | per + as
+este | this
+ | fosse from SER
+dele | of him
+
+ | other words. There are many contractions such as naquele = em+aquele,
+ | mo = me+o, but they are rare.
+ | Indefinite article plural forms are also rare.
+
+tu | thou
+te | thee
+vocês | you (plural)
+vos | you
+lhes | to them
+meus | my
+minhas
+teu | thy
+tua
+teus
+tuas
+nosso | our
+nossa
+nossos
+nossas
+
+dela | of her
+delas | of them
+
+esta | this
+estes | these
+estas | these
+aquele | that
+aquela | that
+aqueles | those
+aquelas | those
+isto | this
+aquilo | that
+
+ | forms of estar, to be (not including the infinitive):
+estou
+está
+estamos
+estão
+estive
+esteve
+estivemos
+estiveram
+estava
+estávamos
+estavam
+estivera
+estivéramos
+esteja
+estejamos
+estejam
+estivesse
+estivéssemos
+estivessem
+estiver
+estivermos
+estiverem
+
+ | forms of haver, to have (not including the infinitive):
+hei
+há
+havemos
+hão
+houve
+houvemos
+houveram
+houvera
+houvéramos
+haja
+hajamos
+hajam
+houvesse
+houvéssemos
+houvessem
+houver
+houvermos
+houverem
+houverei
+houverá
+houveremos
+houverão
+houveria
+houverÃamos
+houveriam
+
+ | forms of ser, to be (not including the infinitive):
+sou
+somos
+são
+era
+éramos
+eram
+fui
+foi
+fomos
+foram
+fora
+fôramos
+seja
+sejamos
+sejam
+fosse
+fôssemos
+fossem
+for
+formos
+forem
+serei
+será
+seremos
+serão
+seria
+serÃamos
+seriam
+
+ | forms of ter, to have (not including the infinitive):
+tenho
+tem
+temos
+tém
+tinha
+tÃnhamos
+tinham
+tive
+teve
+tivemos
+tiveram
+tivera
+tivéramos
+tenha
+tenhamos
+tenham
+tivesse
+tivéssemos
+tivessem
+tiver
+tivermos
+tiverem
+terei
+terá
+teremos
+terão
+teria
+terÃamos
+teriam
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ro.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ro.txt
new file mode 100644
index 00000000000..4fdee90a5ba
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ro.txt
@@ -0,0 +1,233 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+acea
+aceasta
+această
+aceea
+acei
+aceia
+acel
+acela
+acele
+acelea
+acest
+acesta
+aceste
+acestea
+aceÅŸti
+aceÅŸtia
+acolo
+acum
+ai
+aia
+aibă
+aici
+al
+ăla
+ale
+alea
+ălea
+altceva
+altcineva
+am
+ar
+are
+aÅŸ
+aÅŸadar
+asemenea
+asta
+ăsta
+astăzi
+astea
+ăstea
+ăştia
+asupra
+aţi
+au
+avea
+avem
+aveţi
+azi
+bine
+bucur
+bună
+ca
+că
+căci
+când
+care
+cărei
+căror
+cărui
+cât
+câte
+câţi
+către
+câtva
+ce
+cel
+ceva
+chiar
+cînd
+cine
+cineva
+cît
+cîte
+cîţi
+cîtva
+contra
+cu
+cum
+cumva
+curând
+curînd
+da
+dă
+dacă
+dar
+datorită
+de
+deci
+deja
+deoarece
+departe
+deÅŸi
+din
+dinaintea
+dintr
+dintre
+drept
+după
+ea
+ei
+el
+ele
+eram
+este
+eÅŸti
+eu
+face
+fără
+fi
+fie
+fiecare
+fii
+fim
+fiţi
+iar
+ieri
+îi
+îl
+îmi
+împotriva
+în
+înainte
+înaintea
+încât
+încît
+încotro
+între
+întrucât
+întrucît
+îţi
+la
+lângă
+le
+li
+lîngă
+lor
+lui
+mă
+mâine
+mea
+mei
+mele
+mereu
+meu
+mi
+mine
+mult
+multă
+mulţi
+ne
+nicăieri
+nici
+nimeni
+niÅŸte
+noastră
+noastre
+noi
+noÅŸtri
+nostru
+nu
+ori
+oricând
+oricare
+oricât
+orice
+oricînd
+oricine
+oricît
+oricum
+oriunde
+până
+pe
+pentru
+peste
+pînă
+poate
+pot
+prea
+prima
+primul
+prin
+printr
+sa
+să
+săi
+sale
+sau
+său
+se
+ÅŸi
+sînt
+sîntem
+sînteţi
+spre
+sub
+sunt
+suntem
+sunteţi
+ta
+tăi
+tale
+tău
+te
+Å£i
+Å£ie
+tine
+toată
+toate
+tot
+toţi
+totuÅŸi
+tu
+un
+una
+unde
+undeva
+unei
+unele
+uneori
+unor
+vă
+vi
+voastră
+voastre
+voi
+voÅŸtri
+vostru
+vouă
+vreo
+vreun
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ru.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ru.txt
new file mode 100644
index 00000000000..64307693457
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ru.txt
@@ -0,0 +1,241 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | a russian stop word list. comments begin with vertical bar. each stop
+ | word is at the start of a line.
+
+ | this is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | letter `ё' is translated to `е'.
+
+и | and
+в | in/into
+во | alternative form
+не | not
+что | what/that
+он | he
+на | on/onto
+Ñ | i
+Ñ | from
+Ñо | alternative form
+как | how
+а | milder form of `no' (but)
+то | conjunction and form of `that'
+вÑе | all
+она | she
+так | so, thus
+его | him
+но | but
+да | yes/and
+Ñ‚Ñ‹ | thou
+к | towards, by
+у | around, chez
+же | intensifier particle
+вы | you
+за | beyond, behind
+бы | conditional/subj. particle
+по | up to, along
+только | only
+ее | her
+мне | to me
+было | it was
+вот | here is/are, particle
+от | away from
+Ð¼ÐµÐ½Ñ | me
+еще | still, yet, more
+нет | no, there isnt/arent
+о | about
+из | out of
+ему | to him
+теперь | now
+когда | when
+даже | even
+ну | so, well
+вдруг | suddenly
+ли | interrogative particle
+еÑли | if
+уже | already, but homonym of `narrower'
+или | or
+ни | neither
+быть | to be
+был | he was
+него | prepositional form of его
+до | up to
+Ð²Ð°Ñ | you accusative
+нибудь | indef. suffix preceded by hyphen
+опÑÑ‚ÑŒ | again
+уж | already, but homonym of `adder'
+вам | to you
+Ñказал | he said
+ведь | particle `after all'
+там | there
+потом | then
+ÑÐµÐ±Ñ | oneself
+ничего | nothing
+ей | to her
+может | usually with `быть' as `maybe'
+они | they
+тут | here
+где | where
+еÑÑ‚ÑŒ | there is/are
+надо | got to, must
+ней | prepositional form of ей
+Ð´Ð»Ñ | for
+мы | we
+Ñ‚ÐµÐ±Ñ | thee
+их | them, their
+чем | than
+была | she was
+Ñам | self
+чтоб | in order to
+без | without
+будто | as if
+человек | man, person, one
+чего | genitive form of `what'
+раз | once
+тоже | also
+Ñебе | to oneself
+под | beneath
+жизнь | life
+будет | will be
+ж | short form of intensifer particle `же'
+тогда | then
+кто | who
+Ñтот | this
+говорил | was saying
+того | genitive form of `that'
+потому | for that reason
+Ñтого | genitive form of `this'
+какой | which
+ÑовÑем | altogether
+ним | prepositional form of `его', `они'
+здеÑÑŒ | here
+Ñтом | prepositional form of `Ñтот'
+один | one
+почти | almost
+мой | my
+тем | instrumental/dative plural of `тот', `то'
+чтобы | full form of `in order that'
+нее | her (acc.)
+кажетÑÑ | it seems
+ÑÐµÐ¹Ñ‡Ð°Ñ | now
+были | they were
+куда | where to
+зачем | why
+Ñказать | to say
+вÑех | all (acc., gen. preposn. plural)
+никогда | never
+ÑÐµÐ³Ð¾Ð´Ð½Ñ | today
+можно | possible, one can
+при | by
+наконец | finally
+два | two
+об | alternative form of `о', about
+другой | another
+хоть | even
+поÑле | after
+над | above
+больше | more
+тот | that one (masc.)
+через | across, in
+Ñти | these
+Ð½Ð°Ñ | us
+про | about
+вÑего | in all, only, of all
+них | prepositional form of `они' (they)
+ÐºÐ°ÐºÐ°Ñ | which, feminine
+много | lots
+разве | interrogative particle
+Ñказала | she said
+три | three
+Ñту | this, acc. fem. sing.
+Ð¼Ð¾Ñ | my, feminine
+впрочем | moreover, besides
+хорошо | good
+Ñвою | ones own, acc. fem. sing.
+Ñтой | oblique form of `Ñта', fem. `this'
+перед | in front of
+иногда | sometimes
+лучше | better
+чуть | a little
+том | preposn. form of `that one'
+Ð½ÐµÐ»ÑŒÐ·Ñ | one must not
+такой | such a one
+им | to them
+более | more
+вÑегда | always
+конечно | of course
+вÑÑŽ | acc. fem. sing of `all'
+между | between
+
+
+ | b: some paradigms
+ |
+ | personal pronouns
+ |
+ | Ñ Ð¼ÐµÐ½Ñ Ð¼Ð½Ðµ мной [мною]
+ | Ñ‚Ñ‹ Ñ‚ÐµÐ±Ñ Ñ‚ÐµÐ±Ðµ тобой [тобою]
+ | он его ему им [него, нему, ним]
+ | она ее Ñи ею [нее, нÑи, нею]
+ | оно его ему им [него, нему, ним]
+ |
+ | мы Ð½Ð°Ñ Ð½Ð°Ð¼ нами
+ | вы Ð²Ð°Ñ Ð²Ð°Ð¼ вами
+ | они их им ими [них, ним, ними]
+ |
+ | ÑÐµÐ±Ñ Ñебе Ñобой [Ñобою]
+ |
+ | demonstrative pronouns: Ñтот (this), тот (that)
+ |
+ | Ñтот Ñта Ñто Ñти
+ | Ñтого ÑÑ‚Ñ‹ Ñто Ñти
+ | Ñтого Ñтой Ñтого Ñтих
+ | Ñтому Ñтой Ñтому Ñтим
+ | Ñтим Ñтой Ñтим [Ñтою] Ñтими
+ | Ñтом Ñтой Ñтом Ñтих
+ |
+ | тот та то те
+ | того ту то те
+ | того той того тех
+ | тому той тому тем
+ | тем той тем [тою] теми
+ | том той том тех
+ |
+ | determinative pronouns
+ |
+ | (a) веÑÑŒ (all)
+ |
+ | веÑÑŒ вÑÑ Ð²Ñе вÑе
+ | вÑего вÑÑŽ вÑе вÑе
+ | вÑего вÑей вÑего вÑех
+ | вÑему вÑей вÑему вÑем
+ | вÑем вÑей вÑем [вÑею] вÑеми
+ | вÑем вÑей вÑем вÑех
+ |
+ | (b) Ñам (himself etc)
+ |
+ | Ñам Ñама Ñамо Ñами
+ | Ñамого Ñаму Ñамо Ñамих
+ | Ñамого Ñамой Ñамого Ñамих
+ | Ñамому Ñамой Ñамому Ñамим
+ | Ñамим Ñамой Ñамим [Ñамою] Ñамими
+ | Ñамом Ñамой Ñамом Ñамих
+ |
+ | stems of verbs `to be', `to have', `to do' and modal
+ |
+ | быть бы буд быв еÑÑ‚ÑŒ Ñуть
+ | име
+ | дел
+ | мог мож мочь
+ | уме
+ | хоч хот
+ | долж
+ | можн
+ | нужн
+ | нельзÑ
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_sv.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_sv.txt
new file mode 100644
index 00000000000..22bddfd8cb3
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_sv.txt
@@ -0,0 +1,131 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Swedish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | Swedish stop words occasionally exhibit homonym clashes. For example
+ | så = so, but also seed. These are indicated clearly below.
+
+och | and
+det | it, this/that
+att | to (with infinitive)
+i | in, at
+en | a
+jag | I
+hon | she
+som | who, that
+han | he
+på | on
+den | it, this/that
+med | with
+var | where, each
+sig | him(self) etc
+för | for
+så | so (also: seed)
+till | to
+är | is
+men | but
+ett | a
+om | if; around, about
+hade | had
+de | they, these/those
+av | of
+icke | not, no
+mig | me
+du | you
+henne | her
+då | then, when
+sin | his
+nu | now
+har | have
+inte | inte någon = no one
+hans | his
+honom | him
+skulle | 'sake'
+hennes | her
+där | there
+min | my
+man | one (pronoun)
+ej | nor
+vid | at, by, on (also: vast)
+kunde | could
+något | some etc
+från | from, off
+ut | out
+när | when
+efter | after, behind
+upp | up
+vi | we
+dem | them
+vara | be
+vad | what
+över | over
+än | than
+dig | you
+kan | can
+sina | his
+här | here
+ha | have
+mot | towards
+alla | all
+under | under (also: wonder)
+någon | some etc
+eller | or (else)
+allt | all
+mycket | much
+sedan | since
+ju | why
+denna | this/that
+själv | myself, yourself etc
+detta | this/that
+Ã¥t | to
+utan | without
+varit | was
+hur | how
+ingen | no
+mitt | my
+ni | you
+bli | to be, become
+blev | from bli
+oss | us
+din | thy
+dessa | these/those
+några | some etc
+deras | their
+blir | from bli
+mina | my
+samma | (the) same
+vilken | who, that
+er | you, your
+sådan | such a
+vår | our
+blivit | from bli
+dess | its
+inom | within
+mellan | between
+sådant | such a
+varför | why
+varje | each
+vilka | who, that
+ditt | thy
+vem | who
+vilket | who, that
+sitta | his
+sådana | such a
+vart | each
+dina | thy
+vars | whose
+vårt | our
+våra | our
+ert | your
+era | your
+vilkas | whose
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_th.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_th.txt
new file mode 100644
index 00000000000..07f0fabe692
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_th.txt
@@ -0,0 +1,119 @@
+# Thai stopwords from:
+# "Opinion Detection in Thai Political News Columns
+# Based on Subjectivity Analysis"
+# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak
+ไว้
+ไม่
+ไป
+ได้
+ให้
+ใน
+โดย
+à¹à¸«à¹ˆà¸‡
+à¹à¸¥à¹‰à¸§
+à¹à¸¥à¸°
+à¹à¸£à¸
+à¹à¸šà¸š
+à¹à¸•à¹ˆ
+เà¸à¸‡
+เห็น
+เลย
+เริ่ม
+เรา
+เมื่à¸
+เพื่à¸
+เพราะ
+เป็นà¸à¸²à¸£
+เป็น
+เปิดเผย
+เปิด
+เนื่à¸à¸‡à¸ˆà¸²à¸
+เดียวà¸à¸±à¸™
+เดียว
+เช่น
+เฉพาะ
+เคย
+เข้า
+เขา
+à¸à¸µà¸
+à¸à¸²à¸ˆ
+à¸à¸°à¹„ร
+à¸à¸à¸
+à¸à¸¢à¹ˆà¸²à¸‡
+à¸à¸¢à¸¹à¹ˆ
+à¸à¸¢à¸²à¸
+หาà¸
+หลาย
+หลังจาà¸
+หลัง
+หรืà¸
+หนึ่ง
+ส่วน
+ส่ง
+สุด
+สà¹à¸²à¸«à¸£à¸±à¸š
+ว่า
+วัน
+ลง
+ร่วม
+ราย
+รับ
+ระหว่าง
+รวม
+ยัง
+มี
+มาà¸
+มา
+พร้à¸à¸¡
+พบ
+ผ่าน
+ผล
+บาง
+น่า
+นี้
+นà¹à¸²
+นั้น
+นัà¸
+นà¸à¸à¸ˆà¸²à¸
+ทุà¸
+ที่สุด
+ที่
+ทà¹à¸²à¹ƒà¸«à¹‰
+ทà¹à¸²
+ทาง
+ทั้งนี้
+ทั้ง
+ถ้า
+ถูà¸
+ถึง
+ต้à¸à¸‡
+ต่างๆ
+ต่าง
+ต่à¸
+ตาม
+ตั้งà¹à¸•à¹ˆ
+ตั้ง
+ด้าน
+ด้วย
+ดัง
+ซึ่ง
+ช่วง
+จึง
+จาà¸
+จัด
+จะ
+คืà¸
+ความ
+ครั้ง
+คง
+ขึ้น
+ขà¸à¸‡
+ขà¸
+ขณะ
+à¸à¹ˆà¸à¸™
+à¸à¹‡
+à¸à¸²à¸£
+à¸à¸±à¸š
+à¸à¸±à¸™
+à¸à¸§à¹ˆà¸²
+à¸à¸¥à¹ˆà¸²à¸§
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_tr.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_tr.txt
new file mode 100644
index 00000000000..84d9408d4ea
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_tr.txt
@@ -0,0 +1,212 @@
+# Turkish stopwords from LUCENE-559
+# merged with the list from "Information Retrieval on Turkish Texts"
+# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
+acaba
+altmış
+altı
+ama
+ancak
+arada
+aslında
+ayrıca
+bana
+bazı
+belki
+ben
+benden
+beni
+benim
+beri
+beÅŸ
+bile
+bin
+bir
+birçok
+biri
+birkaç
+birkez
+birÅŸey
+birÅŸeyi
+biz
+bize
+bizden
+bizi
+bizim
+böyle
+böylece
+bu
+buna
+bunda
+bundan
+bunlar
+bunları
+bunların
+bunu
+bunun
+burada
+çok
+çünkü
+da
+daha
+dahi
+de
+defa
+deÄŸil
+diÄŸer
+diye
+doksan
+dokuz
+dolayı
+dolayısıyla
+dört
+edecek
+eden
+ederek
+edilecek
+ediliyor
+edilmesi
+ediyor
+eÄŸer
+elli
+en
+etmesi
+etti
+ettiÄŸi
+ettiÄŸini
+gibi
+göre
+halen
+hangi
+hatta
+hem
+henüz
+hep
+hepsi
+her
+herhangi
+herkesin
+hiç
+hiçbir
+için
+iki
+ile
+ilgili
+ise
+iÅŸte
+itibaren
+itibariyle
+kadar
+karşın
+katrilyon
+kendi
+kendilerine
+kendini
+kendisi
+kendisine
+kendisini
+kez
+ki
+kim
+kimden
+kime
+kimi
+kimse
+kırk
+milyar
+milyon
+mu
+mü
+mı
+nasıl
+ne
+neden
+nedenle
+nerde
+nerede
+nereye
+niye
+niçin
+o
+olan
+olarak
+oldu
+olduÄŸu
+olduÄŸunu
+olduklarını
+olmadı
+olmadığı
+olmak
+olması
+olmayan
+olmaz
+olsa
+olsun
+olup
+olur
+olursa
+oluyor
+on
+ona
+ondan
+onlar
+onlardan
+onları
+onların
+onu
+onun
+otuz
+oysa
+öyle
+pek
+raÄŸmen
+sadece
+sanki
+sekiz
+seksen
+sen
+senden
+seni
+senin
+siz
+sizden
+sizi
+sizin
+ÅŸey
+ÅŸeyden
+ÅŸeyi
+ÅŸeyler
+şöyle
+ÅŸu
+ÅŸuna
+ÅŸunda
+ÅŸundan
+şunları
+ÅŸunu
+tarafından
+trilyon
+tüm
+üç
+üzere
+var
+vardı
+ve
+veya
+ya
+yani
+yapacak
+yapılan
+yapılması
+yapıyor
+yapmak
+yaptı
+yaptığı
+yaptığını
+yaptıkları
+yedi
+yerine
+yetmiÅŸ
+yine
+yirmi
+yoksa
+yüz
+zaten
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/userdict_ja.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/userdict_ja.txt
new file mode 100644
index 00000000000..6f0368e4d81
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/userdict_ja.txt
@@ -0,0 +1,29 @@
+#
+# This is a sample user dictionary for Kuromoji (JapaneseTokenizer)
+#
+# Add entries to this file in order to override the statistical model in terms
+# of segmentation, readings and part-of-speech tags. Notice that entries do
+# not have weights since they are always used when found. This is by-design
+# in order to maximize ease-of-use.
+#
+# Entries are defined using the following CSV format:
+# , ... , ... ,
+#
+# Notice that a single half-width space separates tokens and readings, and
+# that the number tokens and readings must match exactly.
+#
+# Also notice that multiple entries with the same is undefined.
+#
+# Whitespace only lines are ignored. Comments are not allowed on entry lines.
+#
+
+# Custom segmentation for kanji compounds
+日本経済新èž,日本 経済 æ–°èž,ニホン ケイザイ シンブン,カスタムåè©ž
+関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタムåè©ž
+
+# Custom segmentation for compound katakana
+トートãƒãƒƒã‚°,トート ãƒãƒƒã‚°,トート ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž
+ショルダーãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž
+
+# Custom reading for former sumo wrestler
+æœé’é¾,æœé’é¾,アサショウリュウ,カスタム人å
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/protwords.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/protwords.txt
new file mode 100644
index 00000000000..1dfc0abecbf
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/protwords.txt
@@ -0,0 +1,21 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+# Use a protected word file to protect against the stemmer reducing two
+# unrelated words to the same base word.
+
+# Some non-words that normally won't be encountered,
+# just to test that they won't be stemmed.
+dontstems
+zwhacky
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/schema.xml
new file mode 100644
index 00000000000..ae2c56d18ae
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/schema.xml
@@ -0,0 +1,947 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/solrconfig.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/solrconfig.xml
new file mode 100644
index 00000000000..9d9178746cf
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/solrconfig.xml
@@ -0,0 +1,1764 @@
+
+
+
+
+
+
+
+
+ LUCENE_43
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.data.dir:}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.maxIndexingThreads:8}
+
+
+
+
+
+ 128
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.ulog.dir:}
+
+
+
+
+ ${solr.autoCommit.maxTime:60000}
+ false
+
+
+
+
+
+ ${solr.autoSoftCommit.maxTime:1000}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1024
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+
+
+
+
+
+ 20
+
+
+ 200
+
+
+
+
+
+
+
+
+
+
+
+ static firstSearcher warming in solrconfig.xml
+
+
+
+
+
+ false
+
+
+ 4
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ explicit
+ 10
+ text
+
+
+
+
+
+
+
+
+
+
+
+
+
+ explicit
+ json
+ true
+ text
+
+
+
+
+
+
+
+ true
+ json
+ true
+
+
+
+
+
+
+
+ explicit
+
+
+ velocity
+ browse
+ layout
+ Solritas
+
+
+ edismax
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+ title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
+
+ text
+ 100%
+ *:*
+ 10
+ *,score
+
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+ title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
+
+ text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename
+ 3
+
+
+ on
+ cat
+ manu_exact
+ content_type
+ author_s
+ ipod
+ GB
+ 1
+ cat,inStock
+ after
+ price
+ 0
+ 600
+ 50
+ popularity
+ 0
+ 10
+ 3
+ manufacturedate_dt
+ NOW/YEAR-10YEARS
+ NOW
+ +1YEAR
+ before
+ after
+
+
+ on
+ content features title name
+ html
+ <b>
+ </b>
+ 0
+ title
+ 0
+ name
+ 3
+ 200
+ content
+ 750
+
+
+ on
+ false
+ 5
+ 2
+ 5
+ true
+ true
+ 5
+ 3
+
+
+
+
+ spellcheck
+
+
+
+
+
+
+
+
+
+
+
+
+
+ application/json
+
+
+
+
+ application/csv
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ solrpingquery
+
+
+ all
+
+
+
+
+
+
+
+
+ explicit
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ textSpell
+
+
+
+
+
+ default
+ name
+ solr.DirectSolrSpellChecker
+
+ internal
+
+ 0.5
+
+ 2
+
+ 1
+
+ 5
+
+ 4
+
+ 0.01
+
+
+
+
+
+ wordbreak
+ solr.WordBreakSolrSpellChecker
+ name
+ true
+ true
+ 10
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text
+
+ default
+ wordbreak
+ on
+ true
+ 10
+ 5
+ 5
+ true
+ true
+ 10
+ 5
+
+
+ spellcheck
+
+
+
+
+
+
+
+
+
+ text
+ true
+
+
+ tvComponent
+
+
+
+
+
+
+
+
+ default
+
+
+ org.carrot2.clustering.lingo.LingoClusteringAlgorithm
+
+
+ 20
+
+
+ clustering/carrot2
+
+
+ ENGLISH
+
+
+ stc
+ org.carrot2.clustering.stc.STCClusteringAlgorithm
+
+
+
+
+
+
+ true
+ default
+ true
+
+ name
+ id
+
+ features
+
+ true
+
+
+
+ false
+
+ edismax
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+
+ *:*
+ 10
+ *,score
+
+
+ clustering
+
+
+
+
+
+
+
+
+
+ true
+
+
+ terms
+
+
+
+
+
+
+
+ string
+ elevate.xml
+
+
+
+
+
+ explicit
+ text
+
+
+ elevator
+
+
+
+
+
+
+
+
+
+
+ 100
+
+
+
+
+
+
+
+ 70
+
+ 0.5
+
+ [-\w ,/\n\"']{20,200}
+
+
+
+
+
+
+ ]]>
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ,,
+ ,,
+ ,,
+ ,,
+ ,]]>
+ ]]>
+
+
+
+
+
+ 10
+ .,!?
+
+
+
+
+
+
+ WORD
+
+
+ en
+ US
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text/plain; charset=UTF-8
+
+
+
+
+
+
+
+
+ 5
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ *:*
+
+
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/stopwords.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/stopwords.txt
new file mode 100644
index 00000000000..ae1e83eeb3d
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/stopwords.txt
@@ -0,0 +1,14 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/synonyms.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/synonyms.txt
new file mode 100644
index 00000000000..7f72128303b
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/synonyms.txt
@@ -0,0 +1,29 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+#some test synonym mappings unlikely to appear in real input text
+aaafoo => aaabar
+bbbfoo => bbbfoo bbbbar
+cccfoo => cccbar cccbaz
+fooaaa,baraaa,bazaaa
+
+# Some synonym groups specific to this example
+GB,gib,gigabyte,gigabytes
+MB,mib,megabyte,megabytes
+Television, Televisions, TV, TVs
+#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
+#after us won't split it into two words.
+
+# Synonym mappings can be used for spelling correction too
+pixima => pixma
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/currency.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/currency.xml
new file mode 100644
index 00000000000..3a9c58afee8
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/currency.xml
@@ -0,0 +1,67 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/elevate.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/elevate.xml
new file mode 100644
index 00000000000..25d5cebe4fb
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/elevate.xml
@@ -0,0 +1,38 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_ca.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_ca.txt
new file mode 100644
index 00000000000..307a85f913d
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_ca.txt
@@ -0,0 +1,8 @@
+# Set of Catalan contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+d
+l
+m
+n
+s
+t
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_fr.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_fr.txt
new file mode 100644
index 00000000000..722db588333
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_fr.txt
@@ -0,0 +1,9 @@
+# Set of French contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+l
+m
+t
+qu
+n
+s
+j
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_ga.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_ga.txt
new file mode 100644
index 00000000000..9ebe7fa349a
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_ga.txt
@@ -0,0 +1,5 @@
+# Set of Irish contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+d
+m
+b
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_it.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_it.txt
new file mode 100644
index 00000000000..cac04095372
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_it.txt
@@ -0,0 +1,23 @@
+# Set of Italian contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+c
+l
+all
+dall
+dell
+nell
+sull
+coll
+pell
+gl
+agl
+dagl
+degl
+negl
+sugl
+un
+m
+t
+s
+v
+d
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/hyphenations_ga.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/hyphenations_ga.txt
new file mode 100644
index 00000000000..4d2642cc5a3
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/hyphenations_ga.txt
@@ -0,0 +1,5 @@
+# Set of Irish hyphenations for StopFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+h
+n
+t
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stemdict_nl.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stemdict_nl.txt
new file mode 100644
index 00000000000..441072971d3
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stemdict_nl.txt
@@ -0,0 +1,6 @@
+# Set of overrides for the dutch stemmer
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+fiets fiets
+bromfiets bromfiets
+ei eier
+kind kinder
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stoptags_ja.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stoptags_ja.txt
new file mode 100644
index 00000000000..71b750845e3
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stoptags_ja.txt
@@ -0,0 +1,420 @@
+#
+# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter.
+#
+# Any token with a part-of-speech tag that exactly matches those defined in this
+# file are removed from the token stream.
+#
+# Set your own stoptags by uncommenting the lines below. Note that comments are
+# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists,
+# etc. that can be useful for building you own stoptag set.
+#
+# The entire possible tagset is provided below for convenience.
+#
+#####
+# noun: unclassified nouns
+#åè©ž
+#
+# noun-common: Common nouns or nouns where the sub-classification is undefined
+#åè©ž-一般
+#
+# noun-proper: Proper nouns where the sub-classification is undefined
+#åè©ž-固有åè©ž
+#
+# noun-proper-misc: miscellaneous proper nouns
+#åè©ž-固有åè©ž-一般
+#
+# noun-proper-person: Personal names where the sub-classification is undefined
+#åè©ž-固有åè©ž-人å
+#
+# noun-proper-person-misc: names that cannot be divided into surname and
+# given name; foreign names; names where the surname or given name is unknown.
+# e.g. ãŠå¸‚ã®æ–¹
+#åè©ž-固有åè©ž-人å-一般
+#
+# noun-proper-person-surname: Mainly Japanese surnames.
+# e.g. 山田
+#åè©ž-固有åè©ž-人å-姓
+#
+# noun-proper-person-given_name: Mainly Japanese given names.
+# e.g. 太郎
+#åè©ž-固有åè©ž-人å-å
+#
+# noun-proper-organization: Names representing organizations.
+# e.g. 通産çœ, NHK
+#åè©ž-固有åè©ž-組織
+#
+# noun-proper-place: Place names where the sub-classification is undefined
+#åè©ž-固有åè©ž-地域
+#
+# noun-proper-place-misc: Place names excluding countries.
+# e.g. アジア, ãƒãƒ«ã‚»ãƒãƒŠ, 京都
+#åè©ž-固有åè©ž-地域-一般
+#
+# noun-proper-place-country: Country names.
+# e.g. 日本, オーストラリア
+#åè©ž-固有åè©ž-地域-国
+#
+# noun-pronoun: Pronouns where the sub-classification is undefined
+#åè©ž-代åè©ž
+#
+# noun-pronoun-misc: miscellaneous pronouns:
+# e.g. ãã‚Œ, ã“ã“, ã‚ã„ã¤, ã‚ãªãŸ, ã‚ã¡ã“ã¡, ã„ãã¤, ã©ã“ã‹, ãªã«, ã¿ãªã•ã‚“, ã¿ã‚“ãª, ã‚ãŸãã—, ã‚ã‚Œã‚ã‚Œ
+#åè©ž-代åè©ž-一般
+#
+# noun-pronoun-contraction: Spoken language contraction made by combining a
+# pronoun and the particle 'wa'.
+# e.g. ã‚りゃ, ã“りゃ, ã“りゃã‚, ãりゃ, ãりゃã‚
+#åè©ž-代åè©ž-縮約
+#
+# noun-adverbial: Temporal nouns such as names of days or months that behave
+# like adverbs. Nouns that represent amount or ratios and can be used adverbially,
+# e.g. 金曜, 一月, åˆå¾Œ, å°‘é‡
+#åè©ž-副詞å¯èƒ½
+#
+# noun-verbal: Nouns that take arguments with case and can appear followed by
+# 'suru' and related verbs (ã™ã‚‹, ã§ãã‚‹, ãªã•ã‚‹, ãã ã•ã‚‹)
+# e.g. インプット, æ„›ç€, 悪化, 悪戦苦闘, 一安心, 下å–ã‚Š
+#åè©ž-サ変接続
+#
+# noun-adjective-base: The base form of adjectives, words that appear before 㪠("na")
+# e.g. å¥åº·, 安易, 駄目, ã ã‚
+#åè©ž-形容動詞語幹
+#
+# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数.
+# e.g. 0, 1, 2, 何, 数, 幾
+#åè©ž-æ•°
+#
+# noun-affix: noun affixes where the sub-classification is undefined
+#åè©ž-éžè‡ªç«‹
+#
+# noun-affix-misc: Of adnominalizers, the case-marker ã® ("no"), and words that
+# attach to the base form of inflectional words, words that cannot be classified
+# into any of the other categories below. This category includes indefinite nouns.
+# e.g. ã‚ã‹ã¤ã, æš, ã‹ã„, 甲æ–, æ°—, ãらã„, å«Œã„, ãã›, ç™–, ã“ã¨, 事, ã”ã¨, 毎, ã—ã ã„, 次第,
+# é †, ã›ã„, 所為, ã¤ã„ã§, åºã§, ã¤ã‚‚ã‚Š, ç©ã‚‚ã‚Š, 点, ã©ã“ã‚, ã®, ã¯ãš, çˆ, ã¯ãšã¿, å¼¾ã¿,
+# æ‹å, ãµã†, ãµã‚Š, 振り, ã»ã†, æ–¹, æ—¨, ã‚‚ã®, 物, 者, ゆãˆ, æ•…, ゆãˆã‚“, 所以, ã‚ã‘, 訳,
+# ã‚ã‚Š, 割り, 割, ã‚“-å£èªž/, ã‚‚ã‚“-å£èªž/
+#åè©ž-éžè‡ªç«‹-一般
+#
+# noun-affix-adverbial: noun affixes that that can behave as adverbs.
+# e.g. ã‚ã„ã , é–“, ã‚ã’ã, 挙ã’å¥, ã‚ã¨, 後, 余り, 以外, 以é™, 以後, 以上, 以å‰, 一方, ã†ãˆ,
+# 上, ã†ã¡, 内, ãŠã‚Š, 折り, ã‹ãŽã‚Š, é™ã‚Š, ãã‚Š, ã£ãã‚Š, çµæžœ, ã“ã‚, é ƒ, ã•ã„, éš›, 最ä¸, ã•ãªã‹,
+# 最ä¸, ã˜ãŸã„, 自体, ãŸã³, 度, ãŸã‚, 為, ã¤ã©, 都度, ã¨ãŠã‚Š, 通り, ã¨ã, 時, ã¨ã“ã‚, 所,
+# ã¨ãŸã‚“, 途端, ãªã‹, ä¸, ã®ã¡, 後, ã°ã‚ã„, å ´åˆ, æ—¥, ã¶ã‚“, 分, ã»ã‹, ä»–, ã¾ãˆ, å‰, ã¾ã¾,
+# 儘, ä¾, ã¿ãŽã‚Š, 矢先
+#åè©ž-éžè‡ªç«‹-副詞å¯èƒ½
+#
+# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars
+# with the stem よã†(ã ) ("you(da)").
+# e.g. よã†, ã‚„ã†, 様 (よã†)
+#åè©ž-éžè‡ªç«‹-助動詞語幹
+#
+# noun-affix-adjective-base: noun affixes that can connect to the indeclinable
+# connection form 㪠(aux "da").
+# e.g. ã¿ãŸã„, ãµã†
+#åè©ž-éžè‡ªç«‹-形容動詞語幹
+#
+# noun-special: special nouns where the sub-classification is undefined.
+#åè©ž-特殊
+#
+# noun-special-aux: The ãã†ã ("souda") stem form that is used for reporting news, is
+# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base
+# form of inflectional words.
+# e.g. ãã†
+#åè©ž-特殊-助動詞語幹
+#
+# noun-suffix: noun suffixes where the sub-classification is undefined.
+#åè©ž-接尾
+#
+# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect
+# to ガル or タイ and can combine into compound nouns, words that cannot be classified into
+# any of the other categories below. In general, this category is more inclusive than
+# 接尾語 ("suffix") and is usually the last element in a compound noun.
+# e.g. ãŠã, ã‹ãŸ, æ–¹, ç”²æ– (ãŒã„), ãŒã‹ã‚Š, ãŽã¿, 気味, ãã‚‹ã¿, (~ã—ãŸ) ã•, 次第, 済 (ãš) ã¿,
+# よã†, (ã§ã)ã£ã“, æ„Ÿ, 観, 性, å¦, é¡ž, é¢, 用
+#åè©ž-接尾-一般
+#
+# noun-suffix-person: Suffixes that form nouns and attach to person names more often
+# than other nouns.
+# e.g. å›, 様, è‘—
+#åè©ž-接尾-人å
+#
+# noun-suffix-place: Suffixes that form nouns and attach to place names more often
+# than other nouns.
+# e.g. 町, 市, 県
+#åè©ž-接尾-地域
+#
+# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that
+# can appear before スル ("suru").
+# e.g. 化, 視, 分ã‘, 入り, è½ã¡, è²·ã„
+#åè©ž-接尾-サ変接続
+#
+# noun-suffix-aux: The stem form of ãã†ã (様態) that is used to indicate conditions,
+# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the
+# conjunctive form of inflectional words.
+# e.g. ãã†
+#åè©ž-接尾-助動詞語幹
+#
+# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive
+# form of inflectional words and appear before the copula ã ("da").
+# e.g. çš„, ã’, ãŒã¡
+#åè©ž-接尾-形容動詞語幹
+#
+# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs.
+# e.g. 後 (ã”), 以後, 以é™, 以å‰, å‰å¾Œ, ä¸, 末, 上, 時 (ã˜)
+#åè©ž-接尾-副詞å¯èƒ½
+#
+# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category
+# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach
+# to numbers.
+# e.g. 個, ã¤, 本, 冊, パーセント, cm, kg, カ月, ã‹å›½, 区画, 時間, 時åŠ
+#åè©ž-接尾-助数詞
+#
+# noun-suffix-special: Special suffixes that mainly attach to inflecting words.
+# e.g. (楽ã—) ã•, (考ãˆ) æ–¹
+#åè©ž-接尾-特殊
+#
+# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words
+# together.
+# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦)
+#åè©ž-接続詞的
+#
+# noun-verbal_aux: Nouns that attach to the conjunctive particle 㦠("te") and are
+# semantically verb-like.
+# e.g. ã”らん, ã”覧, 御覧, é ‚æˆ´
+#åè©ž-å‹•è©žéžè‡ªç«‹çš„
+#
+# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry,
+# dialects, English, etc. Currently, the only entry for åè©ž 引用文å—列 ("noun quotation")
+# is ã„ã‚ã ("iwaku").
+#åè©ž-引用文å—列
+#
+# noun-nai_adjective: Words that appear before the auxiliary verb ãªã„ ("nai") and
+# behave like an adjective.
+# e.g. 申ã—訳, 仕方, ã¨ã‚“ã§ã‚‚, é•ã„
+#åè©ž-ナイ形容詞語幹
+#
+#####
+# prefix: unclassified prefixes
+#接é è©ž
+#
+# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms)
+# excluding numerical expressions.
+# e.g. ㊠(æ°´), æŸ (æ°), åŒ (社), æ•… (~æ°), 高 (å“質), ㊠(見事), ã” (ç«‹æ´¾)
+#接é è©ž-å詞接続
+#
+# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb
+# in conjunctive form followed by ãªã‚‹/ãªã•ã‚‹/ãã ã•ã‚‹.
+# e.g. ㊠(èªã¿ãªã•ã„), ㊠(座り)
+#接é è©ž-動詞接続
+#
+# prefix-adjectival: Prefixes that attach to adjectives.
+# e.g. ㊠(寒ã„ã§ã™ããˆ), ãƒã‚« (ã§ã‹ã„)
+#接é è©ž-形容詞接続
+#
+# prefix-numerical: Prefixes that attach to numerical expressions.
+# e.g. ç´„, ãŠã‚ˆã, 毎時
+#接é è©ž-数接続
+#
+#####
+# verb: unclassified verbs
+#å‹•è©ž
+#
+# verb-main:
+#動詞-自立
+#
+# verb-auxiliary:
+#å‹•è©ž-éžè‡ªç«‹
+#
+# verb-suffix:
+#動詞-接尾
+#
+#####
+# adjective: unclassified adjectives
+#形容詞
+#
+# adjective-main:
+#形容詞-自立
+#
+# adjective-auxiliary:
+#形容詞-éžè‡ªç«‹
+#
+# adjective-suffix:
+#形容詞-接尾
+#
+#####
+# adverb: unclassified adverbs
+#副詞
+#
+# adverb-misc: Words that can be segmented into one unit and where adnominal
+# modification is not possible.
+# e.g. ã‚ã„ã‹ã‚らãš, 多分
+#副詞-一般
+#
+# adverb-particle_conjunction: Adverbs that can be followed by ã®, ã¯, ã«,
+# ãª, ã™ã‚‹, ã , etc.
+# e.g. ã“ã‚“ãªã«, ãã‚“ãªã«, ã‚ã‚“ãªã«, ãªã«ã‹, ãªã‚“ã§ã‚‚
+#副詞-助詞類接続
+#
+#####
+# adnominal: Words that only have noun-modifying forms.
+# e.g. ã“ã®, ãã®, ã‚ã®, ã©ã®, ã„ã‚ゆる, ãªã‚“らã‹ã®, 何らã‹ã®, ã„ã‚ã‚“ãª, ã“ã†ã„ã†, ãã†ã„ã†, ã‚ã‚ã„ã†,
+# ã©ã†ã„ã†, ã“ã‚“ãª, ãã‚“ãª, ã‚ã‚“ãª, ã©ã‚“ãª, 大ããª, å°ã•ãª, ãŠã‹ã—ãª, ã»ã‚“ã®, ãŸã„ã—ãŸ,
+# 「(, ã‚‚) ã•ã‚‹ (ã“ã¨ãªãŒã‚‰)ã€, 微々ãŸã‚‹, å ‚ã€…ãŸã‚‹, å˜ãªã‚‹, ã„ã‹ãªã‚‹, 我ãŒã€ã€ŒåŒã˜, 亡ã
+#連体詞
+#
+#####
+# conjunction: Conjunctions that can occur independently.
+# e.g. ãŒ, ã‘ã‚Œã©ã‚‚, ãã—ã¦, ã˜ã‚ƒã‚, ãã‚Œã©ã“ã‚ã‹
+接続詞
+#
+#####
+# particle: unclassified particles.
+助詞
+#
+# particle-case: case particles where the subclassification is undefined.
+助詞-æ ¼åŠ©è©ž
+#
+# particle-case-misc: Case particles.
+# e.g. ã‹ã‚‰, ãŒ, ã§, ã¨, ã«, ã¸, より, ã‚’, ã®, ã«ã¦
+助詞-æ ¼åŠ©è©ž-一般
+#
+# particle-case-quote: the "to" that appears after nouns, a person’s speech,
+# quotation marks, expressions of decisions from a meeting, reasons, judgements,
+# conjectures, etc.
+# e.g. ( ã ) 㨠(è¿°ã¹ãŸ.), ( ã§ã‚ã‚‹) 㨠(ã—ã¦åŸ·è¡ŒçŒ¶äºˆ...)
+助詞-æ ¼åŠ©è©ž-引用
+#
+# particle-case-compound: Compounds of particles and verbs that mainly behave
+# like case particles.
+# e.g. ã¨ã„ã†, ã¨ã„ã£ãŸ, ã¨ã‹ã„ã†, ã¨ã—ã¦, ã¨ã¨ã‚‚ã«, ã¨å…±ã«, ã§ã‚‚ã£ã¦, ã«ã‚ãŸã£ã¦, ã«å½“ãŸã£ã¦, ã«å½“ã£ã¦,
+# ã«ã‚ãŸã‚Š, ã«å½“ãŸã‚Š, ã«å½“ã‚Š, ã«å½“ãŸã‚‹, ã«ã‚ãŸã‚‹, ã«ãŠã„ã¦, ã«æ–¼ã„ã¦,ã«æ–¼ã¦, ã«ãŠã‘ã‚‹, ã«æ–¼ã‘ã‚‹,
+# ã«ã‹ã‘, ã«ã‹ã‘ã¦, ã«ã‹ã‚“ã—, ã«é–¢ã—, ã«ã‹ã‚“ã—ã¦, ã«é–¢ã—ã¦, ã«ã‹ã‚“ã™ã‚‹, ã«é–¢ã™ã‚‹, ã«éš›ã—,
+# ã«éš›ã—ã¦, ã«ã—ãŸãŒã„, ã«å¾“ã„, ã«å¾“ã†, ã«ã—ãŸãŒã£ã¦, ã«å¾“ã£ã¦, ã«ãŸã„ã—, ã«å¯¾ã—, ã«ãŸã„ã—ã¦,
+# ã«å¯¾ã—ã¦, ã«ãŸã„ã™ã‚‹, ã«å¯¾ã™ã‚‹, ã«ã¤ã„ã¦, ã«ã¤ã, ã«ã¤ã‘, ã«ã¤ã‘ã¦, ã«ã¤ã‚Œ, ã«ã¤ã‚Œã¦, ã«ã¨ã£ã¦,
+# ã«ã¨ã‚Š, ã«ã¾ã¤ã‚ã‚‹, ã«ã‚ˆã£ã¦, ã«ä¾ã£ã¦, ã«å› ã£ã¦, ã«ã‚ˆã‚Š, ã«ä¾ã‚Š, ã«å› ã‚Š, ã«ã‚ˆã‚‹, ã«ä¾ã‚‹, ã«å› ã‚‹,
+# ã«ã‚ãŸã£ã¦, ã«ã‚ãŸã‚‹, ã‚’ã‚‚ã£ã¦, を以ã£ã¦, を通ã˜, を通ã˜ã¦, を通ã—ã¦, ã‚’ã‚ãã£ã¦, ã‚’ã‚ãã‚Š, ã‚’ã‚ãã‚‹,
+# ã£ã¦-å£èªž/, ã¡ã‚…ã†-関西å¼ã€Œã¨ã„ã†ã€/, (何) ã¦ã„ㆠ(人)-å£èªž/, ã£ã¦ã„ã†-å£èªž/, ã¨ã„ãµ, ã¨ã‹ã„ãµ
+助詞-æ ¼åŠ©è©ž-連語
+#
+# particle-conjunctive:
+# e.g. ã‹ã‚‰, ã‹ã‚‰ã«ã¯, ãŒ, ã‘ã‚Œã©, ã‘ã‚Œã©ã‚‚, ã‘ã©, ã—, ã¤ã¤, ã¦, ã§, ã¨, ã¨ã“ã‚ãŒ, ã©ã“ã‚ã‹, ã¨ã‚‚, ã©ã‚‚,
+# ãªãŒã‚‰, ãªã‚Š, ã®ã§, ã®ã«, ã°, ã‚‚ã®ã®, ã‚„ ( ã—ãŸ), ã‚„ã„ãªã‚„, (ã“ã‚ã‚“) ã˜ã‚ƒ(ã„ã‘ãªã„)-å£èªž/,
+# (è¡Œã£) ã¡ã‚ƒ(ã„ã‘ãªã„)-å£èªž/, (言ã£) ãŸã£ã¦ (ã—ã‹ãŸãŒãªã„)-å£èªž/, (ãã‚ŒãŒãªã)ã£ãŸã£ã¦ (平気)-å£èªž/
+助詞-接続助詞
+#
+# particle-dependency:
+# e.g. ã“ã, ã•ãˆ, ã—ã‹, ã™ã‚‰, ã¯, ã‚‚, ãž
+助詞-係助詞
+#
+# particle-adverbial:
+# e.g. ãŒã¦ã‚‰, ã‹ã‚‚, ãらã„, ä½, ãらã„, ã—ã‚‚, (å¦æ ¡) ã˜ã‚ƒ(ã“ã‚ŒãŒæµè¡Œã£ã¦ã„ã‚‹)-å£èªž/,
+# (ãã‚Œ)ã˜ã‚ƒã‚ (よããªã„)-å£èªž/, ãšã¤, (ç§) ãªãž, ãªã©, (ç§) ãªã‚Š (ã«), (先生) ãªã‚“ã‹ (大嫌ã„)-å£èªž/,
+# (ç§) ãªã‚“ãž, (先生) ãªã‚“㦠(大嫌ã„)-å£èªž/, ã®ã¿, ã ã‘, (ç§) ã ã£ã¦-å£èªž/, ã ã«,
+# (å½¼)ã£ãŸã‚‰-å£èªž/, (ãŠèŒ¶) ã§ã‚‚ (ã„ã‹ãŒ), ç‰ (ã¨ã†), (今後) ã¨ã‚‚, ã°ã‹ã‚Š, ã°ã£ã‹-å£èªž/, ã°ã£ã‹ã‚Š-å£èªž/,
+# ã»ã©, 程, ã¾ã§, è¿„, (誰) ã‚‚ (ãŒ)([助詞-æ ¼åŠ©è©ž] ãŠã‚ˆã³ [助詞-係助詞] ã®å‰ã«ä½ç½®ã™ã‚‹ã€Œã‚‚ã€)
+助詞-副助詞
+#
+# particle-interjective: particles with interjective grammatical roles.
+# e.g. (æ¾å³¶) ã‚„
+助詞-間投助詞
+#
+# particle-coordinate:
+# e.g. ã¨, ãŸã‚Š, ã ã®, ã ã‚Š, ã¨ã‹, ãªã‚Š, ã‚„, やら
+助詞-並立助詞
+#
+# particle-final:
+# e.g. ã‹ã„, ã‹ã—ら, ã•, ãœ, (ã )ã£ã‘-å£èªž/, (ã¨ã¾ã£ã¦ã‚‹) ã§-方言/, ãª, ナ, ãªã‚-å£èªž/, ãž, ã, ãƒ,
+# ãã‡-å£èªž/, ããˆ-å£èªž/, ãã‚“-方言/, ã®, ã®ã†-å£èªž/, ã‚„, よ, ヨ, よã‰-å£èªž/, ã‚, ã‚ã„-å£èªž/
+助詞-終助詞
+#
+# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is
+# adverbial, conjunctive, or sentence final. For example:
+# (a) 「A ã‹ B ã‹ã€. Ex:「(国内ã§é‹ç”¨ã™ã‚‹) ã‹,(海外ã§é‹ç”¨ã™ã‚‹) ã‹ (.)ã€
+# (b) Inside an adverb phrase. Ex:「(幸ã„ã¨ã„ã†) ã‹ (, æ»è€…ã¯ã„ãªã‹ã£ãŸ.)ã€
+# 「(祈りãŒå±Šã„ãŸã›ã„) ã‹ (, 試験ã«åˆæ ¼ã—ãŸ.)ã€
+# (c) 「ã‹ã®ã‚ˆã†ã«ã€. Ex:「(何もãªã‹ã£ãŸ) ã‹ (ã®ã‚ˆã†ã«æŒ¯ã‚‹èˆžã£ãŸ.)ã€
+# e.g. ã‹
+助詞-副助詞ï¼ä¸¦ç«‹åŠ©è©žï¼çµ‚助詞
+#
+# particle-adnominalizer: The "no" that attaches to nouns and modifies
+# non-inflectional words.
+助詞-連体化
+#
+# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs
+# that are giongo, giseigo, or gitaigo.
+# e.g. ã«, ã¨
+助詞-副詞化
+#
+# particle-special: A particle that does not fit into one of the above classifications.
+# This includes particles that are used in Tanka, Haiku, and other poetry.
+# e.g. ã‹ãª, ã‘ã‚€, ( ã—ãŸã ã‚ã†) ã«, (ã‚ã‚“ãŸ) ã«ã‚ƒ(ã‚ã‹ã‚‰ã‚“), (俺) ã‚“ (家)
+助詞-特殊
+#
+#####
+# auxiliary-verb:
+助動詞
+#
+#####
+# interjection: Greetings and other exclamations.
+# e.g. ãŠã¯ã‚ˆã†, ãŠã¯ã‚ˆã†ã”ã–ã„ã¾ã™, ã“ã‚“ã«ã¡ã¯, ã“ã‚“ã°ã‚“ã¯, ã‚ã‚ŠãŒã¨ã†, ã©ã†ã‚‚ã‚ã‚ŠãŒã¨ã†, ã‚ã‚ŠãŒã¨ã†ã”ã–ã„ã¾ã™,
+# ã„ãŸã ãã¾ã™, ã”ã¡ãã†ã•ã¾, ã•ã‚ˆãªã‚‰, ã•ã‚ˆã†ãªã‚‰, ã¯ã„, ã„ã„ãˆ, ã”ã‚ã‚“, ã”ã‚ã‚“ãªã•ã„
+#æ„Ÿå‹•è©ž
+#
+#####
+# symbol: unclassified Symbols.
+記å·
+#
+# symbol-misc: A general symbol not in one of the categories below.
+# e.g. [○◎@$〒→+]
+記å·-一般
+#
+# symbol-comma: Commas
+# e.g. [,ã€]
+記å·-èªç‚¹
+#
+# symbol-period: Periods and full stops.
+# e.g. [..。]
+記å·-å¥ç‚¹
+#
+# symbol-space: Full-width whitespace.
+記å·-空白
+#
+# symbol-open_bracket:
+# e.g. [({‘“『ã€]
+記å·-括弧開
+#
+# symbol-close_bracket:
+# e.g. [)}’â€ã€ã€ã€‘]
+記å·-括弧閉
+#
+# symbol-alphabetic:
+#記å·-アルファベット
+#
+#####
+# other: unclassified other
+#ãã®ä»–
+#
+# other-interjection: Words that are hard to classify as noun-suffixes or
+# sentence-final particles.
+# e.g. (ã )ã‚¡
+ãã®ä»–-間投
+#
+#####
+# filler: Aizuchi that occurs during a conversation or sounds inserted as filler.
+# e.g. ã‚ã®, ã†ã‚“ã¨, ãˆã¨
+フィラー
+#
+#####
+# non-verbal: non-verbal sound.
+éžè¨€èªžéŸ³
+#
+#####
+# fragment:
+#語æ–片
+#
+#####
+# unknown: unknown part of speech.
+#未知語
+#
+##### End of file
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ar.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ar.txt
new file mode 100644
index 00000000000..046829db6a2
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ar.txt
@@ -0,0 +1,125 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# Cleaned on October 11, 2009 (not normalized, so use before normalization)
+# This means that when modifying this list, you might need to add some
+# redundant entries, for example containing forms with both أ and ا
+من
+ومن
+منها
+منه
+ÙÙŠ
+ÙˆÙÙŠ
+Ùيها
+Ùيه
+Ùˆ
+Ù
+ثم
+او
+أو
+ب
+بها
+به
+ا
+Ø£
+اى
+اي
+أي
+أى
+لا
+ولا
+الا
+ألا
+إلا
+لكن
+ما
+وما
+كما
+Ùما
+عن
+مع
+اذا
+إذا
+ان
+أن
+إن
+انها
+أنها
+إنها
+انه
+أنه
+إنه
+بان
+بأن
+Ùان
+Ùأن
+وان
+وأن
+وإن
+التى
+التي
+الذى
+الذي
+الذين
+الى
+الي
+إلى
+إلي
+على
+عليها
+عليه
+اما
+أما
+إما
+ايضا
+أيضا
+كل
+وكل
+لم
+ولم
+لن
+ولن
+هى
+هي
+هو
+وهى
+وهي
+وهو
+Ùهى
+Ùهي
+Ùهو
+انت
+أنت
+لك
+لها
+له
+هذه
+هذا
+تلك
+ذلك
+هناك
+كانت
+كان
+يكون
+تكون
+وكانت
+وكان
+غير
+بعض
+قد
+Ù†ØÙˆ
+بين
+بينما
+منذ
+ضمن
+Øيث
+الان
+الآن
+خلال
+بعد
+قبل
+Øتى
+عند
+عندما
+لدى
+جميع
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_bg.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_bg.txt
new file mode 100644
index 00000000000..1ae4ba2ae38
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_bg.txt
@@ -0,0 +1,193 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+а
+аз
+ако
+ала
+бе
+без
+беше
+би
+бил
+била
+били
+било
+близо
+бъдат
+бъде
+бÑха
+в
+ваÑ
+ваш
+ваша
+вероÑтно
+вече
+взема
+ви
+вие
+винаги
+вÑе
+вÑеки
+вÑички
+вÑичко
+вÑÑка
+във
+въпреки
+върху
+г
+ги
+главно
+го
+д
+да
+дали
+до
+докато
+докога
+дори
+доÑега
+доÑта
+е
+едва
+един
+ето
+за
+зад
+заедно
+заради
+заÑега
+затова
+защо
+защото
+и
+из
+или
+им
+има
+имат
+иÑка
+й
+каза
+как
+каква
+какво
+както
+какъв
+като
+кога
+когато
+което
+които
+кой
+който
+колко
+коÑто
+къде
+където
+към
+ли
+м
+ме
+между
+мен
+ми
+мнозина
+мога
+могат
+може
+молÑ
+момента
+му
+н
+на
+над
+назад
+най
+направи
+напред
+например
+наÑ
+не
+него
+неÑ
+ни
+ние
+никой
+нито
+но
+нÑкои
+нÑкой
+нÑма
+обаче
+около
+оÑвен
+оÑобено
+от
+отгоре
+отново
+още
+пак
+по
+повече
+повечето
+под
+поне
+поради
+поÑле
+почти
+прави
+пред
+преди
+през
+при
+пък
+първо
+Ñ
+Ñа
+Ñамо
+Ñе
+Ñега
+Ñи
+Ñкоро
+Ñлед
+Ñме
+Ñпоред
+Ñред
+Ñрещу
+Ñте
+Ñъм
+ÑÑŠÑ
+Ñъщо
+Ñ‚
+тази
+така
+такива
+такъв
+там
+твой
+те
+тези
+ти
+тн
+то
+това
+тогава
+този
+той
+толкова
+точно
+Ñ‚Ñ€Ñбва
+тук
+тъй
+Ñ‚Ñ
+Ñ‚ÑÑ…
+у
+хареÑва
+ч
+че
+чеÑто
+чрез
+ще
+щом
+Ñ
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ca.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ca.txt
new file mode 100644
index 00000000000..3da65deafe1
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ca.txt
@@ -0,0 +1,220 @@
+# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
+a
+abans
+acÃ
+ah
+aixÃ
+això
+al
+als
+aleshores
+algun
+alguna
+algunes
+alguns
+alhora
+allÃ
+allÃ
+allò
+altra
+altre
+altres
+amb
+ambdós
+ambdues
+apa
+aquell
+aquella
+aquelles
+aquells
+aquest
+aquesta
+aquestes
+aquests
+aquÃ
+baix
+cada
+cadascú
+cadascuna
+cadascunes
+cadascuns
+com
+contra
+d'un
+d'una
+d'unes
+d'uns
+dalt
+de
+del
+dels
+des
+després
+dins
+dintre
+donat
+doncs
+durant
+e
+eh
+el
+els
+em
+en
+encara
+ens
+entre
+érem
+eren
+éreu
+es
+és
+esta
+estÃ
+està vem
+estaven
+està veu
+esteu
+et
+etc
+ets
+fins
+fora
+gairebé
+ha
+han
+has
+havia
+he
+hem
+heu
+hi
+ho
+i
+igual
+iguals
+ja
+l'hi
+la
+les
+li
+li'n
+llavors
+m'he
+ma
+mal
+malgrat
+mateix
+mateixa
+mateixes
+mateixos
+me
+mentre
+més
+meu
+meus
+meva
+meves
+molt
+molta
+moltes
+molts
+mon
+mons
+n'he
+n'hi
+ne
+ni
+no
+nogensmenys
+només
+nosaltres
+nostra
+nostre
+nostres
+o
+oh
+oi
+on
+pas
+pel
+pels
+per
+però
+perquè
+poc
+poca
+pocs
+poques
+potser
+propi
+qual
+quals
+quan
+quant
+que
+què
+quelcom
+qui
+quin
+quina
+quines
+quins
+s'ha
+s'han
+sa
+semblant
+semblants
+ses
+seu
+seus
+seva
+seva
+seves
+si
+sobre
+sobretot
+sóc
+solament
+sols
+son
+són
+sons
+sota
+sou
+t'ha
+t'han
+t'he
+ta
+tal
+també
+tampoc
+tan
+tant
+tanta
+tantes
+teu
+teus
+teva
+teves
+ton
+tons
+tot
+tota
+totes
+tots
+un
+una
+unes
+uns
+us
+va
+vaig
+vam
+van
+vas
+veu
+vosaltres
+vostra
+vostre
+vostres
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_cz.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_cz.txt
new file mode 100644
index 00000000000..53c6097dac7
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_cz.txt
@@ -0,0 +1,172 @@
+a
+s
+k
+o
+i
+u
+v
+z
+dnes
+cz
+tÃmto
+budeš
+budem
+byli
+jseš
+můj
+svým
+ta
+tomto
+tohle
+tuto
+tyto
+jej
+zda
+proÄ
+máte
+tato
+kam
+tohoto
+kdo
+kteÅ™Ã
+mi
+nám
+tom
+tomuto
+mÃt
+nic
+proto
+kterou
+byla
+toho
+protože
+asi
+ho
+naši
+napište
+re
+což
+tÃm
+takže
+svých
+jejÃ
+svými
+jste
+aj
+tu
+tedy
+teto
+bylo
+kde
+ke
+pravé
+ji
+nad
+nejsou
+Äi
+pod
+téma
+mezi
+přes
+ty
+pak
+vám
+ani
+když
+však
+neg
+jsem
+tento
+Älánku
+Älánky
+aby
+jsme
+před
+pta
+jejich
+byl
+ještě
+až
+bez
+také
+pouze
+prvnÃ
+vaše
+která
+nás
+nový
+tipy
+pokud
+může
+strana
+jeho
+své
+jiné
+zprávy
+nové
+nenÃ
+vás
+jen
+podle
+zde
+už
+být
+vÃce
+bude
+již
+než
+který
+by
+které
+co
+nebo
+ten
+tak
+má
+při
+od
+po
+jsou
+jak
+dalÅ¡Ã
+ale
+si
+se
+ve
+to
+jako
+za
+zpět
+ze
+do
+pro
+je
+na
+atd
+atp
+jakmile
+pÅ™iÄemž
+já
+on
+ona
+ono
+oni
+ony
+my
+vy
+jÃ
+ji
+mÄ›
+mne
+jemu
+tomu
+těm
+těmu
+němu
+němuž
+jehož
+jÞ
+jelikož
+jež
+jakož
+naÄež
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_da.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_da.txt
new file mode 100644
index 00000000000..a3ff5fe122c
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_da.txt
@@ -0,0 +1,108 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Danish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+
+og | and
+i | in
+jeg | I
+det | that (dem. pronoun)/it (pers. pronoun)
+at | that (in front of a sentence)/to (with infinitive)
+en | a/an
+den | it (pers. pronoun)/that (dem. pronoun)
+til | to/at/for/until/against/by/of/into, more
+er | present tense of "to be"
+som | who, as
+på | on/upon/in/on/at/to/after/of/with/for, on
+de | they
+med | with/by/in, along
+han | he
+af | of/by/from/off/for/in/with/on, off
+for | at/for/to/from/by/of/ago, in front/before, because
+ikke | not
+der | who/which, there/those
+var | past tense of "to be"
+mig | me/myself
+sig | oneself/himself/herself/itself/themselves
+men | but
+et | a/an/one, one (number), someone/somebody/one
+har | present tense of "to have"
+om | round/about/for/in/a, about/around/down, if
+vi | we
+min | my
+havde | past tense of "to have"
+ham | him
+hun | she
+nu | now
+over | over/above/across/by/beyond/past/on/about, over/past
+da | then, when/as/since
+fra | from/off/since, off, since
+du | you
+ud | out
+sin | his/her/its/one's
+dem | them
+os | us/ourselves
+op | up
+man | you/one
+hans | his
+hvor | where
+eller | or
+hvad | what
+skal | must/shall etc.
+selv | myself/youself/herself/ourselves etc., even
+her | here
+alle | all/everyone/everybody etc.
+vil | will (verb)
+blev | past tense of "to stay/to remain/to get/to become"
+kunne | could
+ind | in
+når | when
+være | present tense of "to be"
+dog | however/yet/after all
+noget | something
+ville | would
+jo | you know/you see (adv), yes
+deres | their/theirs
+efter | after/behind/according to/for/by/from, later/afterwards
+ned | down
+skulle | should
+denne | this
+end | than
+dette | this
+mit | my/mine
+også | also
+under | under/beneath/below/during, below/underneath
+have | have
+dig | you
+anden | other
+hende | her
+mine | my
+alt | everything
+meget | much/very, plenty of
+sit | his, her, its, one's
+sine | his, her, its, one's
+vor | our
+mod | against
+disse | these
+hvis | if
+din | your/yours
+nogle | some
+hos | by/at
+blive | be/become
+mange | many
+ad | by/through
+bliver | present tense of "to be/to become"
+hendes | her/hers
+været | be
+thi | for (conj)
+jer | you
+sådan | such, like this/like that
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_de.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_de.txt
new file mode 100644
index 00000000000..f7703841887
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_de.txt
@@ -0,0 +1,292 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A German stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | The number of forms in this list is reduced significantly by passing it
+ | through the German stemmer.
+
+
+aber | but
+
+alle | all
+allem
+allen
+aller
+alles
+
+als | than, as
+also | so
+am | an + dem
+an | at
+
+ander | other
+andere
+anderem
+anderen
+anderer
+anderes
+anderm
+andern
+anderr
+anders
+
+auch | also
+auf | on
+aus | out of
+bei | by
+bin | am
+bis | until
+bist | art
+da | there
+damit | with it
+dann | then
+
+der | the
+den
+des
+dem
+die
+das
+
+daß | that
+
+derselbe | the same
+derselben
+denselben
+desselben
+demselben
+dieselbe
+dieselben
+dasselbe
+
+dazu | to that
+
+dein | thy
+deine
+deinem
+deinen
+deiner
+deines
+
+denn | because
+
+derer | of those
+dessen | of him
+
+dich | thee
+dir | to thee
+du | thou
+
+dies | this
+diese
+diesem
+diesen
+dieser
+dieses
+
+
+doch | (several meanings)
+dort | (over) there
+
+
+durch | through
+
+ein | a
+eine
+einem
+einen
+einer
+eines
+
+einig | some
+einige
+einigem
+einigen
+einiger
+einiges
+
+einmal | once
+
+er | he
+ihn | him
+ihm | to him
+
+es | it
+etwas | something
+
+euer | your
+eure
+eurem
+euren
+eurer
+eures
+
+für | for
+gegen | towards
+gewesen | p.p. of sein
+hab | have
+habe | have
+haben | have
+hat | has
+hatte | had
+hatten | had
+hier | here
+hin | there
+hinter | behind
+
+ich | I
+mich | me
+mir | to me
+
+
+ihr | you, to her
+ihre
+ihrem
+ihren
+ihrer
+ihres
+euch | to you
+
+im | in + dem
+in | in
+indem | while
+ins | in + das
+ist | is
+
+jede | each, every
+jedem
+jeden
+jeder
+jedes
+
+jene | that
+jenem
+jenen
+jener
+jenes
+
+jetzt | now
+kann | can
+
+kein | no
+keine
+keinem
+keinen
+keiner
+keines
+
+können | can
+könnte | could
+machen | do
+man | one
+
+manche | some, many a
+manchem
+manchen
+mancher
+manches
+
+mein | my
+meine
+meinem
+meinen
+meiner
+meines
+
+mit | with
+muss | must
+musste | had to
+nach | to(wards)
+nicht | not
+nichts | nothing
+noch | still, yet
+nun | now
+nur | only
+ob | whether
+oder | or
+ohne | without
+sehr | very
+
+sein | his
+seine
+seinem
+seinen
+seiner
+seines
+
+selbst | self
+sich | herself
+
+sie | they, she
+ihnen | to them
+
+sind | are
+so | so
+
+solche | such
+solchem
+solchen
+solcher
+solches
+
+soll | shall
+sollte | should
+sondern | but
+sonst | else
+über | over
+um | about, around
+und | and
+
+uns | us
+unse
+unsem
+unsen
+unser
+unses
+
+unter | under
+viel | much
+vom | von + dem
+von | from
+vor | before
+während | while
+war | was
+waren | were
+warst | wast
+was | what
+weg | away, off
+weil | because
+weiter | further
+
+welche | which
+welchem
+welchen
+welcher
+welches
+
+wenn | when
+werde | will
+werden | will
+wie | how
+wieder | again
+will | want
+wir | we
+wird | will
+wirst | willst
+wo | where
+wollen | want
+wollte | wanted
+würde | would
+würden | would
+zu | to
+zum | zu + dem
+zur | zu + der
+zwar | indeed
+zwischen | between
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_el.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_el.txt
new file mode 100644
index 00000000000..232681f5bd6
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_el.txt
@@ -0,0 +1,78 @@
+# Lucene Greek Stopwords list
+# Note: by default this file is used after GreekLowerCaseFilter,
+# so when modifying this file use 'σ' instead of 'ς'
+ο
+η
+το
+οι
+τα
+του
+τησ
+των
+τον
+την
+και
+κι
+κ
+ειμαι
+εισαι
+ειναι
+ειμαστε
+ειστε
+στο
+στον
+στη
+στην
+μα
+αλλα
+απο
+για
+Ï€Ïοσ
+με
+σε
+ωσ
+παÏα
+αντι
+κατα
+μετα
+θα
+να
+δε
+δεν
+μη
+μην
+επι
+ενω
+εαν
+αν
+τοτε
+που
+πωσ
+ποιοσ
+ποια
+ποιο
+ποιοι
+ποιεσ
+ποιων
+ποιουσ
+αυτοσ
+αυτη
+αυτο
+αυτοι
+αυτων
+αυτουσ
+αυτεσ
+αυτα
+εκεινοσ
+εκεινη
+εκεινο
+εκεινοι
+εκεινεσ
+εκεινα
+εκεινων
+εκεινουσ
+οπωσ
+ομωσ
+ισωσ
+οσο
+οτι
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_en.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_en.txt
new file mode 100644
index 00000000000..2c164c0b2a1
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_en.txt
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# a couple of test stopwords to test that the words are really being
+# configured from this file:
+stopworda
+stopwordb
+
+# Standard english stop words taken from Lucene's StopAnalyzer
+a
+an
+and
+are
+as
+at
+be
+but
+by
+for
+if
+in
+into
+is
+it
+no
+not
+of
+on
+or
+such
+that
+the
+their
+then
+there
+these
+they
+this
+to
+was
+will
+with
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_es.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_es.txt
new file mode 100644
index 00000000000..2db14760075
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_es.txt
@@ -0,0 +1,354 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Spanish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de | from, of
+la | the, her
+que | who, that
+el | the
+en | in
+y | and
+a | to
+los | the, them
+del | de + el
+se | himself, from him etc
+las | the, them
+por | for, by, etc
+un | a
+para | for
+con | with
+no | no
+una | a
+su | his, her
+al | a + el
+ | es from SER
+lo | him
+como | how
+más | more
+pero | pero
+sus | su plural
+le | to him, her
+ya | already
+o | or
+ | fue from SER
+este | this
+ | ha from HABER
+sà | himself etc
+porque | because
+esta | this
+ | son from SER
+entre | between
+ | está from ESTAR
+cuando | when
+muy | very
+sin | without
+sobre | on
+ | ser from SER
+ | tiene from TENER
+también | also
+me | me
+hasta | until
+hay | there is/are
+donde | where
+ | han from HABER
+quien | whom, that
+ | están from ESTAR
+ | estado from ESTAR
+desde | from
+todo | all
+nos | us
+durante | during
+ | estados from ESTAR
+todos | all
+uno | a
+les | to them
+ni | nor
+contra | against
+otros | other
+ | fueron from SER
+ese | that
+eso | that
+ | habÃa from HABER
+ante | before
+ellos | they
+e | and (variant of y)
+esto | this
+mà | me
+antes | before
+algunos | some
+qué | what?
+unos | a
+yo | I
+otro | other
+otras | other
+otra | other
+él | he
+tanto | so much, many
+esa | that
+estos | these
+mucho | much, many
+quienes | who
+nada | nothing
+muchos | many
+cual | who
+ | sea from SER
+poco | few
+ella | she
+estar | to be
+ | haber from HABER
+estas | these
+ | estaba from ESTAR
+ | estamos from ESTAR
+algunas | some
+algo | something
+nosotros | we
+
+ | other forms
+
+mi | me
+mis | mi plural
+tú | thou
+te | thee
+ti | thee
+tu | thy
+tus | tu plural
+ellas | they
+nosotras | we
+vosotros | you
+vosotras | you
+os | you
+mÃo | mine
+mÃa |
+mÃos |
+mÃas |
+tuyo | thine
+tuya |
+tuyos |
+tuyas |
+suyo | his, hers, theirs
+suya |
+suyos |
+suyas |
+nuestro | ours
+nuestra |
+nuestros |
+nuestras |
+vuestro | yours
+vuestra |
+vuestros |
+vuestras |
+esos | those
+esas | those
+
+ | forms of estar, to be (not including the infinitive):
+estoy
+estás
+está
+estamos
+estáis
+están
+esté
+estés
+estemos
+estéis
+estén
+estaré
+estarás
+estará
+estaremos
+estaréis
+estarán
+estarÃa
+estarÃas
+estarÃamos
+estarÃais
+estarÃan
+estaba
+estabas
+estábamos
+estabais
+estaban
+estuve
+estuviste
+estuvo
+estuvimos
+estuvisteis
+estuvieron
+estuviera
+estuvieras
+estuviéramos
+estuvierais
+estuvieran
+estuviese
+estuvieses
+estuviésemos
+estuvieseis
+estuviesen
+estando
+estado
+estada
+estados
+estadas
+estad
+
+ | forms of haber, to have (not including the infinitive):
+he
+has
+ha
+hemos
+habéis
+han
+haya
+hayas
+hayamos
+hayáis
+hayan
+habré
+habrás
+habrá
+habremos
+habréis
+habrán
+habrÃa
+habrÃas
+habrÃamos
+habrÃais
+habrÃan
+habÃa
+habÃas
+habÃamos
+habÃais
+habÃan
+hube
+hubiste
+hubo
+hubimos
+hubisteis
+hubieron
+hubiera
+hubieras
+hubiéramos
+hubierais
+hubieran
+hubiese
+hubieses
+hubiésemos
+hubieseis
+hubiesen
+habiendo
+habido
+habida
+habidos
+habidas
+
+ | forms of ser, to be (not including the infinitive):
+soy
+eres
+es
+somos
+sois
+son
+sea
+seas
+seamos
+seáis
+sean
+seré
+serás
+será
+seremos
+seréis
+serán
+serÃa
+serÃas
+serÃamos
+serÃais
+serÃan
+era
+eras
+éramos
+erais
+eran
+fui
+fuiste
+fue
+fuimos
+fuisteis
+fueron
+fuera
+fueras
+fuéramos
+fuerais
+fueran
+fuese
+fueses
+fuésemos
+fueseis
+fuesen
+siendo
+sido
+ | sed also means 'thirst'
+
+ | forms of tener, to have (not including the infinitive):
+tengo
+tienes
+tiene
+tenemos
+tenéis
+tienen
+tenga
+tengas
+tengamos
+tengáis
+tengan
+tendré
+tendrás
+tendrá
+tendremos
+tendréis
+tendrán
+tendrÃa
+tendrÃas
+tendrÃamos
+tendrÃais
+tendrÃan
+tenÃa
+tenÃas
+tenÃamos
+tenÃais
+tenÃan
+tuve
+tuviste
+tuvo
+tuvimos
+tuvisteis
+tuvieron
+tuviera
+tuvieras
+tuviéramos
+tuvierais
+tuvieran
+tuviese
+tuvieses
+tuviésemos
+tuvieseis
+tuviesen
+teniendo
+tenido
+tenida
+tenidos
+tenidas
+tened
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_eu.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_eu.txt
new file mode 100644
index 00000000000..25f1db93460
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_eu.txt
@@ -0,0 +1,99 @@
+# example set of basque stopwords
+al
+anitz
+arabera
+asko
+baina
+bat
+batean
+batek
+bati
+batzuei
+batzuek
+batzuetan
+batzuk
+bera
+beraiek
+berau
+berauek
+bere
+berori
+beroriek
+beste
+bezala
+da
+dago
+dira
+ditu
+du
+dute
+edo
+egin
+ere
+eta
+eurak
+ez
+gainera
+gu
+gutxi
+guzti
+haiei
+haiek
+haietan
+hainbeste
+hala
+han
+handik
+hango
+hara
+hari
+hark
+hartan
+hau
+hauei
+hauek
+hauetan
+hemen
+hemendik
+hemengo
+hi
+hona
+honek
+honela
+honetan
+honi
+hor
+hori
+horiei
+horiek
+horietan
+horko
+horra
+horrek
+horrela
+horretan
+horri
+hortik
+hura
+izan
+ni
+noiz
+nola
+non
+nondik
+nongo
+nor
+nora
+ze
+zein
+zen
+zenbait
+zenbat
+zer
+zergatik
+ziren
+zituen
+zu
+zuek
+zuen
+zuten
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_fa.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_fa.txt
new file mode 100644
index 00000000000..723641c6da7
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_fa.txt
@@ -0,0 +1,313 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# Note: by default this file is used after normalization, so when adding entries
+# to this file, use the arabic 'ي' instead of 'ی'
+انان
+نداشته
+سراسر
+خياه
+ايشان
+وي
+تاكنون
+بيشتري
+دوم
+پس
+ناشي
+ÙˆÚ¯Ùˆ
+يا
+داشتند
+سپس
+هنگام
+هرگز
+پنج
+نشان
+امسال
+ديگر
+گروهي
+شدند
+چطور
+ده
+Ùˆ
+دو
+نخستين
+ولي
+چرا
+Ú†Ù‡
+وسط
+Ù‡
+كدام
+قابل
+يك
+رÙت
+Ù‡Ùت
+همچنين
+در
+هزار
+بله
+بلي
+شايد
+اما
+شناسي
+گرÙته
+دهد
+داشته
+دانست
+داشتن
+خواهيم
+ميليارد
+وقتيكه
+امد
+خواهد
+جز
+اورده
+شده
+بلكه
+خدمات
+شدن
+برخي
+نبود
+بسياري
+جلوگيري
+ØÙ‚
+كردند
+نوعي
+بعري
+نكرده
+نظير
+نبايد
+بوده
+بودن
+داد
+اورد
+هست
+جايي
+شود
+دنبال
+داده
+بايد
+سابق
+هيچ
+همان
+انجا
+كمتر
+كجاست
+گردد
+كسي
+تر
+مردم
+تان
+دادن
+بودند
+سري
+جدا
+ندارند
+مگر
+يكديگر
+دارد
+دهند
+بنابراين
+هنگامي
+سمت
+جا
+انچه
+خود
+دادند
+زياد
+دارند
+اثر
+بدون
+بهترين
+بيشتر
+البته
+به
+براساس
+بيرون
+كرد
+بعضي
+گرÙت
+توي
+اي
+ميليون
+او
+جريان
+تول
+بر
+مانند
+برابر
+باشيم
+مدتي
+گويند
+اكنون
+تا
+تنها
+جديد
+چند
+بي
+نشده
+كردن
+كردم
+گويد
+كرده
+كنيم
+نمي
+نزد
+روي
+قصد
+Ùقط
+بالاي
+ديگران
+اين
+ديروز
+توسط
+سوم
+ايم
+دانند
+سوي
+استÙاده
+شما
+كنار
+داريم
+ساخته
+طور
+امده
+رÙته
+نخست
+بيست
+نزديك
+طي
+كنيد
+از
+انها
+تمامي
+داشت
+يكي
+طريق
+اش
+چيست
+روب
+نمايد
+Ú¯Ùت
+چندين
+چيزي
+تواند
+ام
+ايا
+با
+ان
+ايد
+ترين
+اينكه
+ديگري
+راه
+هايي
+بروز
+همچنان
+پاعين
+كس
+Øدود
+مختلÙ
+مقابل
+چيز
+گيرد
+ندارد
+ضد
+همچون
+سازي
+شان
+مورد
+باره
+مرسي
+خويش
+برخوردار
+چون
+خارج
+شش
+هنوز
+تØت
+ضمن
+هستيم
+Ú¯Ùته
+Ùكر
+بسيار
+پيش
+براي
+روزهاي
+انكه
+نخواهد
+بالا
+كل
+وقتي
+كي
+چنين
+كه
+گيري
+نيست
+است
+كجا
+كند
+نيز
+يابد
+بندي
+Øتي
+توانند
+عقب
+خواست
+كنند
+بين
+تمام
+همه
+ما
+باشند
+مثل
+شد
+اري
+باشد
+اره
+طبق
+بعد
+اگر
+صورت
+غير
+جاي
+بيش
+ريزي
+اند
+زيرا
+چگونه
+بار
+لطÙا
+مي
+درباره
+من
+ديده
+همين
+گذاري
+برداري
+علت
+گذاشته
+هم
+Ùوق
+نه
+ها
+شوند
+اباد
+همواره
+هر
+اول
+خواهند
+چهار
+نام
+امروز
+مان
+هاي
+قبل
+كنم
+سعي
+تازه
+را
+هستند
+زير
+جلوي
+عنوان
+بود
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_fi.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_fi.txt
new file mode 100644
index 00000000000..addad798c4b
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_fi.txt
@@ -0,0 +1,95 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+| forms of BE
+
+olla
+olen
+olet
+on
+olemme
+olette
+ovat
+ole | negative form
+
+oli
+olisi
+olisit
+olisin
+olisimme
+olisitte
+olisivat
+olit
+olin
+olimme
+olitte
+olivat
+ollut
+olleet
+
+en | negation
+et
+ei
+emme
+ette
+eivät
+
+|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans
+minä minun minut minua minussa minusta minuun minulla minulta minulle | I
+sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you
+hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she
+me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we
+te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you
+he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they
+
+tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this
+tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that
+se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it
+nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these
+nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those
+ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they
+
+kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
+ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl)
+mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what
+mitkä | (pl)
+
+joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which
+jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl)
+
+| conjunctions
+
+että | that
+ja | and
+jos | if
+koska | because
+kuin | than
+mutta | but
+niin | so
+sekä | and
+sillä | for
+tai | or
+vaan | but
+vai | or
+vaikka | although
+
+
+| prepositions
+
+kanssa | with
+mukaan | according to
+noin | about
+poikki | across
+yli | over, across
+
+| other
+
+kun | when
+niin | so
+nyt | now
+itse | self
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_fr.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_fr.txt
new file mode 100644
index 00000000000..c00837ea939
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_fr.txt
@@ -0,0 +1,183 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A French stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+au | a + le
+aux | a + les
+avec | with
+ce | this
+ces | these
+dans | with
+de | of
+des | de + les
+du | de + le
+elle | she
+en | `of them' etc
+et | and
+eux | them
+il | he
+je | I
+la | the
+le | the
+leur | their
+lui | him
+ma | my (fem)
+mais | but
+me | me
+même | same; as in moi-même (myself) etc
+mes | me (pl)
+moi | me
+mon | my (masc)
+ne | not
+nos | our (pl)
+notre | our
+nous | we
+on | one
+ou | where
+par | by
+pas | not
+pour | for
+qu | que before vowel
+que | that
+qui | who
+sa | his, her (fem)
+se | oneself
+ses | his (pl)
+son | his, her (masc)
+sur | on
+ta | thy (fem)
+te | thee
+tes | thy (pl)
+toi | thee
+ton | thy (masc)
+tu | thou
+un | a
+une | a
+vos | your (pl)
+votre | your
+vous | you
+
+ | single letter forms
+
+c | c'
+d | d'
+j | j'
+l | l'
+Ã | to, at
+m | m'
+n | n'
+s | s'
+t | t'
+y | there
+
+ | forms of être (not including the infinitive):
+été
+étée
+étées
+étés
+étant
+suis
+es
+est
+sommes
+êtes
+sont
+serai
+seras
+sera
+serons
+serez
+seront
+serais
+serait
+serions
+seriez
+seraient
+étais
+était
+étions
+étiez
+étaient
+fus
+fut
+fûmes
+fûtes
+furent
+sois
+soit
+soyons
+soyez
+soient
+fusse
+fusses
+fût
+fussions
+fussiez
+fussent
+
+ | forms of avoir (not including the infinitive):
+ayant
+eu
+eue
+eues
+eus
+ai
+as
+avons
+avez
+ont
+aurai
+auras
+aura
+aurons
+aurez
+auront
+aurais
+aurait
+aurions
+auriez
+auraient
+avais
+avait
+avions
+aviez
+avaient
+eut
+eûmes
+eûtes
+eurent
+aie
+aies
+ait
+ayons
+ayez
+aient
+eusse
+eusses
+eût
+eussions
+eussiez
+eussent
+
+ | Later additions (from Jean-Christophe Deschamps)
+ceci | this
+celà  | that
+cet | this
+cette | this
+ici | here
+ils | they
+les | the (pl)
+leurs | their (pl)
+quel | which
+quels | which
+quelle | which
+quelles | which
+sans | without
+soi | oneself
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ga.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ga.txt
new file mode 100644
index 00000000000..9ff88d747e5
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ga.txt
@@ -0,0 +1,110 @@
+
+a
+ach
+ag
+agus
+an
+aon
+ar
+arna
+as
+b'
+ba
+beirt
+bhúr
+caoga
+ceathair
+ceathrar
+chomh
+chtó
+chuig
+chun
+cois
+céad
+cúig
+cúigear
+d'
+daichead
+dar
+de
+deich
+deichniúr
+den
+dhá
+do
+don
+dtÃ
+dá
+dár
+dó
+faoi
+faoin
+faoina
+faoinár
+fara
+fiche
+gach
+gan
+go
+gur
+haon
+hocht
+i
+iad
+idir
+in
+ina
+ins
+inár
+is
+le
+leis
+lena
+lenár
+m'
+mar
+mo
+mé
+na
+nach
+naoi
+naonúr
+ná
+nÃ
+nÃor
+nó
+nócha
+ocht
+ochtar
+os
+roimh
+sa
+seacht
+seachtar
+seachtó
+seasca
+seisear
+siad
+sibh
+sinn
+sna
+sé
+sÃ
+tar
+thar
+thú
+triúr
+trÃ
+trÃna
+trÃnár
+trÃocha
+tú
+um
+ár
+é
+éis
+Ã
+ó
+ón
+óna
+ónár
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_gl.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_gl.txt
new file mode 100644
index 00000000000..d8760b12c14
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_gl.txt
@@ -0,0 +1,161 @@
+# galican stopwords
+a
+aÃnda
+alÃ
+aquel
+aquela
+aquelas
+aqueles
+aquilo
+aquÃ
+ao
+aos
+as
+asÃ
+á
+ben
+cando
+che
+co
+coa
+comigo
+con
+connosco
+contigo
+convosco
+coas
+cos
+cun
+cuns
+cunha
+cunhas
+da
+dalgunha
+dalgunhas
+dalgún
+dalgúns
+das
+de
+del
+dela
+delas
+deles
+desde
+deste
+do
+dos
+dun
+duns
+dunha
+dunhas
+e
+el
+ela
+elas
+eles
+en
+era
+eran
+esa
+esas
+ese
+eses
+esta
+estar
+estaba
+está
+están
+este
+estes
+estiven
+estou
+eu
+é
+facer
+foi
+foron
+fun
+habÃa
+hai
+iso
+isto
+la
+las
+lle
+lles
+lo
+los
+mais
+me
+meu
+meus
+min
+miña
+miñas
+moi
+na
+nas
+neste
+nin
+no
+non
+nos
+nosa
+nosas
+noso
+nosos
+nós
+nun
+nunha
+nuns
+nunhas
+o
+os
+ou
+ó
+ós
+para
+pero
+pode
+pois
+pola
+polas
+polo
+polos
+por
+que
+se
+senón
+ser
+seu
+seus
+sexa
+sido
+sobre
+súa
+súas
+tamén
+tan
+te
+ten
+teñen
+teño
+ter
+teu
+teus
+ti
+tido
+tiña
+tiven
+túa
+túas
+un
+unha
+unhas
+uns
+vos
+vosa
+vosas
+voso
+vosos
+vós
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_hi.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_hi.txt
new file mode 100644
index 00000000000..86286bb083b
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_hi.txt
@@ -0,0 +1,235 @@
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# Note: by default this file also contains forms normalized by HindiNormalizer
+# for spelling variation (see section below), such that it can be used whether or
+# not you enable that feature. When adding additional entries to this list,
+# please add the normalized form as well.
+अंदर
+अत
+अपना
+अपनी
+अपने
+अà¤à¥€
+आदि
+आप
+इतà¥à¤¯à¤¾à¤¦à¤¿
+इन
+इनका
+इनà¥à¤¹à¥€à¤‚
+इनà¥à¤¹à¥‡à¤‚
+इनà¥à¤¹à¥‹à¤‚
+इस
+इसका
+इसकी
+इसके
+इसमें
+इसी
+इसे
+उन
+उनका
+उनकी
+उनके
+उनको
+उनà¥à¤¹à¥€à¤‚
+उनà¥à¤¹à¥‡à¤‚
+उनà¥à¤¹à¥‹à¤‚
+उस
+उसके
+उसी
+उसे
+à¤à¤•
+à¤à¤µà¤‚
+à¤à¤¸
+à¤à¤¸à¥‡
+और
+कई
+कर
+करता
+करते
+करना
+करने
+करें
+कहते
+कहा
+का
+काफ़ी
+कि
+कितना
+किनà¥à¤¹à¥‡à¤‚
+किनà¥à¤¹à¥‹à¤‚
+किया
+किर
+किस
+किसी
+किसे
+की
+कà¥à¤›
+कà¥à¤²
+के
+को
+कोई
+कौन
+कौनसा
+गया
+घर
+जब
+जहाà¤
+जा
+जितना
+जिन
+जिनà¥à¤¹à¥‡à¤‚
+जिनà¥à¤¹à¥‹à¤‚
+जिस
+जिसे
+जीधर
+जैसा
+जैसे
+जो
+तक
+तब
+तरह
+तिन
+तिनà¥à¤¹à¥‡à¤‚
+तिनà¥à¤¹à¥‹à¤‚
+तिस
+तिसे
+तो
+था
+थी
+थे
+दबारा
+दिया
+दà¥à¤¸à¤°à¤¾
+दूसरे
+दो
+दà¥à¤µà¤¾à¤°à¤¾
+न
+नहीं
+ना
+निहायत
+नीचे
+ने
+पर
+पर
+पहले
+पूरा
+पे
+फिर
+बनी
+बही
+बहà¥à¤¤
+बाद
+बाला
+बिलकà¥à¤²
+à¤à¥€
+à¤à¥€à¤¤à¤°
+मगर
+मानो
+मे
+में
+यदि
+यह
+यहाà¤
+यही
+या
+यिह
+ये
+रखें
+रहा
+रहे
+ऱà¥à¤µà¤¾à¤¸à¤¾
+लिà¤
+लिये
+लेकिन
+व
+वरà¥à¤—
+वह
+वह
+वहाà¤
+वहीं
+वाले
+वà¥à¤¹
+वे
+वग़ैरह
+संग
+सकता
+सकते
+सबसे
+सà¤à¥€
+साथ
+साबà¥à¤¤
+साà¤
+सारा
+से
+सो
+ही
+हà¥à¤†
+हà¥à¤ˆ
+हà¥à¤
+है
+हैं
+हो
+होता
+होती
+होते
+होना
+होने
+# additional normalized forms of the above
+अपनि
+जेसे
+होति
+सà¤à¤¿
+तिंहों
+इंहों
+दवारा
+इसि
+किंहें
+थि
+उंहों
+ओर
+जिंहें
+वहिं
+अà¤à¤¿
+बनि
+हि
+उंहिं
+उंहें
+हें
+वगेरह
+à¤à¤¸à¥‡
+रवासा
+कोन
+निचे
+काफि
+उसि
+पà¥à¤°à¤¾
+à¤à¤¿à¤¤à¤°
+हे
+बहि
+वहां
+कोइ
+यहां
+जिंहों
+तिंहें
+किसि
+कइ
+यहि
+इंहिं
+जिधर
+इंहें
+अदि
+इतयादि
+हà¥à¤‡
+कोनसा
+इसकि
+दà¥à¤¸à¤°à¥‡
+जहां
+अप
+किंहों
+उनकि
+à¤à¤¿
+वरग
+हà¥à¤…
+जेसा
+नहिं
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_hu.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_hu.txt
new file mode 100644
index 00000000000..1a96f1db6f2
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_hu.txt
@@ -0,0 +1,209 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+| Hungarian stop word list
+| prepared by Anna Tordai
+
+a
+ahogy
+ahol
+aki
+akik
+akkor
+alatt
+által
+általában
+amely
+amelyek
+amelyekben
+amelyeket
+amelyet
+amelynek
+ami
+amit
+amolyan
+amÃg
+amikor
+át
+abban
+ahhoz
+annak
+arra
+arról
+az
+azok
+azon
+azt
+azzal
+azért
+aztán
+azután
+azonban
+bár
+be
+belül
+benne
+cikk
+cikkek
+cikkeket
+csak
+de
+e
+eddig
+egész
+egy
+egyes
+egyetlen
+egyéb
+egyik
+egyre
+ekkor
+el
+elég
+ellen
+elő
+először
+előtt
+első
+én
+éppen
+ebben
+ehhez
+emilyen
+ennek
+erre
+ez
+ezt
+ezek
+ezen
+ezzel
+ezért
+és
+fel
+felé
+hanem
+hiszen
+hogy
+hogyan
+igen
+Ãgy
+illetve
+ill.
+ill
+ilyen
+ilyenkor
+ison
+ismét
+itt
+jó
+jól
+jobban
+kell
+kellett
+keresztül
+keressünk
+ki
+kÃvül
+között
+közül
+legalább
+lehet
+lehetett
+legyen
+lenne
+lenni
+lesz
+lett
+maga
+magát
+majd
+majd
+már
+más
+másik
+meg
+még
+mellett
+mert
+mely
+melyek
+mi
+mit
+mÃg
+miért
+milyen
+mikor
+minden
+mindent
+mindenki
+mindig
+mint
+mintha
+mivel
+most
+nagy
+nagyobb
+nagyon
+ne
+néha
+nekem
+neki
+nem
+néhány
+nélkül
+nincs
+olyan
+ott
+össze
+Å‘
+Å‘k
+Å‘ket
+pedig
+persze
+rá
+s
+saját
+sem
+semmi
+sok
+sokat
+sokkal
+számára
+szemben
+szerint
+szinte
+talán
+tehát
+teljes
+tovább
+továbbá
+több
+úgy
+ugyanis
+új
+újabb
+újra
+után
+utána
+utolsó
+vagy
+vagyis
+valaki
+valami
+valamint
+való
+vagyok
+van
+vannak
+volt
+voltam
+voltak
+voltunk
+vissza
+vele
+viszont
+volna
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_hy.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_hy.txt
new file mode 100644
index 00000000000..60c1c50fbc8
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_hy.txt
@@ -0,0 +1,46 @@
+# example set of Armenian stopwords.
+Õ¡ÕµÕ¤
+Õ¡ÕµÕ¬
+Õ¡ÕµÕ¶
+Õ¡ÕµÕ½
+Õ¤Õ¸Ö‚
+Õ¤Õ¸Ö‚Ö„
+Õ¥Õ´
+Õ¥Õ¶
+Õ¥Õ¶Ö„
+Õ¥Õ½
+Õ¥Ö„
+Õ§
+Õ§Õ«
+Õ§Õ«Õ¶
+Õ§Õ«Õ¶Ö„
+Õ§Õ«Ö€
+Õ§Õ«Ö„
+Õ§Ö€
+Õ¨Õ½Õ¿
+Õ©
+Õ«
+Õ«Õ¶
+Õ«Õ½Õ¯
+Õ«Ö€
+Õ¯Õ¡Õ´
+Õ°Õ¡Õ´Õ¡Ö€
+Õ°Õ¥Õ¿
+Õ°Õ¥Õ¿Õ¸
+Õ´Õ¥Õ¶Ö„
+Õ´Õ¥Õ»
+Õ´Õ«
+Õ¶
+Õ¶Õ¡
+Õ¶Õ¡Ö‡
+Õ¶Ö€Õ¡
+Õ¶Ö€Õ¡Õ¶Ö„
+Õ¸Ö€
+Õ¸Ö€Õ¨
+Õ¸Ö€Õ¸Õ¶Ö„
+Õ¸Ö€ÕºÕ¥Õ½
+Õ¸Ö‚
+Õ¸Ö‚Õ´
+ÕºÕ«Õ¿Õ«
+Õ¾Ö€Õ¡
+Ö‡
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_id.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_id.txt
new file mode 100644
index 00000000000..4617f83a5c5
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_id.txt
@@ -0,0 +1,359 @@
+# from appendix D of: A Study of Stemming Effects on Information
+# Retrieval in Bahasa Indonesia
+ada
+adanya
+adalah
+adapun
+agak
+agaknya
+agar
+akan
+akankah
+akhirnya
+aku
+akulah
+amat
+amatlah
+anda
+andalah
+antar
+diantaranya
+antara
+antaranya
+diantara
+apa
+apaan
+mengapa
+apabila
+apakah
+apalagi
+apatah
+atau
+ataukah
+ataupun
+bagai
+bagaikan
+sebagai
+sebagainya
+bagaimana
+bagaimanapun
+sebagaimana
+bagaimanakah
+bagi
+bahkan
+bahwa
+bahwasanya
+sebaliknya
+banyak
+sebanyak
+beberapa
+seberapa
+begini
+beginian
+beginikah
+beginilah
+sebegini
+begitu
+begitukah
+begitulah
+begitupun
+sebegitu
+belum
+belumlah
+sebelum
+sebelumnya
+sebenarnya
+berapa
+berapakah
+berapalah
+berapapun
+betulkah
+sebetulnya
+biasa
+biasanya
+bila
+bilakah
+bisa
+bisakah
+sebisanya
+boleh
+bolehkah
+bolehlah
+buat
+bukan
+bukankah
+bukanlah
+bukannya
+cuma
+percuma
+dahulu
+dalam
+dan
+dapat
+dari
+daripada
+dekat
+demi
+demikian
+demikianlah
+sedemikian
+dengan
+depan
+di
+dia
+dialah
+dini
+diri
+dirinya
+terdiri
+dong
+dulu
+enggak
+enggaknya
+entah
+entahlah
+terhadap
+terhadapnya
+hal
+hampir
+hanya
+hanyalah
+harus
+haruslah
+harusnya
+seharusnya
+hendak
+hendaklah
+hendaknya
+hingga
+sehingga
+ia
+ialah
+ibarat
+ingin
+inginkah
+inginkan
+ini
+inikah
+inilah
+itu
+itukah
+itulah
+jangan
+jangankan
+janganlah
+jika
+jikalau
+juga
+justru
+kala
+kalau
+kalaulah
+kalaupun
+kalian
+kami
+kamilah
+kamu
+kamulah
+kan
+kapan
+kapankah
+kapanpun
+dikarenakan
+karena
+karenanya
+ke
+kecil
+kemudian
+kenapa
+kepada
+kepadanya
+ketika
+seketika
+khususnya
+kini
+kinilah
+kiranya
+sekiranya
+kita
+kitalah
+kok
+lagi
+lagian
+selagi
+lah
+lain
+lainnya
+melainkan
+selaku
+lalu
+melalui
+terlalu
+lama
+lamanya
+selama
+selama
+selamanya
+lebih
+terlebih
+bermacam
+macam
+semacam
+maka
+makanya
+makin
+malah
+malahan
+mampu
+mampukah
+mana
+manakala
+manalagi
+masih
+masihkah
+semasih
+masing
+mau
+maupun
+semaunya
+memang
+mereka
+merekalah
+meski
+meskipun
+semula
+mungkin
+mungkinkah
+nah
+namun
+nanti
+nantinya
+nyaris
+oleh
+olehnya
+seorang
+seseorang
+pada
+padanya
+padahal
+paling
+sepanjang
+pantas
+sepantasnya
+sepantasnyalah
+para
+pasti
+pastilah
+per
+pernah
+pula
+pun
+merupakan
+rupanya
+serupa
+saat
+saatnya
+sesaat
+saja
+sajalah
+saling
+bersama
+sama
+sesama
+sambil
+sampai
+sana
+sangat
+sangatlah
+saya
+sayalah
+se
+sebab
+sebabnya
+sebuah
+tersebut
+tersebutlah
+sedang
+sedangkan
+sedikit
+sedikitnya
+segala
+segalanya
+segera
+sesegera
+sejak
+sejenak
+sekali
+sekalian
+sekalipun
+sesekali
+sekaligus
+sekarang
+sekarang
+sekitar
+sekitarnya
+sela
+selain
+selalu
+seluruh
+seluruhnya
+semakin
+sementara
+sempat
+semua
+semuanya
+sendiri
+sendirinya
+seolah
+seperti
+sepertinya
+sering
+seringnya
+serta
+siapa
+siapakah
+siapapun
+disini
+disinilah
+sini
+sinilah
+sesuatu
+sesuatunya
+suatu
+sesudah
+sesudahnya
+sudah
+sudahkah
+sudahlah
+supaya
+tadi
+tadinya
+tak
+tanpa
+setelah
+telah
+tentang
+tentu
+tentulah
+tentunya
+tertentu
+seterusnya
+tapi
+tetapi
+setiap
+tiap
+setidaknya
+tidak
+tidakkah
+tidaklah
+toh
+waduh
+wah
+wahai
+sewaktu
+walau
+walaupun
+wong
+yaitu
+yakni
+yang
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_it.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_it.txt
new file mode 100644
index 00000000000..4cb5b0891b1
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_it.txt
@@ -0,0 +1,301 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | An Italian stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ad | a (to) before vowel
+al | a + il
+allo | a + lo
+ai | a + i
+agli | a + gli
+all | a + l'
+agl | a + gl'
+alla | a + la
+alle | a + le
+con | with
+col | con + il
+coi | con + i (forms collo, cogli etc are now very rare)
+da | from
+dal | da + il
+dallo | da + lo
+dai | da + i
+dagli | da + gli
+dall | da + l'
+dagl | da + gll'
+dalla | da + la
+dalle | da + le
+di | of
+del | di + il
+dello | di + lo
+dei | di + i
+degli | di + gli
+dell | di + l'
+degl | di + gl'
+della | di + la
+delle | di + le
+in | in
+nel | in + el
+nello | in + lo
+nei | in + i
+negli | in + gli
+nell | in + l'
+negl | in + gl'
+nella | in + la
+nelle | in + le
+su | on
+sul | su + il
+sullo | su + lo
+sui | su + i
+sugli | su + gli
+sull | su + l'
+sugl | su + gl'
+sulla | su + la
+sulle | su + le
+per | through, by
+tra | among
+contro | against
+io | I
+tu | thou
+lui | he
+lei | she
+noi | we
+voi | you
+loro | they
+mio | my
+mia |
+miei |
+mie |
+tuo |
+tua |
+tuoi | thy
+tue |
+suo |
+sua |
+suoi | his, her
+sue |
+nostro | our
+nostra |
+nostri |
+nostre |
+vostro | your
+vostra |
+vostri |
+vostre |
+mi | me
+ti | thee
+ci | us, there
+vi | you, there
+lo | him, the
+la | her, the
+li | them
+le | them, the
+gli | to him, the
+ne | from there etc
+il | the
+un | a
+uno | a
+una | a
+ma | but
+ed | and
+se | if
+perché | why, because
+anche | also
+come | how
+dov | where (as dov')
+dove | where
+che | who, that
+chi | who
+cui | whom
+non | not
+più | more
+quale | who, that
+quanto | how much
+quanti |
+quanta |
+quante |
+quello | that
+quelli |
+quella |
+quelle |
+questo | this
+questi |
+questa |
+queste |
+si | yes
+tutto | all
+tutti | all
+
+ | single letter forms:
+
+a | at
+c | as c' for ce or ci
+e | and
+i | the
+l | as l'
+o | or
+
+ | forms of avere, to have (not including the infinitive):
+
+ho
+hai
+ha
+abbiamo
+avete
+hanno
+abbia
+abbiate
+abbiano
+avrò
+avrai
+avrÃ
+avremo
+avrete
+avranno
+avrei
+avresti
+avrebbe
+avremmo
+avreste
+avrebbero
+avevo
+avevi
+aveva
+avevamo
+avevate
+avevano
+ebbi
+avesti
+ebbe
+avemmo
+aveste
+ebbero
+avessi
+avesse
+avessimo
+avessero
+avendo
+avuto
+avuta
+avuti
+avute
+
+ | forms of essere, to be (not including the infinitive):
+sono
+sei
+è
+siamo
+siete
+sia
+siate
+siano
+sarò
+sarai
+sarÃ
+saremo
+sarete
+saranno
+sarei
+saresti
+sarebbe
+saremmo
+sareste
+sarebbero
+ero
+eri
+era
+eravamo
+eravate
+erano
+fui
+fosti
+fu
+fummo
+foste
+furono
+fossi
+fosse
+fossimo
+fossero
+essendo
+
+ | forms of fare, to do (not including the infinitive, fa, fat-):
+faccio
+fai
+facciamo
+fanno
+faccia
+facciate
+facciano
+farò
+farai
+farÃ
+faremo
+farete
+faranno
+farei
+faresti
+farebbe
+faremmo
+fareste
+farebbero
+facevo
+facevi
+faceva
+facevamo
+facevate
+facevano
+feci
+facesti
+fece
+facemmo
+faceste
+fecero
+facessi
+facesse
+facessimo
+facessero
+facendo
+
+ | forms of stare, to be (not including the infinitive):
+sto
+stai
+sta
+stiamo
+stanno
+stia
+stiate
+stiano
+starò
+starai
+starÃ
+staremo
+starete
+staranno
+starei
+staresti
+starebbe
+staremmo
+stareste
+starebbero
+stavo
+stavi
+stava
+stavamo
+stavate
+stavano
+stetti
+stesti
+stette
+stemmo
+steste
+stettero
+stessi
+stesse
+stessimo
+stessero
+stando
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ja.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ja.txt
new file mode 100644
index 00000000000..d4321be6b16
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ja.txt
@@ -0,0 +1,127 @@
+#
+# This file defines a stopword set for Japanese.
+#
+# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia.
+# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745
+# for frequency lists, etc. that can be useful for making your own set (if desired)
+#
+# Note that there is an overlap between these stopwords and the terms stopped when used
+# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note
+# that comments are not allowed on the same line as stopwords.
+#
+# Also note that stopping is done in a case-insensitive manner. Change your StopFilter
+# configuration if you need case-sensitive stopping. Lastly, note that stopping is done
+# using the same character width as the entries in this file. Since this StopFilter is
+# normally done after a CJKWidthFilter in your chain, you would usually want your romaji
+# entries to be in half-width and your kana entries to be in full-width.
+#
+ã®
+ã«
+ã¯
+ã‚’
+ãŸ
+ãŒ
+ã§
+ã¦
+ã¨
+ã—
+れ
+ã•
+ã‚ã‚‹
+ã„ã‚‹
+ã‚‚
+ã™ã‚‹
+ã‹ã‚‰
+ãª
+ã“ã¨
+ã¨ã—ã¦
+ã„
+ã‚„
+れる
+ãªã©
+ãªã£
+ãªã„
+ã“ã®
+ãŸã‚
+ãã®
+ã‚ã£
+よã†
+ã¾ãŸ
+ã‚‚ã®
+ã¨ã„ã†
+ã‚ã‚Š
+ã¾ã§
+られ
+ãªã‚‹
+ã¸
+ã‹
+ã
+ã“ã‚Œ
+ã«ã‚ˆã£ã¦
+ã«ã‚ˆã‚Š
+ãŠã‚Š
+より
+ã«ã‚ˆã‚‹
+ãš
+ãªã‚Š
+られる
+ã«ãŠã„ã¦
+ã°
+ãªã‹ã£
+ãªã
+ã—ã‹ã—
+ã«ã¤ã„ã¦
+ã›
+ã ã£
+ãã®å¾Œ
+ã§ãã‚‹
+ãã‚Œ
+ã†
+ã®ã§
+ãªãŠ
+ã®ã¿
+ã§ã
+ã
+ã¤
+ã«ãŠã‘ã‚‹
+ãŠã‚ˆã³
+ã„ã†
+ã•ã‚‰ã«
+ã§ã‚‚
+ら
+ãŸã‚Š
+ãã®ä»–
+ã«é–¢ã™ã‚‹
+ãŸã¡
+ã¾ã™
+ã‚“
+ãªã‚‰
+ã«å¯¾ã—ã¦
+特ã«
+ã›ã‚‹
+åŠã³
+ã“れら
+ã¨ã
+ã§ã¯
+ã«ã¦
+ã»ã‹
+ãªãŒã‚‰
+ã†ã¡
+ãã—ã¦
+ã¨ã¨ã‚‚ã«
+ãŸã ã—
+ã‹ã¤ã¦
+ãã‚Œãžã‚Œ
+ã¾ãŸã¯
+ãŠ
+ã»ã©
+ã‚‚ã®ã®
+ã«å¯¾ã™ã‚‹
+ã»ã¨ã‚“ã©
+ã¨å…±ã«
+ã¨ã„ã£ãŸ
+ã§ã™
+ã¨ã‚‚
+ã¨ã“ã‚
+ã“ã“
+##### End of file
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_lv.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_lv.txt
new file mode 100644
index 00000000000..e21a23c06c3
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_lv.txt
@@ -0,0 +1,172 @@
+# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins
+# the original list of over 800 forms was refined:
+# pronouns, adverbs, interjections were removed
+#
+# prepositions
+aiz
+ap
+ar
+apakš
+Ärpus
+augšpus
+bez
+caur
+dēļ
+gar
+iekš
+iz
+kopš
+labad
+lejpus
+līdz
+no
+otrpus
+pa
+par
+pÄr
+pēc
+pie
+pirms
+pret
+priekš
+starp
+Å¡aipus
+uz
+viņpus
+virs
+virspus
+zem
+apakšpus
+# Conjunctions
+un
+bet
+jo
+ja
+ka
+lai
+tomēr
+tikko
+turpretī
+arī
+kaut
+gan
+tÄdēļ
+tÄ
+ne
+tikvien
+vien
+kÄ
+ir
+te
+vai
+kamēr
+# Particles
+ar
+diezin
+droši
+diemžēl
+nebūt
+ik
+it
+taÄu
+nu
+pat
+tiklab
+iekšpus
+nedz
+tik
+nevis
+turpretim
+jeb
+iekam
+iekÄm
+iekÄms
+kolīdz
+līdzko
+tiklīdz
+jebšu
+tÄlab
+tÄpÄ“c
+nekÄ
+itin
+jÄ
+jau
+jel
+nē
+nezin
+tad
+tikai
+vis
+tak
+iekams
+vien
+# modal verbs
+būt
+biju
+biji
+bija
+bijÄm
+bijÄt
+esmu
+esi
+esam
+esat
+būšu
+būsi
+būs
+būsim
+būsiet
+tikt
+tiku
+tiki
+tika
+tikÄm
+tikÄt
+tieku
+tiec
+tiek
+tiekam
+tiekat
+tikšu
+tiks
+tiksim
+tiksiet
+tapt
+tapi
+tapÄt
+topat
+tapšu
+tapsi
+taps
+tapsim
+tapsiet
+kļūt
+kļuvu
+kļuvi
+kļuva
+kļuvÄm
+kļuvÄt
+kļūstu
+kļūsti
+kļūst
+kļūstam
+kļūstat
+kļūšu
+kļūsi
+kļūs
+kļūsim
+kļūsiet
+# verbs
+varēt
+varēju
+varÄ“jÄm
+varēšu
+varēsim
+var
+varēji
+varÄ“jÄt
+varēsi
+varēsiet
+varat
+varēja
+varēs
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_nl.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_nl.txt
new file mode 100644
index 00000000000..f4d61f5092c
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_nl.txt
@@ -0,0 +1,117 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Dutch stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large sample of Dutch text.
+
+ | Dutch stop words frequently exhibit homonym clashes. These are indicated
+ | clearly below.
+
+de | the
+en | and
+van | of, from
+ik | I, the ego
+te | (1) chez, at etc, (2) to, (3) too
+dat | that, which
+die | that, those, who, which
+in | in, inside
+een | a, an, one
+hij | he
+het | the, it
+niet | not, nothing, naught
+zijn | (1) to be, being, (2) his, one's, its
+is | is
+was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
+op | on, upon, at, in, up, used up
+aan | on, upon, to (as dative)
+met | with, by
+als | like, such as, when
+voor | (1) before, in front of, (2) furrow
+had | had, past tense all persons sing. of 'hebben' (have)
+er | there
+maar | but, only
+om | round, about, for etc
+hem | him
+dan | then
+zou | should/would, past tense all persons sing. of 'zullen'
+of | or, whether, if
+wat | what, something, anything
+mijn | possessive and noun 'mine'
+men | people, 'one'
+dit | this
+zo | so, thus, in this way
+door | through by
+over | over, across
+ze | she, her, they, them
+zich | oneself
+bij | (1) a bee, (2) by, near, at
+ook | also, too
+tot | till, until
+je | you
+mij | me
+uit | out of, from
+der | Old Dutch form of 'van der' still found in surnames
+daar | (1) there, (2) because
+haar | (1) her, their, them, (2) hair
+naar | (1) unpleasant, unwell etc, (2) towards, (3) as
+heb | present first person sing. of 'to have'
+hoe | how, why
+heeft | present third person sing. of 'to have'
+hebben | 'to have' and various parts thereof
+deze | this
+u | you
+want | (1) for, (2) mitten, (3) rigging
+nog | yet, still
+zal | 'shall', first and third person sing. of verb 'zullen' (will)
+me | me
+zij | she, they
+nu | now
+ge | 'thou', still used in Belgium and south Netherlands
+geen | none
+omdat | because
+iets | something, somewhat
+worden | to become, grow, get
+toch | yet, still
+al | all, every, each
+waren | (1) 'were' (2) to wander, (3) wares, (3)
+veel | much, many
+meer | (1) more, (2) lake
+doen | to do, to make
+toen | then, when
+moet | noun 'spot/mote' and present form of 'to must'
+ben | (1) am, (2) 'are' in interrogative second person singular of 'to be'
+zonder | without
+kan | noun 'can' and present form of 'to be able'
+hun | their, them
+dus | so, consequently
+alles | all, everything, anything
+onder | under, beneath
+ja | yes, of course
+eens | once, one day
+hier | here
+wie | who
+werd | imperfect third person sing. of 'become'
+altijd | always
+doch | yet, but etc
+wordt | present third person sing. of 'become'
+wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans
+kunnen | to be able
+ons | us/our
+zelf | self
+tegen | against, towards, at
+na | after, near
+reeds | already
+wil | (1) present tense of 'want', (2) 'will', noun, (3) fender
+kon | could; past tense of 'to be able'
+niets | nothing
+uw | your
+iemand | somebody
+geweest | been; past participle of 'be'
+andere | other
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_no.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_no.txt
new file mode 100644
index 00000000000..e76f36e69ed
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_no.txt
@@ -0,0 +1,192 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Norwegian stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This stop word list is for the dominant bokmål dialect. Words unique
+ | to nynorsk are marked *.
+
+ | Revised by Jan Bruusgaard , Jan 2005
+
+og | and
+i | in
+jeg | I
+det | it/this/that
+at | to (w. inf.)
+en | a/an
+et | a/an
+den | it/this/that
+til | to
+er | is/am/are
+som | who/that
+på | on
+de | they / you(formal)
+med | with
+han | he
+av | of
+ikke | not
+ikkje | not *
+der | there
+så | so
+var | was/were
+meg | me
+seg | you
+men | but
+ett | one
+har | have
+om | about
+vi | we
+min | my
+mitt | my
+ha | have
+hadde | had
+hun | she
+nå | now
+over | over
+da | when/as
+ved | by/know
+fra | from
+du | you
+ut | out
+sin | your
+dem | them
+oss | us
+opp | up
+man | you/one
+kan | can
+hans | his
+hvor | where
+eller | or
+hva | what
+skal | shall/must
+selv | self (reflective)
+sjøl | self (reflective)
+her | here
+alle | all
+vil | will
+bli | become
+ble | became
+blei | became *
+blitt | have become
+kunne | could
+inn | in
+når | when
+være | be
+kom | come
+noen | some
+noe | some
+ville | would
+dere | you
+som | who/which/that
+deres | their/theirs
+kun | only/just
+ja | yes
+etter | after
+ned | down
+skulle | should
+denne | this
+for | for/because
+deg | you
+si | hers/his
+sine | hers/his
+sitt | hers/his
+mot | against
+Ã¥ | to
+meget | much
+hvorfor | why
+dette | this
+disse | these/those
+uten | without
+hvordan | how
+ingen | none
+din | your
+ditt | your
+blir | become
+samme | same
+hvilken | which
+hvilke | which (plural)
+sånn | such a
+inni | inside/within
+mellom | between
+vår | our
+hver | each
+hvem | who
+vors | us/ours
+hvis | whose
+både | both
+bare | only/just
+enn | than
+fordi | as/because
+før | before
+mange | many
+også | also
+slik | just
+vært | been
+være | to be
+båe | both *
+begge | both
+siden | since
+dykk | your *
+dykkar | yours *
+dei | they *
+deira | them *
+deires | theirs *
+deim | them *
+di | your (fem.) *
+då | as/when *
+eg | I *
+ein | a/an *
+eit | a/an *
+eitt | a/an *
+elles | or *
+honom | he *
+hjå | at *
+ho | she *
+hoe | she *
+henne | her
+hennar | her/hers
+hennes | hers
+hoss | how *
+hossen | how *
+ikkje | not *
+ingi | noone *
+inkje | noone *
+korleis | how *
+korso | how *
+kva | what/which *
+kvar | where *
+kvarhelst | where *
+kven | who/whom *
+kvi | why *
+kvifor | why *
+me | we *
+medan | while *
+mi | my *
+mine | my *
+mykje | much *
+no | now *
+nokon | some (masc./neut.) *
+noka | some (fem.) *
+nokor | some *
+noko | some *
+nokre | some *
+si | his/hers *
+sia | since *
+sidan | since *
+so | so *
+somt | some *
+somme | some *
+um | about*
+upp | up *
+vere | be *
+vore | was *
+verte | become *
+vort | become *
+varte | became *
+vart | became *
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_pt.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_pt.txt
new file mode 100644
index 00000000000..276c1b446f2
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_pt.txt
@@ -0,0 +1,251 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Portuguese stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de | of, from
+a | the; to, at; her
+o | the; him
+que | who, that
+e | and
+do | de + o
+da | de + a
+em | in
+um | a
+para | for
+ | é from SER
+com | with
+não | not, no
+uma | a
+os | the; them
+no | em + o
+se | himself etc
+na | em + a
+por | for
+mais | more
+as | the; them
+dos | de + os
+como | as, like
+mas | but
+ | foi from SER
+ao | a + o
+ele | he
+das | de + as
+ | tem from TER
+Ã | a + a
+seu | his
+sua | her
+ou | or
+ | ser from SER
+quando | when
+muito | much
+ | há from HAV
+nos | em + os; us
+já | already, now
+ | está from EST
+eu | I
+também | also
+só | only, just
+pelo | per + o
+pela | per + a
+até | up to
+isso | that
+ela | he
+entre | between
+ | era from SER
+depois | after
+sem | without
+mesmo | same
+aos | a + os
+ | ter from TER
+seus | his
+quem | whom
+nas | em + as
+me | me
+esse | that
+eles | they
+ | estão from EST
+você | you
+ | tinha from TER
+ | foram from SER
+essa | that
+num | em + um
+nem | nor
+suas | her
+meu | my
+Ã s | a + as
+minha | my
+ | têm from TER
+numa | em + uma
+pelos | per + os
+elas | they
+ | havia from HAV
+ | seja from SER
+qual | which
+ | será from SER
+nós | we
+ | tenho from TER
+lhe | to him, her
+deles | of them
+essas | those
+esses | those
+pelas | per + as
+este | this
+ | fosse from SER
+dele | of him
+
+ | other words. There are many contractions such as naquele = em+aquele,
+ | mo = me+o, but they are rare.
+ | Indefinite article plural forms are also rare.
+
+tu | thou
+te | thee
+vocês | you (plural)
+vos | you
+lhes | to them
+meus | my
+minhas
+teu | thy
+tua
+teus
+tuas
+nosso | our
+nossa
+nossos
+nossas
+
+dela | of her
+delas | of them
+
+esta | this
+estes | these
+estas | these
+aquele | that
+aquela | that
+aqueles | those
+aquelas | those
+isto | this
+aquilo | that
+
+ | forms of estar, to be (not including the infinitive):
+estou
+está
+estamos
+estão
+estive
+esteve
+estivemos
+estiveram
+estava
+estávamos
+estavam
+estivera
+estivéramos
+esteja
+estejamos
+estejam
+estivesse
+estivéssemos
+estivessem
+estiver
+estivermos
+estiverem
+
+ | forms of haver, to have (not including the infinitive):
+hei
+há
+havemos
+hão
+houve
+houvemos
+houveram
+houvera
+houvéramos
+haja
+hajamos
+hajam
+houvesse
+houvéssemos
+houvessem
+houver
+houvermos
+houverem
+houverei
+houverá
+houveremos
+houverão
+houveria
+houverÃamos
+houveriam
+
+ | forms of ser, to be (not including the infinitive):
+sou
+somos
+são
+era
+éramos
+eram
+fui
+foi
+fomos
+foram
+fora
+fôramos
+seja
+sejamos
+sejam
+fosse
+fôssemos
+fossem
+for
+formos
+forem
+serei
+será
+seremos
+serão
+seria
+serÃamos
+seriam
+
+ | forms of ter, to have (not including the infinitive):
+tenho
+tem
+temos
+tém
+tinha
+tÃnhamos
+tinham
+tive
+teve
+tivemos
+tiveram
+tivera
+tivéramos
+tenha
+tenhamos
+tenham
+tivesse
+tivéssemos
+tivessem
+tiver
+tivermos
+tiverem
+terei
+terá
+teremos
+terão
+teria
+terÃamos
+teriam
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ro.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ro.txt
new file mode 100644
index 00000000000..4fdee90a5ba
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ro.txt
@@ -0,0 +1,233 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+acea
+aceasta
+această
+aceea
+acei
+aceia
+acel
+acela
+acele
+acelea
+acest
+acesta
+aceste
+acestea
+aceÅŸti
+aceÅŸtia
+acolo
+acum
+ai
+aia
+aibă
+aici
+al
+ăla
+ale
+alea
+ălea
+altceva
+altcineva
+am
+ar
+are
+aÅŸ
+aÅŸadar
+asemenea
+asta
+ăsta
+astăzi
+astea
+ăstea
+ăştia
+asupra
+aţi
+au
+avea
+avem
+aveţi
+azi
+bine
+bucur
+bună
+ca
+că
+căci
+când
+care
+cărei
+căror
+cărui
+cât
+câte
+câţi
+către
+câtva
+ce
+cel
+ceva
+chiar
+cînd
+cine
+cineva
+cît
+cîte
+cîţi
+cîtva
+contra
+cu
+cum
+cumva
+curând
+curînd
+da
+dă
+dacă
+dar
+datorită
+de
+deci
+deja
+deoarece
+departe
+deÅŸi
+din
+dinaintea
+dintr
+dintre
+drept
+după
+ea
+ei
+el
+ele
+eram
+este
+eÅŸti
+eu
+face
+fără
+fi
+fie
+fiecare
+fii
+fim
+fiţi
+iar
+ieri
+îi
+îl
+îmi
+împotriva
+în
+înainte
+înaintea
+încât
+încît
+încotro
+între
+întrucât
+întrucît
+îţi
+la
+lângă
+le
+li
+lîngă
+lor
+lui
+mă
+mâine
+mea
+mei
+mele
+mereu
+meu
+mi
+mine
+mult
+multă
+mulţi
+ne
+nicăieri
+nici
+nimeni
+niÅŸte
+noastră
+noastre
+noi
+noÅŸtri
+nostru
+nu
+ori
+oricând
+oricare
+oricât
+orice
+oricînd
+oricine
+oricît
+oricum
+oriunde
+până
+pe
+pentru
+peste
+pînă
+poate
+pot
+prea
+prima
+primul
+prin
+printr
+sa
+să
+săi
+sale
+sau
+său
+se
+ÅŸi
+sînt
+sîntem
+sînteţi
+spre
+sub
+sunt
+suntem
+sunteţi
+ta
+tăi
+tale
+tău
+te
+Å£i
+Å£ie
+tine
+toată
+toate
+tot
+toţi
+totuÅŸi
+tu
+un
+una
+unde
+undeva
+unei
+unele
+uneori
+unor
+vă
+vi
+voastră
+voastre
+voi
+voÅŸtri
+vostru
+vouă
+vreo
+vreun
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ru.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ru.txt
new file mode 100644
index 00000000000..64307693457
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ru.txt
@@ -0,0 +1,241 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | a russian stop word list. comments begin with vertical bar. each stop
+ | word is at the start of a line.
+
+ | this is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | letter `ё' is translated to `е'.
+
+и | and
+в | in/into
+во | alternative form
+не | not
+что | what/that
+он | he
+на | on/onto
+Ñ | i
+Ñ | from
+Ñо | alternative form
+как | how
+а | milder form of `no' (but)
+то | conjunction and form of `that'
+вÑе | all
+она | she
+так | so, thus
+его | him
+но | but
+да | yes/and
+Ñ‚Ñ‹ | thou
+к | towards, by
+у | around, chez
+же | intensifier particle
+вы | you
+за | beyond, behind
+бы | conditional/subj. particle
+по | up to, along
+только | only
+ее | her
+мне | to me
+было | it was
+вот | here is/are, particle
+от | away from
+Ð¼ÐµÐ½Ñ | me
+еще | still, yet, more
+нет | no, there isnt/arent
+о | about
+из | out of
+ему | to him
+теперь | now
+когда | when
+даже | even
+ну | so, well
+вдруг | suddenly
+ли | interrogative particle
+еÑли | if
+уже | already, but homonym of `narrower'
+или | or
+ни | neither
+быть | to be
+был | he was
+него | prepositional form of его
+до | up to
+Ð²Ð°Ñ | you accusative
+нибудь | indef. suffix preceded by hyphen
+опÑÑ‚ÑŒ | again
+уж | already, but homonym of `adder'
+вам | to you
+Ñказал | he said
+ведь | particle `after all'
+там | there
+потом | then
+ÑÐµÐ±Ñ | oneself
+ничего | nothing
+ей | to her
+может | usually with `быть' as `maybe'
+они | they
+тут | here
+где | where
+еÑÑ‚ÑŒ | there is/are
+надо | got to, must
+ней | prepositional form of ей
+Ð´Ð»Ñ | for
+мы | we
+Ñ‚ÐµÐ±Ñ | thee
+их | them, their
+чем | than
+была | she was
+Ñам | self
+чтоб | in order to
+без | without
+будто | as if
+человек | man, person, one
+чего | genitive form of `what'
+раз | once
+тоже | also
+Ñебе | to oneself
+под | beneath
+жизнь | life
+будет | will be
+ж | short form of intensifer particle `же'
+тогда | then
+кто | who
+Ñтот | this
+говорил | was saying
+того | genitive form of `that'
+потому | for that reason
+Ñтого | genitive form of `this'
+какой | which
+ÑовÑем | altogether
+ним | prepositional form of `его', `они'
+здеÑÑŒ | here
+Ñтом | prepositional form of `Ñтот'
+один | one
+почти | almost
+мой | my
+тем | instrumental/dative plural of `тот', `то'
+чтобы | full form of `in order that'
+нее | her (acc.)
+кажетÑÑ | it seems
+ÑÐµÐ¹Ñ‡Ð°Ñ | now
+были | they were
+куда | where to
+зачем | why
+Ñказать | to say
+вÑех | all (acc., gen. preposn. plural)
+никогда | never
+ÑÐµÐ³Ð¾Ð´Ð½Ñ | today
+можно | possible, one can
+при | by
+наконец | finally
+два | two
+об | alternative form of `о', about
+другой | another
+хоть | even
+поÑле | after
+над | above
+больше | more
+тот | that one (masc.)
+через | across, in
+Ñти | these
+Ð½Ð°Ñ | us
+про | about
+вÑего | in all, only, of all
+них | prepositional form of `они' (they)
+ÐºÐ°ÐºÐ°Ñ | which, feminine
+много | lots
+разве | interrogative particle
+Ñказала | she said
+три | three
+Ñту | this, acc. fem. sing.
+Ð¼Ð¾Ñ | my, feminine
+впрочем | moreover, besides
+хорошо | good
+Ñвою | ones own, acc. fem. sing.
+Ñтой | oblique form of `Ñта', fem. `this'
+перед | in front of
+иногда | sometimes
+лучше | better
+чуть | a little
+том | preposn. form of `that one'
+Ð½ÐµÐ»ÑŒÐ·Ñ | one must not
+такой | such a one
+им | to them
+более | more
+вÑегда | always
+конечно | of course
+вÑÑŽ | acc. fem. sing of `all'
+между | between
+
+
+ | b: some paradigms
+ |
+ | personal pronouns
+ |
+ | Ñ Ð¼ÐµÐ½Ñ Ð¼Ð½Ðµ мной [мною]
+ | Ñ‚Ñ‹ Ñ‚ÐµÐ±Ñ Ñ‚ÐµÐ±Ðµ тобой [тобою]
+ | он его ему им [него, нему, ним]
+ | она ее Ñи ею [нее, нÑи, нею]
+ | оно его ему им [него, нему, ним]
+ |
+ | мы Ð½Ð°Ñ Ð½Ð°Ð¼ нами
+ | вы Ð²Ð°Ñ Ð²Ð°Ð¼ вами
+ | они их им ими [них, ним, ними]
+ |
+ | ÑÐµÐ±Ñ Ñебе Ñобой [Ñобою]
+ |
+ | demonstrative pronouns: Ñтот (this), тот (that)
+ |
+ | Ñтот Ñта Ñто Ñти
+ | Ñтого ÑÑ‚Ñ‹ Ñто Ñти
+ | Ñтого Ñтой Ñтого Ñтих
+ | Ñтому Ñтой Ñтому Ñтим
+ | Ñтим Ñтой Ñтим [Ñтою] Ñтими
+ | Ñтом Ñтой Ñтом Ñтих
+ |
+ | тот та то те
+ | того ту то те
+ | того той того тех
+ | тому той тому тем
+ | тем той тем [тою] теми
+ | том той том тех
+ |
+ | determinative pronouns
+ |
+ | (a) веÑÑŒ (all)
+ |
+ | веÑÑŒ вÑÑ Ð²Ñе вÑе
+ | вÑего вÑÑŽ вÑе вÑе
+ | вÑего вÑей вÑего вÑех
+ | вÑему вÑей вÑему вÑем
+ | вÑем вÑей вÑем [вÑею] вÑеми
+ | вÑем вÑей вÑем вÑех
+ |
+ | (b) Ñам (himself etc)
+ |
+ | Ñам Ñама Ñамо Ñами
+ | Ñамого Ñаму Ñамо Ñамих
+ | Ñамого Ñамой Ñамого Ñамих
+ | Ñамому Ñамой Ñамому Ñамим
+ | Ñамим Ñамой Ñамим [Ñамою] Ñамими
+ | Ñамом Ñамой Ñамом Ñамих
+ |
+ | stems of verbs `to be', `to have', `to do' and modal
+ |
+ | быть бы буд быв еÑÑ‚ÑŒ Ñуть
+ | име
+ | дел
+ | мог мож мочь
+ | уме
+ | хоч хот
+ | долж
+ | можн
+ | нужн
+ | нельзÑ
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_sv.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_sv.txt
new file mode 100644
index 00000000000..22bddfd8cb3
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_sv.txt
@@ -0,0 +1,131 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Swedish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | Swedish stop words occasionally exhibit homonym clashes. For example
+ | så = so, but also seed. These are indicated clearly below.
+
+och | and
+det | it, this/that
+att | to (with infinitive)
+i | in, at
+en | a
+jag | I
+hon | she
+som | who, that
+han | he
+på | on
+den | it, this/that
+med | with
+var | where, each
+sig | him(self) etc
+för | for
+så | so (also: seed)
+till | to
+är | is
+men | but
+ett | a
+om | if; around, about
+hade | had
+de | they, these/those
+av | of
+icke | not, no
+mig | me
+du | you
+henne | her
+då | then, when
+sin | his
+nu | now
+har | have
+inte | inte någon = no one
+hans | his
+honom | him
+skulle | 'sake'
+hennes | her
+där | there
+min | my
+man | one (pronoun)
+ej | nor
+vid | at, by, on (also: vast)
+kunde | could
+något | some etc
+från | from, off
+ut | out
+när | when
+efter | after, behind
+upp | up
+vi | we
+dem | them
+vara | be
+vad | what
+över | over
+än | than
+dig | you
+kan | can
+sina | his
+här | here
+ha | have
+mot | towards
+alla | all
+under | under (also: wonder)
+någon | some etc
+eller | or (else)
+allt | all
+mycket | much
+sedan | since
+ju | why
+denna | this/that
+själv | myself, yourself etc
+detta | this/that
+Ã¥t | to
+utan | without
+varit | was
+hur | how
+ingen | no
+mitt | my
+ni | you
+bli | to be, become
+blev | from bli
+oss | us
+din | thy
+dessa | these/those
+några | some etc
+deras | their
+blir | from bli
+mina | my
+samma | (the) same
+vilken | who, that
+er | you, your
+sådan | such a
+vår | our
+blivit | from bli
+dess | its
+inom | within
+mellan | between
+sådant | such a
+varför | why
+varje | each
+vilka | who, that
+ditt | thy
+vem | who
+vilket | who, that
+sitta | his
+sådana | such a
+vart | each
+dina | thy
+vars | whose
+vårt | our
+våra | our
+ert | your
+era | your
+vilkas | whose
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_th.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_th.txt
new file mode 100644
index 00000000000..07f0fabe692
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_th.txt
@@ -0,0 +1,119 @@
+# Thai stopwords from:
+# "Opinion Detection in Thai Political News Columns
+# Based on Subjectivity Analysis"
+# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak
+ไว้
+ไม่
+ไป
+ได้
+ให้
+ใน
+โดย
+à¹à¸«à¹ˆà¸‡
+à¹à¸¥à¹‰à¸§
+à¹à¸¥à¸°
+à¹à¸£à¸
+à¹à¸šà¸š
+à¹à¸•à¹ˆ
+เà¸à¸‡
+เห็น
+เลย
+เริ่ม
+เรา
+เมื่à¸
+เพื่à¸
+เพราะ
+เป็นà¸à¸²à¸£
+เป็น
+เปิดเผย
+เปิด
+เนื่à¸à¸‡à¸ˆà¸²à¸
+เดียวà¸à¸±à¸™
+เดียว
+เช่น
+เฉพาะ
+เคย
+เข้า
+เขา
+à¸à¸µà¸
+à¸à¸²à¸ˆ
+à¸à¸°à¹„ร
+à¸à¸à¸
+à¸à¸¢à¹ˆà¸²à¸‡
+à¸à¸¢à¸¹à¹ˆ
+à¸à¸¢à¸²à¸
+หาà¸
+หลาย
+หลังจาà¸
+หลัง
+หรืà¸
+หนึ่ง
+ส่วน
+ส่ง
+สุด
+สà¹à¸²à¸«à¸£à¸±à¸š
+ว่า
+วัน
+ลง
+ร่วม
+ราย
+รับ
+ระหว่าง
+รวม
+ยัง
+มี
+มาà¸
+มา
+พร้à¸à¸¡
+พบ
+ผ่าน
+ผล
+บาง
+น่า
+นี้
+นà¹à¸²
+นั้น
+นัà¸
+นà¸à¸à¸ˆà¸²à¸
+ทุà¸
+ที่สุด
+ที่
+ทà¹à¸²à¹ƒà¸«à¹‰
+ทà¹à¸²
+ทาง
+ทั้งนี้
+ทั้ง
+ถ้า
+ถูà¸
+ถึง
+ต้à¸à¸‡
+ต่างๆ
+ต่าง
+ต่à¸
+ตาม
+ตั้งà¹à¸•à¹ˆ
+ตั้ง
+ด้าน
+ด้วย
+ดัง
+ซึ่ง
+ช่วง
+จึง
+จาà¸
+จัด
+จะ
+คืà¸
+ความ
+ครั้ง
+คง
+ขึ้น
+ขà¸à¸‡
+ขà¸
+ขณะ
+à¸à¹ˆà¸à¸™
+à¸à¹‡
+à¸à¸²à¸£
+à¸à¸±à¸š
+à¸à¸±à¸™
+à¸à¸§à¹ˆà¸²
+à¸à¸¥à¹ˆà¸²à¸§
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_tr.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_tr.txt
new file mode 100644
index 00000000000..84d9408d4ea
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_tr.txt
@@ -0,0 +1,212 @@
+# Turkish stopwords from LUCENE-559
+# merged with the list from "Information Retrieval on Turkish Texts"
+# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
+acaba
+altmış
+altı
+ama
+ancak
+arada
+aslında
+ayrıca
+bana
+bazı
+belki
+ben
+benden
+beni
+benim
+beri
+beÅŸ
+bile
+bin
+bir
+birçok
+biri
+birkaç
+birkez
+birÅŸey
+birÅŸeyi
+biz
+bize
+bizden
+bizi
+bizim
+böyle
+böylece
+bu
+buna
+bunda
+bundan
+bunlar
+bunları
+bunların
+bunu
+bunun
+burada
+çok
+çünkü
+da
+daha
+dahi
+de
+defa
+deÄŸil
+diÄŸer
+diye
+doksan
+dokuz
+dolayı
+dolayısıyla
+dört
+edecek
+eden
+ederek
+edilecek
+ediliyor
+edilmesi
+ediyor
+eÄŸer
+elli
+en
+etmesi
+etti
+ettiÄŸi
+ettiÄŸini
+gibi
+göre
+halen
+hangi
+hatta
+hem
+henüz
+hep
+hepsi
+her
+herhangi
+herkesin
+hiç
+hiçbir
+için
+iki
+ile
+ilgili
+ise
+iÅŸte
+itibaren
+itibariyle
+kadar
+karşın
+katrilyon
+kendi
+kendilerine
+kendini
+kendisi
+kendisine
+kendisini
+kez
+ki
+kim
+kimden
+kime
+kimi
+kimse
+kırk
+milyar
+milyon
+mu
+mü
+mı
+nasıl
+ne
+neden
+nedenle
+nerde
+nerede
+nereye
+niye
+niçin
+o
+olan
+olarak
+oldu
+olduÄŸu
+olduÄŸunu
+olduklarını
+olmadı
+olmadığı
+olmak
+olması
+olmayan
+olmaz
+olsa
+olsun
+olup
+olur
+olursa
+oluyor
+on
+ona
+ondan
+onlar
+onlardan
+onları
+onların
+onu
+onun
+otuz
+oysa
+öyle
+pek
+raÄŸmen
+sadece
+sanki
+sekiz
+seksen
+sen
+senden
+seni
+senin
+siz
+sizden
+sizi
+sizin
+ÅŸey
+ÅŸeyden
+ÅŸeyi
+ÅŸeyler
+şöyle
+ÅŸu
+ÅŸuna
+ÅŸunda
+ÅŸundan
+şunları
+ÅŸunu
+tarafından
+trilyon
+tüm
+üç
+üzere
+var
+vardı
+ve
+veya
+ya
+yani
+yapacak
+yapılan
+yapılması
+yapıyor
+yapmak
+yaptı
+yaptığı
+yaptığını
+yaptıkları
+yedi
+yerine
+yetmiÅŸ
+yine
+yirmi
+yoksa
+yüz
+zaten
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/userdict_ja.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/userdict_ja.txt
new file mode 100644
index 00000000000..6f0368e4d81
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/userdict_ja.txt
@@ -0,0 +1,29 @@
+#
+# This is a sample user dictionary for Kuromoji (JapaneseTokenizer)
+#
+# Add entries to this file in order to override the statistical model in terms
+# of segmentation, readings and part-of-speech tags. Notice that entries do
+# not have weights since they are always used when found. This is by-design
+# in order to maximize ease-of-use.
+#
+# Entries are defined using the following CSV format:
+# , ... , ... ,
+#
+# Notice that a single half-width space separates tokens and readings, and
+# that the number tokens and readings must match exactly.
+#
+# Also notice that multiple entries with the same is undefined.
+#
+# Whitespace only lines are ignored. Comments are not allowed on entry lines.
+#
+
+# Custom segmentation for kanji compounds
+日本経済新èž,日本 経済 æ–°èž,ニホン ケイザイ シンブン,カスタムåè©ž
+関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタムåè©ž
+
+# Custom segmentation for compound katakana
+トートãƒãƒƒã‚°,トート ãƒãƒƒã‚°,トート ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž
+ショルダーãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž
+
+# Custom reading for former sumo wrestler
+æœé’é¾,æœé’é¾,アサショウリュウ,カスタム人å
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/protwords.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/protwords.txt
new file mode 100644
index 00000000000..1dfc0abecbf
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/protwords.txt
@@ -0,0 +1,21 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+# Use a protected word file to protect against the stemmer reducing two
+# unrelated words to the same base word.
+
+# Some non-words that normally won't be encountered,
+# just to test that they won't be stemmed.
+dontstems
+zwhacky
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/schema.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/schema.xml
new file mode 100644
index 00000000000..65192efe442
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/schema.xml
@@ -0,0 +1,961 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/solrconfig.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/solrconfig.xml
new file mode 100644
index 00000000000..beff1b2af0a
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/solrconfig.xml
@@ -0,0 +1,1784 @@
+
+
+
+
+
+
+
+
+ LUCENE_43
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.data.dir:}
+
+
+
+
+ ${solr.hdfs.home:}
+ ${solr.hdfs.confdir:}
+ ${solr.hdfs.blockcache.enabled:true}
+ ${solr.hdfs.blockcache.slab.count:1}
+ ${solr.hdfs.blockcache.direct.memory.allocation:true}
+ ${solr.hdfs.blockcache.blocksperbank:16384}
+ ${solr.hdfs.blockcache.read.enabled:true}
+ ${solr.hdfs.blockcache.write.enabled:true}
+ ${solr.hdfs.nrtcachingdirectory.enable:true}
+ ${solr.hdfs.nrtcachingdirectory.maxmergesizemb:16}
+ ${solr.hdfs.nrtcachingdirectory.maxcachedmb:192}
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.maxIndexingThreads:8}
+
+
+
+
+
+ 128
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.lock.type:hdfs}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.ulog.dir:}
+
+
+
+
+ ${solr.autoCommit.maxTime:60000}
+ false
+
+
+
+
+ ${solr.autoSoftCommit.maxTime:1000}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1024
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+
+
+
+
+
+ 20
+
+
+ 200
+
+
+
+
+
+
+
+
+
+
+
+ static firstSearcher warming in solrconfig.xml
+
+
+
+
+
+ false
+
+
+ 4
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ explicit
+ 10
+ text
+
+
+
+
+
+
+
+
+
+
+
+
+
+ explicit
+ json
+ true
+ text
+
+
+
+
+
+
+
+ true
+ json
+ true
+
+
+
+
+
+
+
+ explicit
+
+
+ velocity
+ browse
+ layout
+ Solritas
+
+
+ edismax
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+ title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
+
+ text
+ 100%
+ *:*
+ 10
+ *,score
+
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+ title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
+
+ text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename
+ 3
+
+
+ on
+ cat
+ manu_exact
+ content_type
+ author_s
+ ipod
+ GB
+ 1
+ cat,inStock
+ after
+ price
+ 0
+ 600
+ 50
+ popularity
+ 0
+ 10
+ 3
+ manufacturedate_dt
+ NOW/YEAR-10YEARS
+ NOW
+ +1YEAR
+ before
+ after
+
+
+ on
+ content features title name
+ html
+ <b>
+ </b>
+ 0
+ title
+ 0
+ name
+ 3
+ 200
+ content
+ 750
+
+
+ on
+ false
+ 5
+ 2
+ 5
+ true
+ true
+ 5
+ 3
+
+
+
+
+ spellcheck
+
+
+
+
+
+
+
+
+
+
+
+
+
+ application/json
+
+
+
+
+ application/csv
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ solrpingquery
+
+
+ all
+
+
+
+
+
+
+
+
+ explicit
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text_general
+
+
+
+
+
+ default
+ text
+ solr.DirectSolrSpellChecker
+
+ internal
+
+ 0.5
+
+ 2
+
+ 1
+
+ 5
+
+ 4
+
+ 0.01
+
+
+
+
+
+ wordbreak
+ solr.WordBreakSolrSpellChecker
+ name
+ true
+ true
+ 10
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text
+
+ default
+ wordbreak
+ on
+ true
+ 10
+ 5
+ 5
+ true
+ true
+ 10
+ 5
+
+
+ spellcheck
+
+
+
+
+
+
+
+
+
+ text
+ true
+
+
+ tvComponent
+
+
+
+
+
+
+
+
+ default
+
+
+ org.carrot2.clustering.lingo.LingoClusteringAlgorithm
+
+
+ 20
+
+
+ clustering/carrot2
+
+
+ ENGLISH
+
+
+ stc
+ org.carrot2.clustering.stc.STCClusteringAlgorithm
+
+
+
+
+
+
+ true
+ default
+ true
+
+ name
+ id
+
+ features
+
+ true
+
+
+
+ false
+
+ edismax
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+
+ *:*
+ 10
+ *,score
+
+
+ clustering
+
+
+
+
+
+
+
+
+
+ true
+ false
+
+
+ terms
+
+
+
+
+
+
+
+ string
+ elevate.xml
+
+
+
+
+
+ explicit
+ text
+
+
+ elevator
+
+
+
+
+
+
+
+
+
+
+ 100
+
+
+
+
+
+
+
+ 70
+
+ 0.5
+
+ [-\w ,/\n\"']{20,200}
+
+
+
+
+
+
+ ]]>
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ,,
+ ,,
+ ,,
+ ,,
+ ,]]>
+ ]]>
+
+
+
+
+
+ 10
+ .,!?
+
+
+
+
+
+
+ WORD
+
+
+ en
+ US
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text/plain; charset=UTF-8
+
+
+
+
+
+
+
+
+ 5
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ *:*
+
+
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/stopwords.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/stopwords.txt
new file mode 100644
index 00000000000..ae1e83eeb3d
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/stopwords.txt
@@ -0,0 +1,14 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/synonyms.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/synonyms.txt
new file mode 100644
index 00000000000..7f72128303b
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/synonyms.txt
@@ -0,0 +1,29 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+#some test synonym mappings unlikely to appear in real input text
+aaafoo => aaabar
+bbbfoo => bbbfoo bbbbar
+cccfoo => cccbar cccbaz
+fooaaa,baraaa,bazaaa
+
+# Some synonym groups specific to this example
+GB,gib,gigabyte,gigabytes
+MB,mib,megabyte,megabytes
+Television, Televisions, TV, TVs
+#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
+#after us won't split it into two words.
+
+# Synonym mappings can be used for spelling correction too
+pixima => pixma
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/solr.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/solr.xml
new file mode 100644
index 00000000000..6c8b43f75ed
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/solr.xml
@@ -0,0 +1,43 @@
+
+
+
+
+
+
+
+
+
+
+ ${socketTimeout:120000}
+ ${connTimeout:15000}
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/currency.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/currency.xml
new file mode 100644
index 00000000000..3a9c58afee8
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/currency.xml
@@ -0,0 +1,67 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/elevate.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/elevate.xml
new file mode 100644
index 00000000000..25d5cebe4fb
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/elevate.xml
@@ -0,0 +1,38 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_ca.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_ca.txt
new file mode 100644
index 00000000000..307a85f913d
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_ca.txt
@@ -0,0 +1,8 @@
+# Set of Catalan contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+d
+l
+m
+n
+s
+t
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_fr.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_fr.txt
new file mode 100644
index 00000000000..722db588333
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_fr.txt
@@ -0,0 +1,9 @@
+# Set of French contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+l
+m
+t
+qu
+n
+s
+j
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_ga.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_ga.txt
new file mode 100644
index 00000000000..9ebe7fa349a
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_ga.txt
@@ -0,0 +1,5 @@
+# Set of Irish contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+d
+m
+b
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_it.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_it.txt
new file mode 100644
index 00000000000..cac04095372
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_it.txt
@@ -0,0 +1,23 @@
+# Set of Italian contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+c
+l
+all
+dall
+dell
+nell
+sull
+coll
+pell
+gl
+agl
+dagl
+degl
+negl
+sugl
+un
+m
+t
+s
+v
+d
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/hyphenations_ga.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/hyphenations_ga.txt
new file mode 100644
index 00000000000..4d2642cc5a3
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/hyphenations_ga.txt
@@ -0,0 +1,5 @@
+# Set of Irish hyphenations for StopFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+h
+n
+t
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stemdict_nl.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stemdict_nl.txt
new file mode 100644
index 00000000000..441072971d3
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stemdict_nl.txt
@@ -0,0 +1,6 @@
+# Set of overrides for the dutch stemmer
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+fiets fiets
+bromfiets bromfiets
+ei eier
+kind kinder
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stoptags_ja.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stoptags_ja.txt
new file mode 100644
index 00000000000..71b750845e3
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stoptags_ja.txt
@@ -0,0 +1,420 @@
+#
+# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter.
+#
+# Any token with a part-of-speech tag that exactly matches those defined in this
+# file are removed from the token stream.
+#
+# Set your own stoptags by uncommenting the lines below. Note that comments are
+# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists,
+# etc. that can be useful for building you own stoptag set.
+#
+# The entire possible tagset is provided below for convenience.
+#
+#####
+# noun: unclassified nouns
+#åè©ž
+#
+# noun-common: Common nouns or nouns where the sub-classification is undefined
+#åè©ž-一般
+#
+# noun-proper: Proper nouns where the sub-classification is undefined
+#åè©ž-固有åè©ž
+#
+# noun-proper-misc: miscellaneous proper nouns
+#åè©ž-固有åè©ž-一般
+#
+# noun-proper-person: Personal names where the sub-classification is undefined
+#åè©ž-固有åè©ž-人å
+#
+# noun-proper-person-misc: names that cannot be divided into surname and
+# given name; foreign names; names where the surname or given name is unknown.
+# e.g. ãŠå¸‚ã®æ–¹
+#åè©ž-固有åè©ž-人å-一般
+#
+# noun-proper-person-surname: Mainly Japanese surnames.
+# e.g. 山田
+#åè©ž-固有åè©ž-人å-姓
+#
+# noun-proper-person-given_name: Mainly Japanese given names.
+# e.g. 太郎
+#åè©ž-固有åè©ž-人å-å
+#
+# noun-proper-organization: Names representing organizations.
+# e.g. 通産çœ, NHK
+#åè©ž-固有åè©ž-組織
+#
+# noun-proper-place: Place names where the sub-classification is undefined
+#åè©ž-固有åè©ž-地域
+#
+# noun-proper-place-misc: Place names excluding countries.
+# e.g. アジア, ãƒãƒ«ã‚»ãƒãƒŠ, 京都
+#åè©ž-固有åè©ž-地域-一般
+#
+# noun-proper-place-country: Country names.
+# e.g. 日本, オーストラリア
+#åè©ž-固有åè©ž-地域-国
+#
+# noun-pronoun: Pronouns where the sub-classification is undefined
+#åè©ž-代åè©ž
+#
+# noun-pronoun-misc: miscellaneous pronouns:
+# e.g. ãã‚Œ, ã“ã“, ã‚ã„ã¤, ã‚ãªãŸ, ã‚ã¡ã“ã¡, ã„ãã¤, ã©ã“ã‹, ãªã«, ã¿ãªã•ã‚“, ã¿ã‚“ãª, ã‚ãŸãã—, ã‚ã‚Œã‚ã‚Œ
+#åè©ž-代åè©ž-一般
+#
+# noun-pronoun-contraction: Spoken language contraction made by combining a
+# pronoun and the particle 'wa'.
+# e.g. ã‚りゃ, ã“りゃ, ã“りゃã‚, ãりゃ, ãりゃã‚
+#åè©ž-代åè©ž-縮約
+#
+# noun-adverbial: Temporal nouns such as names of days or months that behave
+# like adverbs. Nouns that represent amount or ratios and can be used adverbially,
+# e.g. 金曜, 一月, åˆå¾Œ, å°‘é‡
+#åè©ž-副詞å¯èƒ½
+#
+# noun-verbal: Nouns that take arguments with case and can appear followed by
+# 'suru' and related verbs (ã™ã‚‹, ã§ãã‚‹, ãªã•ã‚‹, ãã ã•ã‚‹)
+# e.g. インプット, æ„›ç€, 悪化, 悪戦苦闘, 一安心, 下å–ã‚Š
+#åè©ž-サ変接続
+#
+# noun-adjective-base: The base form of adjectives, words that appear before 㪠("na")
+# e.g. å¥åº·, 安易, 駄目, ã ã‚
+#åè©ž-形容動詞語幹
+#
+# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数.
+# e.g. 0, 1, 2, 何, 数, 幾
+#åè©ž-æ•°
+#
+# noun-affix: noun affixes where the sub-classification is undefined
+#åè©ž-éžè‡ªç«‹
+#
+# noun-affix-misc: Of adnominalizers, the case-marker ã® ("no"), and words that
+# attach to the base form of inflectional words, words that cannot be classified
+# into any of the other categories below. This category includes indefinite nouns.
+# e.g. ã‚ã‹ã¤ã, æš, ã‹ã„, 甲æ–, æ°—, ãらã„, å«Œã„, ãã›, ç™–, ã“ã¨, 事, ã”ã¨, 毎, ã—ã ã„, 次第,
+# é †, ã›ã„, 所為, ã¤ã„ã§, åºã§, ã¤ã‚‚ã‚Š, ç©ã‚‚ã‚Š, 点, ã©ã“ã‚, ã®, ã¯ãš, çˆ, ã¯ãšã¿, å¼¾ã¿,
+# æ‹å, ãµã†, ãµã‚Š, 振り, ã»ã†, æ–¹, æ—¨, ã‚‚ã®, 物, 者, ゆãˆ, æ•…, ゆãˆã‚“, 所以, ã‚ã‘, 訳,
+# ã‚ã‚Š, 割り, 割, ã‚“-å£èªž/, ã‚‚ã‚“-å£èªž/
+#åè©ž-éžè‡ªç«‹-一般
+#
+# noun-affix-adverbial: noun affixes that that can behave as adverbs.
+# e.g. ã‚ã„ã , é–“, ã‚ã’ã, 挙ã’å¥, ã‚ã¨, 後, 余り, 以外, 以é™, 以後, 以上, 以å‰, 一方, ã†ãˆ,
+# 上, ã†ã¡, 内, ãŠã‚Š, 折り, ã‹ãŽã‚Š, é™ã‚Š, ãã‚Š, ã£ãã‚Š, çµæžœ, ã“ã‚, é ƒ, ã•ã„, éš›, 最ä¸, ã•ãªã‹,
+# 最ä¸, ã˜ãŸã„, 自体, ãŸã³, 度, ãŸã‚, 為, ã¤ã©, 都度, ã¨ãŠã‚Š, 通り, ã¨ã, 時, ã¨ã“ã‚, 所,
+# ã¨ãŸã‚“, 途端, ãªã‹, ä¸, ã®ã¡, 後, ã°ã‚ã„, å ´åˆ, æ—¥, ã¶ã‚“, 分, ã»ã‹, ä»–, ã¾ãˆ, å‰, ã¾ã¾,
+# 儘, ä¾, ã¿ãŽã‚Š, 矢先
+#åè©ž-éžè‡ªç«‹-副詞å¯èƒ½
+#
+# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars
+# with the stem よã†(ã ) ("you(da)").
+# e.g. よã†, ã‚„ã†, 様 (よã†)
+#åè©ž-éžè‡ªç«‹-助動詞語幹
+#
+# noun-affix-adjective-base: noun affixes that can connect to the indeclinable
+# connection form 㪠(aux "da").
+# e.g. ã¿ãŸã„, ãµã†
+#åè©ž-éžè‡ªç«‹-形容動詞語幹
+#
+# noun-special: special nouns where the sub-classification is undefined.
+#åè©ž-特殊
+#
+# noun-special-aux: The ãã†ã ("souda") stem form that is used for reporting news, is
+# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base
+# form of inflectional words.
+# e.g. ãã†
+#åè©ž-特殊-助動詞語幹
+#
+# noun-suffix: noun suffixes where the sub-classification is undefined.
+#åè©ž-接尾
+#
+# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect
+# to ガル or タイ and can combine into compound nouns, words that cannot be classified into
+# any of the other categories below. In general, this category is more inclusive than
+# 接尾語 ("suffix") and is usually the last element in a compound noun.
+# e.g. ãŠã, ã‹ãŸ, æ–¹, ç”²æ– (ãŒã„), ãŒã‹ã‚Š, ãŽã¿, 気味, ãã‚‹ã¿, (~ã—ãŸ) ã•, 次第, 済 (ãš) ã¿,
+# よã†, (ã§ã)ã£ã“, æ„Ÿ, 観, 性, å¦, é¡ž, é¢, 用
+#åè©ž-接尾-一般
+#
+# noun-suffix-person: Suffixes that form nouns and attach to person names more often
+# than other nouns.
+# e.g. å›, 様, è‘—
+#åè©ž-接尾-人å
+#
+# noun-suffix-place: Suffixes that form nouns and attach to place names more often
+# than other nouns.
+# e.g. 町, 市, 県
+#åè©ž-接尾-地域
+#
+# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that
+# can appear before スル ("suru").
+# e.g. 化, 視, 分ã‘, 入り, è½ã¡, è²·ã„
+#åè©ž-接尾-サ変接続
+#
+# noun-suffix-aux: The stem form of ãã†ã (様態) that is used to indicate conditions,
+# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the
+# conjunctive form of inflectional words.
+# e.g. ãã†
+#åè©ž-接尾-助動詞語幹
+#
+# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive
+# form of inflectional words and appear before the copula ã ("da").
+# e.g. çš„, ã’, ãŒã¡
+#åè©ž-接尾-形容動詞語幹
+#
+# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs.
+# e.g. 後 (ã”), 以後, 以é™, 以å‰, å‰å¾Œ, ä¸, 末, 上, 時 (ã˜)
+#åè©ž-接尾-副詞å¯èƒ½
+#
+# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category
+# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach
+# to numbers.
+# e.g. 個, ã¤, 本, 冊, パーセント, cm, kg, カ月, ã‹å›½, 区画, 時間, 時åŠ
+#åè©ž-接尾-助数詞
+#
+# noun-suffix-special: Special suffixes that mainly attach to inflecting words.
+# e.g. (楽ã—) ã•, (考ãˆ) æ–¹
+#åè©ž-接尾-特殊
+#
+# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words
+# together.
+# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦)
+#åè©ž-接続詞的
+#
+# noun-verbal_aux: Nouns that attach to the conjunctive particle 㦠("te") and are
+# semantically verb-like.
+# e.g. ã”らん, ã”覧, 御覧, é ‚æˆ´
+#åè©ž-å‹•è©žéžè‡ªç«‹çš„
+#
+# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry,
+# dialects, English, etc. Currently, the only entry for åè©ž 引用文å—列 ("noun quotation")
+# is ã„ã‚ã ("iwaku").
+#åè©ž-引用文å—列
+#
+# noun-nai_adjective: Words that appear before the auxiliary verb ãªã„ ("nai") and
+# behave like an adjective.
+# e.g. 申ã—訳, 仕方, ã¨ã‚“ã§ã‚‚, é•ã„
+#åè©ž-ナイ形容詞語幹
+#
+#####
+# prefix: unclassified prefixes
+#接é è©ž
+#
+# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms)
+# excluding numerical expressions.
+# e.g. ㊠(æ°´), æŸ (æ°), åŒ (社), æ•… (~æ°), 高 (å“質), ㊠(見事), ã” (ç«‹æ´¾)
+#接é è©ž-å詞接続
+#
+# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb
+# in conjunctive form followed by ãªã‚‹/ãªã•ã‚‹/ãã ã•ã‚‹.
+# e.g. ㊠(èªã¿ãªã•ã„), ㊠(座り)
+#接é è©ž-動詞接続
+#
+# prefix-adjectival: Prefixes that attach to adjectives.
+# e.g. ㊠(寒ã„ã§ã™ããˆ), ãƒã‚« (ã§ã‹ã„)
+#接é è©ž-形容詞接続
+#
+# prefix-numerical: Prefixes that attach to numerical expressions.
+# e.g. ç´„, ãŠã‚ˆã, 毎時
+#接é è©ž-数接続
+#
+#####
+# verb: unclassified verbs
+#å‹•è©ž
+#
+# verb-main:
+#動詞-自立
+#
+# verb-auxiliary:
+#å‹•è©ž-éžè‡ªç«‹
+#
+# verb-suffix:
+#動詞-接尾
+#
+#####
+# adjective: unclassified adjectives
+#形容詞
+#
+# adjective-main:
+#形容詞-自立
+#
+# adjective-auxiliary:
+#形容詞-éžè‡ªç«‹
+#
+# adjective-suffix:
+#形容詞-接尾
+#
+#####
+# adverb: unclassified adverbs
+#副詞
+#
+# adverb-misc: Words that can be segmented into one unit and where adnominal
+# modification is not possible.
+# e.g. ã‚ã„ã‹ã‚らãš, 多分
+#副詞-一般
+#
+# adverb-particle_conjunction: Adverbs that can be followed by ã®, ã¯, ã«,
+# ãª, ã™ã‚‹, ã , etc.
+# e.g. ã“ã‚“ãªã«, ãã‚“ãªã«, ã‚ã‚“ãªã«, ãªã«ã‹, ãªã‚“ã§ã‚‚
+#副詞-助詞類接続
+#
+#####
+# adnominal: Words that only have noun-modifying forms.
+# e.g. ã“ã®, ãã®, ã‚ã®, ã©ã®, ã„ã‚ゆる, ãªã‚“らã‹ã®, 何らã‹ã®, ã„ã‚ã‚“ãª, ã“ã†ã„ã†, ãã†ã„ã†, ã‚ã‚ã„ã†,
+# ã©ã†ã„ã†, ã“ã‚“ãª, ãã‚“ãª, ã‚ã‚“ãª, ã©ã‚“ãª, 大ããª, å°ã•ãª, ãŠã‹ã—ãª, ã»ã‚“ã®, ãŸã„ã—ãŸ,
+# 「(, ã‚‚) ã•ã‚‹ (ã“ã¨ãªãŒã‚‰)ã€, 微々ãŸã‚‹, å ‚ã€…ãŸã‚‹, å˜ãªã‚‹, ã„ã‹ãªã‚‹, 我ãŒã€ã€ŒåŒã˜, 亡ã
+#連体詞
+#
+#####
+# conjunction: Conjunctions that can occur independently.
+# e.g. ãŒ, ã‘ã‚Œã©ã‚‚, ãã—ã¦, ã˜ã‚ƒã‚, ãã‚Œã©ã“ã‚ã‹
+接続詞
+#
+#####
+# particle: unclassified particles.
+助詞
+#
+# particle-case: case particles where the subclassification is undefined.
+助詞-æ ¼åŠ©è©ž
+#
+# particle-case-misc: Case particles.
+# e.g. ã‹ã‚‰, ãŒ, ã§, ã¨, ã«, ã¸, より, ã‚’, ã®, ã«ã¦
+助詞-æ ¼åŠ©è©ž-一般
+#
+# particle-case-quote: the "to" that appears after nouns, a person’s speech,
+# quotation marks, expressions of decisions from a meeting, reasons, judgements,
+# conjectures, etc.
+# e.g. ( ã ) 㨠(è¿°ã¹ãŸ.), ( ã§ã‚ã‚‹) 㨠(ã—ã¦åŸ·è¡ŒçŒ¶äºˆ...)
+助詞-æ ¼åŠ©è©ž-引用
+#
+# particle-case-compound: Compounds of particles and verbs that mainly behave
+# like case particles.
+# e.g. ã¨ã„ã†, ã¨ã„ã£ãŸ, ã¨ã‹ã„ã†, ã¨ã—ã¦, ã¨ã¨ã‚‚ã«, ã¨å…±ã«, ã§ã‚‚ã£ã¦, ã«ã‚ãŸã£ã¦, ã«å½“ãŸã£ã¦, ã«å½“ã£ã¦,
+# ã«ã‚ãŸã‚Š, ã«å½“ãŸã‚Š, ã«å½“ã‚Š, ã«å½“ãŸã‚‹, ã«ã‚ãŸã‚‹, ã«ãŠã„ã¦, ã«æ–¼ã„ã¦,ã«æ–¼ã¦, ã«ãŠã‘ã‚‹, ã«æ–¼ã‘ã‚‹,
+# ã«ã‹ã‘, ã«ã‹ã‘ã¦, ã«ã‹ã‚“ã—, ã«é–¢ã—, ã«ã‹ã‚“ã—ã¦, ã«é–¢ã—ã¦, ã«ã‹ã‚“ã™ã‚‹, ã«é–¢ã™ã‚‹, ã«éš›ã—,
+# ã«éš›ã—ã¦, ã«ã—ãŸãŒã„, ã«å¾“ã„, ã«å¾“ã†, ã«ã—ãŸãŒã£ã¦, ã«å¾“ã£ã¦, ã«ãŸã„ã—, ã«å¯¾ã—, ã«ãŸã„ã—ã¦,
+# ã«å¯¾ã—ã¦, ã«ãŸã„ã™ã‚‹, ã«å¯¾ã™ã‚‹, ã«ã¤ã„ã¦, ã«ã¤ã, ã«ã¤ã‘, ã«ã¤ã‘ã¦, ã«ã¤ã‚Œ, ã«ã¤ã‚Œã¦, ã«ã¨ã£ã¦,
+# ã«ã¨ã‚Š, ã«ã¾ã¤ã‚ã‚‹, ã«ã‚ˆã£ã¦, ã«ä¾ã£ã¦, ã«å› ã£ã¦, ã«ã‚ˆã‚Š, ã«ä¾ã‚Š, ã«å› ã‚Š, ã«ã‚ˆã‚‹, ã«ä¾ã‚‹, ã«å› ã‚‹,
+# ã«ã‚ãŸã£ã¦, ã«ã‚ãŸã‚‹, ã‚’ã‚‚ã£ã¦, を以ã£ã¦, を通ã˜, を通ã˜ã¦, を通ã—ã¦, ã‚’ã‚ãã£ã¦, ã‚’ã‚ãã‚Š, ã‚’ã‚ãã‚‹,
+# ã£ã¦-å£èªž/, ã¡ã‚…ã†-関西å¼ã€Œã¨ã„ã†ã€/, (何) ã¦ã„ㆠ(人)-å£èªž/, ã£ã¦ã„ã†-å£èªž/, ã¨ã„ãµ, ã¨ã‹ã„ãµ
+助詞-æ ¼åŠ©è©ž-連語
+#
+# particle-conjunctive:
+# e.g. ã‹ã‚‰, ã‹ã‚‰ã«ã¯, ãŒ, ã‘ã‚Œã©, ã‘ã‚Œã©ã‚‚, ã‘ã©, ã—, ã¤ã¤, ã¦, ã§, ã¨, ã¨ã“ã‚ãŒ, ã©ã“ã‚ã‹, ã¨ã‚‚, ã©ã‚‚,
+# ãªãŒã‚‰, ãªã‚Š, ã®ã§, ã®ã«, ã°, ã‚‚ã®ã®, ã‚„ ( ã—ãŸ), ã‚„ã„ãªã‚„, (ã“ã‚ã‚“) ã˜ã‚ƒ(ã„ã‘ãªã„)-å£èªž/,
+# (è¡Œã£) ã¡ã‚ƒ(ã„ã‘ãªã„)-å£èªž/, (言ã£) ãŸã£ã¦ (ã—ã‹ãŸãŒãªã„)-å£èªž/, (ãã‚ŒãŒãªã)ã£ãŸã£ã¦ (平気)-å£èªž/
+助詞-接続助詞
+#
+# particle-dependency:
+# e.g. ã“ã, ã•ãˆ, ã—ã‹, ã™ã‚‰, ã¯, ã‚‚, ãž
+助詞-係助詞
+#
+# particle-adverbial:
+# e.g. ãŒã¦ã‚‰, ã‹ã‚‚, ãらã„, ä½, ãらã„, ã—ã‚‚, (å¦æ ¡) ã˜ã‚ƒ(ã“ã‚ŒãŒæµè¡Œã£ã¦ã„ã‚‹)-å£èªž/,
+# (ãã‚Œ)ã˜ã‚ƒã‚ (よããªã„)-å£èªž/, ãšã¤, (ç§) ãªãž, ãªã©, (ç§) ãªã‚Š (ã«), (先生) ãªã‚“ã‹ (大嫌ã„)-å£èªž/,
+# (ç§) ãªã‚“ãž, (先生) ãªã‚“㦠(大嫌ã„)-å£èªž/, ã®ã¿, ã ã‘, (ç§) ã ã£ã¦-å£èªž/, ã ã«,
+# (å½¼)ã£ãŸã‚‰-å£èªž/, (ãŠèŒ¶) ã§ã‚‚ (ã„ã‹ãŒ), ç‰ (ã¨ã†), (今後) ã¨ã‚‚, ã°ã‹ã‚Š, ã°ã£ã‹-å£èªž/, ã°ã£ã‹ã‚Š-å£èªž/,
+# ã»ã©, 程, ã¾ã§, è¿„, (誰) ã‚‚ (ãŒ)([助詞-æ ¼åŠ©è©ž] ãŠã‚ˆã³ [助詞-係助詞] ã®å‰ã«ä½ç½®ã™ã‚‹ã€Œã‚‚ã€)
+助詞-副助詞
+#
+# particle-interjective: particles with interjective grammatical roles.
+# e.g. (æ¾å³¶) ã‚„
+助詞-間投助詞
+#
+# particle-coordinate:
+# e.g. ã¨, ãŸã‚Š, ã ã®, ã ã‚Š, ã¨ã‹, ãªã‚Š, ã‚„, やら
+助詞-並立助詞
+#
+# particle-final:
+# e.g. ã‹ã„, ã‹ã—ら, ã•, ãœ, (ã )ã£ã‘-å£èªž/, (ã¨ã¾ã£ã¦ã‚‹) ã§-方言/, ãª, ナ, ãªã‚-å£èªž/, ãž, ã, ãƒ,
+# ãã‡-å£èªž/, ããˆ-å£èªž/, ãã‚“-方言/, ã®, ã®ã†-å£èªž/, ã‚„, よ, ヨ, よã‰-å£èªž/, ã‚, ã‚ã„-å£èªž/
+助詞-終助詞
+#
+# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is
+# adverbial, conjunctive, or sentence final. For example:
+# (a) 「A ã‹ B ã‹ã€. Ex:「(国内ã§é‹ç”¨ã™ã‚‹) ã‹,(海外ã§é‹ç”¨ã™ã‚‹) ã‹ (.)ã€
+# (b) Inside an adverb phrase. Ex:「(幸ã„ã¨ã„ã†) ã‹ (, æ»è€…ã¯ã„ãªã‹ã£ãŸ.)ã€
+# 「(祈りãŒå±Šã„ãŸã›ã„) ã‹ (, 試験ã«åˆæ ¼ã—ãŸ.)ã€
+# (c) 「ã‹ã®ã‚ˆã†ã«ã€. Ex:「(何もãªã‹ã£ãŸ) ã‹ (ã®ã‚ˆã†ã«æŒ¯ã‚‹èˆžã£ãŸ.)ã€
+# e.g. ã‹
+助詞-副助詞ï¼ä¸¦ç«‹åŠ©è©žï¼çµ‚助詞
+#
+# particle-adnominalizer: The "no" that attaches to nouns and modifies
+# non-inflectional words.
+助詞-連体化
+#
+# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs
+# that are giongo, giseigo, or gitaigo.
+# e.g. ã«, ã¨
+助詞-副詞化
+#
+# particle-special: A particle that does not fit into one of the above classifications.
+# This includes particles that are used in Tanka, Haiku, and other poetry.
+# e.g. ã‹ãª, ã‘ã‚€, ( ã—ãŸã ã‚ã†) ã«, (ã‚ã‚“ãŸ) ã«ã‚ƒ(ã‚ã‹ã‚‰ã‚“), (俺) ã‚“ (家)
+助詞-特殊
+#
+#####
+# auxiliary-verb:
+助動詞
+#
+#####
+# interjection: Greetings and other exclamations.
+# e.g. ãŠã¯ã‚ˆã†, ãŠã¯ã‚ˆã†ã”ã–ã„ã¾ã™, ã“ã‚“ã«ã¡ã¯, ã“ã‚“ã°ã‚“ã¯, ã‚ã‚ŠãŒã¨ã†, ã©ã†ã‚‚ã‚ã‚ŠãŒã¨ã†, ã‚ã‚ŠãŒã¨ã†ã”ã–ã„ã¾ã™,
+# ã„ãŸã ãã¾ã™, ã”ã¡ãã†ã•ã¾, ã•ã‚ˆãªã‚‰, ã•ã‚ˆã†ãªã‚‰, ã¯ã„, ã„ã„ãˆ, ã”ã‚ã‚“, ã”ã‚ã‚“ãªã•ã„
+#æ„Ÿå‹•è©ž
+#
+#####
+# symbol: unclassified Symbols.
+記å·
+#
+# symbol-misc: A general symbol not in one of the categories below.
+# e.g. [○◎@$〒→+]
+記å·-一般
+#
+# symbol-comma: Commas
+# e.g. [,ã€]
+記å·-èªç‚¹
+#
+# symbol-period: Periods and full stops.
+# e.g. [..。]
+記å·-å¥ç‚¹
+#
+# symbol-space: Full-width whitespace.
+記å·-空白
+#
+# symbol-open_bracket:
+# e.g. [({‘“『ã€]
+記å·-括弧開
+#
+# symbol-close_bracket:
+# e.g. [)}’â€ã€ã€ã€‘]
+記å·-括弧閉
+#
+# symbol-alphabetic:
+#記å·-アルファベット
+#
+#####
+# other: unclassified other
+#ãã®ä»–
+#
+# other-interjection: Words that are hard to classify as noun-suffixes or
+# sentence-final particles.
+# e.g. (ã )ã‚¡
+ãã®ä»–-間投
+#
+#####
+# filler: Aizuchi that occurs during a conversation or sounds inserted as filler.
+# e.g. ã‚ã®, ã†ã‚“ã¨, ãˆã¨
+フィラー
+#
+#####
+# non-verbal: non-verbal sound.
+éžè¨€èªžéŸ³
+#
+#####
+# fragment:
+#語æ–片
+#
+#####
+# unknown: unknown part of speech.
+#未知語
+#
+##### End of file
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ar.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ar.txt
new file mode 100644
index 00000000000..046829db6a2
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ar.txt
@@ -0,0 +1,125 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# Cleaned on October 11, 2009 (not normalized, so use before normalization)
+# This means that when modifying this list, you might need to add some
+# redundant entries, for example containing forms with both أ and ا
+من
+ومن
+منها
+منه
+ÙÙŠ
+ÙˆÙÙŠ
+Ùيها
+Ùيه
+Ùˆ
+Ù
+ثم
+او
+أو
+ب
+بها
+به
+ا
+Ø£
+اى
+اي
+أي
+أى
+لا
+ولا
+الا
+ألا
+إلا
+لكن
+ما
+وما
+كما
+Ùما
+عن
+مع
+اذا
+إذا
+ان
+أن
+إن
+انها
+أنها
+إنها
+انه
+أنه
+إنه
+بان
+بأن
+Ùان
+Ùأن
+وان
+وأن
+وإن
+التى
+التي
+الذى
+الذي
+الذين
+الى
+الي
+إلى
+إلي
+على
+عليها
+عليه
+اما
+أما
+إما
+ايضا
+أيضا
+كل
+وكل
+لم
+ولم
+لن
+ولن
+هى
+هي
+هو
+وهى
+وهي
+وهو
+Ùهى
+Ùهي
+Ùهو
+انت
+أنت
+لك
+لها
+له
+هذه
+هذا
+تلك
+ذلك
+هناك
+كانت
+كان
+يكون
+تكون
+وكانت
+وكان
+غير
+بعض
+قد
+Ù†ØÙˆ
+بين
+بينما
+منذ
+ضمن
+Øيث
+الان
+الآن
+خلال
+بعد
+قبل
+Øتى
+عند
+عندما
+لدى
+جميع
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_bg.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_bg.txt
new file mode 100644
index 00000000000..1ae4ba2ae38
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_bg.txt
@@ -0,0 +1,193 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+а
+аз
+ако
+ала
+бе
+без
+беше
+би
+бил
+била
+били
+било
+близо
+бъдат
+бъде
+бÑха
+в
+ваÑ
+ваш
+ваша
+вероÑтно
+вече
+взема
+ви
+вие
+винаги
+вÑе
+вÑеки
+вÑички
+вÑичко
+вÑÑка
+във
+въпреки
+върху
+г
+ги
+главно
+го
+д
+да
+дали
+до
+докато
+докога
+дори
+доÑега
+доÑта
+е
+едва
+един
+ето
+за
+зад
+заедно
+заради
+заÑега
+затова
+защо
+защото
+и
+из
+или
+им
+има
+имат
+иÑка
+й
+каза
+как
+каква
+какво
+както
+какъв
+като
+кога
+когато
+което
+които
+кой
+който
+колко
+коÑто
+къде
+където
+към
+ли
+м
+ме
+между
+мен
+ми
+мнозина
+мога
+могат
+може
+молÑ
+момента
+му
+н
+на
+над
+назад
+най
+направи
+напред
+например
+наÑ
+не
+него
+неÑ
+ни
+ние
+никой
+нито
+но
+нÑкои
+нÑкой
+нÑма
+обаче
+около
+оÑвен
+оÑобено
+от
+отгоре
+отново
+още
+пак
+по
+повече
+повечето
+под
+поне
+поради
+поÑле
+почти
+прави
+пред
+преди
+през
+при
+пък
+първо
+Ñ
+Ñа
+Ñамо
+Ñе
+Ñега
+Ñи
+Ñкоро
+Ñлед
+Ñме
+Ñпоред
+Ñред
+Ñрещу
+Ñте
+Ñъм
+ÑÑŠÑ
+Ñъщо
+Ñ‚
+тази
+така
+такива
+такъв
+там
+твой
+те
+тези
+ти
+тн
+то
+това
+тогава
+този
+той
+толкова
+точно
+Ñ‚Ñ€Ñбва
+тук
+тъй
+Ñ‚Ñ
+Ñ‚ÑÑ…
+у
+хареÑва
+ч
+че
+чеÑто
+чрез
+ще
+щом
+Ñ
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ca.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ca.txt
new file mode 100644
index 00000000000..3da65deafe1
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ca.txt
@@ -0,0 +1,220 @@
+# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
+a
+abans
+acÃ
+ah
+aixÃ
+això
+al
+als
+aleshores
+algun
+alguna
+algunes
+alguns
+alhora
+allÃ
+allÃ
+allò
+altra
+altre
+altres
+amb
+ambdós
+ambdues
+apa
+aquell
+aquella
+aquelles
+aquells
+aquest
+aquesta
+aquestes
+aquests
+aquÃ
+baix
+cada
+cadascú
+cadascuna
+cadascunes
+cadascuns
+com
+contra
+d'un
+d'una
+d'unes
+d'uns
+dalt
+de
+del
+dels
+des
+després
+dins
+dintre
+donat
+doncs
+durant
+e
+eh
+el
+els
+em
+en
+encara
+ens
+entre
+érem
+eren
+éreu
+es
+és
+esta
+estÃ
+està vem
+estaven
+està veu
+esteu
+et
+etc
+ets
+fins
+fora
+gairebé
+ha
+han
+has
+havia
+he
+hem
+heu
+hi
+ho
+i
+igual
+iguals
+ja
+l'hi
+la
+les
+li
+li'n
+llavors
+m'he
+ma
+mal
+malgrat
+mateix
+mateixa
+mateixes
+mateixos
+me
+mentre
+més
+meu
+meus
+meva
+meves
+molt
+molta
+moltes
+molts
+mon
+mons
+n'he
+n'hi
+ne
+ni
+no
+nogensmenys
+només
+nosaltres
+nostra
+nostre
+nostres
+o
+oh
+oi
+on
+pas
+pel
+pels
+per
+però
+perquè
+poc
+poca
+pocs
+poques
+potser
+propi
+qual
+quals
+quan
+quant
+que
+què
+quelcom
+qui
+quin
+quina
+quines
+quins
+s'ha
+s'han
+sa
+semblant
+semblants
+ses
+seu
+seus
+seva
+seva
+seves
+si
+sobre
+sobretot
+sóc
+solament
+sols
+son
+són
+sons
+sota
+sou
+t'ha
+t'han
+t'he
+ta
+tal
+també
+tampoc
+tan
+tant
+tanta
+tantes
+teu
+teus
+teva
+teves
+ton
+tons
+tot
+tota
+totes
+tots
+un
+una
+unes
+uns
+us
+va
+vaig
+vam
+van
+vas
+veu
+vosaltres
+vostra
+vostre
+vostres
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_cz.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_cz.txt
new file mode 100644
index 00000000000..53c6097dac7
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_cz.txt
@@ -0,0 +1,172 @@
+a
+s
+k
+o
+i
+u
+v
+z
+dnes
+cz
+tÃmto
+budeš
+budem
+byli
+jseš
+můj
+svým
+ta
+tomto
+tohle
+tuto
+tyto
+jej
+zda
+proÄ
+máte
+tato
+kam
+tohoto
+kdo
+kteÅ™Ã
+mi
+nám
+tom
+tomuto
+mÃt
+nic
+proto
+kterou
+byla
+toho
+protože
+asi
+ho
+naši
+napište
+re
+což
+tÃm
+takže
+svých
+jejÃ
+svými
+jste
+aj
+tu
+tedy
+teto
+bylo
+kde
+ke
+pravé
+ji
+nad
+nejsou
+Äi
+pod
+téma
+mezi
+přes
+ty
+pak
+vám
+ani
+když
+však
+neg
+jsem
+tento
+Älánku
+Älánky
+aby
+jsme
+před
+pta
+jejich
+byl
+ještě
+až
+bez
+také
+pouze
+prvnÃ
+vaše
+která
+nás
+nový
+tipy
+pokud
+může
+strana
+jeho
+své
+jiné
+zprávy
+nové
+nenÃ
+vás
+jen
+podle
+zde
+už
+být
+vÃce
+bude
+již
+než
+který
+by
+které
+co
+nebo
+ten
+tak
+má
+při
+od
+po
+jsou
+jak
+dalÅ¡Ã
+ale
+si
+se
+ve
+to
+jako
+za
+zpět
+ze
+do
+pro
+je
+na
+atd
+atp
+jakmile
+pÅ™iÄemž
+já
+on
+ona
+ono
+oni
+ony
+my
+vy
+jÃ
+ji
+mÄ›
+mne
+jemu
+tomu
+těm
+těmu
+němu
+němuž
+jehož
+jÞ
+jelikož
+jež
+jakož
+naÄež
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_da.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_da.txt
new file mode 100644
index 00000000000..a3ff5fe122c
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_da.txt
@@ -0,0 +1,108 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Danish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+
+og | and
+i | in
+jeg | I
+det | that (dem. pronoun)/it (pers. pronoun)
+at | that (in front of a sentence)/to (with infinitive)
+en | a/an
+den | it (pers. pronoun)/that (dem. pronoun)
+til | to/at/for/until/against/by/of/into, more
+er | present tense of "to be"
+som | who, as
+på | on/upon/in/on/at/to/after/of/with/for, on
+de | they
+med | with/by/in, along
+han | he
+af | of/by/from/off/for/in/with/on, off
+for | at/for/to/from/by/of/ago, in front/before, because
+ikke | not
+der | who/which, there/those
+var | past tense of "to be"
+mig | me/myself
+sig | oneself/himself/herself/itself/themselves
+men | but
+et | a/an/one, one (number), someone/somebody/one
+har | present tense of "to have"
+om | round/about/for/in/a, about/around/down, if
+vi | we
+min | my
+havde | past tense of "to have"
+ham | him
+hun | she
+nu | now
+over | over/above/across/by/beyond/past/on/about, over/past
+da | then, when/as/since
+fra | from/off/since, off, since
+du | you
+ud | out
+sin | his/her/its/one's
+dem | them
+os | us/ourselves
+op | up
+man | you/one
+hans | his
+hvor | where
+eller | or
+hvad | what
+skal | must/shall etc.
+selv | myself/youself/herself/ourselves etc., even
+her | here
+alle | all/everyone/everybody etc.
+vil | will (verb)
+blev | past tense of "to stay/to remain/to get/to become"
+kunne | could
+ind | in
+når | when
+være | present tense of "to be"
+dog | however/yet/after all
+noget | something
+ville | would
+jo | you know/you see (adv), yes
+deres | their/theirs
+efter | after/behind/according to/for/by/from, later/afterwards
+ned | down
+skulle | should
+denne | this
+end | than
+dette | this
+mit | my/mine
+også | also
+under | under/beneath/below/during, below/underneath
+have | have
+dig | you
+anden | other
+hende | her
+mine | my
+alt | everything
+meget | much/very, plenty of
+sit | his, her, its, one's
+sine | his, her, its, one's
+vor | our
+mod | against
+disse | these
+hvis | if
+din | your/yours
+nogle | some
+hos | by/at
+blive | be/become
+mange | many
+ad | by/through
+bliver | present tense of "to be/to become"
+hendes | her/hers
+været | be
+thi | for (conj)
+jer | you
+sådan | such, like this/like that
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_de.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_de.txt
new file mode 100644
index 00000000000..f7703841887
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_de.txt
@@ -0,0 +1,292 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A German stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | The number of forms in this list is reduced significantly by passing it
+ | through the German stemmer.
+
+
+aber | but
+
+alle | all
+allem
+allen
+aller
+alles
+
+als | than, as
+also | so
+am | an + dem
+an | at
+
+ander | other
+andere
+anderem
+anderen
+anderer
+anderes
+anderm
+andern
+anderr
+anders
+
+auch | also
+auf | on
+aus | out of
+bei | by
+bin | am
+bis | until
+bist | art
+da | there
+damit | with it
+dann | then
+
+der | the
+den
+des
+dem
+die
+das
+
+daß | that
+
+derselbe | the same
+derselben
+denselben
+desselben
+demselben
+dieselbe
+dieselben
+dasselbe
+
+dazu | to that
+
+dein | thy
+deine
+deinem
+deinen
+deiner
+deines
+
+denn | because
+
+derer | of those
+dessen | of him
+
+dich | thee
+dir | to thee
+du | thou
+
+dies | this
+diese
+diesem
+diesen
+dieser
+dieses
+
+
+doch | (several meanings)
+dort | (over) there
+
+
+durch | through
+
+ein | a
+eine
+einem
+einen
+einer
+eines
+
+einig | some
+einige
+einigem
+einigen
+einiger
+einiges
+
+einmal | once
+
+er | he
+ihn | him
+ihm | to him
+
+es | it
+etwas | something
+
+euer | your
+eure
+eurem
+euren
+eurer
+eures
+
+für | for
+gegen | towards
+gewesen | p.p. of sein
+hab | have
+habe | have
+haben | have
+hat | has
+hatte | had
+hatten | had
+hier | here
+hin | there
+hinter | behind
+
+ich | I
+mich | me
+mir | to me
+
+
+ihr | you, to her
+ihre
+ihrem
+ihren
+ihrer
+ihres
+euch | to you
+
+im | in + dem
+in | in
+indem | while
+ins | in + das
+ist | is
+
+jede | each, every
+jedem
+jeden
+jeder
+jedes
+
+jene | that
+jenem
+jenen
+jener
+jenes
+
+jetzt | now
+kann | can
+
+kein | no
+keine
+keinem
+keinen
+keiner
+keines
+
+können | can
+könnte | could
+machen | do
+man | one
+
+manche | some, many a
+manchem
+manchen
+mancher
+manches
+
+mein | my
+meine
+meinem
+meinen
+meiner
+meines
+
+mit | with
+muss | must
+musste | had to
+nach | to(wards)
+nicht | not
+nichts | nothing
+noch | still, yet
+nun | now
+nur | only
+ob | whether
+oder | or
+ohne | without
+sehr | very
+
+sein | his
+seine
+seinem
+seinen
+seiner
+seines
+
+selbst | self
+sich | herself
+
+sie | they, she
+ihnen | to them
+
+sind | are
+so | so
+
+solche | such
+solchem
+solchen
+solcher
+solches
+
+soll | shall
+sollte | should
+sondern | but
+sonst | else
+über | over
+um | about, around
+und | and
+
+uns | us
+unse
+unsem
+unsen
+unser
+unses
+
+unter | under
+viel | much
+vom | von + dem
+von | from
+vor | before
+während | while
+war | was
+waren | were
+warst | wast
+was | what
+weg | away, off
+weil | because
+weiter | further
+
+welche | which
+welchem
+welchen
+welcher
+welches
+
+wenn | when
+werde | will
+werden | will
+wie | how
+wieder | again
+will | want
+wir | we
+wird | will
+wirst | willst
+wo | where
+wollen | want
+wollte | wanted
+würde | would
+würden | would
+zu | to
+zum | zu + dem
+zur | zu + der
+zwar | indeed
+zwischen | between
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_el.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_el.txt
new file mode 100644
index 00000000000..232681f5bd6
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_el.txt
@@ -0,0 +1,78 @@
+# Lucene Greek Stopwords list
+# Note: by default this file is used after GreekLowerCaseFilter,
+# so when modifying this file use 'σ' instead of 'ς'
+ο
+η
+το
+οι
+τα
+του
+τησ
+των
+τον
+την
+και
+κι
+κ
+ειμαι
+εισαι
+ειναι
+ειμαστε
+ειστε
+στο
+στον
+στη
+στην
+μα
+αλλα
+απο
+για
+Ï€Ïοσ
+με
+σε
+ωσ
+παÏα
+αντι
+κατα
+μετα
+θα
+να
+δε
+δεν
+μη
+μην
+επι
+ενω
+εαν
+αν
+τοτε
+που
+πωσ
+ποιοσ
+ποια
+ποιο
+ποιοι
+ποιεσ
+ποιων
+ποιουσ
+αυτοσ
+αυτη
+αυτο
+αυτοι
+αυτων
+αυτουσ
+αυτεσ
+αυτα
+εκεινοσ
+εκεινη
+εκεινο
+εκεινοι
+εκεινεσ
+εκεινα
+εκεινων
+εκεινουσ
+οπωσ
+ομωσ
+ισωσ
+οσο
+οτι
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_en.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_en.txt
new file mode 100644
index 00000000000..2c164c0b2a1
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_en.txt
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# a couple of test stopwords to test that the words are really being
+# configured from this file:
+stopworda
+stopwordb
+
+# Standard english stop words taken from Lucene's StopAnalyzer
+a
+an
+and
+are
+as
+at
+be
+but
+by
+for
+if
+in
+into
+is
+it
+no
+not
+of
+on
+or
+such
+that
+the
+their
+then
+there
+these
+they
+this
+to
+was
+will
+with
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_es.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_es.txt
new file mode 100644
index 00000000000..2db14760075
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_es.txt
@@ -0,0 +1,354 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Spanish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de | from, of
+la | the, her
+que | who, that
+el | the
+en | in
+y | and
+a | to
+los | the, them
+del | de + el
+se | himself, from him etc
+las | the, them
+por | for, by, etc
+un | a
+para | for
+con | with
+no | no
+una | a
+su | his, her
+al | a + el
+ | es from SER
+lo | him
+como | how
+más | more
+pero | pero
+sus | su plural
+le | to him, her
+ya | already
+o | or
+ | fue from SER
+este | this
+ | ha from HABER
+sà | himself etc
+porque | because
+esta | this
+ | son from SER
+entre | between
+ | está from ESTAR
+cuando | when
+muy | very
+sin | without
+sobre | on
+ | ser from SER
+ | tiene from TENER
+también | also
+me | me
+hasta | until
+hay | there is/are
+donde | where
+ | han from HABER
+quien | whom, that
+ | están from ESTAR
+ | estado from ESTAR
+desde | from
+todo | all
+nos | us
+durante | during
+ | estados from ESTAR
+todos | all
+uno | a
+les | to them
+ni | nor
+contra | against
+otros | other
+ | fueron from SER
+ese | that
+eso | that
+ | habÃa from HABER
+ante | before
+ellos | they
+e | and (variant of y)
+esto | this
+mà | me
+antes | before
+algunos | some
+qué | what?
+unos | a
+yo | I
+otro | other
+otras | other
+otra | other
+él | he
+tanto | so much, many
+esa | that
+estos | these
+mucho | much, many
+quienes | who
+nada | nothing
+muchos | many
+cual | who
+ | sea from SER
+poco | few
+ella | she
+estar | to be
+ | haber from HABER
+estas | these
+ | estaba from ESTAR
+ | estamos from ESTAR
+algunas | some
+algo | something
+nosotros | we
+
+ | other forms
+
+mi | me
+mis | mi plural
+tú | thou
+te | thee
+ti | thee
+tu | thy
+tus | tu plural
+ellas | they
+nosotras | we
+vosotros | you
+vosotras | you
+os | you
+mÃo | mine
+mÃa |
+mÃos |
+mÃas |
+tuyo | thine
+tuya |
+tuyos |
+tuyas |
+suyo | his, hers, theirs
+suya |
+suyos |
+suyas |
+nuestro | ours
+nuestra |
+nuestros |
+nuestras |
+vuestro | yours
+vuestra |
+vuestros |
+vuestras |
+esos | those
+esas | those
+
+ | forms of estar, to be (not including the infinitive):
+estoy
+estás
+está
+estamos
+estáis
+están
+esté
+estés
+estemos
+estéis
+estén
+estaré
+estarás
+estará
+estaremos
+estaréis
+estarán
+estarÃa
+estarÃas
+estarÃamos
+estarÃais
+estarÃan
+estaba
+estabas
+estábamos
+estabais
+estaban
+estuve
+estuviste
+estuvo
+estuvimos
+estuvisteis
+estuvieron
+estuviera
+estuvieras
+estuviéramos
+estuvierais
+estuvieran
+estuviese
+estuvieses
+estuviésemos
+estuvieseis
+estuviesen
+estando
+estado
+estada
+estados
+estadas
+estad
+
+ | forms of haber, to have (not including the infinitive):
+he
+has
+ha
+hemos
+habéis
+han
+haya
+hayas
+hayamos
+hayáis
+hayan
+habré
+habrás
+habrá
+habremos
+habréis
+habrán
+habrÃa
+habrÃas
+habrÃamos
+habrÃais
+habrÃan
+habÃa
+habÃas
+habÃamos
+habÃais
+habÃan
+hube
+hubiste
+hubo
+hubimos
+hubisteis
+hubieron
+hubiera
+hubieras
+hubiéramos
+hubierais
+hubieran
+hubiese
+hubieses
+hubiésemos
+hubieseis
+hubiesen
+habiendo
+habido
+habida
+habidos
+habidas
+
+ | forms of ser, to be (not including the infinitive):
+soy
+eres
+es
+somos
+sois
+son
+sea
+seas
+seamos
+seáis
+sean
+seré
+serás
+será
+seremos
+seréis
+serán
+serÃa
+serÃas
+serÃamos
+serÃais
+serÃan
+era
+eras
+éramos
+erais
+eran
+fui
+fuiste
+fue
+fuimos
+fuisteis
+fueron
+fuera
+fueras
+fuéramos
+fuerais
+fueran
+fuese
+fueses
+fuésemos
+fueseis
+fuesen
+siendo
+sido
+ | sed also means 'thirst'
+
+ | forms of tener, to have (not including the infinitive):
+tengo
+tienes
+tiene
+tenemos
+tenéis
+tienen
+tenga
+tengas
+tengamos
+tengáis
+tengan
+tendré
+tendrás
+tendrá
+tendremos
+tendréis
+tendrán
+tendrÃa
+tendrÃas
+tendrÃamos
+tendrÃais
+tendrÃan
+tenÃa
+tenÃas
+tenÃamos
+tenÃais
+tenÃan
+tuve
+tuviste
+tuvo
+tuvimos
+tuvisteis
+tuvieron
+tuviera
+tuvieras
+tuviéramos
+tuvierais
+tuvieran
+tuviese
+tuvieses
+tuviésemos
+tuvieseis
+tuviesen
+teniendo
+tenido
+tenida
+tenidos
+tenidas
+tened
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_eu.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_eu.txt
new file mode 100644
index 00000000000..25f1db93460
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_eu.txt
@@ -0,0 +1,99 @@
+# example set of basque stopwords
+al
+anitz
+arabera
+asko
+baina
+bat
+batean
+batek
+bati
+batzuei
+batzuek
+batzuetan
+batzuk
+bera
+beraiek
+berau
+berauek
+bere
+berori
+beroriek
+beste
+bezala
+da
+dago
+dira
+ditu
+du
+dute
+edo
+egin
+ere
+eta
+eurak
+ez
+gainera
+gu
+gutxi
+guzti
+haiei
+haiek
+haietan
+hainbeste
+hala
+han
+handik
+hango
+hara
+hari
+hark
+hartan
+hau
+hauei
+hauek
+hauetan
+hemen
+hemendik
+hemengo
+hi
+hona
+honek
+honela
+honetan
+honi
+hor
+hori
+horiei
+horiek
+horietan
+horko
+horra
+horrek
+horrela
+horretan
+horri
+hortik
+hura
+izan
+ni
+noiz
+nola
+non
+nondik
+nongo
+nor
+nora
+ze
+zein
+zen
+zenbait
+zenbat
+zer
+zergatik
+ziren
+zituen
+zu
+zuek
+zuen
+zuten
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_fa.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_fa.txt
new file mode 100644
index 00000000000..723641c6da7
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_fa.txt
@@ -0,0 +1,313 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# Note: by default this file is used after normalization, so when adding entries
+# to this file, use the arabic 'ي' instead of 'ی'
+انان
+نداشته
+سراسر
+خياه
+ايشان
+وي
+تاكنون
+بيشتري
+دوم
+پس
+ناشي
+ÙˆÚ¯Ùˆ
+يا
+داشتند
+سپس
+هنگام
+هرگز
+پنج
+نشان
+امسال
+ديگر
+گروهي
+شدند
+چطور
+ده
+Ùˆ
+دو
+نخستين
+ولي
+چرا
+Ú†Ù‡
+وسط
+Ù‡
+كدام
+قابل
+يك
+رÙت
+Ù‡Ùت
+همچنين
+در
+هزار
+بله
+بلي
+شايد
+اما
+شناسي
+گرÙته
+دهد
+داشته
+دانست
+داشتن
+خواهيم
+ميليارد
+وقتيكه
+امد
+خواهد
+جز
+اورده
+شده
+بلكه
+خدمات
+شدن
+برخي
+نبود
+بسياري
+جلوگيري
+ØÙ‚
+كردند
+نوعي
+بعري
+نكرده
+نظير
+نبايد
+بوده
+بودن
+داد
+اورد
+هست
+جايي
+شود
+دنبال
+داده
+بايد
+سابق
+هيچ
+همان
+انجا
+كمتر
+كجاست
+گردد
+كسي
+تر
+مردم
+تان
+دادن
+بودند
+سري
+جدا
+ندارند
+مگر
+يكديگر
+دارد
+دهند
+بنابراين
+هنگامي
+سمت
+جا
+انچه
+خود
+دادند
+زياد
+دارند
+اثر
+بدون
+بهترين
+بيشتر
+البته
+به
+براساس
+بيرون
+كرد
+بعضي
+گرÙت
+توي
+اي
+ميليون
+او
+جريان
+تول
+بر
+مانند
+برابر
+باشيم
+مدتي
+گويند
+اكنون
+تا
+تنها
+جديد
+چند
+بي
+نشده
+كردن
+كردم
+گويد
+كرده
+كنيم
+نمي
+نزد
+روي
+قصد
+Ùقط
+بالاي
+ديگران
+اين
+ديروز
+توسط
+سوم
+ايم
+دانند
+سوي
+استÙاده
+شما
+كنار
+داريم
+ساخته
+طور
+امده
+رÙته
+نخست
+بيست
+نزديك
+طي
+كنيد
+از
+انها
+تمامي
+داشت
+يكي
+طريق
+اش
+چيست
+روب
+نمايد
+Ú¯Ùت
+چندين
+چيزي
+تواند
+ام
+ايا
+با
+ان
+ايد
+ترين
+اينكه
+ديگري
+راه
+هايي
+بروز
+همچنان
+پاعين
+كس
+Øدود
+مختلÙ
+مقابل
+چيز
+گيرد
+ندارد
+ضد
+همچون
+سازي
+شان
+مورد
+باره
+مرسي
+خويش
+برخوردار
+چون
+خارج
+شش
+هنوز
+تØت
+ضمن
+هستيم
+Ú¯Ùته
+Ùكر
+بسيار
+پيش
+براي
+روزهاي
+انكه
+نخواهد
+بالا
+كل
+وقتي
+كي
+چنين
+كه
+گيري
+نيست
+است
+كجا
+كند
+نيز
+يابد
+بندي
+Øتي
+توانند
+عقب
+خواست
+كنند
+بين
+تمام
+همه
+ما
+باشند
+مثل
+شد
+اري
+باشد
+اره
+طبق
+بعد
+اگر
+صورت
+غير
+جاي
+بيش
+ريزي
+اند
+زيرا
+چگونه
+بار
+لطÙا
+مي
+درباره
+من
+ديده
+همين
+گذاري
+برداري
+علت
+گذاشته
+هم
+Ùوق
+نه
+ها
+شوند
+اباد
+همواره
+هر
+اول
+خواهند
+چهار
+نام
+امروز
+مان
+هاي
+قبل
+كنم
+سعي
+تازه
+را
+هستند
+زير
+جلوي
+عنوان
+بود
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_fi.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_fi.txt
new file mode 100644
index 00000000000..addad798c4b
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_fi.txt
@@ -0,0 +1,95 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+| forms of BE
+
+olla
+olen
+olet
+on
+olemme
+olette
+ovat
+ole | negative form
+
+oli
+olisi
+olisit
+olisin
+olisimme
+olisitte
+olisivat
+olit
+olin
+olimme
+olitte
+olivat
+ollut
+olleet
+
+en | negation
+et
+ei
+emme
+ette
+eivät
+
+|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans
+minä minun minut minua minussa minusta minuun minulla minulta minulle | I
+sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you
+hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she
+me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we
+te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you
+he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they
+
+tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this
+tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that
+se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it
+nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these
+nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those
+ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they
+
+kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
+ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl)
+mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what
+mitkä | (pl)
+
+joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which
+jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl)
+
+| conjunctions
+
+että | that
+ja | and
+jos | if
+koska | because
+kuin | than
+mutta | but
+niin | so
+sekä | and
+sillä | for
+tai | or
+vaan | but
+vai | or
+vaikka | although
+
+
+| prepositions
+
+kanssa | with
+mukaan | according to
+noin | about
+poikki | across
+yli | over, across
+
+| other
+
+kun | when
+niin | so
+nyt | now
+itse | self
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_fr.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_fr.txt
new file mode 100644
index 00000000000..c00837ea939
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_fr.txt
@@ -0,0 +1,183 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A French stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+au | a + le
+aux | a + les
+avec | with
+ce | this
+ces | these
+dans | with
+de | of
+des | de + les
+du | de + le
+elle | she
+en | `of them' etc
+et | and
+eux | them
+il | he
+je | I
+la | the
+le | the
+leur | their
+lui | him
+ma | my (fem)
+mais | but
+me | me
+même | same; as in moi-même (myself) etc
+mes | me (pl)
+moi | me
+mon | my (masc)
+ne | not
+nos | our (pl)
+notre | our
+nous | we
+on | one
+ou | where
+par | by
+pas | not
+pour | for
+qu | que before vowel
+que | that
+qui | who
+sa | his, her (fem)
+se | oneself
+ses | his (pl)
+son | his, her (masc)
+sur | on
+ta | thy (fem)
+te | thee
+tes | thy (pl)
+toi | thee
+ton | thy (masc)
+tu | thou
+un | a
+une | a
+vos | your (pl)
+votre | your
+vous | you
+
+ | single letter forms
+
+c | c'
+d | d'
+j | j'
+l | l'
+Ã | to, at
+m | m'
+n | n'
+s | s'
+t | t'
+y | there
+
+ | forms of être (not including the infinitive):
+été
+étée
+étées
+étés
+étant
+suis
+es
+est
+sommes
+êtes
+sont
+serai
+seras
+sera
+serons
+serez
+seront
+serais
+serait
+serions
+seriez
+seraient
+étais
+était
+étions
+étiez
+étaient
+fus
+fut
+fûmes
+fûtes
+furent
+sois
+soit
+soyons
+soyez
+soient
+fusse
+fusses
+fût
+fussions
+fussiez
+fussent
+
+ | forms of avoir (not including the infinitive):
+ayant
+eu
+eue
+eues
+eus
+ai
+as
+avons
+avez
+ont
+aurai
+auras
+aura
+aurons
+aurez
+auront
+aurais
+aurait
+aurions
+auriez
+auraient
+avais
+avait
+avions
+aviez
+avaient
+eut
+eûmes
+eûtes
+eurent
+aie
+aies
+ait
+ayons
+ayez
+aient
+eusse
+eusses
+eût
+eussions
+eussiez
+eussent
+
+ | Later additions (from Jean-Christophe Deschamps)
+ceci | this
+celà  | that
+cet | this
+cette | this
+ici | here
+ils | they
+les | the (pl)
+leurs | their (pl)
+quel | which
+quels | which
+quelle | which
+quelles | which
+sans | without
+soi | oneself
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ga.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ga.txt
new file mode 100644
index 00000000000..9ff88d747e5
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ga.txt
@@ -0,0 +1,110 @@
+
+a
+ach
+ag
+agus
+an
+aon
+ar
+arna
+as
+b'
+ba
+beirt
+bhúr
+caoga
+ceathair
+ceathrar
+chomh
+chtó
+chuig
+chun
+cois
+céad
+cúig
+cúigear
+d'
+daichead
+dar
+de
+deich
+deichniúr
+den
+dhá
+do
+don
+dtÃ
+dá
+dár
+dó
+faoi
+faoin
+faoina
+faoinár
+fara
+fiche
+gach
+gan
+go
+gur
+haon
+hocht
+i
+iad
+idir
+in
+ina
+ins
+inár
+is
+le
+leis
+lena
+lenár
+m'
+mar
+mo
+mé
+na
+nach
+naoi
+naonúr
+ná
+nÃ
+nÃor
+nó
+nócha
+ocht
+ochtar
+os
+roimh
+sa
+seacht
+seachtar
+seachtó
+seasca
+seisear
+siad
+sibh
+sinn
+sna
+sé
+sÃ
+tar
+thar
+thú
+triúr
+trÃ
+trÃna
+trÃnár
+trÃocha
+tú
+um
+ár
+é
+éis
+Ã
+ó
+ón
+óna
+ónár
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_gl.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_gl.txt
new file mode 100644
index 00000000000..d8760b12c14
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_gl.txt
@@ -0,0 +1,161 @@
+# galican stopwords
+a
+aÃnda
+alÃ
+aquel
+aquela
+aquelas
+aqueles
+aquilo
+aquÃ
+ao
+aos
+as
+asÃ
+á
+ben
+cando
+che
+co
+coa
+comigo
+con
+connosco
+contigo
+convosco
+coas
+cos
+cun
+cuns
+cunha
+cunhas
+da
+dalgunha
+dalgunhas
+dalgún
+dalgúns
+das
+de
+del
+dela
+delas
+deles
+desde
+deste
+do
+dos
+dun
+duns
+dunha
+dunhas
+e
+el
+ela
+elas
+eles
+en
+era
+eran
+esa
+esas
+ese
+eses
+esta
+estar
+estaba
+está
+están
+este
+estes
+estiven
+estou
+eu
+é
+facer
+foi
+foron
+fun
+habÃa
+hai
+iso
+isto
+la
+las
+lle
+lles
+lo
+los
+mais
+me
+meu
+meus
+min
+miña
+miñas
+moi
+na
+nas
+neste
+nin
+no
+non
+nos
+nosa
+nosas
+noso
+nosos
+nós
+nun
+nunha
+nuns
+nunhas
+o
+os
+ou
+ó
+ós
+para
+pero
+pode
+pois
+pola
+polas
+polo
+polos
+por
+que
+se
+senón
+ser
+seu
+seus
+sexa
+sido
+sobre
+súa
+súas
+tamén
+tan
+te
+ten
+teñen
+teño
+ter
+teu
+teus
+ti
+tido
+tiña
+tiven
+túa
+túas
+un
+unha
+unhas
+uns
+vos
+vosa
+vosas
+voso
+vosos
+vós
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_hi.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_hi.txt
new file mode 100644
index 00000000000..86286bb083b
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_hi.txt
@@ -0,0 +1,235 @@
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# Note: by default this file also contains forms normalized by HindiNormalizer
+# for spelling variation (see section below), such that it can be used whether or
+# not you enable that feature. When adding additional entries to this list,
+# please add the normalized form as well.
+अंदर
+अत
+अपना
+अपनी
+अपने
+अà¤à¥€
+आदि
+आप
+इतà¥à¤¯à¤¾à¤¦à¤¿
+इन
+इनका
+इनà¥à¤¹à¥€à¤‚
+इनà¥à¤¹à¥‡à¤‚
+इनà¥à¤¹à¥‹à¤‚
+इस
+इसका
+इसकी
+इसके
+इसमें
+इसी
+इसे
+उन
+उनका
+उनकी
+उनके
+उनको
+उनà¥à¤¹à¥€à¤‚
+उनà¥à¤¹à¥‡à¤‚
+उनà¥à¤¹à¥‹à¤‚
+उस
+उसके
+उसी
+उसे
+à¤à¤•
+à¤à¤µà¤‚
+à¤à¤¸
+à¤à¤¸à¥‡
+और
+कई
+कर
+करता
+करते
+करना
+करने
+करें
+कहते
+कहा
+का
+काफ़ी
+कि
+कितना
+किनà¥à¤¹à¥‡à¤‚
+किनà¥à¤¹à¥‹à¤‚
+किया
+किर
+किस
+किसी
+किसे
+की
+कà¥à¤›
+कà¥à¤²
+के
+को
+कोई
+कौन
+कौनसा
+गया
+घर
+जब
+जहाà¤
+जा
+जितना
+जिन
+जिनà¥à¤¹à¥‡à¤‚
+जिनà¥à¤¹à¥‹à¤‚
+जिस
+जिसे
+जीधर
+जैसा
+जैसे
+जो
+तक
+तब
+तरह
+तिन
+तिनà¥à¤¹à¥‡à¤‚
+तिनà¥à¤¹à¥‹à¤‚
+तिस
+तिसे
+तो
+था
+थी
+थे
+दबारा
+दिया
+दà¥à¤¸à¤°à¤¾
+दूसरे
+दो
+दà¥à¤µà¤¾à¤°à¤¾
+न
+नहीं
+ना
+निहायत
+नीचे
+ने
+पर
+पर
+पहले
+पूरा
+पे
+फिर
+बनी
+बही
+बहà¥à¤¤
+बाद
+बाला
+बिलकà¥à¤²
+à¤à¥€
+à¤à¥€à¤¤à¤°
+मगर
+मानो
+मे
+में
+यदि
+यह
+यहाà¤
+यही
+या
+यिह
+ये
+रखें
+रहा
+रहे
+ऱà¥à¤µà¤¾à¤¸à¤¾
+लिà¤
+लिये
+लेकिन
+व
+वरà¥à¤—
+वह
+वह
+वहाà¤
+वहीं
+वाले
+वà¥à¤¹
+वे
+वग़ैरह
+संग
+सकता
+सकते
+सबसे
+सà¤à¥€
+साथ
+साबà¥à¤¤
+साà¤
+सारा
+से
+सो
+ही
+हà¥à¤†
+हà¥à¤ˆ
+हà¥à¤
+है
+हैं
+हो
+होता
+होती
+होते
+होना
+होने
+# additional normalized forms of the above
+अपनि
+जेसे
+होति
+सà¤à¤¿
+तिंहों
+इंहों
+दवारा
+इसि
+किंहें
+थि
+उंहों
+ओर
+जिंहें
+वहिं
+अà¤à¤¿
+बनि
+हि
+उंहिं
+उंहें
+हें
+वगेरह
+à¤à¤¸à¥‡
+रवासा
+कोन
+निचे
+काफि
+उसि
+पà¥à¤°à¤¾
+à¤à¤¿à¤¤à¤°
+हे
+बहि
+वहां
+कोइ
+यहां
+जिंहों
+तिंहें
+किसि
+कइ
+यहि
+इंहिं
+जिधर
+इंहें
+अदि
+इतयादि
+हà¥à¤‡
+कोनसा
+इसकि
+दà¥à¤¸à¤°à¥‡
+जहां
+अप
+किंहों
+उनकि
+à¤à¤¿
+वरग
+हà¥à¤…
+जेसा
+नहिं
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_hu.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_hu.txt
new file mode 100644
index 00000000000..1a96f1db6f2
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_hu.txt
@@ -0,0 +1,209 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+| Hungarian stop word list
+| prepared by Anna Tordai
+
+a
+ahogy
+ahol
+aki
+akik
+akkor
+alatt
+által
+általában
+amely
+amelyek
+amelyekben
+amelyeket
+amelyet
+amelynek
+ami
+amit
+amolyan
+amÃg
+amikor
+át
+abban
+ahhoz
+annak
+arra
+arról
+az
+azok
+azon
+azt
+azzal
+azért
+aztán
+azután
+azonban
+bár
+be
+belül
+benne
+cikk
+cikkek
+cikkeket
+csak
+de
+e
+eddig
+egész
+egy
+egyes
+egyetlen
+egyéb
+egyik
+egyre
+ekkor
+el
+elég
+ellen
+elő
+először
+előtt
+első
+én
+éppen
+ebben
+ehhez
+emilyen
+ennek
+erre
+ez
+ezt
+ezek
+ezen
+ezzel
+ezért
+és
+fel
+felé
+hanem
+hiszen
+hogy
+hogyan
+igen
+Ãgy
+illetve
+ill.
+ill
+ilyen
+ilyenkor
+ison
+ismét
+itt
+jó
+jól
+jobban
+kell
+kellett
+keresztül
+keressünk
+ki
+kÃvül
+között
+közül
+legalább
+lehet
+lehetett
+legyen
+lenne
+lenni
+lesz
+lett
+maga
+magát
+majd
+majd
+már
+más
+másik
+meg
+még
+mellett
+mert
+mely
+melyek
+mi
+mit
+mÃg
+miért
+milyen
+mikor
+minden
+mindent
+mindenki
+mindig
+mint
+mintha
+mivel
+most
+nagy
+nagyobb
+nagyon
+ne
+néha
+nekem
+neki
+nem
+néhány
+nélkül
+nincs
+olyan
+ott
+össze
+Å‘
+Å‘k
+Å‘ket
+pedig
+persze
+rá
+s
+saját
+sem
+semmi
+sok
+sokat
+sokkal
+számára
+szemben
+szerint
+szinte
+talán
+tehát
+teljes
+tovább
+továbbá
+több
+úgy
+ugyanis
+új
+újabb
+újra
+után
+utána
+utolsó
+vagy
+vagyis
+valaki
+valami
+valamint
+való
+vagyok
+van
+vannak
+volt
+voltam
+voltak
+voltunk
+vissza
+vele
+viszont
+volna
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_hy.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_hy.txt
new file mode 100644
index 00000000000..60c1c50fbc8
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_hy.txt
@@ -0,0 +1,46 @@
+# example set of Armenian stopwords.
+Õ¡ÕµÕ¤
+Õ¡ÕµÕ¬
+Õ¡ÕµÕ¶
+Õ¡ÕµÕ½
+Õ¤Õ¸Ö‚
+Õ¤Õ¸Ö‚Ö„
+Õ¥Õ´
+Õ¥Õ¶
+Õ¥Õ¶Ö„
+Õ¥Õ½
+Õ¥Ö„
+Õ§
+Õ§Õ«
+Õ§Õ«Õ¶
+Õ§Õ«Õ¶Ö„
+Õ§Õ«Ö€
+Õ§Õ«Ö„
+Õ§Ö€
+Õ¨Õ½Õ¿
+Õ©
+Õ«
+Õ«Õ¶
+Õ«Õ½Õ¯
+Õ«Ö€
+Õ¯Õ¡Õ´
+Õ°Õ¡Õ´Õ¡Ö€
+Õ°Õ¥Õ¿
+Õ°Õ¥Õ¿Õ¸
+Õ´Õ¥Õ¶Ö„
+Õ´Õ¥Õ»
+Õ´Õ«
+Õ¶
+Õ¶Õ¡
+Õ¶Õ¡Ö‡
+Õ¶Ö€Õ¡
+Õ¶Ö€Õ¡Õ¶Ö„
+Õ¸Ö€
+Õ¸Ö€Õ¨
+Õ¸Ö€Õ¸Õ¶Ö„
+Õ¸Ö€ÕºÕ¥Õ½
+Õ¸Ö‚
+Õ¸Ö‚Õ´
+ÕºÕ«Õ¿Õ«
+Õ¾Ö€Õ¡
+Ö‡
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_id.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_id.txt
new file mode 100644
index 00000000000..4617f83a5c5
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_id.txt
@@ -0,0 +1,359 @@
+# from appendix D of: A Study of Stemming Effects on Information
+# Retrieval in Bahasa Indonesia
+ada
+adanya
+adalah
+adapun
+agak
+agaknya
+agar
+akan
+akankah
+akhirnya
+aku
+akulah
+amat
+amatlah
+anda
+andalah
+antar
+diantaranya
+antara
+antaranya
+diantara
+apa
+apaan
+mengapa
+apabila
+apakah
+apalagi
+apatah
+atau
+ataukah
+ataupun
+bagai
+bagaikan
+sebagai
+sebagainya
+bagaimana
+bagaimanapun
+sebagaimana
+bagaimanakah
+bagi
+bahkan
+bahwa
+bahwasanya
+sebaliknya
+banyak
+sebanyak
+beberapa
+seberapa
+begini
+beginian
+beginikah
+beginilah
+sebegini
+begitu
+begitukah
+begitulah
+begitupun
+sebegitu
+belum
+belumlah
+sebelum
+sebelumnya
+sebenarnya
+berapa
+berapakah
+berapalah
+berapapun
+betulkah
+sebetulnya
+biasa
+biasanya
+bila
+bilakah
+bisa
+bisakah
+sebisanya
+boleh
+bolehkah
+bolehlah
+buat
+bukan
+bukankah
+bukanlah
+bukannya
+cuma
+percuma
+dahulu
+dalam
+dan
+dapat
+dari
+daripada
+dekat
+demi
+demikian
+demikianlah
+sedemikian
+dengan
+depan
+di
+dia
+dialah
+dini
+diri
+dirinya
+terdiri
+dong
+dulu
+enggak
+enggaknya
+entah
+entahlah
+terhadap
+terhadapnya
+hal
+hampir
+hanya
+hanyalah
+harus
+haruslah
+harusnya
+seharusnya
+hendak
+hendaklah
+hendaknya
+hingga
+sehingga
+ia
+ialah
+ibarat
+ingin
+inginkah
+inginkan
+ini
+inikah
+inilah
+itu
+itukah
+itulah
+jangan
+jangankan
+janganlah
+jika
+jikalau
+juga
+justru
+kala
+kalau
+kalaulah
+kalaupun
+kalian
+kami
+kamilah
+kamu
+kamulah
+kan
+kapan
+kapankah
+kapanpun
+dikarenakan
+karena
+karenanya
+ke
+kecil
+kemudian
+kenapa
+kepada
+kepadanya
+ketika
+seketika
+khususnya
+kini
+kinilah
+kiranya
+sekiranya
+kita
+kitalah
+kok
+lagi
+lagian
+selagi
+lah
+lain
+lainnya
+melainkan
+selaku
+lalu
+melalui
+terlalu
+lama
+lamanya
+selama
+selama
+selamanya
+lebih
+terlebih
+bermacam
+macam
+semacam
+maka
+makanya
+makin
+malah
+malahan
+mampu
+mampukah
+mana
+manakala
+manalagi
+masih
+masihkah
+semasih
+masing
+mau
+maupun
+semaunya
+memang
+mereka
+merekalah
+meski
+meskipun
+semula
+mungkin
+mungkinkah
+nah
+namun
+nanti
+nantinya
+nyaris
+oleh
+olehnya
+seorang
+seseorang
+pada
+padanya
+padahal
+paling
+sepanjang
+pantas
+sepantasnya
+sepantasnyalah
+para
+pasti
+pastilah
+per
+pernah
+pula
+pun
+merupakan
+rupanya
+serupa
+saat
+saatnya
+sesaat
+saja
+sajalah
+saling
+bersama
+sama
+sesama
+sambil
+sampai
+sana
+sangat
+sangatlah
+saya
+sayalah
+se
+sebab
+sebabnya
+sebuah
+tersebut
+tersebutlah
+sedang
+sedangkan
+sedikit
+sedikitnya
+segala
+segalanya
+segera
+sesegera
+sejak
+sejenak
+sekali
+sekalian
+sekalipun
+sesekali
+sekaligus
+sekarang
+sekarang
+sekitar
+sekitarnya
+sela
+selain
+selalu
+seluruh
+seluruhnya
+semakin
+sementara
+sempat
+semua
+semuanya
+sendiri
+sendirinya
+seolah
+seperti
+sepertinya
+sering
+seringnya
+serta
+siapa
+siapakah
+siapapun
+disini
+disinilah
+sini
+sinilah
+sesuatu
+sesuatunya
+suatu
+sesudah
+sesudahnya
+sudah
+sudahkah
+sudahlah
+supaya
+tadi
+tadinya
+tak
+tanpa
+setelah
+telah
+tentang
+tentu
+tentulah
+tentunya
+tertentu
+seterusnya
+tapi
+tetapi
+setiap
+tiap
+setidaknya
+tidak
+tidakkah
+tidaklah
+toh
+waduh
+wah
+wahai
+sewaktu
+walau
+walaupun
+wong
+yaitu
+yakni
+yang
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_it.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_it.txt
new file mode 100644
index 00000000000..4cb5b0891b1
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_it.txt
@@ -0,0 +1,301 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | An Italian stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ad | a (to) before vowel
+al | a + il
+allo | a + lo
+ai | a + i
+agli | a + gli
+all | a + l'
+agl | a + gl'
+alla | a + la
+alle | a + le
+con | with
+col | con + il
+coi | con + i (forms collo, cogli etc are now very rare)
+da | from
+dal | da + il
+dallo | da + lo
+dai | da + i
+dagli | da + gli
+dall | da + l'
+dagl | da + gll'
+dalla | da + la
+dalle | da + le
+di | of
+del | di + il
+dello | di + lo
+dei | di + i
+degli | di + gli
+dell | di + l'
+degl | di + gl'
+della | di + la
+delle | di + le
+in | in
+nel | in + el
+nello | in + lo
+nei | in + i
+negli | in + gli
+nell | in + l'
+negl | in + gl'
+nella | in + la
+nelle | in + le
+su | on
+sul | su + il
+sullo | su + lo
+sui | su + i
+sugli | su + gli
+sull | su + l'
+sugl | su + gl'
+sulla | su + la
+sulle | su + le
+per | through, by
+tra | among
+contro | against
+io | I
+tu | thou
+lui | he
+lei | she
+noi | we
+voi | you
+loro | they
+mio | my
+mia |
+miei |
+mie |
+tuo |
+tua |
+tuoi | thy
+tue |
+suo |
+sua |
+suoi | his, her
+sue |
+nostro | our
+nostra |
+nostri |
+nostre |
+vostro | your
+vostra |
+vostri |
+vostre |
+mi | me
+ti | thee
+ci | us, there
+vi | you, there
+lo | him, the
+la | her, the
+li | them
+le | them, the
+gli | to him, the
+ne | from there etc
+il | the
+un | a
+uno | a
+una | a
+ma | but
+ed | and
+se | if
+perché | why, because
+anche | also
+come | how
+dov | where (as dov')
+dove | where
+che | who, that
+chi | who
+cui | whom
+non | not
+più | more
+quale | who, that
+quanto | how much
+quanti |
+quanta |
+quante |
+quello | that
+quelli |
+quella |
+quelle |
+questo | this
+questi |
+questa |
+queste |
+si | yes
+tutto | all
+tutti | all
+
+ | single letter forms:
+
+a | at
+c | as c' for ce or ci
+e | and
+i | the
+l | as l'
+o | or
+
+ | forms of avere, to have (not including the infinitive):
+
+ho
+hai
+ha
+abbiamo
+avete
+hanno
+abbia
+abbiate
+abbiano
+avrò
+avrai
+avrÃ
+avremo
+avrete
+avranno
+avrei
+avresti
+avrebbe
+avremmo
+avreste
+avrebbero
+avevo
+avevi
+aveva
+avevamo
+avevate
+avevano
+ebbi
+avesti
+ebbe
+avemmo
+aveste
+ebbero
+avessi
+avesse
+avessimo
+avessero
+avendo
+avuto
+avuta
+avuti
+avute
+
+ | forms of essere, to be (not including the infinitive):
+sono
+sei
+è
+siamo
+siete
+sia
+siate
+siano
+sarò
+sarai
+sarÃ
+saremo
+sarete
+saranno
+sarei
+saresti
+sarebbe
+saremmo
+sareste
+sarebbero
+ero
+eri
+era
+eravamo
+eravate
+erano
+fui
+fosti
+fu
+fummo
+foste
+furono
+fossi
+fosse
+fossimo
+fossero
+essendo
+
+ | forms of fare, to do (not including the infinitive, fa, fat-):
+faccio
+fai
+facciamo
+fanno
+faccia
+facciate
+facciano
+farò
+farai
+farÃ
+faremo
+farete
+faranno
+farei
+faresti
+farebbe
+faremmo
+fareste
+farebbero
+facevo
+facevi
+faceva
+facevamo
+facevate
+facevano
+feci
+facesti
+fece
+facemmo
+faceste
+fecero
+facessi
+facesse
+facessimo
+facessero
+facendo
+
+ | forms of stare, to be (not including the infinitive):
+sto
+stai
+sta
+stiamo
+stanno
+stia
+stiate
+stiano
+starò
+starai
+starÃ
+staremo
+starete
+staranno
+starei
+staresti
+starebbe
+staremmo
+stareste
+starebbero
+stavo
+stavi
+stava
+stavamo
+stavate
+stavano
+stetti
+stesti
+stette
+stemmo
+steste
+stettero
+stessi
+stesse
+stessimo
+stessero
+stando
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ja.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ja.txt
new file mode 100644
index 00000000000..d4321be6b16
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ja.txt
@@ -0,0 +1,127 @@
+#
+# This file defines a stopword set for Japanese.
+#
+# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia.
+# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745
+# for frequency lists, etc. that can be useful for making your own set (if desired)
+#
+# Note that there is an overlap between these stopwords and the terms stopped when used
+# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note
+# that comments are not allowed on the same line as stopwords.
+#
+# Also note that stopping is done in a case-insensitive manner. Change your StopFilter
+# configuration if you need case-sensitive stopping. Lastly, note that stopping is done
+# using the same character width as the entries in this file. Since this StopFilter is
+# normally done after a CJKWidthFilter in your chain, you would usually want your romaji
+# entries to be in half-width and your kana entries to be in full-width.
+#
+ã®
+ã«
+ã¯
+ã‚’
+ãŸ
+ãŒ
+ã§
+ã¦
+ã¨
+ã—
+れ
+ã•
+ã‚ã‚‹
+ã„ã‚‹
+ã‚‚
+ã™ã‚‹
+ã‹ã‚‰
+ãª
+ã“ã¨
+ã¨ã—ã¦
+ã„
+ã‚„
+れる
+ãªã©
+ãªã£
+ãªã„
+ã“ã®
+ãŸã‚
+ãã®
+ã‚ã£
+よã†
+ã¾ãŸ
+ã‚‚ã®
+ã¨ã„ã†
+ã‚ã‚Š
+ã¾ã§
+られ
+ãªã‚‹
+ã¸
+ã‹
+ã
+ã“ã‚Œ
+ã«ã‚ˆã£ã¦
+ã«ã‚ˆã‚Š
+ãŠã‚Š
+より
+ã«ã‚ˆã‚‹
+ãš
+ãªã‚Š
+られる
+ã«ãŠã„ã¦
+ã°
+ãªã‹ã£
+ãªã
+ã—ã‹ã—
+ã«ã¤ã„ã¦
+ã›
+ã ã£
+ãã®å¾Œ
+ã§ãã‚‹
+ãã‚Œ
+ã†
+ã®ã§
+ãªãŠ
+ã®ã¿
+ã§ã
+ã
+ã¤
+ã«ãŠã‘ã‚‹
+ãŠã‚ˆã³
+ã„ã†
+ã•ã‚‰ã«
+ã§ã‚‚
+ら
+ãŸã‚Š
+ãã®ä»–
+ã«é–¢ã™ã‚‹
+ãŸã¡
+ã¾ã™
+ã‚“
+ãªã‚‰
+ã«å¯¾ã—ã¦
+特ã«
+ã›ã‚‹
+åŠã³
+ã“れら
+ã¨ã
+ã§ã¯
+ã«ã¦
+ã»ã‹
+ãªãŒã‚‰
+ã†ã¡
+ãã—ã¦
+ã¨ã¨ã‚‚ã«
+ãŸã ã—
+ã‹ã¤ã¦
+ãã‚Œãžã‚Œ
+ã¾ãŸã¯
+ãŠ
+ã»ã©
+ã‚‚ã®ã®
+ã«å¯¾ã™ã‚‹
+ã»ã¨ã‚“ã©
+ã¨å…±ã«
+ã¨ã„ã£ãŸ
+ã§ã™
+ã¨ã‚‚
+ã¨ã“ã‚
+ã“ã“
+##### End of file
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_lv.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_lv.txt
new file mode 100644
index 00000000000..e21a23c06c3
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_lv.txt
@@ -0,0 +1,172 @@
+# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins
+# the original list of over 800 forms was refined:
+# pronouns, adverbs, interjections were removed
+#
+# prepositions
+aiz
+ap
+ar
+apakš
+Ärpus
+augšpus
+bez
+caur
+dēļ
+gar
+iekš
+iz
+kopš
+labad
+lejpus
+līdz
+no
+otrpus
+pa
+par
+pÄr
+pēc
+pie
+pirms
+pret
+priekš
+starp
+Å¡aipus
+uz
+viņpus
+virs
+virspus
+zem
+apakšpus
+# Conjunctions
+un
+bet
+jo
+ja
+ka
+lai
+tomēr
+tikko
+turpretī
+arī
+kaut
+gan
+tÄdēļ
+tÄ
+ne
+tikvien
+vien
+kÄ
+ir
+te
+vai
+kamēr
+# Particles
+ar
+diezin
+droši
+diemžēl
+nebūt
+ik
+it
+taÄu
+nu
+pat
+tiklab
+iekšpus
+nedz
+tik
+nevis
+turpretim
+jeb
+iekam
+iekÄm
+iekÄms
+kolīdz
+līdzko
+tiklīdz
+jebšu
+tÄlab
+tÄpÄ“c
+nekÄ
+itin
+jÄ
+jau
+jel
+nē
+nezin
+tad
+tikai
+vis
+tak
+iekams
+vien
+# modal verbs
+būt
+biju
+biji
+bija
+bijÄm
+bijÄt
+esmu
+esi
+esam
+esat
+būšu
+būsi
+būs
+būsim
+būsiet
+tikt
+tiku
+tiki
+tika
+tikÄm
+tikÄt
+tieku
+tiec
+tiek
+tiekam
+tiekat
+tikšu
+tiks
+tiksim
+tiksiet
+tapt
+tapi
+tapÄt
+topat
+tapšu
+tapsi
+taps
+tapsim
+tapsiet
+kļūt
+kļuvu
+kļuvi
+kļuva
+kļuvÄm
+kļuvÄt
+kļūstu
+kļūsti
+kļūst
+kļūstam
+kļūstat
+kļūšu
+kļūsi
+kļūs
+kļūsim
+kļūsiet
+# verbs
+varēt
+varēju
+varÄ“jÄm
+varēšu
+varēsim
+var
+varēji
+varÄ“jÄt
+varēsi
+varēsiet
+varat
+varēja
+varēs
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_nl.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_nl.txt
new file mode 100644
index 00000000000..f4d61f5092c
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_nl.txt
@@ -0,0 +1,117 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Dutch stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large sample of Dutch text.
+
+ | Dutch stop words frequently exhibit homonym clashes. These are indicated
+ | clearly below.
+
+de | the
+en | and
+van | of, from
+ik | I, the ego
+te | (1) chez, at etc, (2) to, (3) too
+dat | that, which
+die | that, those, who, which
+in | in, inside
+een | a, an, one
+hij | he
+het | the, it
+niet | not, nothing, naught
+zijn | (1) to be, being, (2) his, one's, its
+is | is
+was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
+op | on, upon, at, in, up, used up
+aan | on, upon, to (as dative)
+met | with, by
+als | like, such as, when
+voor | (1) before, in front of, (2) furrow
+had | had, past tense all persons sing. of 'hebben' (have)
+er | there
+maar | but, only
+om | round, about, for etc
+hem | him
+dan | then
+zou | should/would, past tense all persons sing. of 'zullen'
+of | or, whether, if
+wat | what, something, anything
+mijn | possessive and noun 'mine'
+men | people, 'one'
+dit | this
+zo | so, thus, in this way
+door | through by
+over | over, across
+ze | she, her, they, them
+zich | oneself
+bij | (1) a bee, (2) by, near, at
+ook | also, too
+tot | till, until
+je | you
+mij | me
+uit | out of, from
+der | Old Dutch form of 'van der' still found in surnames
+daar | (1) there, (2) because
+haar | (1) her, their, them, (2) hair
+naar | (1) unpleasant, unwell etc, (2) towards, (3) as
+heb | present first person sing. of 'to have'
+hoe | how, why
+heeft | present third person sing. of 'to have'
+hebben | 'to have' and various parts thereof
+deze | this
+u | you
+want | (1) for, (2) mitten, (3) rigging
+nog | yet, still
+zal | 'shall', first and third person sing. of verb 'zullen' (will)
+me | me
+zij | she, they
+nu | now
+ge | 'thou', still used in Belgium and south Netherlands
+geen | none
+omdat | because
+iets | something, somewhat
+worden | to become, grow, get
+toch | yet, still
+al | all, every, each
+waren | (1) 'were' (2) to wander, (3) wares, (3)
+veel | much, many
+meer | (1) more, (2) lake
+doen | to do, to make
+toen | then, when
+moet | noun 'spot/mote' and present form of 'to must'
+ben | (1) am, (2) 'are' in interrogative second person singular of 'to be'
+zonder | without
+kan | noun 'can' and present form of 'to be able'
+hun | their, them
+dus | so, consequently
+alles | all, everything, anything
+onder | under, beneath
+ja | yes, of course
+eens | once, one day
+hier | here
+wie | who
+werd | imperfect third person sing. of 'become'
+altijd | always
+doch | yet, but etc
+wordt | present third person sing. of 'become'
+wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans
+kunnen | to be able
+ons | us/our
+zelf | self
+tegen | against, towards, at
+na | after, near
+reeds | already
+wil | (1) present tense of 'want', (2) 'will', noun, (3) fender
+kon | could; past tense of 'to be able'
+niets | nothing
+uw | your
+iemand | somebody
+geweest | been; past participle of 'be'
+andere | other
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_no.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_no.txt
new file mode 100644
index 00000000000..e76f36e69ed
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_no.txt
@@ -0,0 +1,192 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Norwegian stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This stop word list is for the dominant bokmål dialect. Words unique
+ | to nynorsk are marked *.
+
+ | Revised by Jan Bruusgaard , Jan 2005
+
+og | and
+i | in
+jeg | I
+det | it/this/that
+at | to (w. inf.)
+en | a/an
+et | a/an
+den | it/this/that
+til | to
+er | is/am/are
+som | who/that
+på | on
+de | they / you(formal)
+med | with
+han | he
+av | of
+ikke | not
+ikkje | not *
+der | there
+så | so
+var | was/were
+meg | me
+seg | you
+men | but
+ett | one
+har | have
+om | about
+vi | we
+min | my
+mitt | my
+ha | have
+hadde | had
+hun | she
+nå | now
+over | over
+da | when/as
+ved | by/know
+fra | from
+du | you
+ut | out
+sin | your
+dem | them
+oss | us
+opp | up
+man | you/one
+kan | can
+hans | his
+hvor | where
+eller | or
+hva | what
+skal | shall/must
+selv | self (reflective)
+sjøl | self (reflective)
+her | here
+alle | all
+vil | will
+bli | become
+ble | became
+blei | became *
+blitt | have become
+kunne | could
+inn | in
+når | when
+være | be
+kom | come
+noen | some
+noe | some
+ville | would
+dere | you
+som | who/which/that
+deres | their/theirs
+kun | only/just
+ja | yes
+etter | after
+ned | down
+skulle | should
+denne | this
+for | for/because
+deg | you
+si | hers/his
+sine | hers/his
+sitt | hers/his
+mot | against
+Ã¥ | to
+meget | much
+hvorfor | why
+dette | this
+disse | these/those
+uten | without
+hvordan | how
+ingen | none
+din | your
+ditt | your
+blir | become
+samme | same
+hvilken | which
+hvilke | which (plural)
+sånn | such a
+inni | inside/within
+mellom | between
+vår | our
+hver | each
+hvem | who
+vors | us/ours
+hvis | whose
+både | both
+bare | only/just
+enn | than
+fordi | as/because
+før | before
+mange | many
+også | also
+slik | just
+vært | been
+være | to be
+båe | both *
+begge | both
+siden | since
+dykk | your *
+dykkar | yours *
+dei | they *
+deira | them *
+deires | theirs *
+deim | them *
+di | your (fem.) *
+då | as/when *
+eg | I *
+ein | a/an *
+eit | a/an *
+eitt | a/an *
+elles | or *
+honom | he *
+hjå | at *
+ho | she *
+hoe | she *
+henne | her
+hennar | her/hers
+hennes | hers
+hoss | how *
+hossen | how *
+ikkje | not *
+ingi | noone *
+inkje | noone *
+korleis | how *
+korso | how *
+kva | what/which *
+kvar | where *
+kvarhelst | where *
+kven | who/whom *
+kvi | why *
+kvifor | why *
+me | we *
+medan | while *
+mi | my *
+mine | my *
+mykje | much *
+no | now *
+nokon | some (masc./neut.) *
+noka | some (fem.) *
+nokor | some *
+noko | some *
+nokre | some *
+si | his/hers *
+sia | since *
+sidan | since *
+so | so *
+somt | some *
+somme | some *
+um | about*
+upp | up *
+vere | be *
+vore | was *
+verte | become *
+vort | become *
+varte | became *
+vart | became *
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_pt.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_pt.txt
new file mode 100644
index 00000000000..276c1b446f2
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_pt.txt
@@ -0,0 +1,251 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Portuguese stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de | of, from
+a | the; to, at; her
+o | the; him
+que | who, that
+e | and
+do | de + o
+da | de + a
+em | in
+um | a
+para | for
+ | é from SER
+com | with
+não | not, no
+uma | a
+os | the; them
+no | em + o
+se | himself etc
+na | em + a
+por | for
+mais | more
+as | the; them
+dos | de + os
+como | as, like
+mas | but
+ | foi from SER
+ao | a + o
+ele | he
+das | de + as
+ | tem from TER
+Ã | a + a
+seu | his
+sua | her
+ou | or
+ | ser from SER
+quando | when
+muito | much
+ | há from HAV
+nos | em + os; us
+já | already, now
+ | está from EST
+eu | I
+também | also
+só | only, just
+pelo | per + o
+pela | per + a
+até | up to
+isso | that
+ela | he
+entre | between
+ | era from SER
+depois | after
+sem | without
+mesmo | same
+aos | a + os
+ | ter from TER
+seus | his
+quem | whom
+nas | em + as
+me | me
+esse | that
+eles | they
+ | estão from EST
+você | you
+ | tinha from TER
+ | foram from SER
+essa | that
+num | em + um
+nem | nor
+suas | her
+meu | my
+Ã s | a + as
+minha | my
+ | têm from TER
+numa | em + uma
+pelos | per + os
+elas | they
+ | havia from HAV
+ | seja from SER
+qual | which
+ | será from SER
+nós | we
+ | tenho from TER
+lhe | to him, her
+deles | of them
+essas | those
+esses | those
+pelas | per + as
+este | this
+ | fosse from SER
+dele | of him
+
+ | other words. There are many contractions such as naquele = em+aquele,
+ | mo = me+o, but they are rare.
+ | Indefinite article plural forms are also rare.
+
+tu | thou
+te | thee
+vocês | you (plural)
+vos | you
+lhes | to them
+meus | my
+minhas
+teu | thy
+tua
+teus
+tuas
+nosso | our
+nossa
+nossos
+nossas
+
+dela | of her
+delas | of them
+
+esta | this
+estes | these
+estas | these
+aquele | that
+aquela | that
+aqueles | those
+aquelas | those
+isto | this
+aquilo | that
+
+ | forms of estar, to be (not including the infinitive):
+estou
+está
+estamos
+estão
+estive
+esteve
+estivemos
+estiveram
+estava
+estávamos
+estavam
+estivera
+estivéramos
+esteja
+estejamos
+estejam
+estivesse
+estivéssemos
+estivessem
+estiver
+estivermos
+estiverem
+
+ | forms of haver, to have (not including the infinitive):
+hei
+há
+havemos
+hão
+houve
+houvemos
+houveram
+houvera
+houvéramos
+haja
+hajamos
+hajam
+houvesse
+houvéssemos
+houvessem
+houver
+houvermos
+houverem
+houverei
+houverá
+houveremos
+houverão
+houveria
+houverÃamos
+houveriam
+
+ | forms of ser, to be (not including the infinitive):
+sou
+somos
+são
+era
+éramos
+eram
+fui
+foi
+fomos
+foram
+fora
+fôramos
+seja
+sejamos
+sejam
+fosse
+fôssemos
+fossem
+for
+formos
+forem
+serei
+será
+seremos
+serão
+seria
+serÃamos
+seriam
+
+ | forms of ter, to have (not including the infinitive):
+tenho
+tem
+temos
+tém
+tinha
+tÃnhamos
+tinham
+tive
+teve
+tivemos
+tiveram
+tivera
+tivéramos
+tenha
+tenhamos
+tenham
+tivesse
+tivéssemos
+tivessem
+tiver
+tivermos
+tiverem
+terei
+terá
+teremos
+terão
+teria
+terÃamos
+teriam
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ro.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ro.txt
new file mode 100644
index 00000000000..4fdee90a5ba
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ro.txt
@@ -0,0 +1,233 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+acea
+aceasta
+această
+aceea
+acei
+aceia
+acel
+acela
+acele
+acelea
+acest
+acesta
+aceste
+acestea
+aceÅŸti
+aceÅŸtia
+acolo
+acum
+ai
+aia
+aibă
+aici
+al
+ăla
+ale
+alea
+ălea
+altceva
+altcineva
+am
+ar
+are
+aÅŸ
+aÅŸadar
+asemenea
+asta
+ăsta
+astăzi
+astea
+ăstea
+ăştia
+asupra
+aţi
+au
+avea
+avem
+aveţi
+azi
+bine
+bucur
+bună
+ca
+că
+căci
+când
+care
+cărei
+căror
+cărui
+cât
+câte
+câţi
+către
+câtva
+ce
+cel
+ceva
+chiar
+cînd
+cine
+cineva
+cît
+cîte
+cîţi
+cîtva
+contra
+cu
+cum
+cumva
+curând
+curînd
+da
+dă
+dacă
+dar
+datorită
+de
+deci
+deja
+deoarece
+departe
+deÅŸi
+din
+dinaintea
+dintr
+dintre
+drept
+după
+ea
+ei
+el
+ele
+eram
+este
+eÅŸti
+eu
+face
+fără
+fi
+fie
+fiecare
+fii
+fim
+fiţi
+iar
+ieri
+îi
+îl
+îmi
+împotriva
+în
+înainte
+înaintea
+încât
+încît
+încotro
+între
+întrucât
+întrucît
+îţi
+la
+lângă
+le
+li
+lîngă
+lor
+lui
+mă
+mâine
+mea
+mei
+mele
+mereu
+meu
+mi
+mine
+mult
+multă
+mulţi
+ne
+nicăieri
+nici
+nimeni
+niÅŸte
+noastră
+noastre
+noi
+noÅŸtri
+nostru
+nu
+ori
+oricând
+oricare
+oricât
+orice
+oricînd
+oricine
+oricît
+oricum
+oriunde
+până
+pe
+pentru
+peste
+pînă
+poate
+pot
+prea
+prima
+primul
+prin
+printr
+sa
+să
+săi
+sale
+sau
+său
+se
+ÅŸi
+sînt
+sîntem
+sînteţi
+spre
+sub
+sunt
+suntem
+sunteţi
+ta
+tăi
+tale
+tău
+te
+Å£i
+Å£ie
+tine
+toată
+toate
+tot
+toţi
+totuÅŸi
+tu
+un
+una
+unde
+undeva
+unei
+unele
+uneori
+unor
+vă
+vi
+voastră
+voastre
+voi
+voÅŸtri
+vostru
+vouă
+vreo
+vreun
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ru.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ru.txt
new file mode 100644
index 00000000000..64307693457
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ru.txt
@@ -0,0 +1,241 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | a russian stop word list. comments begin with vertical bar. each stop
+ | word is at the start of a line.
+
+ | this is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | letter `ё' is translated to `е'.
+
+и | and
+в | in/into
+во | alternative form
+не | not
+что | what/that
+он | he
+на | on/onto
+Ñ | i
+Ñ | from
+Ñо | alternative form
+как | how
+а | milder form of `no' (but)
+то | conjunction and form of `that'
+вÑе | all
+она | she
+так | so, thus
+его | him
+но | but
+да | yes/and
+Ñ‚Ñ‹ | thou
+к | towards, by
+у | around, chez
+же | intensifier particle
+вы | you
+за | beyond, behind
+бы | conditional/subj. particle
+по | up to, along
+только | only
+ее | her
+мне | to me
+было | it was
+вот | here is/are, particle
+от | away from
+Ð¼ÐµÐ½Ñ | me
+еще | still, yet, more
+нет | no, there isnt/arent
+о | about
+из | out of
+ему | to him
+теперь | now
+когда | when
+даже | even
+ну | so, well
+вдруг | suddenly
+ли | interrogative particle
+еÑли | if
+уже | already, but homonym of `narrower'
+или | or
+ни | neither
+быть | to be
+был | he was
+него | prepositional form of его
+до | up to
+Ð²Ð°Ñ | you accusative
+нибудь | indef. suffix preceded by hyphen
+опÑÑ‚ÑŒ | again
+уж | already, but homonym of `adder'
+вам | to you
+Ñказал | he said
+ведь | particle `after all'
+там | there
+потом | then
+ÑÐµÐ±Ñ | oneself
+ничего | nothing
+ей | to her
+может | usually with `быть' as `maybe'
+они | they
+тут | here
+где | where
+еÑÑ‚ÑŒ | there is/are
+надо | got to, must
+ней | prepositional form of ей
+Ð´Ð»Ñ | for
+мы | we
+Ñ‚ÐµÐ±Ñ | thee
+их | them, their
+чем | than
+была | she was
+Ñам | self
+чтоб | in order to
+без | without
+будто | as if
+человек | man, person, one
+чего | genitive form of `what'
+раз | once
+тоже | also
+Ñебе | to oneself
+под | beneath
+жизнь | life
+будет | will be
+ж | short form of intensifer particle `же'
+тогда | then
+кто | who
+Ñтот | this
+говорил | was saying
+того | genitive form of `that'
+потому | for that reason
+Ñтого | genitive form of `this'
+какой | which
+ÑовÑем | altogether
+ним | prepositional form of `его', `они'
+здеÑÑŒ | here
+Ñтом | prepositional form of `Ñтот'
+один | one
+почти | almost
+мой | my
+тем | instrumental/dative plural of `тот', `то'
+чтобы | full form of `in order that'
+нее | her (acc.)
+кажетÑÑ | it seems
+ÑÐµÐ¹Ñ‡Ð°Ñ | now
+были | they were
+куда | where to
+зачем | why
+Ñказать | to say
+вÑех | all (acc., gen. preposn. plural)
+никогда | never
+ÑÐµÐ³Ð¾Ð´Ð½Ñ | today
+можно | possible, one can
+при | by
+наконец | finally
+два | two
+об | alternative form of `о', about
+другой | another
+хоть | even
+поÑле | after
+над | above
+больше | more
+тот | that one (masc.)
+через | across, in
+Ñти | these
+Ð½Ð°Ñ | us
+про | about
+вÑего | in all, only, of all
+них | prepositional form of `они' (they)
+ÐºÐ°ÐºÐ°Ñ | which, feminine
+много | lots
+разве | interrogative particle
+Ñказала | she said
+три | three
+Ñту | this, acc. fem. sing.
+Ð¼Ð¾Ñ | my, feminine
+впрочем | moreover, besides
+хорошо | good
+Ñвою | ones own, acc. fem. sing.
+Ñтой | oblique form of `Ñта', fem. `this'
+перед | in front of
+иногда | sometimes
+лучше | better
+чуть | a little
+том | preposn. form of `that one'
+Ð½ÐµÐ»ÑŒÐ·Ñ | one must not
+такой | such a one
+им | to them
+более | more
+вÑегда | always
+конечно | of course
+вÑÑŽ | acc. fem. sing of `all'
+между | between
+
+
+ | b: some paradigms
+ |
+ | personal pronouns
+ |
+ | Ñ Ð¼ÐµÐ½Ñ Ð¼Ð½Ðµ мной [мною]
+ | Ñ‚Ñ‹ Ñ‚ÐµÐ±Ñ Ñ‚ÐµÐ±Ðµ тобой [тобою]
+ | он его ему им [него, нему, ним]
+ | она ее Ñи ею [нее, нÑи, нею]
+ | оно его ему им [него, нему, ним]
+ |
+ | мы Ð½Ð°Ñ Ð½Ð°Ð¼ нами
+ | вы Ð²Ð°Ñ Ð²Ð°Ð¼ вами
+ | они их им ими [них, ним, ними]
+ |
+ | ÑÐµÐ±Ñ Ñебе Ñобой [Ñобою]
+ |
+ | demonstrative pronouns: Ñтот (this), тот (that)
+ |
+ | Ñтот Ñта Ñто Ñти
+ | Ñтого ÑÑ‚Ñ‹ Ñто Ñти
+ | Ñтого Ñтой Ñтого Ñтих
+ | Ñтому Ñтой Ñтому Ñтим
+ | Ñтим Ñтой Ñтим [Ñтою] Ñтими
+ | Ñтом Ñтой Ñтом Ñтих
+ |
+ | тот та то те
+ | того ту то те
+ | того той того тех
+ | тому той тому тем
+ | тем той тем [тою] теми
+ | том той том тех
+ |
+ | determinative pronouns
+ |
+ | (a) веÑÑŒ (all)
+ |
+ | веÑÑŒ вÑÑ Ð²Ñе вÑе
+ | вÑего вÑÑŽ вÑе вÑе
+ | вÑего вÑей вÑего вÑех
+ | вÑему вÑей вÑему вÑем
+ | вÑем вÑей вÑем [вÑею] вÑеми
+ | вÑем вÑей вÑем вÑех
+ |
+ | (b) Ñам (himself etc)
+ |
+ | Ñам Ñама Ñамо Ñами
+ | Ñамого Ñаму Ñамо Ñамих
+ | Ñамого Ñамой Ñамого Ñамих
+ | Ñамому Ñамой Ñамому Ñамим
+ | Ñамим Ñамой Ñамим [Ñамою] Ñамими
+ | Ñамом Ñамой Ñамом Ñамих
+ |
+ | stems of verbs `to be', `to have', `to do' and modal
+ |
+ | быть бы буд быв еÑÑ‚ÑŒ Ñуть
+ | име
+ | дел
+ | мог мож мочь
+ | уме
+ | хоч хот
+ | долж
+ | можн
+ | нужн
+ | нельзÑ
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_sv.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_sv.txt
new file mode 100644
index 00000000000..22bddfd8cb3
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_sv.txt
@@ -0,0 +1,131 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Swedish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | Swedish stop words occasionally exhibit homonym clashes. For example
+ | så = so, but also seed. These are indicated clearly below.
+
+och | and
+det | it, this/that
+att | to (with infinitive)
+i | in, at
+en | a
+jag | I
+hon | she
+som | who, that
+han | he
+på | on
+den | it, this/that
+med | with
+var | where, each
+sig | him(self) etc
+för | for
+så | so (also: seed)
+till | to
+är | is
+men | but
+ett | a
+om | if; around, about
+hade | had
+de | they, these/those
+av | of
+icke | not, no
+mig | me
+du | you
+henne | her
+då | then, when
+sin | his
+nu | now
+har | have
+inte | inte någon = no one
+hans | his
+honom | him
+skulle | 'sake'
+hennes | her
+där | there
+min | my
+man | one (pronoun)
+ej | nor
+vid | at, by, on (also: vast)
+kunde | could
+något | some etc
+från | from, off
+ut | out
+när | when
+efter | after, behind
+upp | up
+vi | we
+dem | them
+vara | be
+vad | what
+över | over
+än | than
+dig | you
+kan | can
+sina | his
+här | here
+ha | have
+mot | towards
+alla | all
+under | under (also: wonder)
+någon | some etc
+eller | or (else)
+allt | all
+mycket | much
+sedan | since
+ju | why
+denna | this/that
+själv | myself, yourself etc
+detta | this/that
+Ã¥t | to
+utan | without
+varit | was
+hur | how
+ingen | no
+mitt | my
+ni | you
+bli | to be, become
+blev | from bli
+oss | us
+din | thy
+dessa | these/those
+några | some etc
+deras | their
+blir | from bli
+mina | my
+samma | (the) same
+vilken | who, that
+er | you, your
+sådan | such a
+vår | our
+blivit | from bli
+dess | its
+inom | within
+mellan | between
+sådant | such a
+varför | why
+varje | each
+vilka | who, that
+ditt | thy
+vem | who
+vilket | who, that
+sitta | his
+sådana | such a
+vart | each
+dina | thy
+vars | whose
+vårt | our
+våra | our
+ert | your
+era | your
+vilkas | whose
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_th.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_th.txt
new file mode 100644
index 00000000000..07f0fabe692
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_th.txt
@@ -0,0 +1,119 @@
+# Thai stopwords from:
+# "Opinion Detection in Thai Political News Columns
+# Based on Subjectivity Analysis"
+# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak
+ไว้
+ไม่
+ไป
+ได้
+ให้
+ใน
+โดย
+à¹à¸«à¹ˆà¸‡
+à¹à¸¥à¹‰à¸§
+à¹à¸¥à¸°
+à¹à¸£à¸
+à¹à¸šà¸š
+à¹à¸•à¹ˆ
+เà¸à¸‡
+เห็น
+เลย
+เริ่ม
+เรา
+เมื่à¸
+เพื่à¸
+เพราะ
+เป็นà¸à¸²à¸£
+เป็น
+เปิดเผย
+เปิด
+เนื่à¸à¸‡à¸ˆà¸²à¸
+เดียวà¸à¸±à¸™
+เดียว
+เช่น
+เฉพาะ
+เคย
+เข้า
+เขา
+à¸à¸µà¸
+à¸à¸²à¸ˆ
+à¸à¸°à¹„ร
+à¸à¸à¸
+à¸à¸¢à¹ˆà¸²à¸‡
+à¸à¸¢à¸¹à¹ˆ
+à¸à¸¢à¸²à¸
+หาà¸
+หลาย
+หลังจาà¸
+หลัง
+หรืà¸
+หนึ่ง
+ส่วน
+ส่ง
+สุด
+สà¹à¸²à¸«à¸£à¸±à¸š
+ว่า
+วัน
+ลง
+ร่วม
+ราย
+รับ
+ระหว่าง
+รวม
+ยัง
+มี
+มาà¸
+มา
+พร้à¸à¸¡
+พบ
+ผ่าน
+ผล
+บาง
+น่า
+นี้
+นà¹à¸²
+นั้น
+นัà¸
+นà¸à¸à¸ˆà¸²à¸
+ทุà¸
+ที่สุด
+ที่
+ทà¹à¸²à¹ƒà¸«à¹‰
+ทà¹à¸²
+ทาง
+ทั้งนี้
+ทั้ง
+ถ้า
+ถูà¸
+ถึง
+ต้à¸à¸‡
+ต่างๆ
+ต่าง
+ต่à¸
+ตาม
+ตั้งà¹à¸•à¹ˆ
+ตั้ง
+ด้าน
+ด้วย
+ดัง
+ซึ่ง
+ช่วง
+จึง
+จาà¸
+จัด
+จะ
+คืà¸
+ความ
+ครั้ง
+คง
+ขึ้น
+ขà¸à¸‡
+ขà¸
+ขณะ
+à¸à¹ˆà¸à¸™
+à¸à¹‡
+à¸à¸²à¸£
+à¸à¸±à¸š
+à¸à¸±à¸™
+à¸à¸§à¹ˆà¸²
+à¸à¸¥à¹ˆà¸²à¸§
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_tr.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_tr.txt
new file mode 100644
index 00000000000..84d9408d4ea
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_tr.txt
@@ -0,0 +1,212 @@
+# Turkish stopwords from LUCENE-559
+# merged with the list from "Information Retrieval on Turkish Texts"
+# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
+acaba
+altmış
+altı
+ama
+ancak
+arada
+aslında
+ayrıca
+bana
+bazı
+belki
+ben
+benden
+beni
+benim
+beri
+beÅŸ
+bile
+bin
+bir
+birçok
+biri
+birkaç
+birkez
+birÅŸey
+birÅŸeyi
+biz
+bize
+bizden
+bizi
+bizim
+böyle
+böylece
+bu
+buna
+bunda
+bundan
+bunlar
+bunları
+bunların
+bunu
+bunun
+burada
+çok
+çünkü
+da
+daha
+dahi
+de
+defa
+deÄŸil
+diÄŸer
+diye
+doksan
+dokuz
+dolayı
+dolayısıyla
+dört
+edecek
+eden
+ederek
+edilecek
+ediliyor
+edilmesi
+ediyor
+eÄŸer
+elli
+en
+etmesi
+etti
+ettiÄŸi
+ettiÄŸini
+gibi
+göre
+halen
+hangi
+hatta
+hem
+henüz
+hep
+hepsi
+her
+herhangi
+herkesin
+hiç
+hiçbir
+için
+iki
+ile
+ilgili
+ise
+iÅŸte
+itibaren
+itibariyle
+kadar
+karşın
+katrilyon
+kendi
+kendilerine
+kendini
+kendisi
+kendisine
+kendisini
+kez
+ki
+kim
+kimden
+kime
+kimi
+kimse
+kırk
+milyar
+milyon
+mu
+mü
+mı
+nasıl
+ne
+neden
+nedenle
+nerde
+nerede
+nereye
+niye
+niçin
+o
+olan
+olarak
+oldu
+olduÄŸu
+olduÄŸunu
+olduklarını
+olmadı
+olmadığı
+olmak
+olması
+olmayan
+olmaz
+olsa
+olsun
+olup
+olur
+olursa
+oluyor
+on
+ona
+ondan
+onlar
+onlardan
+onları
+onların
+onu
+onun
+otuz
+oysa
+öyle
+pek
+raÄŸmen
+sadece
+sanki
+sekiz
+seksen
+sen
+senden
+seni
+senin
+siz
+sizden
+sizi
+sizin
+ÅŸey
+ÅŸeyden
+ÅŸeyi
+ÅŸeyler
+şöyle
+ÅŸu
+ÅŸuna
+ÅŸunda
+ÅŸundan
+şunları
+ÅŸunu
+tarafından
+trilyon
+tüm
+üç
+üzere
+var
+vardı
+ve
+veya
+ya
+yani
+yapacak
+yapılan
+yapılması
+yapıyor
+yapmak
+yaptı
+yaptığı
+yaptığını
+yaptıkları
+yedi
+yerine
+yetmiÅŸ
+yine
+yirmi
+yoksa
+yüz
+zaten
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/userdict_ja.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/userdict_ja.txt
new file mode 100644
index 00000000000..6f0368e4d81
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/userdict_ja.txt
@@ -0,0 +1,29 @@
+#
+# This is a sample user dictionary for Kuromoji (JapaneseTokenizer)
+#
+# Add entries to this file in order to override the statistical model in terms
+# of segmentation, readings and part-of-speech tags. Notice that entries do
+# not have weights since they are always used when found. This is by-design
+# in order to maximize ease-of-use.
+#
+# Entries are defined using the following CSV format:
+# , ... , ... ,
+#
+# Notice that a single half-width space separates tokens and readings, and
+# that the number tokens and readings must match exactly.
+#
+# Also notice that multiple entries with the same is undefined.
+#
+# Whitespace only lines are ignored. Comments are not allowed on entry lines.
+#
+
+# Custom segmentation for kanji compounds
+日本経済新èž,日本 経済 æ–°èž,ニホン ケイザイ シンブン,カスタムåè©ž
+関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタムåè©ž
+
+# Custom segmentation for compound katakana
+トートãƒãƒƒã‚°,トート ãƒãƒƒã‚°,トート ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž
+ショルダーãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž
+
+# Custom reading for former sumo wrestler
+æœé’é¾,æœé’é¾,アサショウリュウ,カスタム人å
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/protwords.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/protwords.txt
new file mode 100644
index 00000000000..1dfc0abecbf
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/protwords.txt
@@ -0,0 +1,21 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+# Use a protected word file to protect against the stemmer reducing two
+# unrelated words to the same base word.
+
+# Some non-words that normally won't be encountered,
+# just to test that they won't be stemmed.
+dontstems
+zwhacky
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/schema.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/schema.xml
new file mode 100644
index 00000000000..b133c135f31
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/schema.xml
@@ -0,0 +1,961 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/solrconfig.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/solrconfig.xml
new file mode 100644
index 00000000000..f9683b27db7
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/solrconfig.xml
@@ -0,0 +1,1789 @@
+
+
+
+
+
+
+
+
+ LUCENE_43
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.data.dir:}
+
+
+
+
+ ${solr.hdfs.home:}
+ ${solr.hdfs.confdir:}
+ ${solr.hdfs.security.kerberos.enabled:false}
+ ${solr.hdfs.security.kerberos.keytabfile:}
+ ${solr.hdfs.security.kerberos.principal:}
+ ${solr.hdfs.blockcache.enabled:true}
+ ${solr.hdfs.blockcache.slab.count:1}
+ ${solr.hdfs.blockcache.direct.memory.allocation:true}
+ ${solr.hdfs.blockcache.blocksperbank:16384}
+ ${solr.hdfs.blockcache.read.enabled:true}
+ ${solr.hdfs.blockcache.write.enabled:true}
+ ${solr.hdfs.nrtcachingdirectory.enable:true}
+ ${solr.hdfs.nrtcachingdirectory.maxmergesizemb:16}
+ ${solr.hdfs.nrtcachingdirectory.maxcachedmb:192}
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.maxIndexingThreads:8}
+
+
+
+
+
+ 128
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.lock.type:hdfs}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.autoCommit.maxTime:60000}
+ false
+
+
+
+
+ ${solr.autoSoftCommit.maxTime:1000}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1024
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+
+
+
+
+
+ 20
+
+
+ 200
+
+
+
+
+
+
+
+
+
+
+
+ static firstSearcher warming in solrconfig.xml
+
+
+
+
+
+ false
+
+
+ 4
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ explicit
+ 10
+ text
+
+
+
+
+
+
+
+
+
+
+
+
+
+ explicit
+ json
+ true
+ text
+
+
+
+
+
+
+
+ true
+ json
+ true
+
+
+
+
+
+
+
+ explicit
+
+
+ velocity
+ browse
+ layout
+ Solritas
+
+
+ edismax
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+ title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
+
+ text
+ 100%
+ *:*
+ 10
+ *,score
+
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+ title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
+
+ text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename
+ 3
+
+
+ on
+ cat
+ manu_exact
+ content_type
+ author_s
+ ipod
+ GB
+ 1
+ cat,inStock
+ after
+ price
+ 0
+ 600
+ 50
+ popularity
+ 0
+ 10
+ 3
+ manufacturedate_dt
+ NOW/YEAR-10YEARS
+ NOW
+ +1YEAR
+ before
+ after
+
+
+ on
+ content features title name
+ html
+ <b>
+ </b>
+ 0
+ title
+ 0
+ name
+ 3
+ 200
+ content
+ 750
+
+
+ on
+ false
+ 5
+ 2
+ 5
+ true
+ true
+ 5
+ 3
+
+
+
+
+ spellcheck
+
+
+
+
+
+
+
+
+
+
+
+
+
+ application/json
+
+
+
+
+ application/csv
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ solrpingquery
+
+
+ all
+
+
+
+
+
+
+
+
+ explicit
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text_general
+
+
+
+
+
+ default
+ text
+ solr.DirectSolrSpellChecker
+
+ internal
+
+ 0.5
+
+ 2
+
+ 1
+
+ 5
+
+ 4
+
+ 0.01
+
+
+
+
+
+ wordbreak
+ solr.WordBreakSolrSpellChecker
+ name
+ true
+ true
+ 10
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text
+
+ default
+ wordbreak
+ on
+ true
+ 10
+ 5
+ 5
+ true
+ true
+ 10
+ 5
+
+
+ spellcheck
+
+
+
+
+
+
+
+
+
+ text
+ true
+
+
+ tvComponent
+
+
+
+
+
+
+
+
+ default
+
+
+ org.carrot2.clustering.lingo.LingoClusteringAlgorithm
+
+
+ 20
+
+
+ clustering/carrot2
+
+
+ ENGLISH
+
+
+ stc
+ org.carrot2.clustering.stc.STCClusteringAlgorithm
+
+
+
+
+
+
+ true
+ default
+ true
+
+ name
+ id
+
+ features
+
+ true
+
+
+
+ false
+
+ edismax
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+
+ *:*
+ 10
+ *,score
+
+
+ clustering
+
+
+
+
+
+
+
+
+
+ true
+ false
+
+
+ terms
+
+
+
+
+
+
+
+ string
+ elevate.xml
+
+
+
+
+
+ explicit
+ text
+
+
+ elevator
+
+
+
+
+
+
+
+
+
+
+ 100
+
+
+
+
+
+
+
+ 70
+
+ 0.5
+
+ [-\w ,/\n\"']{20,200}
+
+
+
+
+
+
+ ]]>
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ,,
+ ,,
+ ,,
+ ,,
+ ,]]>
+ ]]>
+
+
+
+
+
+ 10
+ .,!?
+
+
+
+
+
+
+ WORD
+
+
+ en
+ US
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text/plain; charset=UTF-8
+
+
+
+
+
+
+
+
+ 5
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ *:*
+
+
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/stopwords.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/stopwords.txt
new file mode 100644
index 00000000000..ae1e83eeb3d
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/stopwords.txt
@@ -0,0 +1,14 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/synonyms.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/synonyms.txt
new file mode 100644
index 00000000000..7f72128303b
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/synonyms.txt
@@ -0,0 +1,29 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+#some test synonym mappings unlikely to appear in real input text
+aaafoo => aaabar
+bbbfoo => bbbfoo bbbbar
+cccfoo => cccbar cccbaz
+fooaaa,baraaa,bazaaa
+
+# Some synonym groups specific to this example
+GB,gib,gigabyte,gigabytes
+MB,mib,megabyte,megabytes
+Television, Televisions, TV, TVs
+#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
+#after us won't split it into two words.
+
+# Synonym mappings can be used for spelling correction too
+pixima => pixma
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/solr.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/solr.xml
new file mode 100644
index 00000000000..6c8b43f75ed
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/solr.xml
@@ -0,0 +1,43 @@
+
+
+
+
+
+
+
+
+
+
+ ${socketTimeout:120000}
+ ${connTimeout:15000}
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solr.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solr.xml
new file mode 100644
index 00000000000..4604f60476f
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solr.xml
@@ -0,0 +1,43 @@
+
+
+
+
+
+
+
+
+
+
+ ${socketTimeout:120000}
+ ${connTimeout:15000}
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/currency.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/currency.xml
new file mode 100644
index 00000000000..3a9c58afee8
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/currency.xml
@@ -0,0 +1,67 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/elevate.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/elevate.xml
new file mode 100644
index 00000000000..25d5cebe4fb
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/elevate.xml
@@ -0,0 +1,38 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ca.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ca.txt
new file mode 100644
index 00000000000..307a85f913d
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ca.txt
@@ -0,0 +1,8 @@
+# Set of Catalan contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+d
+l
+m
+n
+s
+t
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_fr.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_fr.txt
new file mode 100644
index 00000000000..722db588333
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_fr.txt
@@ -0,0 +1,9 @@
+# Set of French contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+l
+m
+t
+qu
+n
+s
+j
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ga.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ga.txt
new file mode 100644
index 00000000000..9ebe7fa349a
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ga.txt
@@ -0,0 +1,5 @@
+# Set of Irish contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+d
+m
+b
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_it.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_it.txt
new file mode 100644
index 00000000000..cac04095372
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_it.txt
@@ -0,0 +1,23 @@
+# Set of Italian contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+c
+l
+all
+dall
+dell
+nell
+sull
+coll
+pell
+gl
+agl
+dagl
+degl
+negl
+sugl
+un
+m
+t
+s
+v
+d
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/hyphenations_ga.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/hyphenations_ga.txt
new file mode 100644
index 00000000000..4d2642cc5a3
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/hyphenations_ga.txt
@@ -0,0 +1,5 @@
+# Set of Irish hyphenations for StopFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+h
+n
+t
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stemdict_nl.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stemdict_nl.txt
new file mode 100644
index 00000000000..441072971d3
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stemdict_nl.txt
@@ -0,0 +1,6 @@
+# Set of overrides for the dutch stemmer
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+fiets fiets
+bromfiets bromfiets
+ei eier
+kind kinder
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stoptags_ja.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stoptags_ja.txt
new file mode 100644
index 00000000000..71b750845e3
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stoptags_ja.txt
@@ -0,0 +1,420 @@
+#
+# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter.
+#
+# Any token with a part-of-speech tag that exactly matches those defined in this
+# file are removed from the token stream.
+#
+# Set your own stoptags by uncommenting the lines below. Note that comments are
+# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists,
+# etc. that can be useful for building you own stoptag set.
+#
+# The entire possible tagset is provided below for convenience.
+#
+#####
+# noun: unclassified nouns
+#åè©ž
+#
+# noun-common: Common nouns or nouns where the sub-classification is undefined
+#åè©ž-一般
+#
+# noun-proper: Proper nouns where the sub-classification is undefined
+#åè©ž-固有åè©ž
+#
+# noun-proper-misc: miscellaneous proper nouns
+#åè©ž-固有åè©ž-一般
+#
+# noun-proper-person: Personal names where the sub-classification is undefined
+#åè©ž-固有åè©ž-人å
+#
+# noun-proper-person-misc: names that cannot be divided into surname and
+# given name; foreign names; names where the surname or given name is unknown.
+# e.g. ãŠå¸‚ã®æ–¹
+#åè©ž-固有åè©ž-人å-一般
+#
+# noun-proper-person-surname: Mainly Japanese surnames.
+# e.g. 山田
+#åè©ž-固有åè©ž-人å-姓
+#
+# noun-proper-person-given_name: Mainly Japanese given names.
+# e.g. 太郎
+#åè©ž-固有åè©ž-人å-å
+#
+# noun-proper-organization: Names representing organizations.
+# e.g. 通産çœ, NHK
+#åè©ž-固有åè©ž-組織
+#
+# noun-proper-place: Place names where the sub-classification is undefined
+#åè©ž-固有åè©ž-地域
+#
+# noun-proper-place-misc: Place names excluding countries.
+# e.g. アジア, ãƒãƒ«ã‚»ãƒãƒŠ, 京都
+#åè©ž-固有åè©ž-地域-一般
+#
+# noun-proper-place-country: Country names.
+# e.g. 日本, オーストラリア
+#åè©ž-固有åè©ž-地域-国
+#
+# noun-pronoun: Pronouns where the sub-classification is undefined
+#åè©ž-代åè©ž
+#
+# noun-pronoun-misc: miscellaneous pronouns:
+# e.g. ãã‚Œ, ã“ã“, ã‚ã„ã¤, ã‚ãªãŸ, ã‚ã¡ã“ã¡, ã„ãã¤, ã©ã“ã‹, ãªã«, ã¿ãªã•ã‚“, ã¿ã‚“ãª, ã‚ãŸãã—, ã‚ã‚Œã‚ã‚Œ
+#åè©ž-代åè©ž-一般
+#
+# noun-pronoun-contraction: Spoken language contraction made by combining a
+# pronoun and the particle 'wa'.
+# e.g. ã‚りゃ, ã“りゃ, ã“りゃã‚, ãりゃ, ãりゃã‚
+#åè©ž-代åè©ž-縮約
+#
+# noun-adverbial: Temporal nouns such as names of days or months that behave
+# like adverbs. Nouns that represent amount or ratios and can be used adverbially,
+# e.g. 金曜, 一月, åˆå¾Œ, å°‘é‡
+#åè©ž-副詞å¯èƒ½
+#
+# noun-verbal: Nouns that take arguments with case and can appear followed by
+# 'suru' and related verbs (ã™ã‚‹, ã§ãã‚‹, ãªã•ã‚‹, ãã ã•ã‚‹)
+# e.g. インプット, æ„›ç€, 悪化, 悪戦苦闘, 一安心, 下å–ã‚Š
+#åè©ž-サ変接続
+#
+# noun-adjective-base: The base form of adjectives, words that appear before 㪠("na")
+# e.g. å¥åº·, 安易, 駄目, ã ã‚
+#åè©ž-形容動詞語幹
+#
+# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数.
+# e.g. 0, 1, 2, 何, 数, 幾
+#åè©ž-æ•°
+#
+# noun-affix: noun affixes where the sub-classification is undefined
+#åè©ž-éžè‡ªç«‹
+#
+# noun-affix-misc: Of adnominalizers, the case-marker ã® ("no"), and words that
+# attach to the base form of inflectional words, words that cannot be classified
+# into any of the other categories below. This category includes indefinite nouns.
+# e.g. ã‚ã‹ã¤ã, æš, ã‹ã„, 甲æ–, æ°—, ãらã„, å«Œã„, ãã›, ç™–, ã“ã¨, 事, ã”ã¨, 毎, ã—ã ã„, 次第,
+# é †, ã›ã„, 所為, ã¤ã„ã§, åºã§, ã¤ã‚‚ã‚Š, ç©ã‚‚ã‚Š, 点, ã©ã“ã‚, ã®, ã¯ãš, çˆ, ã¯ãšã¿, å¼¾ã¿,
+# æ‹å, ãµã†, ãµã‚Š, 振り, ã»ã†, æ–¹, æ—¨, ã‚‚ã®, 物, 者, ゆãˆ, æ•…, ゆãˆã‚“, 所以, ã‚ã‘, 訳,
+# ã‚ã‚Š, 割り, 割, ã‚“-å£èªž/, ã‚‚ã‚“-å£èªž/
+#åè©ž-éžè‡ªç«‹-一般
+#
+# noun-affix-adverbial: noun affixes that that can behave as adverbs.
+# e.g. ã‚ã„ã , é–“, ã‚ã’ã, 挙ã’å¥, ã‚ã¨, 後, 余り, 以外, 以é™, 以後, 以上, 以å‰, 一方, ã†ãˆ,
+# 上, ã†ã¡, 内, ãŠã‚Š, 折り, ã‹ãŽã‚Š, é™ã‚Š, ãã‚Š, ã£ãã‚Š, çµæžœ, ã“ã‚, é ƒ, ã•ã„, éš›, 最ä¸, ã•ãªã‹,
+# 最ä¸, ã˜ãŸã„, 自体, ãŸã³, 度, ãŸã‚, 為, ã¤ã©, 都度, ã¨ãŠã‚Š, 通り, ã¨ã, 時, ã¨ã“ã‚, 所,
+# ã¨ãŸã‚“, 途端, ãªã‹, ä¸, ã®ã¡, 後, ã°ã‚ã„, å ´åˆ, æ—¥, ã¶ã‚“, 分, ã»ã‹, ä»–, ã¾ãˆ, å‰, ã¾ã¾,
+# 儘, ä¾, ã¿ãŽã‚Š, 矢先
+#åè©ž-éžè‡ªç«‹-副詞å¯èƒ½
+#
+# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars
+# with the stem よã†(ã ) ("you(da)").
+# e.g. よã†, ã‚„ã†, 様 (よã†)
+#åè©ž-éžè‡ªç«‹-助動詞語幹
+#
+# noun-affix-adjective-base: noun affixes that can connect to the indeclinable
+# connection form 㪠(aux "da").
+# e.g. ã¿ãŸã„, ãµã†
+#åè©ž-éžè‡ªç«‹-形容動詞語幹
+#
+# noun-special: special nouns where the sub-classification is undefined.
+#åè©ž-特殊
+#
+# noun-special-aux: The ãã†ã ("souda") stem form that is used for reporting news, is
+# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base
+# form of inflectional words.
+# e.g. ãã†
+#åè©ž-特殊-助動詞語幹
+#
+# noun-suffix: noun suffixes where the sub-classification is undefined.
+#åè©ž-接尾
+#
+# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect
+# to ガル or タイ and can combine into compound nouns, words that cannot be classified into
+# any of the other categories below. In general, this category is more inclusive than
+# 接尾語 ("suffix") and is usually the last element in a compound noun.
+# e.g. ãŠã, ã‹ãŸ, æ–¹, ç”²æ– (ãŒã„), ãŒã‹ã‚Š, ãŽã¿, 気味, ãã‚‹ã¿, (~ã—ãŸ) ã•, 次第, 済 (ãš) ã¿,
+# よã†, (ã§ã)ã£ã“, æ„Ÿ, 観, 性, å¦, é¡ž, é¢, 用
+#åè©ž-接尾-一般
+#
+# noun-suffix-person: Suffixes that form nouns and attach to person names more often
+# than other nouns.
+# e.g. å›, 様, è‘—
+#åè©ž-接尾-人å
+#
+# noun-suffix-place: Suffixes that form nouns and attach to place names more often
+# than other nouns.
+# e.g. 町, 市, 県
+#åè©ž-接尾-地域
+#
+# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that
+# can appear before スル ("suru").
+# e.g. 化, 視, 分ã‘, 入り, è½ã¡, è²·ã„
+#åè©ž-接尾-サ変接続
+#
+# noun-suffix-aux: The stem form of ãã†ã (様態) that is used to indicate conditions,
+# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the
+# conjunctive form of inflectional words.
+# e.g. ãã†
+#åè©ž-接尾-助動詞語幹
+#
+# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive
+# form of inflectional words and appear before the copula ã ("da").
+# e.g. çš„, ã’, ãŒã¡
+#åè©ž-接尾-形容動詞語幹
+#
+# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs.
+# e.g. 後 (ã”), 以後, 以é™, 以å‰, å‰å¾Œ, ä¸, 末, 上, 時 (ã˜)
+#åè©ž-接尾-副詞å¯èƒ½
+#
+# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category
+# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach
+# to numbers.
+# e.g. 個, ã¤, 本, 冊, パーセント, cm, kg, カ月, ã‹å›½, 区画, 時間, 時åŠ
+#åè©ž-接尾-助数詞
+#
+# noun-suffix-special: Special suffixes that mainly attach to inflecting words.
+# e.g. (楽ã—) ã•, (考ãˆ) æ–¹
+#åè©ž-接尾-特殊
+#
+# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words
+# together.
+# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦)
+#åè©ž-接続詞的
+#
+# noun-verbal_aux: Nouns that attach to the conjunctive particle 㦠("te") and are
+# semantically verb-like.
+# e.g. ã”らん, ã”覧, 御覧, é ‚æˆ´
+#åè©ž-å‹•è©žéžè‡ªç«‹çš„
+#
+# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry,
+# dialects, English, etc. Currently, the only entry for åè©ž 引用文å—列 ("noun quotation")
+# is ã„ã‚ã ("iwaku").
+#åè©ž-引用文å—列
+#
+# noun-nai_adjective: Words that appear before the auxiliary verb ãªã„ ("nai") and
+# behave like an adjective.
+# e.g. 申ã—訳, 仕方, ã¨ã‚“ã§ã‚‚, é•ã„
+#åè©ž-ナイ形容詞語幹
+#
+#####
+# prefix: unclassified prefixes
+#接é è©ž
+#
+# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms)
+# excluding numerical expressions.
+# e.g. ㊠(æ°´), æŸ (æ°), åŒ (社), æ•… (~æ°), 高 (å“質), ㊠(見事), ã” (ç«‹æ´¾)
+#接é è©ž-å詞接続
+#
+# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb
+# in conjunctive form followed by ãªã‚‹/ãªã•ã‚‹/ãã ã•ã‚‹.
+# e.g. ㊠(èªã¿ãªã•ã„), ㊠(座り)
+#接é è©ž-動詞接続
+#
+# prefix-adjectival: Prefixes that attach to adjectives.
+# e.g. ㊠(寒ã„ã§ã™ããˆ), ãƒã‚« (ã§ã‹ã„)
+#接é è©ž-形容詞接続
+#
+# prefix-numerical: Prefixes that attach to numerical expressions.
+# e.g. ç´„, ãŠã‚ˆã, 毎時
+#接é è©ž-数接続
+#
+#####
+# verb: unclassified verbs
+#å‹•è©ž
+#
+# verb-main:
+#動詞-自立
+#
+# verb-auxiliary:
+#å‹•è©ž-éžè‡ªç«‹
+#
+# verb-suffix:
+#動詞-接尾
+#
+#####
+# adjective: unclassified adjectives
+#形容詞
+#
+# adjective-main:
+#形容詞-自立
+#
+# adjective-auxiliary:
+#形容詞-éžè‡ªç«‹
+#
+# adjective-suffix:
+#形容詞-接尾
+#
+#####
+# adverb: unclassified adverbs
+#副詞
+#
+# adverb-misc: Words that can be segmented into one unit and where adnominal
+# modification is not possible.
+# e.g. ã‚ã„ã‹ã‚らãš, 多分
+#副詞-一般
+#
+# adverb-particle_conjunction: Adverbs that can be followed by ã®, ã¯, ã«,
+# ãª, ã™ã‚‹, ã , etc.
+# e.g. ã“ã‚“ãªã«, ãã‚“ãªã«, ã‚ã‚“ãªã«, ãªã«ã‹, ãªã‚“ã§ã‚‚
+#副詞-助詞類接続
+#
+#####
+# adnominal: Words that only have noun-modifying forms.
+# e.g. ã“ã®, ãã®, ã‚ã®, ã©ã®, ã„ã‚ゆる, ãªã‚“らã‹ã®, 何らã‹ã®, ã„ã‚ã‚“ãª, ã“ã†ã„ã†, ãã†ã„ã†, ã‚ã‚ã„ã†,
+# ã©ã†ã„ã†, ã“ã‚“ãª, ãã‚“ãª, ã‚ã‚“ãª, ã©ã‚“ãª, 大ããª, å°ã•ãª, ãŠã‹ã—ãª, ã»ã‚“ã®, ãŸã„ã—ãŸ,
+# 「(, ã‚‚) ã•ã‚‹ (ã“ã¨ãªãŒã‚‰)ã€, 微々ãŸã‚‹, å ‚ã€…ãŸã‚‹, å˜ãªã‚‹, ã„ã‹ãªã‚‹, 我ãŒã€ã€ŒåŒã˜, 亡ã
+#連体詞
+#
+#####
+# conjunction: Conjunctions that can occur independently.
+# e.g. ãŒ, ã‘ã‚Œã©ã‚‚, ãã—ã¦, ã˜ã‚ƒã‚, ãã‚Œã©ã“ã‚ã‹
+接続詞
+#
+#####
+# particle: unclassified particles.
+助詞
+#
+# particle-case: case particles where the subclassification is undefined.
+助詞-æ ¼åŠ©è©ž
+#
+# particle-case-misc: Case particles.
+# e.g. ã‹ã‚‰, ãŒ, ã§, ã¨, ã«, ã¸, より, ã‚’, ã®, ã«ã¦
+助詞-æ ¼åŠ©è©ž-一般
+#
+# particle-case-quote: the "to" that appears after nouns, a person’s speech,
+# quotation marks, expressions of decisions from a meeting, reasons, judgements,
+# conjectures, etc.
+# e.g. ( ã ) 㨠(è¿°ã¹ãŸ.), ( ã§ã‚ã‚‹) 㨠(ã—ã¦åŸ·è¡ŒçŒ¶äºˆ...)
+助詞-æ ¼åŠ©è©ž-引用
+#
+# particle-case-compound: Compounds of particles and verbs that mainly behave
+# like case particles.
+# e.g. ã¨ã„ã†, ã¨ã„ã£ãŸ, ã¨ã‹ã„ã†, ã¨ã—ã¦, ã¨ã¨ã‚‚ã«, ã¨å…±ã«, ã§ã‚‚ã£ã¦, ã«ã‚ãŸã£ã¦, ã«å½“ãŸã£ã¦, ã«å½“ã£ã¦,
+# ã«ã‚ãŸã‚Š, ã«å½“ãŸã‚Š, ã«å½“ã‚Š, ã«å½“ãŸã‚‹, ã«ã‚ãŸã‚‹, ã«ãŠã„ã¦, ã«æ–¼ã„ã¦,ã«æ–¼ã¦, ã«ãŠã‘ã‚‹, ã«æ–¼ã‘ã‚‹,
+# ã«ã‹ã‘, ã«ã‹ã‘ã¦, ã«ã‹ã‚“ã—, ã«é–¢ã—, ã«ã‹ã‚“ã—ã¦, ã«é–¢ã—ã¦, ã«ã‹ã‚“ã™ã‚‹, ã«é–¢ã™ã‚‹, ã«éš›ã—,
+# ã«éš›ã—ã¦, ã«ã—ãŸãŒã„, ã«å¾“ã„, ã«å¾“ã†, ã«ã—ãŸãŒã£ã¦, ã«å¾“ã£ã¦, ã«ãŸã„ã—, ã«å¯¾ã—, ã«ãŸã„ã—ã¦,
+# ã«å¯¾ã—ã¦, ã«ãŸã„ã™ã‚‹, ã«å¯¾ã™ã‚‹, ã«ã¤ã„ã¦, ã«ã¤ã, ã«ã¤ã‘, ã«ã¤ã‘ã¦, ã«ã¤ã‚Œ, ã«ã¤ã‚Œã¦, ã«ã¨ã£ã¦,
+# ã«ã¨ã‚Š, ã«ã¾ã¤ã‚ã‚‹, ã«ã‚ˆã£ã¦, ã«ä¾ã£ã¦, ã«å› ã£ã¦, ã«ã‚ˆã‚Š, ã«ä¾ã‚Š, ã«å› ã‚Š, ã«ã‚ˆã‚‹, ã«ä¾ã‚‹, ã«å› ã‚‹,
+# ã«ã‚ãŸã£ã¦, ã«ã‚ãŸã‚‹, ã‚’ã‚‚ã£ã¦, を以ã£ã¦, を通ã˜, を通ã˜ã¦, を通ã—ã¦, ã‚’ã‚ãã£ã¦, ã‚’ã‚ãã‚Š, ã‚’ã‚ãã‚‹,
+# ã£ã¦-å£èªž/, ã¡ã‚…ã†-関西å¼ã€Œã¨ã„ã†ã€/, (何) ã¦ã„ㆠ(人)-å£èªž/, ã£ã¦ã„ã†-å£èªž/, ã¨ã„ãµ, ã¨ã‹ã„ãµ
+助詞-æ ¼åŠ©è©ž-連語
+#
+# particle-conjunctive:
+# e.g. ã‹ã‚‰, ã‹ã‚‰ã«ã¯, ãŒ, ã‘ã‚Œã©, ã‘ã‚Œã©ã‚‚, ã‘ã©, ã—, ã¤ã¤, ã¦, ã§, ã¨, ã¨ã“ã‚ãŒ, ã©ã“ã‚ã‹, ã¨ã‚‚, ã©ã‚‚,
+# ãªãŒã‚‰, ãªã‚Š, ã®ã§, ã®ã«, ã°, ã‚‚ã®ã®, ã‚„ ( ã—ãŸ), ã‚„ã„ãªã‚„, (ã“ã‚ã‚“) ã˜ã‚ƒ(ã„ã‘ãªã„)-å£èªž/,
+# (è¡Œã£) ã¡ã‚ƒ(ã„ã‘ãªã„)-å£èªž/, (言ã£) ãŸã£ã¦ (ã—ã‹ãŸãŒãªã„)-å£èªž/, (ãã‚ŒãŒãªã)ã£ãŸã£ã¦ (平気)-å£èªž/
+助詞-接続助詞
+#
+# particle-dependency:
+# e.g. ã“ã, ã•ãˆ, ã—ã‹, ã™ã‚‰, ã¯, ã‚‚, ãž
+助詞-係助詞
+#
+# particle-adverbial:
+# e.g. ãŒã¦ã‚‰, ã‹ã‚‚, ãらã„, ä½, ãらã„, ã—ã‚‚, (å¦æ ¡) ã˜ã‚ƒ(ã“ã‚ŒãŒæµè¡Œã£ã¦ã„ã‚‹)-å£èªž/,
+# (ãã‚Œ)ã˜ã‚ƒã‚ (よããªã„)-å£èªž/, ãšã¤, (ç§) ãªãž, ãªã©, (ç§) ãªã‚Š (ã«), (先生) ãªã‚“ã‹ (大嫌ã„)-å£èªž/,
+# (ç§) ãªã‚“ãž, (先生) ãªã‚“㦠(大嫌ã„)-å£èªž/, ã®ã¿, ã ã‘, (ç§) ã ã£ã¦-å£èªž/, ã ã«,
+# (å½¼)ã£ãŸã‚‰-å£èªž/, (ãŠèŒ¶) ã§ã‚‚ (ã„ã‹ãŒ), ç‰ (ã¨ã†), (今後) ã¨ã‚‚, ã°ã‹ã‚Š, ã°ã£ã‹-å£èªž/, ã°ã£ã‹ã‚Š-å£èªž/,
+# ã»ã©, 程, ã¾ã§, è¿„, (誰) ã‚‚ (ãŒ)([助詞-æ ¼åŠ©è©ž] ãŠã‚ˆã³ [助詞-係助詞] ã®å‰ã«ä½ç½®ã™ã‚‹ã€Œã‚‚ã€)
+助詞-副助詞
+#
+# particle-interjective: particles with interjective grammatical roles.
+# e.g. (æ¾å³¶) ã‚„
+助詞-間投助詞
+#
+# particle-coordinate:
+# e.g. ã¨, ãŸã‚Š, ã ã®, ã ã‚Š, ã¨ã‹, ãªã‚Š, ã‚„, やら
+助詞-並立助詞
+#
+# particle-final:
+# e.g. ã‹ã„, ã‹ã—ら, ã•, ãœ, (ã )ã£ã‘-å£èªž/, (ã¨ã¾ã£ã¦ã‚‹) ã§-方言/, ãª, ナ, ãªã‚-å£èªž/, ãž, ã, ãƒ,
+# ãã‡-å£èªž/, ããˆ-å£èªž/, ãã‚“-方言/, ã®, ã®ã†-å£èªž/, ã‚„, よ, ヨ, よã‰-å£èªž/, ã‚, ã‚ã„-å£èªž/
+助詞-終助詞
+#
+# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is
+# adverbial, conjunctive, or sentence final. For example:
+# (a) 「A ã‹ B ã‹ã€. Ex:「(国内ã§é‹ç”¨ã™ã‚‹) ã‹,(海外ã§é‹ç”¨ã™ã‚‹) ã‹ (.)ã€
+# (b) Inside an adverb phrase. Ex:「(幸ã„ã¨ã„ã†) ã‹ (, æ»è€…ã¯ã„ãªã‹ã£ãŸ.)ã€
+# 「(祈りãŒå±Šã„ãŸã›ã„) ã‹ (, 試験ã«åˆæ ¼ã—ãŸ.)ã€
+# (c) 「ã‹ã®ã‚ˆã†ã«ã€. Ex:「(何もãªã‹ã£ãŸ) ã‹ (ã®ã‚ˆã†ã«æŒ¯ã‚‹èˆžã£ãŸ.)ã€
+# e.g. ã‹
+助詞-副助詞ï¼ä¸¦ç«‹åŠ©è©žï¼çµ‚助詞
+#
+# particle-adnominalizer: The "no" that attaches to nouns and modifies
+# non-inflectional words.
+助詞-連体化
+#
+# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs
+# that are giongo, giseigo, or gitaigo.
+# e.g. ã«, ã¨
+助詞-副詞化
+#
+# particle-special: A particle that does not fit into one of the above classifications.
+# This includes particles that are used in Tanka, Haiku, and other poetry.
+# e.g. ã‹ãª, ã‘ã‚€, ( ã—ãŸã ã‚ã†) ã«, (ã‚ã‚“ãŸ) ã«ã‚ƒ(ã‚ã‹ã‚‰ã‚“), (俺) ã‚“ (家)
+助詞-特殊
+#
+#####
+# auxiliary-verb:
+助動詞
+#
+#####
+# interjection: Greetings and other exclamations.
+# e.g. ãŠã¯ã‚ˆã†, ãŠã¯ã‚ˆã†ã”ã–ã„ã¾ã™, ã“ã‚“ã«ã¡ã¯, ã“ã‚“ã°ã‚“ã¯, ã‚ã‚ŠãŒã¨ã†, ã©ã†ã‚‚ã‚ã‚ŠãŒã¨ã†, ã‚ã‚ŠãŒã¨ã†ã”ã–ã„ã¾ã™,
+# ã„ãŸã ãã¾ã™, ã”ã¡ãã†ã•ã¾, ã•ã‚ˆãªã‚‰, ã•ã‚ˆã†ãªã‚‰, ã¯ã„, ã„ã„ãˆ, ã”ã‚ã‚“, ã”ã‚ã‚“ãªã•ã„
+#æ„Ÿå‹•è©ž
+#
+#####
+# symbol: unclassified Symbols.
+記å·
+#
+# symbol-misc: A general symbol not in one of the categories below.
+# e.g. [○◎@$〒→+]
+記å·-一般
+#
+# symbol-comma: Commas
+# e.g. [,ã€]
+記å·-èªç‚¹
+#
+# symbol-period: Periods and full stops.
+# e.g. [..。]
+記å·-å¥ç‚¹
+#
+# symbol-space: Full-width whitespace.
+記å·-空白
+#
+# symbol-open_bracket:
+# e.g. [({‘“『ã€]
+記å·-括弧開
+#
+# symbol-close_bracket:
+# e.g. [)}’â€ã€ã€ã€‘]
+記å·-括弧閉
+#
+# symbol-alphabetic:
+#記å·-アルファベット
+#
+#####
+# other: unclassified other
+#ãã®ä»–
+#
+# other-interjection: Words that are hard to classify as noun-suffixes or
+# sentence-final particles.
+# e.g. (ã )ã‚¡
+ãã®ä»–-間投
+#
+#####
+# filler: Aizuchi that occurs during a conversation or sounds inserted as filler.
+# e.g. ã‚ã®, ã†ã‚“ã¨, ãˆã¨
+フィラー
+#
+#####
+# non-verbal: non-verbal sound.
+éžè¨€èªžéŸ³
+#
+#####
+# fragment:
+#語æ–片
+#
+#####
+# unknown: unknown part of speech.
+#未知語
+#
+##### End of file
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ar.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ar.txt
new file mode 100644
index 00000000000..046829db6a2
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ar.txt
@@ -0,0 +1,125 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# Cleaned on October 11, 2009 (not normalized, so use before normalization)
+# This means that when modifying this list, you might need to add some
+# redundant entries, for example containing forms with both أ and ا
+من
+ومن
+منها
+منه
+ÙÙŠ
+ÙˆÙÙŠ
+Ùيها
+Ùيه
+Ùˆ
+Ù
+ثم
+او
+أو
+ب
+بها
+به
+ا
+Ø£
+اى
+اي
+أي
+أى
+لا
+ولا
+الا
+ألا
+إلا
+لكن
+ما
+وما
+كما
+Ùما
+عن
+مع
+اذا
+إذا
+ان
+أن
+إن
+انها
+أنها
+إنها
+انه
+أنه
+إنه
+بان
+بأن
+Ùان
+Ùأن
+وان
+وأن
+وإن
+التى
+التي
+الذى
+الذي
+الذين
+الى
+الي
+إلى
+إلي
+على
+عليها
+عليه
+اما
+أما
+إما
+ايضا
+أيضا
+كل
+وكل
+لم
+ولم
+لن
+ولن
+هى
+هي
+هو
+وهى
+وهي
+وهو
+Ùهى
+Ùهي
+Ùهو
+انت
+أنت
+لك
+لها
+له
+هذه
+هذا
+تلك
+ذلك
+هناك
+كانت
+كان
+يكون
+تكون
+وكانت
+وكان
+غير
+بعض
+قد
+Ù†ØÙˆ
+بين
+بينما
+منذ
+ضمن
+Øيث
+الان
+الآن
+خلال
+بعد
+قبل
+Øتى
+عند
+عندما
+لدى
+جميع
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_bg.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_bg.txt
new file mode 100644
index 00000000000..1ae4ba2ae38
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_bg.txt
@@ -0,0 +1,193 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+а
+аз
+ако
+ала
+бе
+без
+беше
+би
+бил
+била
+били
+било
+близо
+бъдат
+бъде
+бÑха
+в
+ваÑ
+ваш
+ваша
+вероÑтно
+вече
+взема
+ви
+вие
+винаги
+вÑе
+вÑеки
+вÑички
+вÑичко
+вÑÑка
+във
+въпреки
+върху
+г
+ги
+главно
+го
+д
+да
+дали
+до
+докато
+докога
+дори
+доÑега
+доÑта
+е
+едва
+един
+ето
+за
+зад
+заедно
+заради
+заÑега
+затова
+защо
+защото
+и
+из
+или
+им
+има
+имат
+иÑка
+й
+каза
+как
+каква
+какво
+както
+какъв
+като
+кога
+когато
+което
+които
+кой
+който
+колко
+коÑто
+къде
+където
+към
+ли
+м
+ме
+между
+мен
+ми
+мнозина
+мога
+могат
+може
+молÑ
+момента
+му
+н
+на
+над
+назад
+най
+направи
+напред
+например
+наÑ
+не
+него
+неÑ
+ни
+ние
+никой
+нито
+но
+нÑкои
+нÑкой
+нÑма
+обаче
+около
+оÑвен
+оÑобено
+от
+отгоре
+отново
+още
+пак
+по
+повече
+повечето
+под
+поне
+поради
+поÑле
+почти
+прави
+пред
+преди
+през
+при
+пък
+първо
+Ñ
+Ñа
+Ñамо
+Ñе
+Ñега
+Ñи
+Ñкоро
+Ñлед
+Ñме
+Ñпоред
+Ñред
+Ñрещу
+Ñте
+Ñъм
+ÑÑŠÑ
+Ñъщо
+Ñ‚
+тази
+така
+такива
+такъв
+там
+твой
+те
+тези
+ти
+тн
+то
+това
+тогава
+този
+той
+толкова
+точно
+Ñ‚Ñ€Ñбва
+тук
+тъй
+Ñ‚Ñ
+Ñ‚ÑÑ…
+у
+хареÑва
+ч
+че
+чеÑто
+чрез
+ще
+щом
+Ñ
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ca.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ca.txt
new file mode 100644
index 00000000000..3da65deafe1
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ca.txt
@@ -0,0 +1,220 @@
+# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
+a
+abans
+acÃ
+ah
+aixÃ
+això
+al
+als
+aleshores
+algun
+alguna
+algunes
+alguns
+alhora
+allÃ
+allÃ
+allò
+altra
+altre
+altres
+amb
+ambdós
+ambdues
+apa
+aquell
+aquella
+aquelles
+aquells
+aquest
+aquesta
+aquestes
+aquests
+aquÃ
+baix
+cada
+cadascú
+cadascuna
+cadascunes
+cadascuns
+com
+contra
+d'un
+d'una
+d'unes
+d'uns
+dalt
+de
+del
+dels
+des
+després
+dins
+dintre
+donat
+doncs
+durant
+e
+eh
+el
+els
+em
+en
+encara
+ens
+entre
+érem
+eren
+éreu
+es
+és
+esta
+estÃ
+està vem
+estaven
+està veu
+esteu
+et
+etc
+ets
+fins
+fora
+gairebé
+ha
+han
+has
+havia
+he
+hem
+heu
+hi
+ho
+i
+igual
+iguals
+ja
+l'hi
+la
+les
+li
+li'n
+llavors
+m'he
+ma
+mal
+malgrat
+mateix
+mateixa
+mateixes
+mateixos
+me
+mentre
+més
+meu
+meus
+meva
+meves
+molt
+molta
+moltes
+molts
+mon
+mons
+n'he
+n'hi
+ne
+ni
+no
+nogensmenys
+només
+nosaltres
+nostra
+nostre
+nostres
+o
+oh
+oi
+on
+pas
+pel
+pels
+per
+però
+perquè
+poc
+poca
+pocs
+poques
+potser
+propi
+qual
+quals
+quan
+quant
+que
+què
+quelcom
+qui
+quin
+quina
+quines
+quins
+s'ha
+s'han
+sa
+semblant
+semblants
+ses
+seu
+seus
+seva
+seva
+seves
+si
+sobre
+sobretot
+sóc
+solament
+sols
+son
+són
+sons
+sota
+sou
+t'ha
+t'han
+t'he
+ta
+tal
+també
+tampoc
+tan
+tant
+tanta
+tantes
+teu
+teus
+teva
+teves
+ton
+tons
+tot
+tota
+totes
+tots
+un
+una
+unes
+uns
+us
+va
+vaig
+vam
+van
+vas
+veu
+vosaltres
+vostra
+vostre
+vostres
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_cz.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_cz.txt
new file mode 100644
index 00000000000..53c6097dac7
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_cz.txt
@@ -0,0 +1,172 @@
+a
+s
+k
+o
+i
+u
+v
+z
+dnes
+cz
+tÃmto
+budeš
+budem
+byli
+jseš
+můj
+svým
+ta
+tomto
+tohle
+tuto
+tyto
+jej
+zda
+proÄ
+máte
+tato
+kam
+tohoto
+kdo
+kteÅ™Ã
+mi
+nám
+tom
+tomuto
+mÃt
+nic
+proto
+kterou
+byla
+toho
+protože
+asi
+ho
+naši
+napište
+re
+což
+tÃm
+takže
+svých
+jejÃ
+svými
+jste
+aj
+tu
+tedy
+teto
+bylo
+kde
+ke
+pravé
+ji
+nad
+nejsou
+Äi
+pod
+téma
+mezi
+přes
+ty
+pak
+vám
+ani
+když
+však
+neg
+jsem
+tento
+Älánku
+Älánky
+aby
+jsme
+před
+pta
+jejich
+byl
+ještě
+až
+bez
+také
+pouze
+prvnÃ
+vaše
+která
+nás
+nový
+tipy
+pokud
+může
+strana
+jeho
+své
+jiné
+zprávy
+nové
+nenÃ
+vás
+jen
+podle
+zde
+už
+být
+vÃce
+bude
+již
+než
+který
+by
+které
+co
+nebo
+ten
+tak
+má
+při
+od
+po
+jsou
+jak
+dalÅ¡Ã
+ale
+si
+se
+ve
+to
+jako
+za
+zpět
+ze
+do
+pro
+je
+na
+atd
+atp
+jakmile
+pÅ™iÄemž
+já
+on
+ona
+ono
+oni
+ony
+my
+vy
+jÃ
+ji
+mÄ›
+mne
+jemu
+tomu
+těm
+těmu
+němu
+němuž
+jehož
+jÞ
+jelikož
+jež
+jakož
+naÄež
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_da.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_da.txt
new file mode 100644
index 00000000000..a3ff5fe122c
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_da.txt
@@ -0,0 +1,108 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Danish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+
+og | and
+i | in
+jeg | I
+det | that (dem. pronoun)/it (pers. pronoun)
+at | that (in front of a sentence)/to (with infinitive)
+en | a/an
+den | it (pers. pronoun)/that (dem. pronoun)
+til | to/at/for/until/against/by/of/into, more
+er | present tense of "to be"
+som | who, as
+på | on/upon/in/on/at/to/after/of/with/for, on
+de | they
+med | with/by/in, along
+han | he
+af | of/by/from/off/for/in/with/on, off
+for | at/for/to/from/by/of/ago, in front/before, because
+ikke | not
+der | who/which, there/those
+var | past tense of "to be"
+mig | me/myself
+sig | oneself/himself/herself/itself/themselves
+men | but
+et | a/an/one, one (number), someone/somebody/one
+har | present tense of "to have"
+om | round/about/for/in/a, about/around/down, if
+vi | we
+min | my
+havde | past tense of "to have"
+ham | him
+hun | she
+nu | now
+over | over/above/across/by/beyond/past/on/about, over/past
+da | then, when/as/since
+fra | from/off/since, off, since
+du | you
+ud | out
+sin | his/her/its/one's
+dem | them
+os | us/ourselves
+op | up
+man | you/one
+hans | his
+hvor | where
+eller | or
+hvad | what
+skal | must/shall etc.
+selv | myself/youself/herself/ourselves etc., even
+her | here
+alle | all/everyone/everybody etc.
+vil | will (verb)
+blev | past tense of "to stay/to remain/to get/to become"
+kunne | could
+ind | in
+når | when
+være | present tense of "to be"
+dog | however/yet/after all
+noget | something
+ville | would
+jo | you know/you see (adv), yes
+deres | their/theirs
+efter | after/behind/according to/for/by/from, later/afterwards
+ned | down
+skulle | should
+denne | this
+end | than
+dette | this
+mit | my/mine
+også | also
+under | under/beneath/below/during, below/underneath
+have | have
+dig | you
+anden | other
+hende | her
+mine | my
+alt | everything
+meget | much/very, plenty of
+sit | his, her, its, one's
+sine | his, her, its, one's
+vor | our
+mod | against
+disse | these
+hvis | if
+din | your/yours
+nogle | some
+hos | by/at
+blive | be/become
+mange | many
+ad | by/through
+bliver | present tense of "to be/to become"
+hendes | her/hers
+været | be
+thi | for (conj)
+jer | you
+sådan | such, like this/like that
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_de.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_de.txt
new file mode 100644
index 00000000000..f7703841887
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_de.txt
@@ -0,0 +1,292 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A German stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | The number of forms in this list is reduced significantly by passing it
+ | through the German stemmer.
+
+
+aber | but
+
+alle | all
+allem
+allen
+aller
+alles
+
+als | than, as
+also | so
+am | an + dem
+an | at
+
+ander | other
+andere
+anderem
+anderen
+anderer
+anderes
+anderm
+andern
+anderr
+anders
+
+auch | also
+auf | on
+aus | out of
+bei | by
+bin | am
+bis | until
+bist | art
+da | there
+damit | with it
+dann | then
+
+der | the
+den
+des
+dem
+die
+das
+
+daß | that
+
+derselbe | the same
+derselben
+denselben
+desselben
+demselben
+dieselbe
+dieselben
+dasselbe
+
+dazu | to that
+
+dein | thy
+deine
+deinem
+deinen
+deiner
+deines
+
+denn | because
+
+derer | of those
+dessen | of him
+
+dich | thee
+dir | to thee
+du | thou
+
+dies | this
+diese
+diesem
+diesen
+dieser
+dieses
+
+
+doch | (several meanings)
+dort | (over) there
+
+
+durch | through
+
+ein | a
+eine
+einem
+einen
+einer
+eines
+
+einig | some
+einige
+einigem
+einigen
+einiger
+einiges
+
+einmal | once
+
+er | he
+ihn | him
+ihm | to him
+
+es | it
+etwas | something
+
+euer | your
+eure
+eurem
+euren
+eurer
+eures
+
+für | for
+gegen | towards
+gewesen | p.p. of sein
+hab | have
+habe | have
+haben | have
+hat | has
+hatte | had
+hatten | had
+hier | here
+hin | there
+hinter | behind
+
+ich | I
+mich | me
+mir | to me
+
+
+ihr | you, to her
+ihre
+ihrem
+ihren
+ihrer
+ihres
+euch | to you
+
+im | in + dem
+in | in
+indem | while
+ins | in + das
+ist | is
+
+jede | each, every
+jedem
+jeden
+jeder
+jedes
+
+jene | that
+jenem
+jenen
+jener
+jenes
+
+jetzt | now
+kann | can
+
+kein | no
+keine
+keinem
+keinen
+keiner
+keines
+
+können | can
+könnte | could
+machen | do
+man | one
+
+manche | some, many a
+manchem
+manchen
+mancher
+manches
+
+mein | my
+meine
+meinem
+meinen
+meiner
+meines
+
+mit | with
+muss | must
+musste | had to
+nach | to(wards)
+nicht | not
+nichts | nothing
+noch | still, yet
+nun | now
+nur | only
+ob | whether
+oder | or
+ohne | without
+sehr | very
+
+sein | his
+seine
+seinem
+seinen
+seiner
+seines
+
+selbst | self
+sich | herself
+
+sie | they, she
+ihnen | to them
+
+sind | are
+so | so
+
+solche | such
+solchem
+solchen
+solcher
+solches
+
+soll | shall
+sollte | should
+sondern | but
+sonst | else
+über | over
+um | about, around
+und | and
+
+uns | us
+unse
+unsem
+unsen
+unser
+unses
+
+unter | under
+viel | much
+vom | von + dem
+von | from
+vor | before
+während | while
+war | was
+waren | were
+warst | wast
+was | what
+weg | away, off
+weil | because
+weiter | further
+
+welche | which
+welchem
+welchen
+welcher
+welches
+
+wenn | when
+werde | will
+werden | will
+wie | how
+wieder | again
+will | want
+wir | we
+wird | will
+wirst | willst
+wo | where
+wollen | want
+wollte | wanted
+würde | would
+würden | would
+zu | to
+zum | zu + dem
+zur | zu + der
+zwar | indeed
+zwischen | between
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_el.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_el.txt
new file mode 100644
index 00000000000..232681f5bd6
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_el.txt
@@ -0,0 +1,78 @@
+# Lucene Greek Stopwords list
+# Note: by default this file is used after GreekLowerCaseFilter,
+# so when modifying this file use 'σ' instead of 'ς'
+ο
+η
+το
+οι
+τα
+του
+τησ
+των
+τον
+την
+και
+κι
+κ
+ειμαι
+εισαι
+ειναι
+ειμαστε
+ειστε
+στο
+στον
+στη
+στην
+μα
+αλλα
+απο
+για
+Ï€Ïοσ
+με
+σε
+ωσ
+παÏα
+αντι
+κατα
+μετα
+θα
+να
+δε
+δεν
+μη
+μην
+επι
+ενω
+εαν
+αν
+τοτε
+που
+πωσ
+ποιοσ
+ποια
+ποιο
+ποιοι
+ποιεσ
+ποιων
+ποιουσ
+αυτοσ
+αυτη
+αυτο
+αυτοι
+αυτων
+αυτουσ
+αυτεσ
+αυτα
+εκεινοσ
+εκεινη
+εκεινο
+εκεινοι
+εκεινεσ
+εκεινα
+εκεινων
+εκεινουσ
+οπωσ
+ομωσ
+ισωσ
+οσο
+οτι
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_en.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_en.txt
new file mode 100644
index 00000000000..2c164c0b2a1
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_en.txt
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# a couple of test stopwords to test that the words are really being
+# configured from this file:
+stopworda
+stopwordb
+
+# Standard english stop words taken from Lucene's StopAnalyzer
+a
+an
+and
+are
+as
+at
+be
+but
+by
+for
+if
+in
+into
+is
+it
+no
+not
+of
+on
+or
+such
+that
+the
+their
+then
+there
+these
+they
+this
+to
+was
+will
+with
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_es.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_es.txt
new file mode 100644
index 00000000000..2db14760075
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_es.txt
@@ -0,0 +1,354 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Spanish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de | from, of
+la | the, her
+que | who, that
+el | the
+en | in
+y | and
+a | to
+los | the, them
+del | de + el
+se | himself, from him etc
+las | the, them
+por | for, by, etc
+un | a
+para | for
+con | with
+no | no
+una | a
+su | his, her
+al | a + el
+ | es from SER
+lo | him
+como | how
+más | more
+pero | pero
+sus | su plural
+le | to him, her
+ya | already
+o | or
+ | fue from SER
+este | this
+ | ha from HABER
+sà | himself etc
+porque | because
+esta | this
+ | son from SER
+entre | between
+ | está from ESTAR
+cuando | when
+muy | very
+sin | without
+sobre | on
+ | ser from SER
+ | tiene from TENER
+también | also
+me | me
+hasta | until
+hay | there is/are
+donde | where
+ | han from HABER
+quien | whom, that
+ | están from ESTAR
+ | estado from ESTAR
+desde | from
+todo | all
+nos | us
+durante | during
+ | estados from ESTAR
+todos | all
+uno | a
+les | to them
+ni | nor
+contra | against
+otros | other
+ | fueron from SER
+ese | that
+eso | that
+ | habÃa from HABER
+ante | before
+ellos | they
+e | and (variant of y)
+esto | this
+mà | me
+antes | before
+algunos | some
+qué | what?
+unos | a
+yo | I
+otro | other
+otras | other
+otra | other
+él | he
+tanto | so much, many
+esa | that
+estos | these
+mucho | much, many
+quienes | who
+nada | nothing
+muchos | many
+cual | who
+ | sea from SER
+poco | few
+ella | she
+estar | to be
+ | haber from HABER
+estas | these
+ | estaba from ESTAR
+ | estamos from ESTAR
+algunas | some
+algo | something
+nosotros | we
+
+ | other forms
+
+mi | me
+mis | mi plural
+tú | thou
+te | thee
+ti | thee
+tu | thy
+tus | tu plural
+ellas | they
+nosotras | we
+vosotros | you
+vosotras | you
+os | you
+mÃo | mine
+mÃa |
+mÃos |
+mÃas |
+tuyo | thine
+tuya |
+tuyos |
+tuyas |
+suyo | his, hers, theirs
+suya |
+suyos |
+suyas |
+nuestro | ours
+nuestra |
+nuestros |
+nuestras |
+vuestro | yours
+vuestra |
+vuestros |
+vuestras |
+esos | those
+esas | those
+
+ | forms of estar, to be (not including the infinitive):
+estoy
+estás
+está
+estamos
+estáis
+están
+esté
+estés
+estemos
+estéis
+estén
+estaré
+estarás
+estará
+estaremos
+estaréis
+estarán
+estarÃa
+estarÃas
+estarÃamos
+estarÃais
+estarÃan
+estaba
+estabas
+estábamos
+estabais
+estaban
+estuve
+estuviste
+estuvo
+estuvimos
+estuvisteis
+estuvieron
+estuviera
+estuvieras
+estuviéramos
+estuvierais
+estuvieran
+estuviese
+estuvieses
+estuviésemos
+estuvieseis
+estuviesen
+estando
+estado
+estada
+estados
+estadas
+estad
+
+ | forms of haber, to have (not including the infinitive):
+he
+has
+ha
+hemos
+habéis
+han
+haya
+hayas
+hayamos
+hayáis
+hayan
+habré
+habrás
+habrá
+habremos
+habréis
+habrán
+habrÃa
+habrÃas
+habrÃamos
+habrÃais
+habrÃan
+habÃa
+habÃas
+habÃamos
+habÃais
+habÃan
+hube
+hubiste
+hubo
+hubimos
+hubisteis
+hubieron
+hubiera
+hubieras
+hubiéramos
+hubierais
+hubieran
+hubiese
+hubieses
+hubiésemos
+hubieseis
+hubiesen
+habiendo
+habido
+habida
+habidos
+habidas
+
+ | forms of ser, to be (not including the infinitive):
+soy
+eres
+es
+somos
+sois
+son
+sea
+seas
+seamos
+seáis
+sean
+seré
+serás
+será
+seremos
+seréis
+serán
+serÃa
+serÃas
+serÃamos
+serÃais
+serÃan
+era
+eras
+éramos
+erais
+eran
+fui
+fuiste
+fue
+fuimos
+fuisteis
+fueron
+fuera
+fueras
+fuéramos
+fuerais
+fueran
+fuese
+fueses
+fuésemos
+fueseis
+fuesen
+siendo
+sido
+ | sed also means 'thirst'
+
+ | forms of tener, to have (not including the infinitive):
+tengo
+tienes
+tiene
+tenemos
+tenéis
+tienen
+tenga
+tengas
+tengamos
+tengáis
+tengan
+tendré
+tendrás
+tendrá
+tendremos
+tendréis
+tendrán
+tendrÃa
+tendrÃas
+tendrÃamos
+tendrÃais
+tendrÃan
+tenÃa
+tenÃas
+tenÃamos
+tenÃais
+tenÃan
+tuve
+tuviste
+tuvo
+tuvimos
+tuvisteis
+tuvieron
+tuviera
+tuvieras
+tuviéramos
+tuvierais
+tuvieran
+tuviese
+tuvieses
+tuviésemos
+tuvieseis
+tuviesen
+teniendo
+tenido
+tenida
+tenidos
+tenidas
+tened
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_eu.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_eu.txt
new file mode 100644
index 00000000000..25f1db93460
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_eu.txt
@@ -0,0 +1,99 @@
+# example set of basque stopwords
+al
+anitz
+arabera
+asko
+baina
+bat
+batean
+batek
+bati
+batzuei
+batzuek
+batzuetan
+batzuk
+bera
+beraiek
+berau
+berauek
+bere
+berori
+beroriek
+beste
+bezala
+da
+dago
+dira
+ditu
+du
+dute
+edo
+egin
+ere
+eta
+eurak
+ez
+gainera
+gu
+gutxi
+guzti
+haiei
+haiek
+haietan
+hainbeste
+hala
+han
+handik
+hango
+hara
+hari
+hark
+hartan
+hau
+hauei
+hauek
+hauetan
+hemen
+hemendik
+hemengo
+hi
+hona
+honek
+honela
+honetan
+honi
+hor
+hori
+horiei
+horiek
+horietan
+horko
+horra
+horrek
+horrela
+horretan
+horri
+hortik
+hura
+izan
+ni
+noiz
+nola
+non
+nondik
+nongo
+nor
+nora
+ze
+zein
+zen
+zenbait
+zenbat
+zer
+zergatik
+ziren
+zituen
+zu
+zuek
+zuen
+zuten
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fa.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fa.txt
new file mode 100644
index 00000000000..723641c6da7
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fa.txt
@@ -0,0 +1,313 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# Note: by default this file is used after normalization, so when adding entries
+# to this file, use the arabic 'ي' instead of 'ی'
+انان
+نداشته
+سراسر
+خياه
+ايشان
+وي
+تاكنون
+بيشتري
+دوم
+پس
+ناشي
+ÙˆÚ¯Ùˆ
+يا
+داشتند
+سپس
+هنگام
+هرگز
+پنج
+نشان
+امسال
+ديگر
+گروهي
+شدند
+چطور
+ده
+Ùˆ
+دو
+نخستين
+ولي
+چرا
+Ú†Ù‡
+وسط
+Ù‡
+كدام
+قابل
+يك
+رÙت
+Ù‡Ùت
+همچنين
+در
+هزار
+بله
+بلي
+شايد
+اما
+شناسي
+گرÙته
+دهد
+داشته
+دانست
+داشتن
+خواهيم
+ميليارد
+وقتيكه
+امد
+خواهد
+جز
+اورده
+شده
+بلكه
+خدمات
+شدن
+برخي
+نبود
+بسياري
+جلوگيري
+ØÙ‚
+كردند
+نوعي
+بعري
+نكرده
+نظير
+نبايد
+بوده
+بودن
+داد
+اورد
+هست
+جايي
+شود
+دنبال
+داده
+بايد
+سابق
+هيچ
+همان
+انجا
+كمتر
+كجاست
+گردد
+كسي
+تر
+مردم
+تان
+دادن
+بودند
+سري
+جدا
+ندارند
+مگر
+يكديگر
+دارد
+دهند
+بنابراين
+هنگامي
+سمت
+جا
+انچه
+خود
+دادند
+زياد
+دارند
+اثر
+بدون
+بهترين
+بيشتر
+البته
+به
+براساس
+بيرون
+كرد
+بعضي
+گرÙت
+توي
+اي
+ميليون
+او
+جريان
+تول
+بر
+مانند
+برابر
+باشيم
+مدتي
+گويند
+اكنون
+تا
+تنها
+جديد
+چند
+بي
+نشده
+كردن
+كردم
+گويد
+كرده
+كنيم
+نمي
+نزد
+روي
+قصد
+Ùقط
+بالاي
+ديگران
+اين
+ديروز
+توسط
+سوم
+ايم
+دانند
+سوي
+استÙاده
+شما
+كنار
+داريم
+ساخته
+طور
+امده
+رÙته
+نخست
+بيست
+نزديك
+طي
+كنيد
+از
+انها
+تمامي
+داشت
+يكي
+طريق
+اش
+چيست
+روب
+نمايد
+Ú¯Ùت
+چندين
+چيزي
+تواند
+ام
+ايا
+با
+ان
+ايد
+ترين
+اينكه
+ديگري
+راه
+هايي
+بروز
+همچنان
+پاعين
+كس
+Øدود
+مختلÙ
+مقابل
+چيز
+گيرد
+ندارد
+ضد
+همچون
+سازي
+شان
+مورد
+باره
+مرسي
+خويش
+برخوردار
+چون
+خارج
+شش
+هنوز
+تØت
+ضمن
+هستيم
+Ú¯Ùته
+Ùكر
+بسيار
+پيش
+براي
+روزهاي
+انكه
+نخواهد
+بالا
+كل
+وقتي
+كي
+چنين
+كه
+گيري
+نيست
+است
+كجا
+كند
+نيز
+يابد
+بندي
+Øتي
+توانند
+عقب
+خواست
+كنند
+بين
+تمام
+همه
+ما
+باشند
+مثل
+شد
+اري
+باشد
+اره
+طبق
+بعد
+اگر
+صورت
+غير
+جاي
+بيش
+ريزي
+اند
+زيرا
+چگونه
+بار
+لطÙا
+مي
+درباره
+من
+ديده
+همين
+گذاري
+برداري
+علت
+گذاشته
+هم
+Ùوق
+نه
+ها
+شوند
+اباد
+همواره
+هر
+اول
+خواهند
+چهار
+نام
+امروز
+مان
+هاي
+قبل
+كنم
+سعي
+تازه
+را
+هستند
+زير
+جلوي
+عنوان
+بود
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fi.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fi.txt
new file mode 100644
index 00000000000..addad798c4b
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fi.txt
@@ -0,0 +1,95 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+| forms of BE
+
+olla
+olen
+olet
+on
+olemme
+olette
+ovat
+ole | negative form
+
+oli
+olisi
+olisit
+olisin
+olisimme
+olisitte
+olisivat
+olit
+olin
+olimme
+olitte
+olivat
+ollut
+olleet
+
+en | negation
+et
+ei
+emme
+ette
+eivät
+
+|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans
+minä minun minut minua minussa minusta minuun minulla minulta minulle | I
+sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you
+hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she
+me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we
+te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you
+he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they
+
+tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this
+tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that
+se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it
+nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these
+nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those
+ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they
+
+kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
+ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl)
+mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what
+mitkä | (pl)
+
+joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which
+jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl)
+
+| conjunctions
+
+että | that
+ja | and
+jos | if
+koska | because
+kuin | than
+mutta | but
+niin | so
+sekä | and
+sillä | for
+tai | or
+vaan | but
+vai | or
+vaikka | although
+
+
+| prepositions
+
+kanssa | with
+mukaan | according to
+noin | about
+poikki | across
+yli | over, across
+
+| other
+
+kun | when
+niin | so
+nyt | now
+itse | self
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fr.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fr.txt
new file mode 100644
index 00000000000..c00837ea939
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fr.txt
@@ -0,0 +1,183 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A French stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+au | a + le
+aux | a + les
+avec | with
+ce | this
+ces | these
+dans | with
+de | of
+des | de + les
+du | de + le
+elle | she
+en | `of them' etc
+et | and
+eux | them
+il | he
+je | I
+la | the
+le | the
+leur | their
+lui | him
+ma | my (fem)
+mais | but
+me | me
+même | same; as in moi-même (myself) etc
+mes | me (pl)
+moi | me
+mon | my (masc)
+ne | not
+nos | our (pl)
+notre | our
+nous | we
+on | one
+ou | where
+par | by
+pas | not
+pour | for
+qu | que before vowel
+que | that
+qui | who
+sa | his, her (fem)
+se | oneself
+ses | his (pl)
+son | his, her (masc)
+sur | on
+ta | thy (fem)
+te | thee
+tes | thy (pl)
+toi | thee
+ton | thy (masc)
+tu | thou
+un | a
+une | a
+vos | your (pl)
+votre | your
+vous | you
+
+ | single letter forms
+
+c | c'
+d | d'
+j | j'
+l | l'
+Ã | to, at
+m | m'
+n | n'
+s | s'
+t | t'
+y | there
+
+ | forms of être (not including the infinitive):
+été
+étée
+étées
+étés
+étant
+suis
+es
+est
+sommes
+êtes
+sont
+serai
+seras
+sera
+serons
+serez
+seront
+serais
+serait
+serions
+seriez
+seraient
+étais
+était
+étions
+étiez
+étaient
+fus
+fut
+fûmes
+fûtes
+furent
+sois
+soit
+soyons
+soyez
+soient
+fusse
+fusses
+fût
+fussions
+fussiez
+fussent
+
+ | forms of avoir (not including the infinitive):
+ayant
+eu
+eue
+eues
+eus
+ai
+as
+avons
+avez
+ont
+aurai
+auras
+aura
+aurons
+aurez
+auront
+aurais
+aurait
+aurions
+auriez
+auraient
+avais
+avait
+avions
+aviez
+avaient
+eut
+eûmes
+eûtes
+eurent
+aie
+aies
+ait
+ayons
+ayez
+aient
+eusse
+eusses
+eût
+eussions
+eussiez
+eussent
+
+ | Later additions (from Jean-Christophe Deschamps)
+ceci | this
+celà  | that
+cet | this
+cette | this
+ici | here
+ils | they
+les | the (pl)
+leurs | their (pl)
+quel | which
+quels | which
+quelle | which
+quelles | which
+sans | without
+soi | oneself
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ga.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ga.txt
new file mode 100644
index 00000000000..9ff88d747e5
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ga.txt
@@ -0,0 +1,110 @@
+
+a
+ach
+ag
+agus
+an
+aon
+ar
+arna
+as
+b'
+ba
+beirt
+bhúr
+caoga
+ceathair
+ceathrar
+chomh
+chtó
+chuig
+chun
+cois
+céad
+cúig
+cúigear
+d'
+daichead
+dar
+de
+deich
+deichniúr
+den
+dhá
+do
+don
+dtÃ
+dá
+dár
+dó
+faoi
+faoin
+faoina
+faoinár
+fara
+fiche
+gach
+gan
+go
+gur
+haon
+hocht
+i
+iad
+idir
+in
+ina
+ins
+inár
+is
+le
+leis
+lena
+lenár
+m'
+mar
+mo
+mé
+na
+nach
+naoi
+naonúr
+ná
+nÃ
+nÃor
+nó
+nócha
+ocht
+ochtar
+os
+roimh
+sa
+seacht
+seachtar
+seachtó
+seasca
+seisear
+siad
+sibh
+sinn
+sna
+sé
+sÃ
+tar
+thar
+thú
+triúr
+trÃ
+trÃna
+trÃnár
+trÃocha
+tú
+um
+ár
+é
+éis
+Ã
+ó
+ón
+óna
+ónár
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_gl.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_gl.txt
new file mode 100644
index 00000000000..d8760b12c14
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_gl.txt
@@ -0,0 +1,161 @@
+# galican stopwords
+a
+aÃnda
+alÃ
+aquel
+aquela
+aquelas
+aqueles
+aquilo
+aquÃ
+ao
+aos
+as
+asÃ
+á
+ben
+cando
+che
+co
+coa
+comigo
+con
+connosco
+contigo
+convosco
+coas
+cos
+cun
+cuns
+cunha
+cunhas
+da
+dalgunha
+dalgunhas
+dalgún
+dalgúns
+das
+de
+del
+dela
+delas
+deles
+desde
+deste
+do
+dos
+dun
+duns
+dunha
+dunhas
+e
+el
+ela
+elas
+eles
+en
+era
+eran
+esa
+esas
+ese
+eses
+esta
+estar
+estaba
+está
+están
+este
+estes
+estiven
+estou
+eu
+é
+facer
+foi
+foron
+fun
+habÃa
+hai
+iso
+isto
+la
+las
+lle
+lles
+lo
+los
+mais
+me
+meu
+meus
+min
+miña
+miñas
+moi
+na
+nas
+neste
+nin
+no
+non
+nos
+nosa
+nosas
+noso
+nosos
+nós
+nun
+nunha
+nuns
+nunhas
+o
+os
+ou
+ó
+ós
+para
+pero
+pode
+pois
+pola
+polas
+polo
+polos
+por
+que
+se
+senón
+ser
+seu
+seus
+sexa
+sido
+sobre
+súa
+súas
+tamén
+tan
+te
+ten
+teñen
+teño
+ter
+teu
+teus
+ti
+tido
+tiña
+tiven
+túa
+túas
+un
+unha
+unhas
+uns
+vos
+vosa
+vosas
+voso
+vosos
+vós
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hi.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hi.txt
new file mode 100644
index 00000000000..86286bb083b
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hi.txt
@@ -0,0 +1,235 @@
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# Note: by default this file also contains forms normalized by HindiNormalizer
+# for spelling variation (see section below), such that it can be used whether or
+# not you enable that feature. When adding additional entries to this list,
+# please add the normalized form as well.
+अंदर
+अत
+अपना
+अपनी
+अपने
+अà¤à¥€
+आदि
+आप
+इतà¥à¤¯à¤¾à¤¦à¤¿
+इन
+इनका
+इनà¥à¤¹à¥€à¤‚
+इनà¥à¤¹à¥‡à¤‚
+इनà¥à¤¹à¥‹à¤‚
+इस
+इसका
+इसकी
+इसके
+इसमें
+इसी
+इसे
+उन
+उनका
+उनकी
+उनके
+उनको
+उनà¥à¤¹à¥€à¤‚
+उनà¥à¤¹à¥‡à¤‚
+उनà¥à¤¹à¥‹à¤‚
+उस
+उसके
+उसी
+उसे
+à¤à¤•
+à¤à¤µà¤‚
+à¤à¤¸
+à¤à¤¸à¥‡
+और
+कई
+कर
+करता
+करते
+करना
+करने
+करें
+कहते
+कहा
+का
+काफ़ी
+कि
+कितना
+किनà¥à¤¹à¥‡à¤‚
+किनà¥à¤¹à¥‹à¤‚
+किया
+किर
+किस
+किसी
+किसे
+की
+कà¥à¤›
+कà¥à¤²
+के
+को
+कोई
+कौन
+कौनसा
+गया
+घर
+जब
+जहाà¤
+जा
+जितना
+जिन
+जिनà¥à¤¹à¥‡à¤‚
+जिनà¥à¤¹à¥‹à¤‚
+जिस
+जिसे
+जीधर
+जैसा
+जैसे
+जो
+तक
+तब
+तरह
+तिन
+तिनà¥à¤¹à¥‡à¤‚
+तिनà¥à¤¹à¥‹à¤‚
+तिस
+तिसे
+तो
+था
+थी
+थे
+दबारा
+दिया
+दà¥à¤¸à¤°à¤¾
+दूसरे
+दो
+दà¥à¤µà¤¾à¤°à¤¾
+न
+नहीं
+ना
+निहायत
+नीचे
+ने
+पर
+पर
+पहले
+पूरा
+पे
+फिर
+बनी
+बही
+बहà¥à¤¤
+बाद
+बाला
+बिलकà¥à¤²
+à¤à¥€
+à¤à¥€à¤¤à¤°
+मगर
+मानो
+मे
+में
+यदि
+यह
+यहाà¤
+यही
+या
+यिह
+ये
+रखें
+रहा
+रहे
+ऱà¥à¤µà¤¾à¤¸à¤¾
+लिà¤
+लिये
+लेकिन
+व
+वरà¥à¤—
+वह
+वह
+वहाà¤
+वहीं
+वाले
+वà¥à¤¹
+वे
+वग़ैरह
+संग
+सकता
+सकते
+सबसे
+सà¤à¥€
+साथ
+साबà¥à¤¤
+साà¤
+सारा
+से
+सो
+ही
+हà¥à¤†
+हà¥à¤ˆ
+हà¥à¤
+है
+हैं
+हो
+होता
+होती
+होते
+होना
+होने
+# additional normalized forms of the above
+अपनि
+जेसे
+होति
+सà¤à¤¿
+तिंहों
+इंहों
+दवारा
+इसि
+किंहें
+थि
+उंहों
+ओर
+जिंहें
+वहिं
+अà¤à¤¿
+बनि
+हि
+उंहिं
+उंहें
+हें
+वगेरह
+à¤à¤¸à¥‡
+रवासा
+कोन
+निचे
+काफि
+उसि
+पà¥à¤°à¤¾
+à¤à¤¿à¤¤à¤°
+हे
+बहि
+वहां
+कोइ
+यहां
+जिंहों
+तिंहें
+किसि
+कइ
+यहि
+इंहिं
+जिधर
+इंहें
+अदि
+इतयादि
+हà¥à¤‡
+कोनसा
+इसकि
+दà¥à¤¸à¤°à¥‡
+जहां
+अप
+किंहों
+उनकि
+à¤à¤¿
+वरग
+हà¥à¤…
+जेसा
+नहिं
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hu.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hu.txt
new file mode 100644
index 00000000000..1a96f1db6f2
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hu.txt
@@ -0,0 +1,209 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+| Hungarian stop word list
+| prepared by Anna Tordai
+
+a
+ahogy
+ahol
+aki
+akik
+akkor
+alatt
+által
+általában
+amely
+amelyek
+amelyekben
+amelyeket
+amelyet
+amelynek
+ami
+amit
+amolyan
+amÃg
+amikor
+át
+abban
+ahhoz
+annak
+arra
+arról
+az
+azok
+azon
+azt
+azzal
+azért
+aztán
+azután
+azonban
+bár
+be
+belül
+benne
+cikk
+cikkek
+cikkeket
+csak
+de
+e
+eddig
+egész
+egy
+egyes
+egyetlen
+egyéb
+egyik
+egyre
+ekkor
+el
+elég
+ellen
+elő
+először
+előtt
+első
+én
+éppen
+ebben
+ehhez
+emilyen
+ennek
+erre
+ez
+ezt
+ezek
+ezen
+ezzel
+ezért
+és
+fel
+felé
+hanem
+hiszen
+hogy
+hogyan
+igen
+Ãgy
+illetve
+ill.
+ill
+ilyen
+ilyenkor
+ison
+ismét
+itt
+jó
+jól
+jobban
+kell
+kellett
+keresztül
+keressünk
+ki
+kÃvül
+között
+közül
+legalább
+lehet
+lehetett
+legyen
+lenne
+lenni
+lesz
+lett
+maga
+magát
+majd
+majd
+már
+más
+másik
+meg
+még
+mellett
+mert
+mely
+melyek
+mi
+mit
+mÃg
+miért
+milyen
+mikor
+minden
+mindent
+mindenki
+mindig
+mint
+mintha
+mivel
+most
+nagy
+nagyobb
+nagyon
+ne
+néha
+nekem
+neki
+nem
+néhány
+nélkül
+nincs
+olyan
+ott
+össze
+Å‘
+Å‘k
+Å‘ket
+pedig
+persze
+rá
+s
+saját
+sem
+semmi
+sok
+sokat
+sokkal
+számára
+szemben
+szerint
+szinte
+talán
+tehát
+teljes
+tovább
+továbbá
+több
+úgy
+ugyanis
+új
+újabb
+újra
+után
+utána
+utolsó
+vagy
+vagyis
+valaki
+valami
+valamint
+való
+vagyok
+van
+vannak
+volt
+voltam
+voltak
+voltunk
+vissza
+vele
+viszont
+volna
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hy.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hy.txt
new file mode 100644
index 00000000000..60c1c50fbc8
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hy.txt
@@ -0,0 +1,46 @@
+# example set of Armenian stopwords.
+Õ¡ÕµÕ¤
+Õ¡ÕµÕ¬
+Õ¡ÕµÕ¶
+Õ¡ÕµÕ½
+Õ¤Õ¸Ö‚
+Õ¤Õ¸Ö‚Ö„
+Õ¥Õ´
+Õ¥Õ¶
+Õ¥Õ¶Ö„
+Õ¥Õ½
+Õ¥Ö„
+Õ§
+Õ§Õ«
+Õ§Õ«Õ¶
+Õ§Õ«Õ¶Ö„
+Õ§Õ«Ö€
+Õ§Õ«Ö„
+Õ§Ö€
+Õ¨Õ½Õ¿
+Õ©
+Õ«
+Õ«Õ¶
+Õ«Õ½Õ¯
+Õ«Ö€
+Õ¯Õ¡Õ´
+Õ°Õ¡Õ´Õ¡Ö€
+Õ°Õ¥Õ¿
+Õ°Õ¥Õ¿Õ¸
+Õ´Õ¥Õ¶Ö„
+Õ´Õ¥Õ»
+Õ´Õ«
+Õ¶
+Õ¶Õ¡
+Õ¶Õ¡Ö‡
+Õ¶Ö€Õ¡
+Õ¶Ö€Õ¡Õ¶Ö„
+Õ¸Ö€
+Õ¸Ö€Õ¨
+Õ¸Ö€Õ¸Õ¶Ö„
+Õ¸Ö€ÕºÕ¥Õ½
+Õ¸Ö‚
+Õ¸Ö‚Õ´
+ÕºÕ«Õ¿Õ«
+Õ¾Ö€Õ¡
+Ö‡
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_id.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_id.txt
new file mode 100644
index 00000000000..4617f83a5c5
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_id.txt
@@ -0,0 +1,359 @@
+# from appendix D of: A Study of Stemming Effects on Information
+# Retrieval in Bahasa Indonesia
+ada
+adanya
+adalah
+adapun
+agak
+agaknya
+agar
+akan
+akankah
+akhirnya
+aku
+akulah
+amat
+amatlah
+anda
+andalah
+antar
+diantaranya
+antara
+antaranya
+diantara
+apa
+apaan
+mengapa
+apabila
+apakah
+apalagi
+apatah
+atau
+ataukah
+ataupun
+bagai
+bagaikan
+sebagai
+sebagainya
+bagaimana
+bagaimanapun
+sebagaimana
+bagaimanakah
+bagi
+bahkan
+bahwa
+bahwasanya
+sebaliknya
+banyak
+sebanyak
+beberapa
+seberapa
+begini
+beginian
+beginikah
+beginilah
+sebegini
+begitu
+begitukah
+begitulah
+begitupun
+sebegitu
+belum
+belumlah
+sebelum
+sebelumnya
+sebenarnya
+berapa
+berapakah
+berapalah
+berapapun
+betulkah
+sebetulnya
+biasa
+biasanya
+bila
+bilakah
+bisa
+bisakah
+sebisanya
+boleh
+bolehkah
+bolehlah
+buat
+bukan
+bukankah
+bukanlah
+bukannya
+cuma
+percuma
+dahulu
+dalam
+dan
+dapat
+dari
+daripada
+dekat
+demi
+demikian
+demikianlah
+sedemikian
+dengan
+depan
+di
+dia
+dialah
+dini
+diri
+dirinya
+terdiri
+dong
+dulu
+enggak
+enggaknya
+entah
+entahlah
+terhadap
+terhadapnya
+hal
+hampir
+hanya
+hanyalah
+harus
+haruslah
+harusnya
+seharusnya
+hendak
+hendaklah
+hendaknya
+hingga
+sehingga
+ia
+ialah
+ibarat
+ingin
+inginkah
+inginkan
+ini
+inikah
+inilah
+itu
+itukah
+itulah
+jangan
+jangankan
+janganlah
+jika
+jikalau
+juga
+justru
+kala
+kalau
+kalaulah
+kalaupun
+kalian
+kami
+kamilah
+kamu
+kamulah
+kan
+kapan
+kapankah
+kapanpun
+dikarenakan
+karena
+karenanya
+ke
+kecil
+kemudian
+kenapa
+kepada
+kepadanya
+ketika
+seketika
+khususnya
+kini
+kinilah
+kiranya
+sekiranya
+kita
+kitalah
+kok
+lagi
+lagian
+selagi
+lah
+lain
+lainnya
+melainkan
+selaku
+lalu
+melalui
+terlalu
+lama
+lamanya
+selama
+selama
+selamanya
+lebih
+terlebih
+bermacam
+macam
+semacam
+maka
+makanya
+makin
+malah
+malahan
+mampu
+mampukah
+mana
+manakala
+manalagi
+masih
+masihkah
+semasih
+masing
+mau
+maupun
+semaunya
+memang
+mereka
+merekalah
+meski
+meskipun
+semula
+mungkin
+mungkinkah
+nah
+namun
+nanti
+nantinya
+nyaris
+oleh
+olehnya
+seorang
+seseorang
+pada
+padanya
+padahal
+paling
+sepanjang
+pantas
+sepantasnya
+sepantasnyalah
+para
+pasti
+pastilah
+per
+pernah
+pula
+pun
+merupakan
+rupanya
+serupa
+saat
+saatnya
+sesaat
+saja
+sajalah
+saling
+bersama
+sama
+sesama
+sambil
+sampai
+sana
+sangat
+sangatlah
+saya
+sayalah
+se
+sebab
+sebabnya
+sebuah
+tersebut
+tersebutlah
+sedang
+sedangkan
+sedikit
+sedikitnya
+segala
+segalanya
+segera
+sesegera
+sejak
+sejenak
+sekali
+sekalian
+sekalipun
+sesekali
+sekaligus
+sekarang
+sekarang
+sekitar
+sekitarnya
+sela
+selain
+selalu
+seluruh
+seluruhnya
+semakin
+sementara
+sempat
+semua
+semuanya
+sendiri
+sendirinya
+seolah
+seperti
+sepertinya
+sering
+seringnya
+serta
+siapa
+siapakah
+siapapun
+disini
+disinilah
+sini
+sinilah
+sesuatu
+sesuatunya
+suatu
+sesudah
+sesudahnya
+sudah
+sudahkah
+sudahlah
+supaya
+tadi
+tadinya
+tak
+tanpa
+setelah
+telah
+tentang
+tentu
+tentulah
+tentunya
+tertentu
+seterusnya
+tapi
+tetapi
+setiap
+tiap
+setidaknya
+tidak
+tidakkah
+tidaklah
+toh
+waduh
+wah
+wahai
+sewaktu
+walau
+walaupun
+wong
+yaitu
+yakni
+yang
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_it.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_it.txt
new file mode 100644
index 00000000000..4cb5b0891b1
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_it.txt
@@ -0,0 +1,301 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | An Italian stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ad | a (to) before vowel
+al | a + il
+allo | a + lo
+ai | a + i
+agli | a + gli
+all | a + l'
+agl | a + gl'
+alla | a + la
+alle | a + le
+con | with
+col | con + il
+coi | con + i (forms collo, cogli etc are now very rare)
+da | from
+dal | da + il
+dallo | da + lo
+dai | da + i
+dagli | da + gli
+dall | da + l'
+dagl | da + gll'
+dalla | da + la
+dalle | da + le
+di | of
+del | di + il
+dello | di + lo
+dei | di + i
+degli | di + gli
+dell | di + l'
+degl | di + gl'
+della | di + la
+delle | di + le
+in | in
+nel | in + el
+nello | in + lo
+nei | in + i
+negli | in + gli
+nell | in + l'
+negl | in + gl'
+nella | in + la
+nelle | in + le
+su | on
+sul | su + il
+sullo | su + lo
+sui | su + i
+sugli | su + gli
+sull | su + l'
+sugl | su + gl'
+sulla | su + la
+sulle | su + le
+per | through, by
+tra | among
+contro | against
+io | I
+tu | thou
+lui | he
+lei | she
+noi | we
+voi | you
+loro | they
+mio | my
+mia |
+miei |
+mie |
+tuo |
+tua |
+tuoi | thy
+tue |
+suo |
+sua |
+suoi | his, her
+sue |
+nostro | our
+nostra |
+nostri |
+nostre |
+vostro | your
+vostra |
+vostri |
+vostre |
+mi | me
+ti | thee
+ci | us, there
+vi | you, there
+lo | him, the
+la | her, the
+li | them
+le | them, the
+gli | to him, the
+ne | from there etc
+il | the
+un | a
+uno | a
+una | a
+ma | but
+ed | and
+se | if
+perché | why, because
+anche | also
+come | how
+dov | where (as dov')
+dove | where
+che | who, that
+chi | who
+cui | whom
+non | not
+più | more
+quale | who, that
+quanto | how much
+quanti |
+quanta |
+quante |
+quello | that
+quelli |
+quella |
+quelle |
+questo | this
+questi |
+questa |
+queste |
+si | yes
+tutto | all
+tutti | all
+
+ | single letter forms:
+
+a | at
+c | as c' for ce or ci
+e | and
+i | the
+l | as l'
+o | or
+
+ | forms of avere, to have (not including the infinitive):
+
+ho
+hai
+ha
+abbiamo
+avete
+hanno
+abbia
+abbiate
+abbiano
+avrò
+avrai
+avrÃ
+avremo
+avrete
+avranno
+avrei
+avresti
+avrebbe
+avremmo
+avreste
+avrebbero
+avevo
+avevi
+aveva
+avevamo
+avevate
+avevano
+ebbi
+avesti
+ebbe
+avemmo
+aveste
+ebbero
+avessi
+avesse
+avessimo
+avessero
+avendo
+avuto
+avuta
+avuti
+avute
+
+ | forms of essere, to be (not including the infinitive):
+sono
+sei
+è
+siamo
+siete
+sia
+siate
+siano
+sarò
+sarai
+sarÃ
+saremo
+sarete
+saranno
+sarei
+saresti
+sarebbe
+saremmo
+sareste
+sarebbero
+ero
+eri
+era
+eravamo
+eravate
+erano
+fui
+fosti
+fu
+fummo
+foste
+furono
+fossi
+fosse
+fossimo
+fossero
+essendo
+
+ | forms of fare, to do (not including the infinitive, fa, fat-):
+faccio
+fai
+facciamo
+fanno
+faccia
+facciate
+facciano
+farò
+farai
+farÃ
+faremo
+farete
+faranno
+farei
+faresti
+farebbe
+faremmo
+fareste
+farebbero
+facevo
+facevi
+faceva
+facevamo
+facevate
+facevano
+feci
+facesti
+fece
+facemmo
+faceste
+fecero
+facessi
+facesse
+facessimo
+facessero
+facendo
+
+ | forms of stare, to be (not including the infinitive):
+sto
+stai
+sta
+stiamo
+stanno
+stia
+stiate
+stiano
+starò
+starai
+starÃ
+staremo
+starete
+staranno
+starei
+staresti
+starebbe
+staremmo
+stareste
+starebbero
+stavo
+stavi
+stava
+stavamo
+stavate
+stavano
+stetti
+stesti
+stette
+stemmo
+steste
+stettero
+stessi
+stesse
+stessimo
+stessero
+stando
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ja.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ja.txt
new file mode 100644
index 00000000000..d4321be6b16
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ja.txt
@@ -0,0 +1,127 @@
+#
+# This file defines a stopword set for Japanese.
+#
+# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia.
+# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745
+# for frequency lists, etc. that can be useful for making your own set (if desired)
+#
+# Note that there is an overlap between these stopwords and the terms stopped when used
+# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note
+# that comments are not allowed on the same line as stopwords.
+#
+# Also note that stopping is done in a case-insensitive manner. Change your StopFilter
+# configuration if you need case-sensitive stopping. Lastly, note that stopping is done
+# using the same character width as the entries in this file. Since this StopFilter is
+# normally done after a CJKWidthFilter in your chain, you would usually want your romaji
+# entries to be in half-width and your kana entries to be in full-width.
+#
+ã®
+ã«
+ã¯
+ã‚’
+ãŸ
+ãŒ
+ã§
+ã¦
+ã¨
+ã—
+れ
+ã•
+ã‚ã‚‹
+ã„ã‚‹
+ã‚‚
+ã™ã‚‹
+ã‹ã‚‰
+ãª
+ã“ã¨
+ã¨ã—ã¦
+ã„
+ã‚„
+れる
+ãªã©
+ãªã£
+ãªã„
+ã“ã®
+ãŸã‚
+ãã®
+ã‚ã£
+よã†
+ã¾ãŸ
+ã‚‚ã®
+ã¨ã„ã†
+ã‚ã‚Š
+ã¾ã§
+られ
+ãªã‚‹
+ã¸
+ã‹
+ã
+ã“ã‚Œ
+ã«ã‚ˆã£ã¦
+ã«ã‚ˆã‚Š
+ãŠã‚Š
+より
+ã«ã‚ˆã‚‹
+ãš
+ãªã‚Š
+られる
+ã«ãŠã„ã¦
+ã°
+ãªã‹ã£
+ãªã
+ã—ã‹ã—
+ã«ã¤ã„ã¦
+ã›
+ã ã£
+ãã®å¾Œ
+ã§ãã‚‹
+ãã‚Œ
+ã†
+ã®ã§
+ãªãŠ
+ã®ã¿
+ã§ã
+ã
+ã¤
+ã«ãŠã‘ã‚‹
+ãŠã‚ˆã³
+ã„ã†
+ã•ã‚‰ã«
+ã§ã‚‚
+ら
+ãŸã‚Š
+ãã®ä»–
+ã«é–¢ã™ã‚‹
+ãŸã¡
+ã¾ã™
+ã‚“
+ãªã‚‰
+ã«å¯¾ã—ã¦
+特ã«
+ã›ã‚‹
+åŠã³
+ã“れら
+ã¨ã
+ã§ã¯
+ã«ã¦
+ã»ã‹
+ãªãŒã‚‰
+ã†ã¡
+ãã—ã¦
+ã¨ã¨ã‚‚ã«
+ãŸã ã—
+ã‹ã¤ã¦
+ãã‚Œãžã‚Œ
+ã¾ãŸã¯
+ãŠ
+ã»ã©
+ã‚‚ã®ã®
+ã«å¯¾ã™ã‚‹
+ã»ã¨ã‚“ã©
+ã¨å…±ã«
+ã¨ã„ã£ãŸ
+ã§ã™
+ã¨ã‚‚
+ã¨ã“ã‚
+ã“ã“
+##### End of file
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_lv.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_lv.txt
new file mode 100644
index 00000000000..e21a23c06c3
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_lv.txt
@@ -0,0 +1,172 @@
+# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins
+# the original list of over 800 forms was refined:
+# pronouns, adverbs, interjections were removed
+#
+# prepositions
+aiz
+ap
+ar
+apakš
+Ärpus
+augšpus
+bez
+caur
+dēļ
+gar
+iekš
+iz
+kopš
+labad
+lejpus
+līdz
+no
+otrpus
+pa
+par
+pÄr
+pēc
+pie
+pirms
+pret
+priekš
+starp
+Å¡aipus
+uz
+viņpus
+virs
+virspus
+zem
+apakšpus
+# Conjunctions
+un
+bet
+jo
+ja
+ka
+lai
+tomēr
+tikko
+turpretī
+arī
+kaut
+gan
+tÄdēļ
+tÄ
+ne
+tikvien
+vien
+kÄ
+ir
+te
+vai
+kamēr
+# Particles
+ar
+diezin
+droši
+diemžēl
+nebūt
+ik
+it
+taÄu
+nu
+pat
+tiklab
+iekšpus
+nedz
+tik
+nevis
+turpretim
+jeb
+iekam
+iekÄm
+iekÄms
+kolīdz
+līdzko
+tiklīdz
+jebšu
+tÄlab
+tÄpÄ“c
+nekÄ
+itin
+jÄ
+jau
+jel
+nē
+nezin
+tad
+tikai
+vis
+tak
+iekams
+vien
+# modal verbs
+būt
+biju
+biji
+bija
+bijÄm
+bijÄt
+esmu
+esi
+esam
+esat
+būšu
+būsi
+būs
+būsim
+būsiet
+tikt
+tiku
+tiki
+tika
+tikÄm
+tikÄt
+tieku
+tiec
+tiek
+tiekam
+tiekat
+tikšu
+tiks
+tiksim
+tiksiet
+tapt
+tapi
+tapÄt
+topat
+tapšu
+tapsi
+taps
+tapsim
+tapsiet
+kļūt
+kļuvu
+kļuvi
+kļuva
+kļuvÄm
+kļuvÄt
+kļūstu
+kļūsti
+kļūst
+kļūstam
+kļūstat
+kļūšu
+kļūsi
+kļūs
+kļūsim
+kļūsiet
+# verbs
+varēt
+varēju
+varÄ“jÄm
+varēšu
+varēsim
+var
+varēji
+varÄ“jÄt
+varēsi
+varēsiet
+varat
+varēja
+varēs
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_nl.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_nl.txt
new file mode 100644
index 00000000000..f4d61f5092c
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_nl.txt
@@ -0,0 +1,117 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Dutch stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large sample of Dutch text.
+
+ | Dutch stop words frequently exhibit homonym clashes. These are indicated
+ | clearly below.
+
+de | the
+en | and
+van | of, from
+ik | I, the ego
+te | (1) chez, at etc, (2) to, (3) too
+dat | that, which
+die | that, those, who, which
+in | in, inside
+een | a, an, one
+hij | he
+het | the, it
+niet | not, nothing, naught
+zijn | (1) to be, being, (2) his, one's, its
+is | is
+was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
+op | on, upon, at, in, up, used up
+aan | on, upon, to (as dative)
+met | with, by
+als | like, such as, when
+voor | (1) before, in front of, (2) furrow
+had | had, past tense all persons sing. of 'hebben' (have)
+er | there
+maar | but, only
+om | round, about, for etc
+hem | him
+dan | then
+zou | should/would, past tense all persons sing. of 'zullen'
+of | or, whether, if
+wat | what, something, anything
+mijn | possessive and noun 'mine'
+men | people, 'one'
+dit | this
+zo | so, thus, in this way
+door | through by
+over | over, across
+ze | she, her, they, them
+zich | oneself
+bij | (1) a bee, (2) by, near, at
+ook | also, too
+tot | till, until
+je | you
+mij | me
+uit | out of, from
+der | Old Dutch form of 'van der' still found in surnames
+daar | (1) there, (2) because
+haar | (1) her, their, them, (2) hair
+naar | (1) unpleasant, unwell etc, (2) towards, (3) as
+heb | present first person sing. of 'to have'
+hoe | how, why
+heeft | present third person sing. of 'to have'
+hebben | 'to have' and various parts thereof
+deze | this
+u | you
+want | (1) for, (2) mitten, (3) rigging
+nog | yet, still
+zal | 'shall', first and third person sing. of verb 'zullen' (will)
+me | me
+zij | she, they
+nu | now
+ge | 'thou', still used in Belgium and south Netherlands
+geen | none
+omdat | because
+iets | something, somewhat
+worden | to become, grow, get
+toch | yet, still
+al | all, every, each
+waren | (1) 'were' (2) to wander, (3) wares, (3)
+veel | much, many
+meer | (1) more, (2) lake
+doen | to do, to make
+toen | then, when
+moet | noun 'spot/mote' and present form of 'to must'
+ben | (1) am, (2) 'are' in interrogative second person singular of 'to be'
+zonder | without
+kan | noun 'can' and present form of 'to be able'
+hun | their, them
+dus | so, consequently
+alles | all, everything, anything
+onder | under, beneath
+ja | yes, of course
+eens | once, one day
+hier | here
+wie | who
+werd | imperfect third person sing. of 'become'
+altijd | always
+doch | yet, but etc
+wordt | present third person sing. of 'become'
+wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans
+kunnen | to be able
+ons | us/our
+zelf | self
+tegen | against, towards, at
+na | after, near
+reeds | already
+wil | (1) present tense of 'want', (2) 'will', noun, (3) fender
+kon | could; past tense of 'to be able'
+niets | nothing
+uw | your
+iemand | somebody
+geweest | been; past participle of 'be'
+andere | other
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_no.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_no.txt
new file mode 100644
index 00000000000..e76f36e69ed
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_no.txt
@@ -0,0 +1,192 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Norwegian stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This stop word list is for the dominant bokmål dialect. Words unique
+ | to nynorsk are marked *.
+
+ | Revised by Jan Bruusgaard , Jan 2005
+
+og | and
+i | in
+jeg | I
+det | it/this/that
+at | to (w. inf.)
+en | a/an
+et | a/an
+den | it/this/that
+til | to
+er | is/am/are
+som | who/that
+på | on
+de | they / you(formal)
+med | with
+han | he
+av | of
+ikke | not
+ikkje | not *
+der | there
+så | so
+var | was/were
+meg | me
+seg | you
+men | but
+ett | one
+har | have
+om | about
+vi | we
+min | my
+mitt | my
+ha | have
+hadde | had
+hun | she
+nå | now
+over | over
+da | when/as
+ved | by/know
+fra | from
+du | you
+ut | out
+sin | your
+dem | them
+oss | us
+opp | up
+man | you/one
+kan | can
+hans | his
+hvor | where
+eller | or
+hva | what
+skal | shall/must
+selv | self (reflective)
+sjøl | self (reflective)
+her | here
+alle | all
+vil | will
+bli | become
+ble | became
+blei | became *
+blitt | have become
+kunne | could
+inn | in
+når | when
+være | be
+kom | come
+noen | some
+noe | some
+ville | would
+dere | you
+som | who/which/that
+deres | their/theirs
+kun | only/just
+ja | yes
+etter | after
+ned | down
+skulle | should
+denne | this
+for | for/because
+deg | you
+si | hers/his
+sine | hers/his
+sitt | hers/his
+mot | against
+Ã¥ | to
+meget | much
+hvorfor | why
+dette | this
+disse | these/those
+uten | without
+hvordan | how
+ingen | none
+din | your
+ditt | your
+blir | become
+samme | same
+hvilken | which
+hvilke | which (plural)
+sånn | such a
+inni | inside/within
+mellom | between
+vår | our
+hver | each
+hvem | who
+vors | us/ours
+hvis | whose
+både | both
+bare | only/just
+enn | than
+fordi | as/because
+før | before
+mange | many
+også | also
+slik | just
+vært | been
+være | to be
+båe | both *
+begge | both
+siden | since
+dykk | your *
+dykkar | yours *
+dei | they *
+deira | them *
+deires | theirs *
+deim | them *
+di | your (fem.) *
+då | as/when *
+eg | I *
+ein | a/an *
+eit | a/an *
+eitt | a/an *
+elles | or *
+honom | he *
+hjå | at *
+ho | she *
+hoe | she *
+henne | her
+hennar | her/hers
+hennes | hers
+hoss | how *
+hossen | how *
+ikkje | not *
+ingi | noone *
+inkje | noone *
+korleis | how *
+korso | how *
+kva | what/which *
+kvar | where *
+kvarhelst | where *
+kven | who/whom *
+kvi | why *
+kvifor | why *
+me | we *
+medan | while *
+mi | my *
+mine | my *
+mykje | much *
+no | now *
+nokon | some (masc./neut.) *
+noka | some (fem.) *
+nokor | some *
+noko | some *
+nokre | some *
+si | his/hers *
+sia | since *
+sidan | since *
+so | so *
+somt | some *
+somme | some *
+um | about*
+upp | up *
+vere | be *
+vore | was *
+verte | become *
+vort | become *
+varte | became *
+vart | became *
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_pt.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_pt.txt
new file mode 100644
index 00000000000..276c1b446f2
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_pt.txt
@@ -0,0 +1,251 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Portuguese stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de | of, from
+a | the; to, at; her
+o | the; him
+que | who, that
+e | and
+do | de + o
+da | de + a
+em | in
+um | a
+para | for
+ | é from SER
+com | with
+não | not, no
+uma | a
+os | the; them
+no | em + o
+se | himself etc
+na | em + a
+por | for
+mais | more
+as | the; them
+dos | de + os
+como | as, like
+mas | but
+ | foi from SER
+ao | a + o
+ele | he
+das | de + as
+ | tem from TER
+Ã | a + a
+seu | his
+sua | her
+ou | or
+ | ser from SER
+quando | when
+muito | much
+ | há from HAV
+nos | em + os; us
+já | already, now
+ | está from EST
+eu | I
+também | also
+só | only, just
+pelo | per + o
+pela | per + a
+até | up to
+isso | that
+ela | he
+entre | between
+ | era from SER
+depois | after
+sem | without
+mesmo | same
+aos | a + os
+ | ter from TER
+seus | his
+quem | whom
+nas | em + as
+me | me
+esse | that
+eles | they
+ | estão from EST
+você | you
+ | tinha from TER
+ | foram from SER
+essa | that
+num | em + um
+nem | nor
+suas | her
+meu | my
+Ã s | a + as
+minha | my
+ | têm from TER
+numa | em + uma
+pelos | per + os
+elas | they
+ | havia from HAV
+ | seja from SER
+qual | which
+ | será from SER
+nós | we
+ | tenho from TER
+lhe | to him, her
+deles | of them
+essas | those
+esses | those
+pelas | per + as
+este | this
+ | fosse from SER
+dele | of him
+
+ | other words. There are many contractions such as naquele = em+aquele,
+ | mo = me+o, but they are rare.
+ | Indefinite article plural forms are also rare.
+
+tu | thou
+te | thee
+vocês | you (plural)
+vos | you
+lhes | to them
+meus | my
+minhas
+teu | thy
+tua
+teus
+tuas
+nosso | our
+nossa
+nossos
+nossas
+
+dela | of her
+delas | of them
+
+esta | this
+estes | these
+estas | these
+aquele | that
+aquela | that
+aqueles | those
+aquelas | those
+isto | this
+aquilo | that
+
+ | forms of estar, to be (not including the infinitive):
+estou
+está
+estamos
+estão
+estive
+esteve
+estivemos
+estiveram
+estava
+estávamos
+estavam
+estivera
+estivéramos
+esteja
+estejamos
+estejam
+estivesse
+estivéssemos
+estivessem
+estiver
+estivermos
+estiverem
+
+ | forms of haver, to have (not including the infinitive):
+hei
+há
+havemos
+hão
+houve
+houvemos
+houveram
+houvera
+houvéramos
+haja
+hajamos
+hajam
+houvesse
+houvéssemos
+houvessem
+houver
+houvermos
+houverem
+houverei
+houverá
+houveremos
+houverão
+houveria
+houverÃamos
+houveriam
+
+ | forms of ser, to be (not including the infinitive):
+sou
+somos
+são
+era
+éramos
+eram
+fui
+foi
+fomos
+foram
+fora
+fôramos
+seja
+sejamos
+sejam
+fosse
+fôssemos
+fossem
+for
+formos
+forem
+serei
+será
+seremos
+serão
+seria
+serÃamos
+seriam
+
+ | forms of ter, to have (not including the infinitive):
+tenho
+tem
+temos
+tém
+tinha
+tÃnhamos
+tinham
+tive
+teve
+tivemos
+tiveram
+tivera
+tivéramos
+tenha
+tenhamos
+tenham
+tivesse
+tivéssemos
+tivessem
+tiver
+tivermos
+tiverem
+terei
+terá
+teremos
+terão
+teria
+terÃamos
+teriam
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ro.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ro.txt
new file mode 100644
index 00000000000..4fdee90a5ba
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ro.txt
@@ -0,0 +1,233 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+acea
+aceasta
+această
+aceea
+acei
+aceia
+acel
+acela
+acele
+acelea
+acest
+acesta
+aceste
+acestea
+aceÅŸti
+aceÅŸtia
+acolo
+acum
+ai
+aia
+aibă
+aici
+al
+ăla
+ale
+alea
+ălea
+altceva
+altcineva
+am
+ar
+are
+aÅŸ
+aÅŸadar
+asemenea
+asta
+ăsta
+astăzi
+astea
+ăstea
+ăştia
+asupra
+aţi
+au
+avea
+avem
+aveţi
+azi
+bine
+bucur
+bună
+ca
+că
+căci
+când
+care
+cărei
+căror
+cărui
+cât
+câte
+câţi
+către
+câtva
+ce
+cel
+ceva
+chiar
+cînd
+cine
+cineva
+cît
+cîte
+cîţi
+cîtva
+contra
+cu
+cum
+cumva
+curând
+curînd
+da
+dă
+dacă
+dar
+datorită
+de
+deci
+deja
+deoarece
+departe
+deÅŸi
+din
+dinaintea
+dintr
+dintre
+drept
+după
+ea
+ei
+el
+ele
+eram
+este
+eÅŸti
+eu
+face
+fără
+fi
+fie
+fiecare
+fii
+fim
+fiţi
+iar
+ieri
+îi
+îl
+îmi
+împotriva
+în
+înainte
+înaintea
+încât
+încît
+încotro
+între
+întrucât
+întrucît
+îţi
+la
+lângă
+le
+li
+lîngă
+lor
+lui
+mă
+mâine
+mea
+mei
+mele
+mereu
+meu
+mi
+mine
+mult
+multă
+mulţi
+ne
+nicăieri
+nici
+nimeni
+niÅŸte
+noastră
+noastre
+noi
+noÅŸtri
+nostru
+nu
+ori
+oricând
+oricare
+oricât
+orice
+oricînd
+oricine
+oricît
+oricum
+oriunde
+până
+pe
+pentru
+peste
+pînă
+poate
+pot
+prea
+prima
+primul
+prin
+printr
+sa
+să
+săi
+sale
+sau
+său
+se
+ÅŸi
+sînt
+sîntem
+sînteţi
+spre
+sub
+sunt
+suntem
+sunteţi
+ta
+tăi
+tale
+tău
+te
+Å£i
+Å£ie
+tine
+toată
+toate
+tot
+toţi
+totuÅŸi
+tu
+un
+una
+unde
+undeva
+unei
+unele
+uneori
+unor
+vă
+vi
+voastră
+voastre
+voi
+voÅŸtri
+vostru
+vouă
+vreo
+vreun
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ru.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ru.txt
new file mode 100644
index 00000000000..64307693457
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ru.txt
@@ -0,0 +1,241 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | a russian stop word list. comments begin with vertical bar. each stop
+ | word is at the start of a line.
+
+ | this is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | letter `ё' is translated to `е'.
+
+и | and
+в | in/into
+во | alternative form
+не | not
+что | what/that
+он | he
+на | on/onto
+Ñ | i
+Ñ | from
+Ñо | alternative form
+как | how
+а | milder form of `no' (but)
+то | conjunction and form of `that'
+вÑе | all
+она | she
+так | so, thus
+его | him
+но | but
+да | yes/and
+Ñ‚Ñ‹ | thou
+к | towards, by
+у | around, chez
+же | intensifier particle
+вы | you
+за | beyond, behind
+бы | conditional/subj. particle
+по | up to, along
+только | only
+ее | her
+мне | to me
+было | it was
+вот | here is/are, particle
+от | away from
+Ð¼ÐµÐ½Ñ | me
+еще | still, yet, more
+нет | no, there isnt/arent
+о | about
+из | out of
+ему | to him
+теперь | now
+когда | when
+даже | even
+ну | so, well
+вдруг | suddenly
+ли | interrogative particle
+еÑли | if
+уже | already, but homonym of `narrower'
+или | or
+ни | neither
+быть | to be
+был | he was
+него | prepositional form of его
+до | up to
+Ð²Ð°Ñ | you accusative
+нибудь | indef. suffix preceded by hyphen
+опÑÑ‚ÑŒ | again
+уж | already, but homonym of `adder'
+вам | to you
+Ñказал | he said
+ведь | particle `after all'
+там | there
+потом | then
+ÑÐµÐ±Ñ | oneself
+ничего | nothing
+ей | to her
+может | usually with `быть' as `maybe'
+они | they
+тут | here
+где | where
+еÑÑ‚ÑŒ | there is/are
+надо | got to, must
+ней | prepositional form of ей
+Ð´Ð»Ñ | for
+мы | we
+Ñ‚ÐµÐ±Ñ | thee
+их | them, their
+чем | than
+была | she was
+Ñам | self
+чтоб | in order to
+без | without
+будто | as if
+человек | man, person, one
+чего | genitive form of `what'
+раз | once
+тоже | also
+Ñебе | to oneself
+под | beneath
+жизнь | life
+будет | will be
+ж | short form of intensifer particle `же'
+тогда | then
+кто | who
+Ñтот | this
+говорил | was saying
+того | genitive form of `that'
+потому | for that reason
+Ñтого | genitive form of `this'
+какой | which
+ÑовÑем | altogether
+ним | prepositional form of `его', `они'
+здеÑÑŒ | here
+Ñтом | prepositional form of `Ñтот'
+один | one
+почти | almost
+мой | my
+тем | instrumental/dative plural of `тот', `то'
+чтобы | full form of `in order that'
+нее | her (acc.)
+кажетÑÑ | it seems
+ÑÐµÐ¹Ñ‡Ð°Ñ | now
+были | they were
+куда | where to
+зачем | why
+Ñказать | to say
+вÑех | all (acc., gen. preposn. plural)
+никогда | never
+ÑÐµÐ³Ð¾Ð´Ð½Ñ | today
+можно | possible, one can
+при | by
+наконец | finally
+два | two
+об | alternative form of `о', about
+другой | another
+хоть | even
+поÑле | after
+над | above
+больше | more
+тот | that one (masc.)
+через | across, in
+Ñти | these
+Ð½Ð°Ñ | us
+про | about
+вÑего | in all, only, of all
+них | prepositional form of `они' (they)
+ÐºÐ°ÐºÐ°Ñ | which, feminine
+много | lots
+разве | interrogative particle
+Ñказала | she said
+три | three
+Ñту | this, acc. fem. sing.
+Ð¼Ð¾Ñ | my, feminine
+впрочем | moreover, besides
+хорошо | good
+Ñвою | ones own, acc. fem. sing.
+Ñтой | oblique form of `Ñта', fem. `this'
+перед | in front of
+иногда | sometimes
+лучше | better
+чуть | a little
+том | preposn. form of `that one'
+Ð½ÐµÐ»ÑŒÐ·Ñ | one must not
+такой | such a one
+им | to them
+более | more
+вÑегда | always
+конечно | of course
+вÑÑŽ | acc. fem. sing of `all'
+между | between
+
+
+ | b: some paradigms
+ |
+ | personal pronouns
+ |
+ | Ñ Ð¼ÐµÐ½Ñ Ð¼Ð½Ðµ мной [мною]
+ | Ñ‚Ñ‹ Ñ‚ÐµÐ±Ñ Ñ‚ÐµÐ±Ðµ тобой [тобою]
+ | он его ему им [него, нему, ним]
+ | она ее Ñи ею [нее, нÑи, нею]
+ | оно его ему им [него, нему, ним]
+ |
+ | мы Ð½Ð°Ñ Ð½Ð°Ð¼ нами
+ | вы Ð²Ð°Ñ Ð²Ð°Ð¼ вами
+ | они их им ими [них, ним, ними]
+ |
+ | ÑÐµÐ±Ñ Ñебе Ñобой [Ñобою]
+ |
+ | demonstrative pronouns: Ñтот (this), тот (that)
+ |
+ | Ñтот Ñта Ñто Ñти
+ | Ñтого ÑÑ‚Ñ‹ Ñто Ñти
+ | Ñтого Ñтой Ñтого Ñтих
+ | Ñтому Ñтой Ñтому Ñтим
+ | Ñтим Ñтой Ñтим [Ñтою] Ñтими
+ | Ñтом Ñтой Ñтом Ñтих
+ |
+ | тот та то те
+ | того ту то те
+ | того той того тех
+ | тому той тому тем
+ | тем той тем [тою] теми
+ | том той том тех
+ |
+ | determinative pronouns
+ |
+ | (a) веÑÑŒ (all)
+ |
+ | веÑÑŒ вÑÑ Ð²Ñе вÑе
+ | вÑего вÑÑŽ вÑе вÑе
+ | вÑего вÑей вÑего вÑех
+ | вÑему вÑей вÑему вÑем
+ | вÑем вÑей вÑем [вÑею] вÑеми
+ | вÑем вÑей вÑем вÑех
+ |
+ | (b) Ñам (himself etc)
+ |
+ | Ñам Ñама Ñамо Ñами
+ | Ñамого Ñаму Ñамо Ñамих
+ | Ñамого Ñамой Ñамого Ñамих
+ | Ñамому Ñамой Ñамому Ñамим
+ | Ñамим Ñамой Ñамим [Ñамою] Ñамими
+ | Ñамом Ñамой Ñамом Ñамих
+ |
+ | stems of verbs `to be', `to have', `to do' and modal
+ |
+ | быть бы буд быв еÑÑ‚ÑŒ Ñуть
+ | име
+ | дел
+ | мог мож мочь
+ | уме
+ | хоч хот
+ | долж
+ | можн
+ | нужн
+ | нельзÑ
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_sv.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_sv.txt
new file mode 100644
index 00000000000..22bddfd8cb3
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_sv.txt
@@ -0,0 +1,131 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Swedish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | Swedish stop words occasionally exhibit homonym clashes. For example
+ | så = so, but also seed. These are indicated clearly below.
+
+och | and
+det | it, this/that
+att | to (with infinitive)
+i | in, at
+en | a
+jag | I
+hon | she
+som | who, that
+han | he
+på | on
+den | it, this/that
+med | with
+var | where, each
+sig | him(self) etc
+för | for
+så | so (also: seed)
+till | to
+är | is
+men | but
+ett | a
+om | if; around, about
+hade | had
+de | they, these/those
+av | of
+icke | not, no
+mig | me
+du | you
+henne | her
+då | then, when
+sin | his
+nu | now
+har | have
+inte | inte någon = no one
+hans | his
+honom | him
+skulle | 'sake'
+hennes | her
+där | there
+min | my
+man | one (pronoun)
+ej | nor
+vid | at, by, on (also: vast)
+kunde | could
+något | some etc
+från | from, off
+ut | out
+när | when
+efter | after, behind
+upp | up
+vi | we
+dem | them
+vara | be
+vad | what
+över | over
+än | than
+dig | you
+kan | can
+sina | his
+här | here
+ha | have
+mot | towards
+alla | all
+under | under (also: wonder)
+någon | some etc
+eller | or (else)
+allt | all
+mycket | much
+sedan | since
+ju | why
+denna | this/that
+själv | myself, yourself etc
+detta | this/that
+Ã¥t | to
+utan | without
+varit | was
+hur | how
+ingen | no
+mitt | my
+ni | you
+bli | to be, become
+blev | from bli
+oss | us
+din | thy
+dessa | these/those
+några | some etc
+deras | their
+blir | from bli
+mina | my
+samma | (the) same
+vilken | who, that
+er | you, your
+sådan | such a
+vår | our
+blivit | from bli
+dess | its
+inom | within
+mellan | between
+sådant | such a
+varför | why
+varje | each
+vilka | who, that
+ditt | thy
+vem | who
+vilket | who, that
+sitta | his
+sådana | such a
+vart | each
+dina | thy
+vars | whose
+vårt | our
+våra | our
+ert | your
+era | your
+vilkas | whose
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_th.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_th.txt
new file mode 100644
index 00000000000..07f0fabe692
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_th.txt
@@ -0,0 +1,119 @@
+# Thai stopwords from:
+# "Opinion Detection in Thai Political News Columns
+# Based on Subjectivity Analysis"
+# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak
+ไว้
+ไม่
+ไป
+ได้
+ให้
+ใน
+โดย
+à¹à¸«à¹ˆà¸‡
+à¹à¸¥à¹‰à¸§
+à¹à¸¥à¸°
+à¹à¸£à¸
+à¹à¸šà¸š
+à¹à¸•à¹ˆ
+เà¸à¸‡
+เห็น
+เลย
+เริ่ม
+เรา
+เมื่à¸
+เพื่à¸
+เพราะ
+เป็นà¸à¸²à¸£
+เป็น
+เปิดเผย
+เปิด
+เนื่à¸à¸‡à¸ˆà¸²à¸
+เดียวà¸à¸±à¸™
+เดียว
+เช่น
+เฉพาะ
+เคย
+เข้า
+เขา
+à¸à¸µà¸
+à¸à¸²à¸ˆ
+à¸à¸°à¹„ร
+à¸à¸à¸
+à¸à¸¢à¹ˆà¸²à¸‡
+à¸à¸¢à¸¹à¹ˆ
+à¸à¸¢à¸²à¸
+หาà¸
+หลาย
+หลังจาà¸
+หลัง
+หรืà¸
+หนึ่ง
+ส่วน
+ส่ง
+สุด
+สà¹à¸²à¸«à¸£à¸±à¸š
+ว่า
+วัน
+ลง
+ร่วม
+ราย
+รับ
+ระหว่าง
+รวม
+ยัง
+มี
+มาà¸
+มา
+พร้à¸à¸¡
+พบ
+ผ่าน
+ผล
+บาง
+น่า
+นี้
+นà¹à¸²
+นั้น
+นัà¸
+นà¸à¸à¸ˆà¸²à¸
+ทุà¸
+ที่สุด
+ที่
+ทà¹à¸²à¹ƒà¸«à¹‰
+ทà¹à¸²
+ทาง
+ทั้งนี้
+ทั้ง
+ถ้า
+ถูà¸
+ถึง
+ต้à¸à¸‡
+ต่างๆ
+ต่าง
+ต่à¸
+ตาม
+ตั้งà¹à¸•à¹ˆ
+ตั้ง
+ด้าน
+ด้วย
+ดัง
+ซึ่ง
+ช่วง
+จึง
+จาà¸
+จัด
+จะ
+คืà¸
+ความ
+ครั้ง
+คง
+ขึ้น
+ขà¸à¸‡
+ขà¸
+ขณะ
+à¸à¹ˆà¸à¸™
+à¸à¹‡
+à¸à¸²à¸£
+à¸à¸±à¸š
+à¸à¸±à¸™
+à¸à¸§à¹ˆà¸²
+à¸à¸¥à¹ˆà¸²à¸§
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_tr.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_tr.txt
new file mode 100644
index 00000000000..84d9408d4ea
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_tr.txt
@@ -0,0 +1,212 @@
+# Turkish stopwords from LUCENE-559
+# merged with the list from "Information Retrieval on Turkish Texts"
+# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
+acaba
+altmış
+altı
+ama
+ancak
+arada
+aslında
+ayrıca
+bana
+bazı
+belki
+ben
+benden
+beni
+benim
+beri
+beÅŸ
+bile
+bin
+bir
+birçok
+biri
+birkaç
+birkez
+birÅŸey
+birÅŸeyi
+biz
+bize
+bizden
+bizi
+bizim
+böyle
+böylece
+bu
+buna
+bunda
+bundan
+bunlar
+bunları
+bunların
+bunu
+bunun
+burada
+çok
+çünkü
+da
+daha
+dahi
+de
+defa
+deÄŸil
+diÄŸer
+diye
+doksan
+dokuz
+dolayı
+dolayısıyla
+dört
+edecek
+eden
+ederek
+edilecek
+ediliyor
+edilmesi
+ediyor
+eÄŸer
+elli
+en
+etmesi
+etti
+ettiÄŸi
+ettiÄŸini
+gibi
+göre
+halen
+hangi
+hatta
+hem
+henüz
+hep
+hepsi
+her
+herhangi
+herkesin
+hiç
+hiçbir
+için
+iki
+ile
+ilgili
+ise
+iÅŸte
+itibaren
+itibariyle
+kadar
+karşın
+katrilyon
+kendi
+kendilerine
+kendini
+kendisi
+kendisine
+kendisini
+kez
+ki
+kim
+kimden
+kime
+kimi
+kimse
+kırk
+milyar
+milyon
+mu
+mü
+mı
+nasıl
+ne
+neden
+nedenle
+nerde
+nerede
+nereye
+niye
+niçin
+o
+olan
+olarak
+oldu
+olduÄŸu
+olduÄŸunu
+olduklarını
+olmadı
+olmadığı
+olmak
+olması
+olmayan
+olmaz
+olsa
+olsun
+olup
+olur
+olursa
+oluyor
+on
+ona
+ondan
+onlar
+onlardan
+onları
+onların
+onu
+onun
+otuz
+oysa
+öyle
+pek
+raÄŸmen
+sadece
+sanki
+sekiz
+seksen
+sen
+senden
+seni
+senin
+siz
+sizden
+sizi
+sizin
+ÅŸey
+ÅŸeyden
+ÅŸeyi
+ÅŸeyler
+şöyle
+ÅŸu
+ÅŸuna
+ÅŸunda
+ÅŸundan
+şunları
+ÅŸunu
+tarafından
+trilyon
+tüm
+üç
+üzere
+var
+vardı
+ve
+veya
+ya
+yani
+yapacak
+yapılan
+yapılması
+yapıyor
+yapmak
+yaptı
+yaptığı
+yaptığını
+yaptıkları
+yedi
+yerine
+yetmiÅŸ
+yine
+yirmi
+yoksa
+yüz
+zaten
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/userdict_ja.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/userdict_ja.txt
new file mode 100644
index 00000000000..6f0368e4d81
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/userdict_ja.txt
@@ -0,0 +1,29 @@
+#
+# This is a sample user dictionary for Kuromoji (JapaneseTokenizer)
+#
+# Add entries to this file in order to override the statistical model in terms
+# of segmentation, readings and part-of-speech tags. Notice that entries do
+# not have weights since they are always used when found. This is by-design
+# in order to maximize ease-of-use.
+#
+# Entries are defined using the following CSV format:
+# , ... , ... ,
+#
+# Notice that a single half-width space separates tokens and readings, and
+# that the number tokens and readings must match exactly.
+#
+# Also notice that multiple entries with the same is undefined.
+#
+# Whitespace only lines are ignored. Comments are not allowed on entry lines.
+#
+
+# Custom segmentation for kanji compounds
+日本経済新èž,日本 経済 æ–°èž,ニホン ケイザイ シンブン,カスタムåè©ž
+関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタムåè©ž
+
+# Custom segmentation for compound katakana
+トートãƒãƒƒã‚°,トート ãƒãƒƒã‚°,トート ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž
+ショルダーãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž
+
+# Custom reading for former sumo wrestler
+æœé’é¾,æœé’é¾,アサショウリュウ,カスタム人å
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/protwords.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/protwords.txt
new file mode 100644
index 00000000000..1dfc0abecbf
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/protwords.txt
@@ -0,0 +1,21 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+# Use a protected word file to protect against the stemmer reducing two
+# unrelated words to the same base word.
+
+# Some non-words that normally won't be encountered,
+# just to test that they won't be stemmed.
+dontstems
+zwhacky
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/schema.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/schema.xml
new file mode 100644
index 00000000000..83080dfa40c
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/schema.xml
@@ -0,0 +1,914 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/solrconfig.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/solrconfig.xml
new file mode 100644
index 00000000000..9d9178746cf
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/solrconfig.xml
@@ -0,0 +1,1764 @@
+
+
+
+
+
+
+
+
+ LUCENE_43
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.data.dir:}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.maxIndexingThreads:8}
+
+
+
+
+
+ 128
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.ulog.dir:}
+
+
+
+
+ ${solr.autoCommit.maxTime:60000}
+ false
+
+
+
+
+
+ ${solr.autoSoftCommit.maxTime:1000}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1024
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+
+
+
+
+
+ 20
+
+
+ 200
+
+
+
+
+
+
+
+
+
+
+
+ static firstSearcher warming in solrconfig.xml
+
+
+
+
+
+ false
+
+
+ 4
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ explicit
+ 10
+ text
+
+
+
+
+
+
+
+
+
+
+
+
+
+ explicit
+ json
+ true
+ text
+
+
+
+
+
+
+
+ true
+ json
+ true
+
+
+
+
+
+
+
+ explicit
+
+
+ velocity
+ browse
+ layout
+ Solritas
+
+
+ edismax
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+ title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
+
+ text
+ 100%
+ *:*
+ 10
+ *,score
+
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+ title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
+
+ text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename
+ 3
+
+
+ on
+ cat
+ manu_exact
+ content_type
+ author_s
+ ipod
+ GB
+ 1
+ cat,inStock
+ after
+ price
+ 0
+ 600
+ 50
+ popularity
+ 0
+ 10
+ 3
+ manufacturedate_dt
+ NOW/YEAR-10YEARS
+ NOW
+ +1YEAR
+ before
+ after
+
+
+ on
+ content features title name
+ html
+ <b>
+ </b>
+ 0
+ title
+ 0
+ name
+ 3
+ 200
+ content
+ 750
+
+
+ on
+ false
+ 5
+ 2
+ 5
+ true
+ true
+ 5
+ 3
+
+
+
+
+ spellcheck
+
+
+
+
+
+
+
+
+
+
+
+
+
+ application/json
+
+
+
+
+ application/csv
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ solrpingquery
+
+
+ all
+
+
+
+
+
+
+
+
+ explicit
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ textSpell
+
+
+
+
+
+ default
+ name
+ solr.DirectSolrSpellChecker
+
+ internal
+
+ 0.5
+
+ 2
+
+ 1
+
+ 5
+
+ 4
+
+ 0.01
+
+
+
+
+
+ wordbreak
+ solr.WordBreakSolrSpellChecker
+ name
+ true
+ true
+ 10
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text
+
+ default
+ wordbreak
+ on
+ true
+ 10
+ 5
+ 5
+ true
+ true
+ 10
+ 5
+
+
+ spellcheck
+
+
+
+
+
+
+
+
+
+ text
+ true
+
+
+ tvComponent
+
+
+
+
+
+
+
+
+ default
+
+
+ org.carrot2.clustering.lingo.LingoClusteringAlgorithm
+
+
+ 20
+
+
+ clustering/carrot2
+
+
+ ENGLISH
+
+
+ stc
+ org.carrot2.clustering.stc.STCClusteringAlgorithm
+
+
+
+
+
+
+ true
+ default
+ true
+
+ name
+ id
+
+ features
+
+ true
+
+
+
+ false
+
+ edismax
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+
+ *:*
+ 10
+ *,score
+
+
+ clustering
+
+
+
+
+
+
+
+
+
+ true
+
+
+ terms
+
+
+
+
+
+
+
+ string
+ elevate.xml
+
+
+
+
+
+ explicit
+ text
+
+
+ elevator
+
+
+
+
+
+
+
+
+
+
+ 100
+
+
+
+
+
+
+
+ 70
+
+ 0.5
+
+ [-\w ,/\n\"']{20,200}
+
+
+
+
+
+
+ ]]>
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ,,
+ ,,
+ ,,
+ ,,
+ ,]]>
+ ]]>
+
+
+
+
+
+ 10
+ .,!?
+
+
+
+
+
+
+ WORD
+
+
+ en
+ US
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text/plain; charset=UTF-8
+
+
+
+
+
+
+
+
+ 5
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ *:*
+
+
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/stopwords.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/stopwords.txt
new file mode 100644
index 00000000000..ae1e83eeb3d
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/stopwords.txt
@@ -0,0 +1,14 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/synonyms.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/synonyms.txt
new file mode 100644
index 00000000000..7f72128303b
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/synonyms.txt
@@ -0,0 +1,29 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+#some test synonym mappings unlikely to appear in real input text
+aaafoo => aaabar
+bbbfoo => bbbfoo bbbbar
+cccfoo => cccbar cccbaz
+fooaaa,baraaa,bazaaa
+
+# Some synonym groups specific to this example
+GB,gib,gigabyte,gigabytes
+MB,mib,megabyte,megabytes
+Television, Televisions, TV, TVs
+#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
+#after us won't split it into two words.
+
+# Synonym mappings can be used for spelling correction too
+pixima => pixma
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcloud/conf/solrconfig.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcloud/conf/solrconfig.xml
new file mode 100644
index 00000000000..a37ab12ecfe
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcloud/conf/solrconfig.xml
@@ -0,0 +1,1787 @@
+
+
+
+
+
+
+
+
+ LUCENE_43
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.data.dir:}
+
+
+
+
+ ${solr.hdfs.home:}
+ ${solr.hdfs.confdir:}
+ ${solr.hdfs.security.kerberos.enabled:false}
+ ${solr.hdfs.security.kerberos.keytabfile:}
+ ${solr.hdfs.security.kerberos.principal:}
+ ${solr.hdfs.blockcache.enabled:true}
+ ${solr.hdfs.blockcache.slab.count:1}
+ ${solr.hdfs.blockcache.direct.memory.allocation:true}
+ ${solr.hdfs.blockcache.blocksperbank:16384}
+ ${solr.hdfs.blockcache.read.enabled:true}
+ ${solr.hdfs.blockcache.write.enabled:true}
+ ${solr.hdfs.nrtcachingdirectory.enable:true}
+ ${solr.hdfs.nrtcachingdirectory.maxmergesizemb:16}
+ ${solr.hdfs.nrtcachingdirectory.maxcachedmb:192}
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.maxIndexingThreads:8}
+
+
+
+
+
+ 128
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.lock.type:hdfs}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.ulog.dir:}
+
+
+
+
+ ${solr.autoCommit.maxTime:60000}
+ false
+
+
+
+
+ ${solr.autoSoftCommit.maxTime:1000}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1024
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+
+
+
+
+
+ 20
+
+
+ 200
+
+
+
+
+
+
+
+
+
+
+
+ static firstSearcher warming in solrconfig.xml
+
+
+
+
+
+ false
+
+
+ 4
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ explicit
+ 10
+ text
+
+
+
+
+
+
+
+
+
+
+
+
+
+ explicit
+ json
+ true
+ text
+
+
+
+
+
+
+
+ true
+ json
+ true
+
+
+
+
+
+
+
+ explicit
+
+
+ velocity
+ browse
+ layout
+ Solritas
+
+
+ edismax
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+ title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
+
+ text
+ 100%
+ *:*
+ 10
+ *,score
+
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+ title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
+
+ text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename
+ 3
+
+
+ on
+ cat
+ manu_exact
+ content_type
+ author_s
+ ipod
+ GB
+ 1
+ cat,inStock
+ after
+ price
+ 0
+ 600
+ 50
+ popularity
+ 0
+ 10
+ 3
+ manufacturedate_dt
+ NOW/YEAR-10YEARS
+ NOW
+ +1YEAR
+ before
+ after
+
+
+ on
+ content features title name
+ html
+ <b>
+ </b>
+ 0
+ title
+ 0
+ name
+ 3
+ 200
+ content
+ 750
+
+
+ on
+ false
+ 5
+ 2
+ 5
+ true
+ true
+ 5
+ 3
+
+
+
+
+ spellcheck
+
+
+
+
+
+
+
+
+
+
+
+
+
+ application/json
+
+
+
+
+ application/csv
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ solrpingquery
+
+
+ all
+
+
+
+
+
+
+
+
+ explicit
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text_general
+
+
+
+
+
+ default
+ text
+ solr.DirectSolrSpellChecker
+
+ internal
+
+ 0.5
+
+ 2
+
+ 1
+
+ 5
+
+ 4
+
+ 0.01
+
+
+
+
+
+ wordbreak
+ solr.WordBreakSolrSpellChecker
+ name
+ true
+ true
+ 10
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text
+
+ default
+ wordbreak
+ on
+ true
+ 10
+ 5
+ 5
+ true
+ true
+ 10
+ 5
+
+
+ spellcheck
+
+
+
+
+
+
+
+
+
+ text
+ true
+
+
+ tvComponent
+
+
+
+
+
+
+
+
+ default
+
+
+ org.carrot2.clustering.lingo.LingoClusteringAlgorithm
+
+
+ 20
+
+
+ clustering/carrot2
+
+
+ ENGLISH
+
+
+ stc
+ org.carrot2.clustering.stc.STCClusteringAlgorithm
+
+
+
+
+
+
+ true
+ default
+ true
+
+ name
+ id
+
+ features
+
+ true
+
+
+
+ false
+
+ edismax
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+
+ *:*
+ 10
+ *,score
+
+
+ clustering
+
+
+
+
+
+
+
+
+
+ true
+ false
+
+
+ terms
+
+
+
+
+
+
+
+ string
+ elevate.xml
+
+
+
+
+
+ explicit
+ text
+
+
+ elevator
+
+
+
+
+
+
+
+
+
+
+ 100
+
+
+
+
+
+
+
+ 70
+
+ 0.5
+
+ [-\w ,/\n\"']{20,200}
+
+
+
+
+
+
+ ]]>
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ,,
+ ,,
+ ,,
+ ,,
+ ,]]>
+ ]]>
+
+
+
+
+
+ 10
+ .,!?
+
+
+
+
+
+
+ WORD
+
+
+ en
+ US
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text/plain; charset=UTF-8
+
+
+
+
+
+
+
+
+ 5
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ *:*
+
+
+
diff --git a/solr/contrib/solr-morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java b/solr/contrib/solr-morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java
new file mode 100644
index 00000000000..80d2d43499c
--- /dev/null
+++ b/solr/contrib/solr-morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java
@@ -0,0 +1,208 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.morphlines.cell;
+
+import java.io.File;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.params.MapSolrParams;
+import org.apache.solr.common.util.DateUtil;
+import org.apache.solr.handler.extraction.SolrContentHandler;
+import org.apache.solr.morphlines.solr.AbstractSolrMorphlineTestBase;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.tika.metadata.Metadata;
+import org.junit.Before;
+import org.junit.Test;
+
+
+public class SolrCellMorphlineTest extends AbstractSolrMorphlineTestBase {
+
+ private Map expectedRecords = new HashMap();
+
+
+ @Before
+ public void setUp() throws Exception {
+ super.setUp();
+ String path = RESOURCES_DIR + "/test-documents";
+ expectedRecords.put(path + "/sample-statuses-20120906-141433.avro", 2);
+ expectedRecords.put(path + "/sample-statuses-20120906-141433", 2);
+ expectedRecords.put(path + "/sample-statuses-20120906-141433.gz", 2);
+ expectedRecords.put(path + "/sample-statuses-20120906-141433.bz2", 2);
+ expectedRecords.put(path + "/cars.csv", 5);
+ expectedRecords.put(path + "/cars.csv.gz", 5);
+ expectedRecords.put(path + "/cars.tar.gz", 4);
+ expectedRecords.put(path + "/cars.tsv", 5);
+ expectedRecords.put(path + "/cars.ssv", 5);
+ expectedRecords.put(path + "/test-documents.7z", 9);
+ expectedRecords.put(path + "/test-documents.cpio", 9);
+ expectedRecords.put(path + "/test-documents.tar", 9);
+ expectedRecords.put(path + "/test-documents.tbz2", 9);
+ expectedRecords.put(path + "/test-documents.tgz", 9);
+ expectedRecords.put(path + "/test-documents.zip", 9);
+ expectedRecords.put(path + "/multiline-stacktrace.log", 4);
+
+ FileUtils.copyFile(new File(RESOURCES_DIR + "/custom-mimetypes.xml"), new File(tempDir + "/custom-mimetypes.xml"));
+ }
+
+ @Test
+ public void testSolrCellJPGCompressed() throws Exception {
+
+ morphline = createMorphline("test-morphlines/solrCellJPGCompressed");
+ String path = RESOURCES_DIR + "/test-documents";
+ String[] files = new String[] {
+ path + "/testJPEG_EXIF.jpg",
+ path + "/testJPEG_EXIF.jpg.gz",
+ path + "/testJPEG_EXIF.jpg.tar.gz",
+ //path + "/jpeg2000.jp2",
+ };
+ testDocumentTypesInternal(files, expectedRecords);
+ }
+
+ @Test
+ public void testSolrCellXML() throws Exception {
+ morphline = createMorphline("test-morphlines/solrCellXML");
+ String path = RESOURCES_DIR + "/test-documents";
+ String[] files = new String[] {
+ path + "/testXML2.xml",
+ };
+ testDocumentTypesInternal(files, expectedRecords);
+ }
+
+ @Test
+ public void testSolrCellDocumentTypes() throws Exception {
+
+ morphline = createMorphline("test-morphlines/solrCellDocumentTypes");
+ String path = RESOURCES_DIR + "/test-documents";
+ String[] files = new String[] {
+ path + "/testBMPfp.txt",
+ path + "/boilerplate.html",
+ path + "/NullHeader.docx",
+ path + "/testWORD_various.doc",
+ path + "/testPDF.pdf",
+ path + "/testJPEG_EXIF.jpg",
+ path + "/testJPEG_EXIF.jpg.gz",
+ path + "/testJPEG_EXIF.jpg.tar.gz",
+ path + "/testXML.xml",
+// path + "/cars.csv",
+// path + "/cars.tsv",
+// path + "/cars.ssv",
+// path + "/cars.csv.gz",
+// path + "/cars.tar.gz",
+ path + "/sample-statuses-20120906-141433.avro",
+ path + "/sample-statuses-20120906-141433",
+ path + "/sample-statuses-20120906-141433.gz",
+ path + "/sample-statuses-20120906-141433.bz2",
+ };
+ testDocumentTypesInternal(files, expectedRecords);
+ }
+
+ @Test
+ public void testSolrCellDocumentTypes2() throws Exception {
+ morphline = createMorphline("test-morphlines/solrCellDocumentTypes");
+ String path = RESOURCES_DIR + "/test-documents";
+ String[] files = new String[] {
+ path + "/testPPT_various.ppt",
+ path + "/testPPT_various.pptx",
+ path + "/testEXCEL.xlsx",
+ path + "/testEXCEL.xls",
+ path + "/testPages.pages",
+ //path + "/testNumbers.numbers",
+ //path + "/testKeynote.key",
+
+ path + "/testRTFVarious.rtf",
+ path + "/complex.mbox",
+ path + "/test-outlook.msg",
+ path + "/testEMLX.emlx",
+// path + "/testRFC822",
+ path + "/rsstest.rss",
+// path + "/testDITA.dita",
+
+ path + "/testMP3i18n.mp3",
+ path + "/testAIFF.aif",
+ path + "/testFLAC.flac",
+// path + "/testFLAC.oga",
+// path + "/testVORBIS.ogg",
+ path + "/testMP4.m4a",
+ path + "/testWAV.wav",
+// path + "/testWMA.wma",
+
+ path + "/testFLV.flv",
+// path + "/testWMV.wmv",
+
+ path + "/testBMP.bmp",
+ path + "/testPNG.png",
+ path + "/testPSD.psd",
+ path + "/testSVG.svg",
+ path + "/testTIFF.tif",
+
+// path + "/test-documents.7z",
+// path + "/test-documents.cpio",
+// path + "/test-documents.tar",
+// path + "/test-documents.tbz2",
+// path + "/test-documents.tgz",
+// path + "/test-documents.zip",
+// path + "/test-zip-of-zip.zip",
+// path + "/testJAR.jar",
+
+// path + "/testKML.kml",
+// path + "/testRDF.rdf",
+ path + "/testVISIO.vsd",
+// path + "/testWAR.war",
+// path + "/testWindows-x86-32.exe",
+// path + "/testWINMAIL.dat",
+// path + "/testWMF.wmf",
+ };
+ testDocumentTypesInternal(files, expectedRecords);
+ }
+
+ /**
+ * Test that the ContentHandler properly strips the illegal characters
+ */
+ @Test
+ public void testTransformValue() {
+ String fieldName = "user_name";
+ assertFalse("foobar".equals(getFoobarWithNonChars()));
+
+ Metadata metadata = new Metadata();
+ // load illegal char string into a metadata field and generate a new document,
+ // which will cause the ContentHandler to be invoked.
+ metadata.set(fieldName, getFoobarWithNonChars());
+ StripNonCharSolrContentHandlerFactory contentHandlerFactory =
+ new StripNonCharSolrContentHandlerFactory(DateUtil.DEFAULT_DATE_FORMATS);
+ IndexSchema schema = h.getCore().getLatestSchema();
+ SolrContentHandler contentHandler =
+ contentHandlerFactory.createSolrContentHandler(metadata, new MapSolrParams(new HashMap()), schema);
+ SolrInputDocument doc = contentHandler.newDocument();
+ String foobar = doc.getFieldValue(fieldName).toString();
+ assertTrue("foobar".equals(foobar));
+ }
+
+ /**
+ * Returns string "foobar" with illegal characters interspersed.
+ */
+ private String getFoobarWithNonChars() {
+ char illegalChar = '\uffff';
+ StringBuilder builder = new StringBuilder();
+ builder.append(illegalChar).append(illegalChar).append("foo").append(illegalChar)
+ .append(illegalChar).append("bar").append(illegalChar).append(illegalChar);
+ return builder.toString();
+ }
+
+}
diff --git a/solr/contrib/solr-morphlines-core/build.xml b/solr/contrib/solr-morphlines-core/build.xml
new file mode 100644
index 00000000000..ad11be1226c
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/build.xml
@@ -0,0 +1,107 @@
+
+
+
+
+
+
+
+ Solr Morphlines commands.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/ivy.xml b/solr/contrib/solr-morphlines-core/ivy.xml
new file mode 100644
index 00000000000..290460d27cb
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/ivy.xml
@@ -0,0 +1,122 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/DocumentLoader.java b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/DocumentLoader.java
new file mode 100644
index 00000000000..f3030247065
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/DocumentLoader.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.morphlines.solr;
+
+import java.io.IOException;
+
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.response.SolrPingResponse;
+import org.apache.solr.client.solrj.response.UpdateResponse;
+import org.apache.solr.common.SolrInputDocument;
+
+/**
+ * A vehicle to load a list of Solr documents into some kind of destination,
+ * such as a SolrServer or MapReduce RecordWriter.
+ */
+public interface DocumentLoader {
+
+ /** Begins a transaction */
+ public void beginTransaction() throws IOException, SolrServerException;
+
+ /** Loads the given document into the destination */
+ public void load(SolrInputDocument doc) throws IOException, SolrServerException;
+
+ /**
+ * Sends any outstanding documents to the destination and waits for a positive
+ * or negative ack (i.e. exception). Depending on the outcome the caller
+ * should then commit or rollback the current flume transaction
+ * correspondingly.
+ *
+ * @throws IOException
+ * If there is a low-level I/O error.
+ */
+ public void commitTransaction() throws IOException, SolrServerException;
+
+ /**
+ * Performs a rollback of all non-committed documents pending.
+ *
+ * Note that this is not a true rollback as in databases. Content you have
+ * previously added may have already been committed due to autoCommit, buffer
+ * full, other client performing a commit etc. So this is only a best-effort
+ * rollback.
+ *
+ * @throws IOException
+ * If there is a low-level I/O error.
+ */
+ public UpdateResponse rollbackTransaction() throws IOException, SolrServerException;
+
+ /** Releases allocated resources */
+ public void shutdown() throws IOException, SolrServerException;
+
+ /**
+ * Issues a ping request to check if the server is alive
+ *
+ * @throws IOException
+ * If there is a low-level I/O error.
+ */
+ public SolrPingResponse ping() throws IOException, SolrServerException;
+
+}
diff --git a/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/GenerateSolrSequenceKeyBuilder.java b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/GenerateSolrSequenceKeyBuilder.java
new file mode 100644
index 00000000000..251d016634c
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/GenerateSolrSequenceKeyBuilder.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.morphlines.solr;
+
+import java.security.SecureRandom;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Random;
+
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.schema.SchemaField;
+
+import com.cloudera.cdk.morphline.api.Command;
+import com.cloudera.cdk.morphline.api.CommandBuilder;
+import com.cloudera.cdk.morphline.api.MorphlineContext;
+import com.cloudera.cdk.morphline.api.MorphlineRuntimeException;
+import com.cloudera.cdk.morphline.api.Record;
+import com.cloudera.cdk.morphline.base.AbstractCommand;
+import com.cloudera.cdk.morphline.base.Fields;
+import com.cloudera.cdk.morphline.base.Notifications;
+import com.typesafe.config.Config;
+
+/**
+ * A command that assigns a record unique key that is the concatenation of the given
+ * baseIdField
record field, followed by a running count of the record number within
+ * the current session. The count is reset to zero whenever a "startSession" notification is
+ * received.
+ *
+ * For example, assume a CSV file containing multiple records but no unique ids, and the
+ * baseIdField
field is the filesystem path of the file. Now this command can be used
+ * to assign the following record values to Solr's unique key field:
+ * $path#0, $path#1, ... $path#N
.
+ *
+ * The name of the unique key field is fetched from Solr's schema.xml file, as directed by the
+ * solrLocator
configuration parameter.
+ */
+public final class GenerateSolrSequenceKeyBuilder implements CommandBuilder {
+
+ @Override
+ public Collection getNames() {
+ return Arrays.asList(
+ "generateSolrSequenceKey",
+ "sanitizeUniqueSolrKey" // old name (retained for backwards compatibility)
+ );
+ }
+
+ @Override
+ public Command build(Config config, Command parent, Command child, MorphlineContext context) {
+ return new GenerateSolrSequenceKey(config, parent, child, context);
+ }
+
+
+ ///////////////////////////////////////////////////////////////////////////////
+ // Nested classes:
+ ///////////////////////////////////////////////////////////////////////////////
+ private static final class GenerateSolrSequenceKey extends AbstractCommand {
+
+ private final boolean preserveExisting;
+ private final String baseIdFieldName;
+ private final String uniqueKeyName;
+ private long recordCounter = 0;
+
+ private final String idPrefix; // for load testing only; enables adding same document many times with a different unique key
+ private final Random randomIdPrefix; // for load testing only; enables adding same document many times with a different unique key
+
+ public GenerateSolrSequenceKey(Config config, Command parent, Command child, MorphlineContext context) {
+ super(config, parent, child, context);
+ this.baseIdFieldName = getConfigs().getString(config, "baseIdField", Fields.BASE_ID);
+ this.preserveExisting = getConfigs().getBoolean(config, "preserveExisting", true);
+
+ Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator");
+ SolrLocator locator = new SolrLocator(solrLocatorConfig, context);
+ LOG.debug("solrLocator: {}", locator);
+ IndexSchema schema = locator.getIndexSchema();
+ SchemaField uniqueKey = schema.getUniqueKeyField();
+ uniqueKeyName = uniqueKey == null ? null : uniqueKey.getName();
+
+ String tmpIdPrefix = getConfigs().getString(config, "idPrefix", null); // for load testing only
+ Random tmpRandomIdPrefx = null;
+ if ("random".equals(tmpIdPrefix)) { // for load testing only
+ tmpRandomIdPrefx = new Random(new SecureRandom().nextLong());
+ tmpIdPrefix = null;
+ }
+ idPrefix = tmpIdPrefix;
+ randomIdPrefix = tmpRandomIdPrefx;
+ validateArguments();
+ }
+
+ @Override
+ protected boolean doProcess(Record doc) {
+ long num = recordCounter++;
+ // LOG.debug("record #{} id before sanitizing doc: {}", num, doc);
+ if (uniqueKeyName == null || (preserveExisting && doc.getFields().containsKey(uniqueKeyName))) {
+ ; // we must preserve the existing id
+ } else {
+ Object baseId = doc.getFirstValue(baseIdFieldName);
+ if (baseId == null) {
+ throw new MorphlineRuntimeException("Record field " + baseIdFieldName
+ + " must not be null as it is needed as a basis for a unique key for solr doc: " + doc);
+ }
+ doc.replaceValues(uniqueKeyName, baseId.toString() + "#" + num);
+ }
+
+ // for load testing only; enables adding same document many times with a different unique key
+ if (idPrefix != null) {
+ String id = doc.getFirstValue(uniqueKeyName).toString();
+ id = idPrefix + id;
+ doc.replaceValues(uniqueKeyName, id);
+ } else if (randomIdPrefix != null) {
+ String id = doc.getFirstValue(uniqueKeyName).toString();
+ id = String.valueOf(Math.abs(randomIdPrefix.nextInt())) + "#" + id;
+ doc.replaceValues(uniqueKeyName, id);
+ }
+
+ LOG.debug("record #{} unique key sanitized to this: {}", num, doc);
+
+ return super.doProcess(doc);
+ }
+
+ @Override
+ protected void doNotify(Record notification) {
+ if (Notifications.containsLifecycleEvent(notification, Notifications.LifecycleEvent.START_SESSION)) {
+ recordCounter = 0; // reset
+ }
+ super.doNotify(notification);
+ }
+
+ }
+}
diff --git a/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/LoadSolrBuilder.java b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/LoadSolrBuilder.java
new file mode 100644
index 00000000000..019dfcf0f52
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/LoadSolrBuilder.java
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.morphlines.solr;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.common.SolrInputDocument;
+
+import com.cloudera.cdk.morphline.api.Command;
+import com.cloudera.cdk.morphline.api.CommandBuilder;
+import com.cloudera.cdk.morphline.api.MorphlineContext;
+import com.cloudera.cdk.morphline.api.MorphlineRuntimeException;
+import com.cloudera.cdk.morphline.api.Record;
+import com.cloudera.cdk.morphline.base.AbstractCommand;
+import com.cloudera.cdk.morphline.base.Metrics;
+import com.cloudera.cdk.morphline.base.Notifications;
+import com.codahale.metrics.Timer;
+import com.typesafe.config.Config;
+import com.typesafe.config.ConfigFactory;
+
+/**
+ * A command that loads a record into a SolrServer or MapReduce SolrOutputFormat.
+ */
+public final class LoadSolrBuilder implements CommandBuilder {
+
+ @Override
+ public Collection getNames() {
+ return Collections.singletonList("loadSolr");
+ }
+
+ @Override
+ public Command build(Config config, Command parent, Command child, MorphlineContext context) {
+ return new LoadSolr(config, parent, child, context);
+ }
+
+
+ ///////////////////////////////////////////////////////////////////////////////
+ // Nested classes:
+ ///////////////////////////////////////////////////////////////////////////////
+ private static final class LoadSolr extends AbstractCommand {
+
+ private final DocumentLoader loader;
+ private final Map boosts = new HashMap();
+ private final Timer elapsedTime;
+
+ public LoadSolr(Config config, Command parent, Command child, MorphlineContext context) {
+ super(config, parent, child, context);
+ Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator");
+ SolrLocator locator = new SolrLocator(solrLocatorConfig, context);
+ LOG.debug("solrLocator: {}", locator);
+ this.loader = locator.getLoader();
+ Config boostsConfig = getConfigs().getConfig(config, "boosts", ConfigFactory.empty());
+ for (Map.Entry entry : boostsConfig.root().unwrapped().entrySet()) {
+ String fieldName = entry.getKey();
+ float boost = Float.parseFloat(entry.getValue().toString().trim());
+ boosts.put(fieldName, boost);
+ }
+ validateArguments();
+ this.elapsedTime = getTimer(Metrics.ELAPSED_TIME);
+ }
+
+ @Override
+ protected void doNotify(Record notification) {
+ for (Object event : Notifications.getLifecycleEvents(notification)) {
+ if (event == Notifications.LifecycleEvent.BEGIN_TRANSACTION) {
+ try {
+ loader.beginTransaction();
+ } catch (SolrServerException e) {
+ throw new MorphlineRuntimeException(e);
+ } catch (IOException e) {
+ throw new MorphlineRuntimeException(e);
+ }
+ } else if (event == Notifications.LifecycleEvent.COMMIT_TRANSACTION) {
+ try {
+ loader.commitTransaction();
+ } catch (SolrServerException e) {
+ throw new MorphlineRuntimeException(e);
+ } catch (IOException e) {
+ throw new MorphlineRuntimeException(e);
+ }
+ }
+ else if (event == Notifications.LifecycleEvent.ROLLBACK_TRANSACTION) {
+ try {
+ loader.rollbackTransaction();
+ } catch (SolrServerException e) {
+ throw new MorphlineRuntimeException(e);
+ } catch (IOException e) {
+ throw new MorphlineRuntimeException(e);
+ }
+ }
+ else if (event == Notifications.LifecycleEvent.SHUTDOWN) {
+ try {
+ loader.shutdown();
+ } catch (SolrServerException e) {
+ throw new MorphlineRuntimeException(e);
+ } catch (IOException e) {
+ throw new MorphlineRuntimeException(e);
+ }
+ }
+ }
+ super.doNotify(notification);
+ }
+
+ @Override
+ protected boolean doProcess(Record record) {
+ Timer.Context timerContext = elapsedTime.time();
+ SolrInputDocument doc = convert(record);
+ try {
+ loader.load(doc);
+ } catch (IOException e) {
+ throw new MorphlineRuntimeException(e);
+ } catch (SolrServerException e) {
+ throw new MorphlineRuntimeException(e);
+ } finally {
+ timerContext.stop();
+ }
+
+ // pass record to next command in chain:
+ return super.doProcess(record);
+ }
+
+ private SolrInputDocument convert(Record record) {
+ Map> map = record.getFields().asMap();
+ SolrInputDocument doc = new SolrInputDocument(new HashMap(2 * map.size()));
+ for (Map.Entry> entry : map.entrySet()) {
+ String key = entry.getKey();
+ doc.setField(key, entry.getValue(), getBoost(key));
+ }
+ return doc;
+ }
+
+ private float getBoost(String key) {
+ if (boosts.size() > 0) {
+ Float boost = boosts.get(key);
+ if (boost != null) {
+ return boost.floatValue();
+ }
+ }
+ return 1.0f;
+ }
+
+ }
+}
diff --git a/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SafeConcurrentUpdateSolrServer.java b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SafeConcurrentUpdateSolrServer.java
new file mode 100644
index 00000000000..f98eeb25016
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SafeConcurrentUpdateSolrServer.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.morphlines.solr;
+
+import org.apache.http.client.HttpClient;
+import org.apache.solr.client.solrj.impl.ConcurrentUpdateSolrServer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * ConcurrentUpdateSolrServer that propagates exceptions up to the submitter of
+ * requests on blockUntilFinished()
+ */
+final class SafeConcurrentUpdateSolrServer extends ConcurrentUpdateSolrServer {
+
+ private Throwable currentException = null;
+ private final Object myLock = new Object();
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(SafeConcurrentUpdateSolrServer.class);
+
+ public SafeConcurrentUpdateSolrServer(String solrServerUrl, int queueSize, int threadCount) {
+ this(solrServerUrl, null, queueSize, threadCount);
+ }
+
+ public SafeConcurrentUpdateSolrServer(String solrServerUrl, HttpClient client, int queueSize, int threadCount) {
+ super(solrServerUrl, client, queueSize, threadCount);
+ }
+
+ @Override
+ public void handleError(Throwable ex) {
+ assert ex != null;
+ synchronized (myLock) {
+ currentException = ex;
+ }
+ LOGGER.error("handleError", ex);
+ }
+
+ @Override
+ public void blockUntilFinished() {
+ super.blockUntilFinished();
+ synchronized (myLock) {
+ if (currentException != null) {
+ throw new RuntimeException(currentException);
+ }
+ }
+ }
+
+ public void clearException() {
+ synchronized (myLock) {
+ currentException = null;
+ }
+ }
+
+}
diff --git a/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SanitizeUnknownSolrFieldsBuilder.java b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SanitizeUnknownSolrFieldsBuilder.java
new file mode 100644
index 00000000000..fbc8de21bda
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SanitizeUnknownSolrFieldsBuilder.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.morphlines.solr;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.solr.schema.IndexSchema;
+
+import com.cloudera.cdk.morphline.api.Command;
+import com.cloudera.cdk.morphline.api.CommandBuilder;
+import com.cloudera.cdk.morphline.api.MorphlineContext;
+import com.cloudera.cdk.morphline.api.Record;
+import com.cloudera.cdk.morphline.base.AbstractCommand;
+import com.google.common.base.Joiner;
+import com.google.common.base.Preconditions;
+import com.typesafe.config.Config;
+
+/**
+ * Command that sanitizes record fields that are unknown to Solr schema.xml by either deleting them
+ * (renameToPrefix is absent or a zero length string), or by moving them to a field prefixed with
+ * the given renameToPrefix (e.g. renameToPrefix = "ignored_" to use typical dynamic Solr fields).
+ *
+ * Recall that Solr throws an exception on any attempt to load a document that contains a field that
+ * isn't specified in schema.xml.
+ */
+public final class SanitizeUnknownSolrFieldsBuilder implements CommandBuilder {
+
+ @Override
+ public Collection getNames() {
+ return Collections.singletonList("sanitizeUnknownSolrFields");
+ }
+
+ @Override
+ public Command build(Config config, Command parent, Command child, MorphlineContext context) {
+ return new SanitizeUnknownSolrFields(config, parent, child, context);
+ }
+
+
+ ///////////////////////////////////////////////////////////////////////////////
+ // Nested classes:
+ ///////////////////////////////////////////////////////////////////////////////
+ private static final class SanitizeUnknownSolrFields extends AbstractCommand {
+
+ private final IndexSchema schema;
+ private final String renameToPrefix;
+
+ public SanitizeUnknownSolrFields(Config config, Command parent, Command child, MorphlineContext context) {
+ super(config, parent, child, context);
+
+ Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator");
+ SolrLocator locator = new SolrLocator(solrLocatorConfig, context);
+ LOG.debug("solrLocator: {}", locator);
+ this.schema = locator.getIndexSchema();
+ Preconditions.checkNotNull(schema);
+ LOG.trace("Solr schema: \n{}", Joiner.on("\n").join(new TreeMap(schema.getFields()).values()));
+
+ String str = getConfigs().getString(config, "renameToPrefix", "").trim();
+ this.renameToPrefix = str.length() > 0 ? str : null;
+ validateArguments();
+ }
+
+ @Override
+ protected boolean doProcess(Record record) {
+ Collection entries = new ArrayList(record.getFields().asMap().entrySet());
+ for (Map.Entry> entry : entries) {
+ String key = entry.getKey();
+ if (schema.getFieldOrNull(key) == null) {
+ LOG.debug("Sanitizing unknown Solr field: {}", key);
+ Collection values = entry.getValue();
+ if (renameToPrefix != null) {
+ record.getFields().putAll(renameToPrefix + key, values);
+ }
+ values.clear(); // implicitly removes key from record
+ }
+ }
+ return super.doProcess(record);
+ }
+
+ }
+}
diff --git a/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrLocator.java b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrLocator.java
new file mode 100644
index 00000000000..2381a08b082
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrLocator.java
@@ -0,0 +1,270 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.morphlines.solr;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.MalformedURLException;
+
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.apache.solr.client.solrj.SolrServer;
+import org.apache.solr.client.solrj.impl.CloudSolrServer;
+import org.apache.solr.common.cloud.SolrZkClient;
+import org.apache.solr.core.SolrConfig;
+import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.util.SystemIdResolver;
+import org.apache.zookeeper.KeeperException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+import com.cloudera.cdk.morphline.api.MorphlineCompilationException;
+import com.cloudera.cdk.morphline.api.MorphlineContext;
+import com.cloudera.cdk.morphline.api.MorphlineRuntimeException;
+import com.cloudera.cdk.morphline.base.Configs;
+import com.google.common.base.Preconditions;
+import com.typesafe.config.Config;
+import com.typesafe.config.ConfigFactory;
+import com.typesafe.config.ConfigRenderOptions;
+import com.typesafe.config.ConfigUtil;
+
+/**
+ * Set of configuration parameters that identify the location and schema of a Solr server or
+ * SolrCloud; Based on this information this class can return the schema and a corresponding
+ * {@link DocumentLoader}.
+ */
+public class SolrLocator {
+
+ private Config config;
+ private MorphlineContext context;
+ private String collectionName;
+ private String zkHost;
+ private String solrUrl;
+ private String solrHomeDir;
+ private int batchSize = 1000;
+
+ private static final String SOLR_HOME_PROPERTY_NAME = "solr.solr.home";
+
+ private static final Logger LOG = LoggerFactory.getLogger(SolrLocator.class);
+
+ protected SolrLocator(MorphlineContext context) {
+ Preconditions.checkNotNull(context);
+ this.context = context;
+ }
+
+ public SolrLocator(Config config, MorphlineContext context) {
+ this(context);
+ this.config = config;
+ Configs configs = new Configs();
+ collectionName = configs.getString(config, "collection", null);
+ zkHost = configs.getString(config, "zkHost", null);
+ solrHomeDir = configs.getString(config, "solrHomeDir", null);
+ solrUrl = configs.getString(config, "solrUrl", null);
+ batchSize = configs.getInt(config, "batchSize", batchSize);
+ LOG.trace("Constructed solrLocator: {}", this);
+ configs.validateArguments(config);
+ }
+
+ public DocumentLoader getLoader() {
+ if (context instanceof SolrMorphlineContext) {
+ DocumentLoader loader = ((SolrMorphlineContext)context).getDocumentLoader();
+ if (loader != null) {
+ return loader;
+ }
+ }
+
+ if (zkHost != null && zkHost.length() > 0) {
+ if (collectionName == null || collectionName.length() == 0) {
+ throw new MorphlineCompilationException("Parameter 'zkHost' requires that you also pass parameter 'collection'", config);
+ }
+ try {
+ CloudSolrServer cloudSolrServer = new CloudSolrServer(zkHost);
+ cloudSolrServer.setDefaultCollection(collectionName);
+ cloudSolrServer.connect();
+ return new SolrServerDocumentLoader(cloudSolrServer, batchSize);
+ } catch (MalformedURLException e) {
+ throw new MorphlineRuntimeException(e);
+ }
+ } else {
+ if (solrUrl == null || solrUrl.length() == 0) {
+ throw new MorphlineCompilationException("Missing parameter 'solrUrl'", config);
+ }
+ int solrServerNumThreads = 2;
+ int solrServerQueueLength = solrServerNumThreads;
+ SolrServer server = new SafeConcurrentUpdateSolrServer(solrUrl, solrServerQueueLength, solrServerNumThreads);
+ // SolrServer server = new HttpSolrServer(solrServerUrl);
+ // SolrServer server = new ConcurrentUpdateSolrServer(solrServerUrl, solrServerQueueLength, solrServerNumThreads);
+ // server.setParser(new XMLResponseParser()); // binary parser is used by default
+ return new SolrServerDocumentLoader(server, batchSize);
+ }
+ }
+
+ public IndexSchema getIndexSchema() {
+ if (context instanceof SolrMorphlineContext) {
+ IndexSchema schema = ((SolrMorphlineContext)context).getIndexSchema();
+ if (schema != null) {
+ validateSchema(schema);
+ return schema;
+ }
+ }
+
+ // If solrHomeDir isn't defined and zkHost and collectionName are defined
+ // then download schema.xml and solrconfig.xml, etc from zk and use that as solrHomeDir
+ String oldSolrHomeDir = null;
+ String mySolrHomeDir = solrHomeDir;
+ if (solrHomeDir == null || solrHomeDir.length() == 0) {
+ if (zkHost == null || zkHost.length() == 0) {
+ // TODO: implement download from solrUrl if specified
+ throw new MorphlineCompilationException(
+ "Downloading a Solr schema requires either parameter 'solrHomeDir' or parameters 'zkHost' and 'collection'",
+ config);
+ }
+ if (collectionName == null || collectionName.length() == 0) {
+ throw new MorphlineCompilationException(
+ "Parameter 'zkHost' requires that you also pass parameter 'collection'", config);
+ }
+ ZooKeeperDownloader zki = new ZooKeeperDownloader();
+ SolrZkClient zkClient = zki.getZkClient(zkHost);
+ try {
+ String configName = zki.readConfigName(zkClient, collectionName);
+ File downloadedSolrHomeDir = zki.downloadConfigDir(zkClient, configName);
+ mySolrHomeDir = downloadedSolrHomeDir.getAbsolutePath();
+ } catch (KeeperException e) {
+ throw new MorphlineCompilationException("Cannot download schema.xml from ZooKeeper", config, e);
+ } catch (InterruptedException e) {
+ throw new MorphlineCompilationException("Cannot download schema.xml from ZooKeeper", config, e);
+ } catch (IOException e) {
+ throw new MorphlineCompilationException("Cannot download schema.xml from ZooKeeper", config, e);
+ } finally {
+ zkClient.close();
+ }
+ }
+
+ oldSolrHomeDir = System.setProperty(SOLR_HOME_PROPERTY_NAME, mySolrHomeDir);
+ try {
+ SolrConfig solrConfig = new SolrConfig(); // TODO use SolrResourceLoader ala TikaMapper?
+ // SolrConfig solrConfig = new SolrConfig("solrconfig.xml");
+ // SolrConfig solrConfig = new
+ // SolrConfig("/cloud/apache-solr-4.0.0-BETA/example/solr/collection1",
+ // "solrconfig.xml", null);
+ // SolrConfig solrConfig = new
+ // SolrConfig("/cloud/apache-solr-4.0.0-BETA/example/solr/collection1/conf/solrconfig.xml");
+ SolrResourceLoader loader = solrConfig.getResourceLoader();
+
+ InputSource is = new InputSource(loader.openSchema("schema.xml"));
+ is.setSystemId(SystemIdResolver.createSystemIdFromResourceName("schema.xml"));
+
+ IndexSchema schema = new IndexSchema(solrConfig, "schema.xml", is);
+ validateSchema(schema);
+ return schema;
+ } catch (ParserConfigurationException e) {
+ throw new MorphlineRuntimeException(e);
+ } catch (IOException e) {
+ throw new MorphlineRuntimeException(e);
+ } catch (SAXException e) {
+ throw new MorphlineRuntimeException(e);
+ } finally { // restore old global state
+ if (solrHomeDir != null) {
+ if (oldSolrHomeDir == null) {
+ System.clearProperty(SOLR_HOME_PROPERTY_NAME);
+ } else {
+ System.setProperty(SOLR_HOME_PROPERTY_NAME, oldSolrHomeDir);
+ }
+ }
+ }
+ }
+
+ private void validateSchema(IndexSchema schema) {
+ if (schema.getUniqueKeyField() == null) {
+ throw new MorphlineCompilationException("Solr schema.xml is missing unique key field", config);
+ }
+ if (!schema.getUniqueKeyField().isRequired()) {
+ throw new MorphlineCompilationException("Solr schema.xml must contain a required unique key field", config);
+ }
+ }
+
+ @Override
+ public String toString() {
+ return toConfig(null).root().render(ConfigRenderOptions.concise());
+ }
+
+ public Config toConfig(String key) {
+ String json = "";
+ if (key != null) {
+ json = toJson(key) + " : ";
+ }
+ json +=
+ "{" +
+ " collection : " + toJson(collectionName) + ", " +
+ " zkHost : " + toJson(zkHost) + ", " +
+ " solrUrl : " + toJson(solrUrl) + ", " +
+ " solrHomeDir : " + toJson(solrHomeDir) + ", " +
+ " batchSize : " + toJson(batchSize) + " " +
+ "}";
+ return ConfigFactory.parseString(json);
+ }
+
+ private String toJson(Object key) {
+ String str = key == null ? "" : key.toString();
+ str = ConfigUtil.quoteString(str);
+ return str;
+ }
+
+ public String getCollectionName() {
+ return this.collectionName;
+ }
+
+ public void setCollectionName(String collectionName) {
+ this.collectionName = collectionName;
+ }
+
+ public String getZkHost() {
+ return this.zkHost;
+ }
+
+ public void setZkHost(String zkHost) {
+ this.zkHost = zkHost;
+ }
+
+ public String getSolrHomeDir() {
+ return this.solrHomeDir;
+ }
+
+ public void setSolrHomeDir(String solrHomeDir) {
+ this.solrHomeDir = solrHomeDir;
+ }
+
+ public String getServerUrl() {
+ return this.solrUrl;
+ }
+
+ public void setServerUrl(String solrUrl) {
+ this.solrUrl = solrUrl;
+ }
+
+ public int getBatchSize() {
+ return this.batchSize;
+ }
+
+ public void setBatchSize(int batchSize) {
+ this.batchSize = batchSize;
+ }
+
+}
diff --git a/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrMorphlineContext.java b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrMorphlineContext.java
new file mode 100644
index 00000000000..56d6e39227c
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrMorphlineContext.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.morphlines.solr;
+
+import org.apache.solr.schema.IndexSchema;
+
+import com.cloudera.cdk.morphline.api.MorphlineContext;
+
+/**
+ * A context that is specific to Solr.
+ */
+public class SolrMorphlineContext extends MorphlineContext {
+
+ private DocumentLoader loader;
+ private IndexSchema schema;
+
+ /** For public access use {@link Builder#build()} instead */
+ protected SolrMorphlineContext() {}
+
+ public DocumentLoader getDocumentLoader() {
+ return loader;
+ }
+
+ public IndexSchema getIndexSchema() {
+ return schema;
+ }
+
+
+ ///////////////////////////////////////////////////////////////////////////////
+ // Nested classes:
+ ///////////////////////////////////////////////////////////////////////////////
+ /**
+ * Helper to construct a {@link SolrMorphlineContext} instance.
+ */
+ public static class Builder extends MorphlineContext.Builder {
+
+ private DocumentLoader loader;
+ private IndexSchema schema;
+
+ public Builder() {}
+
+ public Builder setDocumentLoader(DocumentLoader loader) {
+ this.loader = loader;
+ return this;
+ }
+
+ public Builder setIndexSchema(IndexSchema schema) {
+ this.schema = schema;
+ return this;
+ }
+
+ @Override
+ public SolrMorphlineContext build() {
+ ((SolrMorphlineContext)context).loader = loader;
+ ((SolrMorphlineContext)context).schema = schema;
+ return (SolrMorphlineContext) super.build();
+ }
+
+ @Override
+ protected SolrMorphlineContext create() {
+ return new SolrMorphlineContext();
+ }
+
+ }
+
+}
diff --git a/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrServerDocumentLoader.java b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrServerDocumentLoader.java
new file mode 100644
index 00000000000..d343230fcba
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrServerDocumentLoader.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.morphlines.solr;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.solr.client.solrj.SolrServer;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.impl.CloudSolrServer;
+import org.apache.solr.client.solrj.impl.ConcurrentUpdateSolrServer;
+import org.apache.solr.client.solrj.response.SolrPingResponse;
+import org.apache.solr.client.solrj.response.UpdateResponse;
+import org.apache.solr.common.SolrInputDocument;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A vehicle to load a list of Solr documents into a local or remote {@link SolrServer}.
+ */
+public class SolrServerDocumentLoader implements DocumentLoader {
+
+ private final SolrServer server; // proxy to local or remote solr server
+ private long numLoadedDocs = 0; // number of documents loaded in the current transaction
+ private final int batchSize;
+ private final List batch = new ArrayList();
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(SolrServerDocumentLoader.class);
+
+ public SolrServerDocumentLoader(SolrServer server, int batchSize) {
+ if (server == null) {
+ throw new IllegalArgumentException("solr server must not be null");
+ }
+ this.server = server;
+ if (batchSize <= 0) {
+ throw new IllegalArgumentException("batchSize must be a positive number: " + batchSize);
+ }
+ this.batchSize = batchSize;
+ }
+
+ @Override
+ public void beginTransaction() {
+ LOGGER.trace("beginTransaction");
+ batch.clear();
+ numLoadedDocs = 0;
+ if (server instanceof SafeConcurrentUpdateSolrServer) {
+ ((SafeConcurrentUpdateSolrServer) server).clearException();
+ }
+ }
+
+ @Override
+ public void load(SolrInputDocument doc) throws IOException, SolrServerException {
+ LOGGER.trace("load doc: {}", doc);
+ batch.add(doc);
+ if (batch.size() >= batchSize) {
+ loadBatch();
+ }
+ }
+
+ @Override
+ public void commitTransaction() throws SolrServerException, IOException {
+ LOGGER.trace("commitTransaction");
+ if (batch.size() > 0) {
+ loadBatch();
+ }
+ if (numLoadedDocs > 0) {
+ if (server instanceof ConcurrentUpdateSolrServer) {
+ ((ConcurrentUpdateSolrServer) server).blockUntilFinished();
+ }
+ }
+ }
+
+ private void loadBatch() throws SolrServerException, IOException {
+ numLoadedDocs += batch.size();
+ try {
+ UpdateResponse rsp = server.add(batch);
+ } finally {
+ batch.clear();
+ }
+ }
+
+ @Override
+ public UpdateResponse rollbackTransaction() throws SolrServerException, IOException {
+ LOGGER.trace("rollback");
+ if (!(server instanceof CloudSolrServer)) {
+ return server.rollback();
+ } else {
+ return new UpdateResponse();
+ }
+ }
+
+ @Override
+ public void shutdown() {
+ LOGGER.trace("shutdown");
+ server.shutdown();
+ }
+
+ @Override
+ public SolrPingResponse ping() throws SolrServerException, IOException {
+ LOGGER.trace("ping");
+ return server.ping();
+ }
+
+ public SolrServer getSolrServer() {
+ return server;
+ }
+
+}
diff --git a/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/TokenizeTextBuilder.java b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/TokenizeTextBuilder.java
new file mode 100644
index 00000000000..58c1bb5536c
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/TokenizeTextBuilder.java
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.morphlines.solr;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.solr.schema.FieldType;
+import org.apache.solr.schema.IndexSchema;
+
+import com.cloudera.cdk.morphline.api.Command;
+import com.cloudera.cdk.morphline.api.CommandBuilder;
+import com.cloudera.cdk.morphline.api.MorphlineCompilationException;
+import com.cloudera.cdk.morphline.api.MorphlineContext;
+import com.cloudera.cdk.morphline.api.MorphlineRuntimeException;
+import com.cloudera.cdk.morphline.api.Record;
+import com.cloudera.cdk.morphline.base.AbstractCommand;
+import com.google.common.base.Preconditions;
+import com.typesafe.config.Config;
+
+/**
+ * A command that uses the embedded Solr/Lucene Analyzer library to generate tokens from a text
+ * string, without sending data to a Solr server.
+ */
+public final class TokenizeTextBuilder implements CommandBuilder {
+
+ @Override
+ public Collection getNames() {
+ return Collections.singletonList("tokenizeText");
+ }
+
+ @Override
+ public Command build(Config config, Command parent, Command child, MorphlineContext context) {
+ return new TokenizeText(config, parent, child, context);
+ }
+
+
+ ///////////////////////////////////////////////////////////////////////////////
+ // Nested classes:
+ ///////////////////////////////////////////////////////////////////////////////
+ private static final class TokenizeText extends AbstractCommand {
+
+ private final String inputFieldName;
+ private final String outputFieldName;
+ private final Analyzer analyzer;
+ private final CharTermAttribute token; // cached
+ private final ReusableStringReader reader = new ReusableStringReader(); // cached
+
+ public TokenizeText(Config config, Command parent, Command child, MorphlineContext context) {
+ super(config, parent, child, context);
+ this.inputFieldName = getConfigs().getString(config, "inputField");
+ this.outputFieldName = getConfigs().getString(config, "outputField");
+ String solrFieldType = getConfigs().getString(config, "solrFieldType");
+ Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator");
+ SolrLocator locator = new SolrLocator(solrLocatorConfig, context);
+ LOG.debug("solrLocator: {}", locator);
+ IndexSchema schema = locator.getIndexSchema();
+ FieldType fieldType = schema.getFieldTypeByName(solrFieldType);
+ if (fieldType == null) {
+ throw new MorphlineCompilationException("Missing Solr field type in schema.xml for name: " + solrFieldType, config);
+ }
+ this.analyzer = fieldType.getAnalyzer();
+ Preconditions.checkNotNull(analyzer);
+ try { // register CharTermAttribute for later (implicit) reuse
+ this.token = analyzer.tokenStream("content", reader).addAttribute(CharTermAttribute.class);
+ } catch (IOException e) {
+ throw new MorphlineCompilationException("Cannot create token stream", config, e);
+ }
+ Preconditions.checkNotNull(token);
+ validateArguments();
+ }
+
+ @Override
+ protected boolean doProcess(Record record) {
+ try {
+ List outputValues = record.get(outputFieldName);
+ for (Object value : record.get(inputFieldName)) {
+ reader.setValue(value.toString());
+ TokenStream tokenStream = analyzer.tokenStream("content", reader);
+ tokenStream.reset();
+ while (tokenStream.incrementToken()) {
+ if (token.length() > 0) { // incrementToken() updates the token!
+ String tokenStr = new String(token.buffer(), 0, token.length());
+ outputValues.add(tokenStr);
+ }
+ }
+ tokenStream.end();
+ tokenStream.close();
+ }
+ } catch (IOException e) {
+ throw new MorphlineRuntimeException(e);
+ }
+
+ // pass record to next command in chain:
+ return super.doProcess(record);
+ }
+
+ }
+
+
+ // Copied from org.apache.lucene.document.Field.java from lucene-4.3.0
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ private static final class ReusableStringReader extends Reader {
+ private int pos = 0, size = 0;
+ private String s = null;
+
+ void setValue(String s) {
+ this.s = s;
+ this.size = s.length();
+ this.pos = 0;
+ }
+
+ @Override
+ public int read() {
+ if (pos < size) {
+ return s.charAt(pos++);
+ } else {
+ s = null;
+ return -1;
+ }
+ }
+
+ @Override
+ public int read(char[] c, int off, int len) {
+ if (pos < size) {
+ len = Math.min(len, size-pos);
+ s.getChars(pos, pos+len, c, off);
+ pos += len;
+ return len;
+ } else {
+ s = null;
+ return -1;
+ }
+ }
+
+ @Override
+ public void close() {
+ pos = size; // this prevents NPE when reading after close!
+ s = null;
+ }
+ }
+}
diff --git a/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/ZooKeeperDownloader.java b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/ZooKeeperDownloader.java
new file mode 100644
index 00000000000..68cb6270139
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/ZooKeeperDownloader.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.morphlines.solr;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.solr.cloud.ZkController;
+import org.apache.solr.common.cloud.Aliases;
+import org.apache.solr.common.cloud.ClusterState;
+import org.apache.solr.common.cloud.SolrZkClient;
+import org.apache.solr.common.cloud.ZkNodeProps;
+import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.common.util.StrUtils;
+import org.apache.zookeeper.KeeperException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.io.Files;
+
+/**
+ * Downloads SolrCloud information from ZooKeeper.
+ */
+final class ZooKeeperDownloader {
+
+ private static final Logger LOG = LoggerFactory.getLogger(ZooKeeperDownloader.class);
+
+ public SolrZkClient getZkClient(String zkHost) {
+ if (zkHost == null) {
+ throw new IllegalArgumentException("zkHost must not be null");
+ }
+
+ SolrZkClient zkClient;
+ try {
+ zkClient = new SolrZkClient(zkHost, 30000);
+ } catch (Exception e) {
+ throw new IllegalArgumentException("Cannot connect to ZooKeeper: " + zkHost, e);
+ }
+ return zkClient;
+ }
+
+ /**
+ * Returns config value given collection name
+ * Borrowed heavily from Solr's ZKController.
+ */
+ public String readConfigName(SolrZkClient zkClient, String collection)
+ throws KeeperException, InterruptedException {
+ if (collection == null) {
+ throw new IllegalArgumentException("collection must not be null");
+ }
+ String configName = null;
+
+ // first check for alias
+ byte[] aliasData = zkClient.getData(ZkStateReader.ALIASES, null, null, true);
+ Aliases aliases = ClusterState.load(aliasData);
+ String alias = aliases.getCollectionAlias(collection);
+ if (alias != null) {
+ List aliasList = StrUtils.splitSmart(alias, ",", true);
+ if (aliasList.size() > 1) {
+ throw new IllegalArgumentException("collection cannot be an alias that maps to multiple collections");
+ }
+ collection = aliasList.get(0);
+ }
+
+ String path = ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection;
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Load collection config from:" + path);
+ }
+ byte[] data = zkClient.getData(path, null, null, true);
+
+ if(data != null) {
+ ZkNodeProps props = ZkNodeProps.load(data);
+ configName = props.getStr(ZkController.CONFIGNAME_PROP);
+ }
+
+ if (configName != null && !zkClient.exists(ZkController.CONFIGS_ZKNODE + "/" + configName, true)) {
+ LOG.error("Specified config does not exist in ZooKeeper:" + configName);
+ throw new IllegalArgumentException("Specified config does not exist in ZooKeeper:"
+ + configName);
+ }
+
+ return configName;
+ }
+
+ /**
+ * Download and return the config directory from ZK
+ */
+ public File downloadConfigDir(SolrZkClient zkClient, String configName)
+ throws IOException, InterruptedException, KeeperException {
+ File dir = Files.createTempDir();
+ dir.deleteOnExit();
+ ZkController.downloadConfigDir(zkClient, configName, dir);
+ File confDir = new File(dir, "conf");
+ if (!confDir.isDirectory()) {
+ // create a temporary directory with "conf" subdir and mv the config in there. This is
+ // necessary because of CDH-11188; solrctl does not generate nor accept directories with e.g.
+ // conf/solrconfig.xml which is necessary for proper solr operation. This should work
+ // even if solrctl changes.
+ confDir = new File(Files.createTempDir().getAbsolutePath(), "conf");
+ confDir.getParentFile().deleteOnExit();
+ Files.move(dir, confDir);
+ dir = confDir.getParentFile();
+ }
+ return dir;
+ }
+
+}
diff --git a/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/package.html b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/package.html
new file mode 100644
index 00000000000..ecec1bdf4d8
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/package.html
@@ -0,0 +1,22 @@
+
+
+
+
+Morphlines Solr related code.
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/java/overview.html b/solr/contrib/solr-morphlines-core/src/java/overview.html
new file mode 100644
index 00000000000..7f8ad137a34
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/java/overview.html
@@ -0,0 +1,21 @@
+
+
+
+Apache Solr Search Server: Solr Core Morphline Commands
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/README b/solr/contrib/solr-morphlines-core/src/test-files/README
new file mode 100644
index 00000000000..10f878acccb
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/README
@@ -0,0 +1,21 @@
+
+
+This directory is where any non-transient, non-java files needed
+for the execution of tests should live.
+
+It is used as the CWD when running JUnit tests.
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/books_numeric_ids.csv b/solr/contrib/solr-morphlines-core/src/test-files/books_numeric_ids.csv
new file mode 100644
index 00000000000..817e8b769cf
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/books_numeric_ids.csv
@@ -0,0 +1,11 @@
+id,cat,name,price,inStock,author_t,series_t,sequence_i,genre_s
+0553573403,book,A Game of Thrones,7.99,true,George R.R. Martin,"A Song of Ice and Fire",1,fantasy
+0553579908,book,A Clash of Kings,7.99,true,George R.R. Martin,"A Song of Ice and Fire",2,fantasy
+0553573429,book,A Storm of Swords,7.99,true,George R.R. Martin,"A Song of Ice and Fire",3,fantasy
+0553293354,book,Foundation,7.99,true,Isaac Asimov,Foundation Novels,1,scifi
+0812521390,book,The Black Company,6.99,false,Glen Cook,The Chronicles of The Black Company,1,fantasy
+0812550706,book,Ender's Game,6.99,true,Orson Scott Card,Ender,1,scifi
+0441385532,book,Jhereg,7.95,false,Steven Brust,Vlad Taltos,1,fantasy
+0380014300,book,Nine Princes In Amber,6.99,true,Roger Zelazny,the Chronicles of Amber,1,fantasy
+0805080481,book,The Book of Three,5.99,true,Lloyd Alexander,The Chronicles of Prydain,1,fantasy
+0805080499,book,The Black Cauldron,5.99,true,Lloyd Alexander,The Chronicles of Prydain,2,fantasy
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/exampledocs/example.html b/solr/contrib/solr-morphlines-core/src/test-files/exampledocs/example.html
new file mode 100644
index 00000000000..5732f6214bc
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/exampledocs/example.html
@@ -0,0 +1,49 @@
+
+
+ Welcome to Solr
+
+
+
+ Here is some text
+
+Here is some text in a div
+
+News
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/exampledocs/example.txt b/solr/contrib/solr-morphlines-core/src/test-files/exampledocs/example.txt
new file mode 100644
index 00000000000..0c9928b9e26
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/exampledocs/example.txt
@@ -0,0 +1,3 @@
+Example text document
+
+This is a simple example for a plain text document, indexed to Solr
\ No newline at end of file
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/README b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/README
new file mode 100644
index 00000000000..b7ca5b834f4
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/README
@@ -0,0 +1,18 @@
+
+
+Items under this directory are used by TestConfig.testLibs()
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/a/a1/empty-file-a1.txt b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/a/a1/empty-file-a1.txt
new file mode 100644
index 00000000000..8b137891791
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/a/a1/empty-file-a1.txt
@@ -0,0 +1 @@
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/a/a2/empty-file-a2.txt b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/a/a2/empty-file-a2.txt
new file mode 100644
index 00000000000..8b137891791
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/a/a2/empty-file-a2.txt
@@ -0,0 +1 @@
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/b/b1/empty-file-b1.txt b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/b/b1/empty-file-b1.txt
new file mode 100644
index 00000000000..8b137891791
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/b/b1/empty-file-b1.txt
@@ -0,0 +1 @@
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/b/b2/empty-file-b2.txt b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/b/b2/empty-file-b2.txt
new file mode 100644
index 00000000000..8b137891791
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/b/b2/empty-file-b2.txt
@@ -0,0 +1 @@
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/c/c1/empty-file-c1.txt b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/c/c1/empty-file-c1.txt
new file mode 100644
index 00000000000..8b137891791
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/c/c1/empty-file-c1.txt
@@ -0,0 +1 @@
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/c/c2/empty-file-c2.txt b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/c/c2/empty-file-c2.txt
new file mode 100644
index 00000000000..8b137891791
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/c/c2/empty-file-c2.txt
@@ -0,0 +1 @@
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/d/d1/empty-file-d1.txt b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/d/d1/empty-file-d1.txt
new file mode 100644
index 00000000000..8b137891791
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/d/d1/empty-file-d1.txt
@@ -0,0 +1 @@
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/d/d2/empty-file-d2.txt b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/d/d2/empty-file-d2.txt
new file mode 100644
index 00000000000..8b137891791
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/d/d2/empty-file-d2.txt
@@ -0,0 +1 @@
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/log4j.properties b/solr/contrib/solr-morphlines-core/src/test-files/log4j.properties
new file mode 100644
index 00000000000..fb0577130bb
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/log4j.properties
@@ -0,0 +1,12 @@
+# Logging level
+log4j.rootLogger=INFO, CONSOLE
+
+log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+log4j.appender.CONSOLE.Target=System.err
+log4j.appender.CONSOLE.layout=org.apache.solr.util.SolrLogLayout
+log4j.appender.CONSOLE.layout.ConversionPattern=%-5p - %d{yyyy-MM-dd HH:mm:ss.SSS}; %C; %m\n
+
+log4j.logger.org.apache.zookeeper=WARN
+log4j.logger.org.apache.hadoop=WARN
+#log4j.logger.org.apache.solr=WARN
+log4j.logger.org.apache.solr.hadoop=INFO
\ No newline at end of file
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/mailing_lists.pdf b/solr/contrib/solr-morphlines-core/src/test-files/mailing_lists.pdf
new file mode 100755
index 00000000000..33b819f0649
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/mailing_lists.pdf
@@ -0,0 +1,382 @@
+%PDF-1.3
+%ª«¬
+4 0 obj
+<< /Type /Info
+/Producer (FOP 0.20.5) >>
+endobj
+5 0 obj
+<< /Length 425 /Filter [ /ASCII85Decode /FlateDecode ]
+ >>
+stream
+Gb!$BYuAO_'ZTnF'lQbNnGsdiUK'C#3dAWc3lI>k\P#:a@Qja<+itJa;R]7&ni\$9pOi?T._;3m?jT+q7>,P^70oB=!nr]%k%\U^KVqaF4*Z`$VJ7Gs`T5OO`(tY]Q1`-5*m;!--h%?*_0SbIU\BV=OFg<#%YcH_YI$(sDCIJts'M2*drjRrJE!OM7HP!^-&EW>B\:RYFnaY.m[$s5f"XG0>^fduHe6/++D0fY3@AWR@HYabmQ5jDQ.c0>I.uQX&(lA@VLm_s_9XnBh7%"*/%^]AO3eTI!BTo'pF?%''A*PDU*NW%d`2@p'@:D@U??4PP08m[K4N,8,(e`N+\7n+a>ac%q#,D8DRQ*3l]MS>'gn3lWNGmRAtQ7n]eDnLPrD!?DEdB/hNarb_7$B7U-H7!['nXLkV_no5AHq`>6~>
+endstream
+endobj
+6 0 obj
+<< /Type /Page
+/Parent 1 0 R
+/MediaBox [ 0 0 612 792 ]
+/Resources 3 0 R
+/Contents 5 0 R
+/Annots 7 0 R
+>>
+endobj
+7 0 obj
+[
+8 0 R
+10 0 R
+12 0 R
+]
+endobj
+8 0 obj
+<< /Type /Annot
+/Subtype /Link
+/Rect [ 102.0 559.666 137.324 547.666 ]
+/C [ 0 0 0 ]
+/Border [ 0 0 0 ]
+/A 9 0 R
+/H /I
+>>
+endobj
+10 0 obj
+<< /Type /Annot
+/Subtype /Link
+/Rect [ 102.0 541.466 164.648 529.466 ]
+/C [ 0 0 0 ]
+/Border [ 0 0 0 ]
+/A 11 0 R
+/H /I
+>>
+endobj
+12 0 obj
+<< /Type /Annot
+/Subtype /Link
+/Rect [ 102.0 523.266 154.016 511.266 ]
+/C [ 0 0 0 ]
+/Border [ 0 0 0 ]
+/A 13 0 R
+/H /I
+>>
+endobj
+14 0 obj
+<< /Length 2197 /Filter [ /ASCII85Decode /FlateDecode ]
+ >>
+stream
+Gb"/)9lo&I&A@C2n5a2!7YkueV^?,ABrC@*[F.^sK-J\u-^*\VZ9A3]?'#&sU^3,]d[;/F9HjMs^A"j:!rHNC?7rs!0)f1q`$?\lOaRt/g/f.>-Am[t'`RUrGL7Uk8K90.i-up;qeIYfjWZ2&ki:[3`TuXFj]`a&Hbo8r&P(RZ+M_>&eY.T4jXOI%UHbq1GnF>g$KgW%R24nBkc\[qA$(koU$isG(W7`PE,nMam;U4(ZC8,Ca!_P2VYf>\V0gK0g;-.E[Y(&s=+&g6ms""'Ip>0b/D!>a&PX9eo_tuueR:b=r@6Q5LM],XbK;&L$0WubNX9c"=FM$543G_>rAQ_%2/dW<)/"U1&]l:AZ&\Mif8sF`r5>b<$lqK"2t]maZ*oDb!^$Zn6OC'%XkI];&*rkLP1BMGI@$,0fK(=gC-3q7n7d4EQ4DepBc'^Q^A%e?19a(`S*FHTN*RNjP&P%2`6%jpOU\DBUN)cnMYa3PQ!sYETiGJi'q>>m*e;[,.1l\rZo3K;>$K"a1:s3pU>o+:'7fND!+6GV@2G;qf`\`=J#WkOjSke<1f>VfbcUtXM"1jGN:@Ptec8Mc-hmS5S>q/nAY%[4%7BCI![NA:We(41]ld_`pU80;+e`1DbG.RQ:'#GQJAL2![aIWY'A*Y_>mF7>2S0IWM%nLg3%%;7r5=;3!7]05r?Ft-6I]9n9C\fUUF6R\9bPEVSutd9LFTpaoaP7Iuus-S#S.3;sVu-*T/:&2Ld]&g0oHoo`TmR'b]ps6hq9s&f+6_5c(k"m96-f:YA!:)K:q+(Hl=t`:+"<lQm6B=K&/r/Ep6Y]EG.T/34(fT0=6_m5PA-7PVo:"r)W'.mX>1A8Yg9kfa?"Qp+ta7Hb$FM`*OP^>3Sg)P[?jIXd]i]"h)Tdjnm[6@=kmEBkP1/K[bg`"7U:BWk^=!+3\ANTnN75*Rh_<-UA*!&rr#KW/7EXkeJU9GF5RA,#kqJ5aC9Ra5,PsiI`uF23/B"nkPHe2Q;B@pBXGM-i;<'oOM,dc3'qL)Ne,OV2.*f^Bt;0P#roPn?h]@-63,-9lQSF!dic13Ag\_]m=7Llb\*&C+>\+o6)Y,C._?+X1Qok%j>f[#T!,CD2T4cL'.Nb_Vit&M]!j7j6LHB.g9AQre&be$gJhbAg68kDJf@XZ7'2791RD*qAP]u")(lEjX)\-#O$aK(E]jq*3XbL:3q:o&9gcZLl?:E-l'-dHf;;_hhH3m/Q3]9jJRn>Z8]1Gt6PAVJ[r2gsg=4$!6I$RQ@Y6;H(U>,LWdW>Z5iTYZ'tAcSfoN,U=/fIoA::l8X^fXIa4m3-]9$Zc\E0H^!pmfeMjW3#p1J)pbH^VZML"NZ$U,Yg;f[AVrZRhlRCC[)D*>K0IRWR98A=<>dPSd)@Ec)OXGjK01hM%!FhVR[I<5Va3V,I"YuQZb-,XEM!Gk_-r<9T0W#M!!;RX!]MtBdJ0ah'FCoNF1r"gmU>Rb4aE:Z'I)d-f_1:B0gfmnM?K9ljY>R%*Fc9oYiohHndi(!dK+]ElID:'g:PKq6fKKHdO>bmG-2]ZmVcqs+ef-EWR(1Da)F&CoL[['3)UZ^!fo+Ua2NSC7m5oIXlLoF)+cWUr/MaMP@shSN$gD*jB=:/ru]MF>3-m'j6_-'>(Uq'PN4Fl*XC8ABmg\b`kmI@<0Sh)bkNopK]E6S7,V*o!<)infW?).%mtC2S8!kqh$BpiWu=4)>.Wm+Mt.YPC"ZlO^Ge*Y5)8QlX2
+endstream
+endobj
+15 0 obj
+<< /Type /Page
+/Parent 1 0 R
+/MediaBox [ 0 0 612 792 ]
+/Resources 3 0 R
+/Contents 14 0 R
+/Annots 16 0 R
+>>
+endobj
+16 0 obj
+[
+17 0 R
+18 0 R
+19 0 R
+20 0 R
+21 0 R
+22 0 R
+23 0 R
+24 0 R
+25 0 R
+26 0 R
+27 0 R
+28 0 R
+29 0 R
+]
+endobj
+17 0 obj
+<< /Type /Annot
+/Subtype /Link
+/Rect [ 232.344 608.466 372.012 596.466 ]
+/C [ 0 0 0 ]
+/Border [ 0 0 0 ]
+/A << /URI (mailto:solr-user@lucene.apache.org)
+/S /URI >>
+/H /I
+>>
+endobj
+18 0 obj
+<< /Type /Annot
+/Subtype /Link
+/Rect [ 108.0 591.266 189.336 579.266 ]
+/C [ 0 0 0 ]
+/Border [ 0 0 0 ]
+/A << /URI (mailto:solr-user-subscribe@lucene.apache.org)
+/S /URI >>
+/H /I
+>>
+endobj
+19 0 obj
+<< /Type /Annot
+/Subtype /Link
+/Rect [ 108.0 578.066 215.988 566.066 ]
+/C [ 0 0 0 ]
+/Border [ 0 0 0 ]
+/A << /URI (mailto:solr-user-unsubscribe@lucene.apache.org)
+/S /URI >>
+/H /I
+>>
+endobj
+20 0 obj
+<< /Type /Annot
+/Subtype /Link
+/Rect [ 108.0 564.866 197.316 552.866 ]
+/C [ 0 0 0 ]
+/Border [ 0 0 0 ]
+/A << /URI (http://mail-archives.apache.org/mod_mbox/lucene-solr-user/)
+/S /URI >>
+/H /I
+>>
+endobj
+21 0 obj
+<< /Type /Annot
+/Subtype /Link
+/Rect [ 453.924 564.866 475.26 552.866 ]
+/C [ 0 0 0 ]
+/Border [ 0 0 0 ]
+/A << /URI (http://wiki.apache.org/solr/SolrResources)
+/S /URI >>
+/H /I
+>>
+endobj
+22 0 obj
+<< /Type /Annot
+/Subtype /Link
+/Rect [ 259.668 441.722 396.672 429.722 ]
+/C [ 0 0 0 ]
+/Border [ 0 0 0 ]
+/A << /URI (mailto:solr-dev@lucene.apache.org)
+/S /URI >>
+/H /I
+>>
+endobj
+23 0 obj
+<< /Type /Annot
+/Subtype /Link
+/Rect [ 108.0 424.522 189.336 412.522 ]
+/C [ 0 0 0 ]
+/Border [ 0 0 0 ]
+/A << /URI (mailto:solr-dev-subscribe@lucene.apache.org)
+/S /URI >>
+/H /I
+>>
+endobj
+24 0 obj
+<< /Type /Annot
+/Subtype /Link
+/Rect [ 108.0 411.322 215.988 399.322 ]
+/C [ 0 0 0 ]
+/Border [ 0 0 0 ]
+/A << /URI (mailto:solr-dev-unsubscribe@lucene.apache.org)
+/S /URI >>
+/H /I
+>>
+endobj
+25 0 obj
+<< /Type /Annot
+/Subtype /Link
+/Rect [ 108.0 398.122 197.316 386.122 ]
+/C [ 0 0 0 ]
+/Border [ 0 0 0 ]
+/A << /URI (http://mail-archives.apache.org/mod_mbox/lucene-solr-dev/)
+/S /URI >>
+/H /I
+>>
+endobj
+26 0 obj
+<< /Type /Annot
+/Subtype /Link
+/Rect [ 453.924 398.122 475.26 386.122 ]
+/C [ 0 0 0 ]
+/Border [ 0 0 0 ]
+/A << /URI (http://wiki.apache.org/solr/SolrResources)
+/S /URI >>
+/H /I
+>>
+endobj
+27 0 obj
+<< /Type /Annot
+/Subtype /Link
+/Rect [ 294.624 296.178 403.284 284.178 ]
+/C [ 0 0 0 ]
+/Border [ 0 0 0 ]
+/A << /URI (version_control.html)
+/S /URI >>
+/H /I
+>>
+endobj
+28 0 obj
+<< /Type /Annot
+/Subtype /Link
+/Rect [ 108.0 265.778 189.336 253.778 ]
+/C [ 0 0 0 ]
+/Border [ 0 0 0 ]
+/A << /URI (mailto:solr-commits-subscribe@lucene.apache.org)
+/S /URI >>
+/H /I
+>>
+endobj
+29 0 obj
+<< /Type /Annot
+/Subtype /Link
+/Rect [ 108.0 252.578 215.988 240.578 ]
+/C [ 0 0 0 ]
+/Border [ 0 0 0 ]
+/A << /URI (mailto:solr-commits-unsubscribe@lucene.apache.org)
+/S /URI >>
+/H /I
+>>
+endobj
+31 0 obj
+<<
+ /Title (\376\377\0\61\0\40\0\125\0\163\0\145\0\162\0\163)
+ /Parent 30 0 R
+ /Next 32 0 R
+ /A 9 0 R
+>> endobj
+32 0 obj
+<<
+ /Title (\376\377\0\62\0\40\0\104\0\145\0\166\0\145\0\154\0\157\0\160\0\145\0\162\0\163)
+ /Parent 30 0 R
+ /Prev 31 0 R
+ /Next 33 0 R
+ /A 11 0 R
+>> endobj
+33 0 obj
+<<
+ /Title (\376\377\0\63\0\40\0\103\0\157\0\155\0\155\0\151\0\164\0\163)
+ /Parent 30 0 R
+ /Prev 32 0 R
+ /A 13 0 R
+>> endobj
+34 0 obj
+<< /Type /Font
+/Subtype /Type1
+/Name /F3
+/BaseFont /Helvetica-Bold
+/Encoding /WinAnsiEncoding >>
+endobj
+35 0 obj
+<< /Type /Font
+/Subtype /Type1
+/Name /F5
+/BaseFont /Times-Roman
+/Encoding /WinAnsiEncoding >>
+endobj
+36 0 obj
+<< /Type /Font
+/Subtype /Type1
+/Name /F1
+/BaseFont /Helvetica
+/Encoding /WinAnsiEncoding >>
+endobj
+37 0 obj
+<< /Type /Font
+/Subtype /Type1
+/Name /F2
+/BaseFont /Helvetica-Oblique
+/Encoding /WinAnsiEncoding >>
+endobj
+38 0 obj
+<< /Type /Font
+/Subtype /Type1
+/Name /F7
+/BaseFont /Times-Bold
+/Encoding /WinAnsiEncoding >>
+endobj
+1 0 obj
+<< /Type /Pages
+/Count 2
+/Kids [6 0 R 15 0 R ] >>
+endobj
+2 0 obj
+<< /Type /Catalog
+/Pages 1 0 R
+ /Outlines 30 0 R
+ /PageMode /UseOutlines
+ >>
+endobj
+3 0 obj
+<<
+/Font << /F3 34 0 R /F5 35 0 R /F1 36 0 R /F2 37 0 R /F7 38 0 R >>
+/ProcSet [ /PDF /ImageC /Text ] >>
+endobj
+9 0 obj
+<<
+/S /GoTo
+/D [15 0 R /XYZ 85.0 659.0 null]
+>>
+endobj
+11 0 obj
+<<
+/S /GoTo
+/D [15 0 R /XYZ 85.0 492.256 null]
+>>
+endobj
+13 0 obj
+<<
+/S /GoTo
+/D [15 0 R /XYZ 85.0 325.512 null]
+>>
+endobj
+30 0 obj
+<<
+ /First 31 0 R
+ /Last 33 0 R
+>> endobj
+xref
+0 39
+0000000000 65535 f
+0000007198 00000 n
+0000007263 00000 n
+0000007355 00000 n
+0000000015 00000 n
+0000000071 00000 n
+0000000587 00000 n
+0000000707 00000 n
+0000000746 00000 n
+0000007478 00000 n
+0000000881 00000 n
+0000007541 00000 n
+0000001018 00000 n
+0000007607 00000 n
+0000001155 00000 n
+0000003445 00000 n
+0000003568 00000 n
+0000003679 00000 n
+0000003867 00000 n
+0000004063 00000 n
+0000004261 00000 n
+0000004471 00000 n
+0000004665 00000 n
+0000004852 00000 n
+0000005047 00000 n
+0000005244 00000 n
+0000005453 00000 n
+0000005647 00000 n
+0000005821 00000 n
+0000006020 00000 n
+0000007673 00000 n
+0000006221 00000 n
+0000006342 00000 n
+0000006508 00000 n
+0000006642 00000 n
+0000006755 00000 n
+0000006865 00000 n
+0000006973 00000 n
+0000007089 00000 n
+trailer
+<<
+/Size 39
+/Root 2 0 R
+/Info 4 0 R
+>>
+startxref
+7724
+%%EOF
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/old-solr-example/README.txt b/solr/contrib/solr-morphlines-core/src/test-files/old-solr-example/README.txt
new file mode 100644
index 00000000000..6242cff237b
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/old-solr-example/README.txt
@@ -0,0 +1 @@
+This is around for back compat testing purposes and should be able to be removed in Solr 5.0
\ No newline at end of file
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/old-solr-example/solr.xml b/solr/contrib/solr-morphlines-core/src/test-files/old-solr-example/solr.xml
new file mode 100644
index 00000000000..75da88a52f1
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/old-solr-example/solr.xml
@@ -0,0 +1,53 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/addfields.updateprocessor.js b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/addfields.updateprocessor.js
new file mode 100644
index 00000000000..1b3c9fc2d6e
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/addfields.updateprocessor.js
@@ -0,0 +1,26 @@
+function processAdd(cmd) {
+ // Integer.valueOf is needed here to get a tru java object, because
+ // all javascript numbers are floating point (ie: java.lang.Double)
+ cmd.getSolrInputDocument().addField("script_added_i",
+ java.lang.Integer.valueOf(42));
+ cmd.getSolrInputDocument().addField("script_added_d", 42.3);
+
+}
+
+// // //
+
+function processDelete() {
+ // NOOP
+}
+function processCommit() {
+ // NOOP
+}
+function processRollback() {
+ // NOOP
+}
+function processMergeIndexes() {
+ // NOOP
+}
+function finish() {
+ // NOOP
+}
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/analyzingInfixSuggest.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/analyzingInfixSuggest.txt
new file mode 100644
index 00000000000..6d276c33a16
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/analyzingInfixSuggest.txt
@@ -0,0 +1,5 @@
+# simple AnalyzingInfix suggest phrase dictionary for testing
+Japanese Autocomplete and Japanese Highlighter broken
+Add Japanese Kanji number normalization to Kuromoji
+Add decompose compound Japanese Katakana token capability to Kuromoji
+This is just another entry!
\ No newline at end of file
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-currency.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-currency.xml
new file mode 100644
index 00000000000..d7aeeeb2331
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-currency.xml
@@ -0,0 +1,31 @@
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-mp-solrconfig.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-mp-solrconfig.xml
new file mode 100644
index 00000000000..af5d8fbb155
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-mp-solrconfig.xml
@@ -0,0 +1,34 @@
+
+
+
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+ 8
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-analyzer-class-and-nested.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-analyzer-class-and-nested.xml
new file mode 100644
index 00000000000..16796361c66
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-analyzer-class-and-nested.xml
@@ -0,0 +1,40 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-bogus-analysis-parameters.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-bogus-analysis-parameters.xml
new file mode 100644
index 00000000000..3f8e224ce1b
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-bogus-analysis-parameters.xml
@@ -0,0 +1,32 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-bogus-field-parameters.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-bogus-field-parameters.xml
new file mode 100644
index 00000000000..3575c438c72
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-bogus-field-parameters.xml
@@ -0,0 +1,29 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-codec-global-vs-ft-mismatch.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-codec-global-vs-ft-mismatch.xml
new file mode 100644
index 00000000000..9a704fdd731
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-codec-global-vs-ft-mismatch.xml
@@ -0,0 +1,36 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ pulsing1text
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-dynamic-multivalued.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-dynamic-multivalued.xml
new file mode 100644
index 00000000000..a71b361c956
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-dynamic-multivalued.xml
@@ -0,0 +1,36 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-bogus-code-in-xml.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-bogus-code-in-xml.xml
new file mode 100644
index 00000000000..6339ae25eab
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-bogus-code-in-xml.xml
@@ -0,0 +1,38 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-bogus-default-code.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-bogus-default-code.xml
new file mode 100644
index 00000000000..1f92977760e
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-bogus-default-code.xml
@@ -0,0 +1,38 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-multivalued.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-multivalued.xml
new file mode 100644
index 00000000000..a1b788e628e
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-multivalued.xml
@@ -0,0 +1,34 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-oer-norates.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-oer-norates.xml
new file mode 100644
index 00000000000..bd23933b270
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-oer-norates.xml
@@ -0,0 +1,37 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-multivalued.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-multivalued.xml
new file mode 100644
index 00000000000..84bfaea141d
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-multivalued.xml
@@ -0,0 +1,35 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dup-dynamicField.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dup-dynamicField.xml
new file mode 100644
index 00000000000..460fbda8ba2
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dup-dynamicField.xml
@@ -0,0 +1,43 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dup-field.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dup-field.xml
new file mode 100644
index 00000000000..4272362a3f4
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dup-field.xml
@@ -0,0 +1,43 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dup-fieldType.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dup-fieldType.xml
new file mode 100644
index 00000000000..34ef44bcc73
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dup-fieldType.xml
@@ -0,0 +1,44 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dynamicfield-default-val.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dynamicfield-default-val.xml
new file mode 100644
index 00000000000..0e3595d75cb
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dynamicfield-default-val.xml
@@ -0,0 +1,34 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dynamicfield-required.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dynamicfield-required.xml
new file mode 100644
index 00000000000..c372afd44a4
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dynamicfield-required.xml
@@ -0,0 +1,34 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-external-filefield.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-external-filefield.xml
new file mode 100644
index 00000000000..e7874c88d25
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-external-filefield.xml
@@ -0,0 +1,27 @@
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-misplaced-asterisk-copyfield-dest-should-fail-test.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-misplaced-asterisk-copyfield-dest-should-fail-test.xml
new file mode 100644
index 00000000000..5b32376751c
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-misplaced-asterisk-copyfield-dest-should-fail-test.xml
@@ -0,0 +1,31 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-misplaced-asterisk-copyfield-source-should-fail-test.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-misplaced-asterisk-copyfield-source-should-fail-test.xml
new file mode 100644
index 00000000000..ddc9f4dc685
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-misplaced-asterisk-copyfield-source-should-fail-test.xml
@@ -0,0 +1,31 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-multiple-asterisk-copyfield-dest-should-fail-test.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-multiple-asterisk-copyfield-dest-should-fail-test.xml
new file mode 100644
index 00000000000..fb3ddbe5c41
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-multiple-asterisk-copyfield-dest-should-fail-test.xml
@@ -0,0 +1,29 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-multiple-asterisk-copyfield-source-should-fail-test.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-multiple-asterisk-copyfield-source-should-fail-test.xml
new file mode 100644
index 00000000000..b3ca6ae3096
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-multiple-asterisk-copyfield-source-should-fail-test.xml
@@ -0,0 +1,29 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-non-glob-copyfield-source-matching-nothing-should-fail-test.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-non-glob-copyfield-source-matching-nothing-should-fail-test.xml
new file mode 100644
index 00000000000..86e80a4555e
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-non-glob-copyfield-source-matching-nothing-should-fail-test.xml
@@ -0,0 +1,36 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-nontext-analyzer.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-nontext-analyzer.xml
new file mode 100644
index 00000000000..06a689a8298
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-nontext-analyzer.xml
@@ -0,0 +1,39 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-not-indexed-but-norms.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-not-indexed-but-norms.xml
new file mode 100644
index 00000000000..f7c4e9b2d80
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-not-indexed-but-norms.xml
@@ -0,0 +1,40 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-not-indexed-but-pos.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-not-indexed-but-pos.xml
new file mode 100644
index 00000000000..774d58755f4
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-not-indexed-but-pos.xml
@@ -0,0 +1,40 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-not-indexed-but-tf.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-not-indexed-but-tf.xml
new file mode 100644
index 00000000000..d153793830a
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-not-indexed-but-tf.xml
@@ -0,0 +1,40 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-omit-tf-but-not-pos.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-omit-tf-but-not-pos.xml
new file mode 100644
index 00000000000..116f116a176
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-omit-tf-but-not-pos.xml
@@ -0,0 +1,41 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sim-global-vs-ft-mismatch.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sim-global-vs-ft-mismatch.xml
new file mode 100644
index 00000000000..a776d105541
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sim-global-vs-ft-mismatch.xml
@@ -0,0 +1,37 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ sim1text
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-both-tf.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-both-tf.xml
new file mode 100644
index 00000000000..99028c18a7c
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-both-tf.xml
@@ -0,0 +1,48 @@
+
+
+
+
+
+
+
+
+
+
+
+
+ 6.0
+ 1.5
+ 3.3
+ 7.7
+ 5.0
+ 5.0
+
+
+
+
+
+
+
+
+
+
+ id
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-partial-baseline.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-partial-baseline.xml
new file mode 100644
index 00000000000..cf34ec8e21b
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-partial-baseline.xml
@@ -0,0 +1,44 @@
+
+
+
+
+
+
+
+
+
+
+
+
+ 6.0
+
+
+
+
+
+
+
+
+
+
+
+ id
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-partial-hyperbolic.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-partial-hyperbolic.xml
new file mode 100644
index 00000000000..61e18ad73c7
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-partial-hyperbolic.xml
@@ -0,0 +1,46 @@
+
+
+
+
+
+
+
+
+
+
+
+
+ 3.3
+
+ 5.0
+ 5.0
+
+
+
+
+
+
+
+
+
+
+ id
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-partial-norms.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-partial-norms.xml
new file mode 100644
index 00000000000..ef4e8042b3c
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-partial-norms.xml
@@ -0,0 +1,45 @@
+
+
+
+
+
+
+
+
+
+
+
+
+ 3
+
+ 0.5
+
+
+
+
+
+
+
+
+
+
+ id
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-uniquekey-is-copyfield-dest.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-uniquekey-is-copyfield-dest.xml
new file mode 100644
index 00000000000..bf1d53212e4
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-uniquekey-is-copyfield-dest.xml
@@ -0,0 +1,36 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-uniquekey-multivalued.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-uniquekey-multivalued.xml
new file mode 100644
index 00000000000..81ce319eb86
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-uniquekey-multivalued.xml
@@ -0,0 +1,33 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-uniquekey-uses-default.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-uniquekey-uses-default.xml
new file mode 100644
index 00000000000..026b529a942
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-uniquekey-uses-default.xml
@@ -0,0 +1,33 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-unsupported-docValues.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-unsupported-docValues.xml
new file mode 100644
index 00000000000..5f4d69a31a7
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-unsupported-docValues.xml
@@ -0,0 +1,30 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-bogus-scriptengine-name.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-bogus-scriptengine-name.xml
new file mode 100644
index 00000000000..fc9e108bee3
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-bogus-scriptengine-name.xml
@@ -0,0 +1,32 @@
+
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+ giberish
+ missleading.extension.updateprocessor.js.txt
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-invalid-scriptfile.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-invalid-scriptfile.xml
new file mode 100644
index 00000000000..dbadbb5c2c0
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-invalid-scriptfile.xml
@@ -0,0 +1,33 @@
+
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+ javascript
+
+ currency.xml
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-managed-schema-named-schema.xml.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-managed-schema-named-schema.xml.xml
new file mode 100644
index 00000000000..a15c0ac1d6e
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-managed-schema-named-schema.xml.xml
@@ -0,0 +1,30 @@
+
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+ false
+ schema.xml
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-missing-scriptfile.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-missing-scriptfile.xml
new file mode 100644
index 00000000000..4dee70ce08f
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-missing-scriptfile.xml
@@ -0,0 +1,31 @@
+
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+ a-file-name-that-does-not-exist.js
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-multiple-cfs.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-multiple-cfs.xml
new file mode 100644
index 00000000000..f13acb3f6b0
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-multiple-cfs.xml
@@ -0,0 +1,30 @@
+
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+ true
+ false
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-multiple-dirfactory.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-multiple-dirfactory.xml
new file mode 100644
index 00000000000..4da2a002f40
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-multiple-dirfactory.xml
@@ -0,0 +1,34 @@
+
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-multiple-indexconfigs.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-multiple-indexconfigs.xml
new file mode 100644
index 00000000000..00dd08c36fe
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-multiple-indexconfigs.xml
@@ -0,0 +1,35 @@
+
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+ true
+ false
+
+
+
+ ${useCompoundFile:false}
+ true
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-schema-mutable-but-not-managed.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-schema-mutable-but-not-managed.xml
new file mode 100644
index 00000000000..9fe2e89e037
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-schema-mutable-but-not-managed.xml
@@ -0,0 +1,32 @@
+
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+ false
+ schema.xml
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-unexpected-schema-attribute.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-unexpected-schema-attribute.xml
new file mode 100644
index 00000000000..d07cb0d1c11
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-unexpected-schema-attribute.xml
@@ -0,0 +1,32 @@
+
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+ false
+ managed-schema
+ bogusValue
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-warmer-no-reopen.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-warmer-no-reopen.xml
new file mode 100644
index 00000000000..9c9c96402ec
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-warmer-no-reopen.xml
@@ -0,0 +1,27 @@
+
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+ false
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad_solrconfig.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad_solrconfig.xml
new file mode 100644
index 00000000000..ed07d9afdea
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad_solrconfig.xml
@@ -0,0 +1,27 @@
+
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+ ${unset.sys.property}
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/compoundDictionary.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/compoundDictionary.txt
new file mode 100644
index 00000000000..f4977b5df72
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/compoundDictionary.txt
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# A set of words for testing the DictionaryCompound factory
+soft
+ball
+team
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/conditional.updateprocessor.js b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/conditional.updateprocessor.js
new file mode 100644
index 00000000000..5ec9487c150
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/conditional.updateprocessor.js
@@ -0,0 +1,25 @@
+function processAdd(cmd) {
+ if (req.getParams().getBool("go-for-it",false)) {
+ cmd.getSolrInputDocument().addField("script_added_s", "i went for it");
+ return true;
+ }
+ return false;
+}
+
+// // //
+
+function processDelete() {
+ // NOOP
+}
+function processCommit() {
+ // NOOP
+}
+function processRollback() {
+ // NOOP
+}
+function processMergeIndexes() {
+ // NOOP
+}
+function finish() {
+ // NOOP
+}
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/currency.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/currency.xml
new file mode 100644
index 00000000000..6a12b32b2a8
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/currency.xml
@@ -0,0 +1,37 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/da_UTF8.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/da_UTF8.xml
new file mode 100644
index 00000000000..2c8d203be68
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/da_UTF8.xml
@@ -0,0 +1,1208 @@
+
+
+
+
+
+
+
+
+
+
+aA
+bB
+cC
+dD
+eE
+fF
+gG
+hH
+iI
+jJ
+kK
+lL
+mM
+nN
+oO
+pP
+qQ
+rR
+sS
+tT
+uU
+vV
+wW
+xX
+yY
+zZ
+æÆ
+øØ
+åÅ
+
+
+
+.ae3
+.an3k
+.an1s
+.be5la
+.be1t
+.bi4tr
+.der3i
+.diagno5
+.her3
+.hoved3
+.ne4t5
+.om1
+.ove4
+.po1
+.til3
+.yd5r
+ab5le
+3abst
+a3c
+ade5la
+5adg
+a1e
+5afg
+5a4f1l
+af3r
+af4ri
+5afs
+a4gef
+a4gi
+ag5in
+ag5si
+3agti
+a4gy
+a3h
+ais5t
+a3j
+a5ka
+a3ke
+a5kr
+aku5
+a3la
+a1le
+a1li
+al3k
+4alkv
+a1lo
+al5si
+a3lu
+a1ly
+am4pa
+3analy
+an4k5r
+a3nu
+3anv
+a5o
+a5pe
+a3pi
+a5po
+a1ra
+ar5af
+1arb
+a1re
+5arg
+a1ri
+a3ro
+a3sa
+a3sc
+a1si
+a3sk
+a3so
+3a3sp
+a3ste
+a3sti
+a1ta1
+a1te
+a1ti
+a4t5in
+a1to
+ato5v
+a5tr
+a1tu
+a5va
+a1ve
+a5z
+1ba
+ba4ti
+4bd
+1be
+be1k
+be3ro
+be5ru
+be1s4
+be1tr
+1bi
+bi5sk
+b1j
+4b1n
+1bo
+bo4gr
+bo3ra
+bo5re
+1br4
+4bs
+bs5k
+b3so
+b1st
+b5t
+3bu
+bu4s5tr
+b5w
+1by
+by5s
+4c1c
+1ce
+ce5ro
+3ch
+4ch.
+ci4o
+ck3
+5cy
+3da
+4d3af
+d5anta
+da4s
+d1b
+d1d4
+1de
+de5d
+4de4lem
+der5eri
+de4rig
+de5sk
+d1f
+d1g
+d3h
+1di
+di1e
+di5l
+d3j
+d1k
+d1l
+d1m
+4d1n
+3do
+4dop
+d5ov
+d1p
+4drett
+5d4reve
+3drif
+3driv
+d5ros
+d5ru
+ds5an
+ds5in
+d1ski
+d4sm
+d4su
+dsu5l
+ds5vi
+d3ta
+d1te
+dt5o
+d5tr
+dt5u
+1du
+dub5
+d1v
+3dy
+e5ad
+e3af
+e5ag
+e3ak
+e1al
+ea4la
+e3an
+e5ap
+e3at
+e3bl
+ebs3
+e1ci
+ed5ar
+edde4
+eddel5
+e4do
+ed5ra
+ed3re
+ed3rin
+ed4str
+e3e
+3eff
+e3fr
+3eft
+e3gu
+e1h
+e3in
+ei5s
+e3je
+e4j5el
+e1ka
+e3ke
+e3kl
+4e1ko
+e5kr
+ek5sa
+3eksem
+3eksp
+e3ku
+e1kv
+e5ky
+e3lad
+el3ak
+el3ar
+e1las
+e3le
+e4lek
+3elem
+e1li
+5elim
+e3lo
+el5sa
+e5lu
+e3ly
+e4mad
+em4p5le
+em1s
+en5ak
+e4nan
+4enn
+e4no
+en3so
+e5nu
+e5ol
+e3op
+e1or
+e3ov
+epi3
+e1pr
+e3ra
+er3af
+e4rag
+e4rak
+e1re
+e4ref
+er5ege
+5erhv
+e1ri
+e4rib
+er1k
+ero5d
+er5ov
+er3s
+er5tr
+e3rum
+er5un
+e5ry
+e1ta
+e1te
+etek4s
+e1ti
+e3tj
+e1to
+e3tr
+e3tu
+e1ty
+e3um
+e3un
+3eur
+e1va
+e3ve
+e4v3erf
+e1vi
+e5x
+1fa
+fa4ce
+fags3
+f1b
+f1d
+1fe
+fej4
+fejl1
+f1f
+f1g
+f1h
+1fi
+f1k
+3fl
+1fo
+for1en
+fo4ri
+f1p
+f1s4
+4ft
+f3ta
+f1te
+f1ti
+f5to
+f5tvi
+1fu
+f1v
+3fy
+1ga
+g3art
+g1b
+g1d
+1ge
+4g5enden
+ger3in
+ge3s
+g3f
+g1g
+g1h
+1gi
+gi4b
+gi3st
+5gj
+g3k
+g1l
+g1m
+3go
+4g5om
+g5ov
+g3p
+1gr
+gs1a
+gsde4len
+g4se
+gsha4
+g5sla
+gs3or
+gs1p
+g5s4tide
+g4str
+gs1v
+g3ta
+g1te
+g1ti
+g5to
+g3tr
+gt4s
+g3ud
+gun5
+g3v
+1gy
+g5yd
+4ha.
+heds3
+he5s
+4het
+hi4e
+hi4n5
+hi3s
+ho5ko
+ho5ve
+4h3t
+hun4
+hund3
+hvo4
+i1a
+i3b
+i4ble
+i1c
+i3dr
+ids5k
+i1el
+i1en
+i3er
+i3et.
+if3r
+i3gu
+i3h
+i5i
+i5j
+i1ka
+i1ke
+ik1l
+i5ko
+ik3re
+ik5ri
+iks5t
+ik4tu
+i3ku
+ik3v
+i3lag
+il3eg
+il5ej
+il5el
+i3li
+i4l5id
+il3k
+i1lo
+il5u
+i3mu
+ind3t
+5inf
+ings1
+in3s
+in4sv
+inter1
+i3nu
+i3od
+i3og
+i5ok
+i3ol
+ion4
+ions1
+i5o5r
+i3ot
+i5pi
+i3pli
+i5pr
+i3re
+i3ri
+ir5t
+i3sc
+i3si
+i4sm
+is3p
+i1ster
+i3sti
+i5sua
+i1ta
+i1te
+i1ti
+i3to
+i3tr
+it5re.
+i1tu
+i3ty
+i1u
+i1va
+i1ve
+i1vi
+j3ag
+jde4rer
+jds1
+jek4to
+4j5en.
+j5k
+j3le
+j3li
+jlmeld5
+jlmel4di
+j3r
+jre5
+ju3s
+5kap
+k5au
+5kav
+k5b
+kel5s
+ke3sk
+ke5st
+ke4t5a
+k3h
+ki3e
+ki3st
+k1k
+k5lak
+k1le
+3klu
+k4ny
+5kod
+1kon
+ko3ra
+3kort
+ko3v
+1kra
+5kry
+ks3an
+k1si
+ks3k
+ks1p
+k3ste
+k5stu
+ks5v
+k1t
+k4tar
+k4terh
+kti4e
+kt5re
+kt5s
+3kur
+1kus
+3kut
+k4vo
+k4vu
+5lab
+lad3r
+5lagd
+la4g3r
+5lam
+1lat
+l1b
+ldiagnos5
+l3dr
+ld3st
+1le.
+5led
+4lele
+le4mo
+3len
+1ler
+1les
+4leu
+l1f
+lfin4
+lfind5
+l1go1
+l3h
+li4ga
+4l5ins
+4l3int
+li5o
+l3j
+l1ke
+l1ko
+l3ky
+l1l
+l5mu
+lo4du
+l3op
+4l5or
+3lov
+4l3p
+l4ps
+l3r
+4ls
+lses1
+ls5in
+l5sj
+l1ta
+l4taf
+l1te
+l4t5erf
+l3ti
+lt3o
+l3tr
+l3tu
+lu5l
+l3ve
+l3vi
+1ma
+m1b
+m3d
+1me
+4m5ej
+m3f
+m1g
+m3h
+1mi
+mi3k
+m5ing
+mi4o
+mi5sty
+m3k
+m1l
+m1m
+mmen5
+m1n
+3mo
+mo4da
+4mop
+4m5ov
+m1pe
+m3pi
+m3pl
+m1po
+m3pr
+m1r
+mse5s
+ms5in
+m5sk
+ms3p
+m3ste
+ms5v
+m3ta
+m3te
+m3ti
+m3tr
+m1ud
+1mul
+mu1li
+3my
+3na
+4nak
+1nal
+n1b
+n1c
+4nd
+n3dr
+nd5si
+nd5sk
+nd5sp
+1ne
+ne5a
+ne4da
+nemen4
+nement5e
+neo4
+n3erk
+n5erl
+ne5sl
+ne5st
+n1f
+n4go
+4n1h
+1ni
+4nim
+ni5o
+ni3st
+n1ke
+n1ko
+n3kr
+n3ku
+n5kv
+4n1l
+n1m
+n1n
+1no
+n3ord
+n5p
+n3r
+4ns
+n3si
+n1sku
+ns3po
+n1sta
+n5sti
+n1ta
+nta4le
+n1te
+n1ti
+ntiali4
+n3to
+n1tr
+nt4s5t
+nt4su
+n3tu
+n3ty
+4n1v
+3ny
+n3z
+o3a
+o4as
+ob3li
+o1c
+o4din
+od5ri
+od5s
+od5un
+o1e
+of5r
+o4gek
+o4gel
+o4g5o
+og5re
+og5sk
+o5h
+o5in
+oi6s5e
+o1j
+o3ka
+o1ke
+o3ku
+o3la
+o3le
+o1li
+o1lo
+o3lu
+o5ly
+1omr
+on3k
+ook5
+o3or
+o5ov
+o3pi
+op3l
+op3r
+op3s
+3opta
+4or.
+or1an
+3ordn
+ord5s
+o3re.
+o3reg
+o3rek
+o3rer
+o3re3s
+o3ret
+o3ri
+3orient
+or5im
+o4r5in
+or3k
+or5o
+or3sl
+or3st
+o3si
+o3so
+o3t
+o1te
+o5un
+ov4s
+3pa
+pa5gh
+p5anl
+p3d
+4pec
+3pen
+1per
+pe1ra
+pe5s
+pe3u
+p3f
+4p5h
+1pla
+p4lan
+4ple.
+4pler
+4ples
+p3m
+p3n
+5pok
+4po3re
+3pot
+4p5p4
+p4ro
+1proc
+p3sk
+p5so
+ps4p
+p3st
+p1t
+1pu
+pu5b
+p5ule
+p5v
+5py3
+qu4
+4raf
+ra5is
+4rarb
+r1b
+r4d5ar
+r3dr
+rd4s3
+4reks
+1rel
+re5la
+r5enss
+5rese
+re5spo
+4ress
+re3st
+re5s4u
+5rett
+r1f
+r1gu
+r1h
+ri1e
+ri5la
+4rimo
+r4ing
+ringse4
+ringso4r
+4rinp
+4rint
+r3ka
+r1ke
+r1ki
+rk3so
+r3ku
+r1l
+rmo4
+r5mu
+r1n
+ro1b
+ro3p
+r3or
+r3p
+r1r
+rre5s
+rro4n5
+r1sa
+r1si
+r5skr
+r4sk5v
+rs4n
+r3sp
+r5stu
+r5su
+r3sv
+r5tal
+r1te
+r4teli
+r1ti
+r3to
+r4t5or
+rt5rat
+rt3re
+r5tri
+r5tro
+rt3s
+r5ty
+r3ud
+run4da
+5rut
+r3va
+r1ve
+r3vi
+ry4s
+s3af
+1sam
+sa4ma
+s3ap
+s1ar
+1sat
+4s1b
+s1d
+sdy4
+1se
+s4ed
+5s4er
+se4se
+s1f
+4s1g4
+4s3h
+si4bl
+1sig
+s5int
+5sis
+5sit
+5siu
+s5ju
+4sk.
+1skab
+1ske
+s3kl
+sk5s4
+5sky
+s1le
+s1li
+slo3
+5slu
+s5ly
+s1m
+s4my
+4snin
+s4nit
+so5k
+5sol
+5som.
+3somm
+s5oms
+5somt
+3son
+4s1op
+sp4
+3spec
+4sper
+3s4pi
+s1pl
+3sprog.
+s5r4
+s1s4
+4st.
+5s4tam
+1stan
+st5as
+3stat
+1stav
+1ste.
+1sted
+3stel
+5stemo
+1sten
+5step
+3ster.
+3stes
+5stet
+5stj
+3sto
+st5om
+1str
+s1ud
+3sul
+s3un
+3sur
+s3ve
+3s4y
+1sy1s
+5ta.
+1tag
+tands3
+4tanv
+4tb
+tede4l
+teds5
+3teg
+5tekn
+teo1
+5term
+te5ro
+4t1f
+6t3g
+t1h
+tialis5t
+3tid
+ti4en
+ti3st
+4t3k
+4t1l
+tli4s5
+t1m
+t1n
+to5ra
+to1re
+to1ri
+tor4m
+4t3p
+t4ra
+4tres
+tro5v
+1try
+4ts
+t3si
+ts4pa
+ts5pr
+t3st
+ts5ul
+4t1t
+t5uds
+5tur
+t5ve
+1typ
+u1a
+5udl
+ud5r
+ud3s
+3udv
+u1e
+ue4t5
+uge4ri
+ugs3
+u5gu
+u3i
+u5kl
+uk4ta
+uk4tr
+u1la
+u1le
+u5ly
+u5pe
+up5l
+u5q
+u3ra
+u3re
+u4r3eg
+u1rer
+u3ro
+us5a
+u3si
+u5ska
+u5so
+us5v
+u1te
+u1ti
+u1to
+ut5r
+ut5s4
+5u5v
+va5d
+3varm
+1ved
+ve4l5e
+ve4reg
+ve3s
+5vet
+v5h
+vi4l3in
+1vis
+v5j
+v5k
+vl4
+v3le
+v5li
+vls1
+1vo
+4v5om
+v5p
+v5re
+v3st
+v5su
+v5t
+3vu
+y3a
+y5dr
+y3e
+y3ke
+y5ki
+yk3li
+y3ko
+yk4s5
+y3kv
+y5li
+y5lo
+y5mu
+yns5
+y5o
+y1pe
+y3pi
+y3re
+yr3ek
+y3ri
+y3si
+y3ti
+y5t3r
+y5ve
+zi5o
+
+.så3
+.ær5i
+.øv3r
+a3tø
+a5væ
+brød3
+5bæ
+5drøv
+dstå4
+3dæ
+3dø
+e3læ
+e3lø
+e3rø
+er5øn
+e5tæ
+e5tø
+e1væ
+e3æ
+e5Ã¥
+3fæ
+3fø
+fø4r5en
+giø4
+g4sø
+g5så
+3gæ
+3gø1
+3gå
+i5tæ
+i3ø
+3kø
+3kå
+lingeniø4
+l3væ
+5løs
+m5tå
+1mæ
+3mø
+3må
+n3kæ
+n5tæ
+3næ
+4n5æb
+5nø
+o5læ
+or3ø
+o5Ã¥
+5præ
+5pæd
+på3
+r5kæ
+r5tæ
+r5tø
+r3væ
+r5æl
+4røn
+5rør
+3råd
+r5Ã¥r
+s4kå
+3slå
+s4næ
+5stø
+1stå
+1sæ
+4s5æn
+1sø
+s5øk
+så4r5
+ti4ø
+3træk.
+t4sø
+t5så
+t3væ
+u3læ
+3værd
+1værk
+5vå
+y5væ
+æb3l
+æ3c
+æ3e
+æg5a
+æ4gek
+æ4g5r
+ægs5
+æ5i
+æ5kv
+ælle4
+æn1dr
+æ5o
+æ1re
+ær4g5r
+æ3ri
+ær4ma
+ær4mo
+ær5s
+æ5si
+æ3so
+æ3ste
+æ3ve
+øde5
+ø3e
+ø1je
+ø3ke
+ø3le
+øms5
+øn3st
+øn4t3
+ø1re
+ø3ri
+ørne3
+ør5o
+ø1ve
+Ã¥1d
+Ã¥1e
+Ã¥5h
+Ã¥3l
+Ã¥3re
+Ã¥rs5t
+Ã¥5sk
+Ã¥3t
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/da_compoundDictionary.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/da_compoundDictionary.txt
new file mode 100644
index 00000000000..9a14f40c5f9
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/da_compoundDictionary.txt
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# A set of words for testing the HyphenationCompound factory,
+# in conjunction with the danish hyphenation grammar.
+læse
+hest
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/elevate.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/elevate.xml
new file mode 100644
index 00000000000..1befc5443e7
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/elevate.xml
@@ -0,0 +1,54 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/frenchArticles.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/frenchArticles.txt
new file mode 100644
index 00000000000..914161185f7
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/frenchArticles.txt
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# A set of articles for testing the French Elision filter.
+# Requiring a text file is a bit weird here...
+l
+m
+t
+qu
+n
+s
+j
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/fuzzysuggest.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/fuzzysuggest.txt
new file mode 100644
index 00000000000..94e2152160a
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/fuzzysuggest.txt
@@ -0,0 +1,4 @@
+# simple fuzzy suggest phrase dictionary for testing
+change 1.0
+charge 1.0
+chance 1.0
\ No newline at end of file
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/hunspell-test.aff b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/hunspell-test.aff
new file mode 100644
index 00000000000..d035ad18001
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/hunspell-test.aff
@@ -0,0 +1,13 @@
+SET UTF-8
+TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
+
+SFX A Y 2
+SFX A 0 e n
+SFX A 0 e t
+
+SFX C Y 2
+SFX C 0 d/C c
+SFX C 0 c b
+
+PFX B Y 1
+PFX B 0 s o
\ No newline at end of file
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/hunspell-test.dic b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/hunspell-test.dic
new file mode 100644
index 00000000000..92c35d2b6ab
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/hunspell-test.dic
@@ -0,0 +1,6 @@
+5
+lucen/A
+lucene
+mahout/A
+olr/B
+ab/C
\ No newline at end of file
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/hyphenation.dtd b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/hyphenation.dtd
new file mode 100644
index 00000000000..083c2bd8e80
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/hyphenation.dtd
@@ -0,0 +1,68 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/jasuggest.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/jasuggest.txt
new file mode 100644
index 00000000000..6df149de61a
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/jasuggest.txt
@@ -0,0 +1,5 @@
+# simple auto-suggest phrase dictionary for testing
+# note this uses tabs as separator!
+åŒ—æµ·é“ 1.0
+今夜 3.0
+話ã—㟠6.0
\ No newline at end of file
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/keep-1.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/keep-1.txt
new file mode 100644
index 00000000000..8dfe80902d2
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/keep-1.txt
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+foo
+bar
\ No newline at end of file
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/keep-2.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/keep-2.txt
new file mode 100644
index 00000000000..646b7ff4ddb
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/keep-2.txt
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+junk
+more
\ No newline at end of file
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/mapping-ISOLatin1Accent.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/mapping-ISOLatin1Accent.txt
new file mode 100644
index 00000000000..ede7742581b
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/mapping-ISOLatin1Accent.txt
@@ -0,0 +1,246 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Syntax:
+# "source" => "target"
+# "source".length() > 0 (source cannot be empty.)
+# "target".length() >= 0 (target can be empty.)
+
+# example:
+# "À" => "A"
+# "\u00C0" => "A"
+# "\u00C0" => "\u0041"
+# "ß" => "ss"
+# "\t" => " "
+# "\n" => ""
+
+# À => A
+"\u00C0" => "A"
+
+# Ã => A
+"\u00C1" => "A"
+
+# Â => A
+"\u00C2" => "A"
+
+# Ã => A
+"\u00C3" => "A"
+
+# Ä => A
+"\u00C4" => "A"
+
+# Ã… => A
+"\u00C5" => "A"
+
+# Æ => AE
+"\u00C6" => "AE"
+
+# Ç => C
+"\u00C7" => "C"
+
+# È => E
+"\u00C8" => "E"
+
+# É => E
+"\u00C9" => "E"
+
+# Ê => E
+"\u00CA" => "E"
+
+# Ë => E
+"\u00CB" => "E"
+
+# Ì => I
+"\u00CC" => "I"
+
+# Ã => I
+"\u00CD" => "I"
+
+# ÃŽ => I
+"\u00CE" => "I"
+
+# Ã => I
+"\u00CF" => "I"
+
+# IJ => IJ
+"\u0132" => "IJ"
+
+# Ã => D
+"\u00D0" => "D"
+
+# Ñ => N
+"\u00D1" => "N"
+
+# Ã’ => O
+"\u00D2" => "O"
+
+# Ó => O
+"\u00D3" => "O"
+
+# Ô => O
+"\u00D4" => "O"
+
+# Õ => O
+"\u00D5" => "O"
+
+# Ö => O
+"\u00D6" => "O"
+
+# Ø => O
+"\u00D8" => "O"
+
+# Å’ => OE
+"\u0152" => "OE"
+
+# Þ
+"\u00DE" => "TH"
+
+# Ù => U
+"\u00D9" => "U"
+
+# Ú => U
+"\u00DA" => "U"
+
+# Û => U
+"\u00DB" => "U"
+
+# Ü => U
+"\u00DC" => "U"
+
+# Ã => Y
+"\u00DD" => "Y"
+
+# Ÿ => Y
+"\u0178" => "Y"
+
+# Ã => a
+"\u00E0" => "a"
+
+# á => a
+"\u00E1" => "a"
+
+# â => a
+"\u00E2" => "a"
+
+# ã => a
+"\u00E3" => "a"
+
+# ä => a
+"\u00E4" => "a"
+
+# å => a
+"\u00E5" => "a"
+
+# æ => ae
+"\u00E6" => "ae"
+
+# ç => c
+"\u00E7" => "c"
+
+# è => e
+"\u00E8" => "e"
+
+# é => e
+"\u00E9" => "e"
+
+# ê => e
+"\u00EA" => "e"
+
+# ë => e
+"\u00EB" => "e"
+
+# ì => i
+"\u00EC" => "i"
+
+# Ã => i
+"\u00ED" => "i"
+
+# î => i
+"\u00EE" => "i"
+
+# ï => i
+"\u00EF" => "i"
+
+# ij => ij
+"\u0133" => "ij"
+
+# ð => d
+"\u00F0" => "d"
+
+# ñ => n
+"\u00F1" => "n"
+
+# ò => o
+"\u00F2" => "o"
+
+# ó => o
+"\u00F3" => "o"
+
+# ô => o
+"\u00F4" => "o"
+
+# õ => o
+"\u00F5" => "o"
+
+# ö => o
+"\u00F6" => "o"
+
+# ø => o
+"\u00F8" => "o"
+
+# Å“ => oe
+"\u0153" => "oe"
+
+# ß => ss
+"\u00DF" => "ss"
+
+# þ => th
+"\u00FE" => "th"
+
+# ù => u
+"\u00F9" => "u"
+
+# ú => u
+"\u00FA" => "u"
+
+# û => u
+"\u00FB" => "u"
+
+# ü => u
+"\u00FC" => "u"
+
+# ý => y
+"\u00FD" => "y"
+
+# ÿ => y
+"\u00FF" => "y"
+
+# ff => ff
+"\uFB00" => "ff"
+
+# ï¬ => fi
+"\uFB01" => "fi"
+
+# fl => fl
+"\uFB02" => "fl"
+
+# ffi => ffi
+"\uFB03" => "ffi"
+
+# ffl => ffl
+"\uFB04" => "ffl"
+
+# ſt => ft
+"\uFB05" => "ft"
+
+# st => st
+"\uFB06" => "st"
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/missing.functions.updateprocessor.js b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/missing.functions.updateprocessor.js
new file mode 100644
index 00000000000..6e8728a0d77
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/missing.functions.updateprocessor.js
@@ -0,0 +1,3 @@
+function doSomeStuff() {
+ return "This script doesn't contain any update processor functions";
+}
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/missleading.extension.updateprocessor.js.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/missleading.extension.updateprocessor.js.txt
new file mode 100644
index 00000000000..984e1d82f10
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/missleading.extension.updateprocessor.js.txt
@@ -0,0 +1,23 @@
+function processAdd(cmd) {
+ // Integer.valueOf is needed here to get a tru java object, because
+ // all javascript numbers are floating point (ie: java.lang.Double)
+ cmd.getSolrInputDocument().addField("script_added_i",
+ java.lang.Integer.valueOf(42));
+ cmd.getSolrInputDocument().addField("script_added_d", 42.3);
+
+}
+function processDelete() {
+ // NOOP
+}
+function processCommit() {
+ // NOOP
+}
+function processRollback() {
+ // NOOP
+}
+function processMergeIndexes() {
+ // NOOP
+}
+function finish() {
+ // NOOP
+}
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/old_synonyms.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/old_synonyms.txt
new file mode 100644
index 00000000000..a7624f0597d
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/old_synonyms.txt
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+a => aa
+b => b1 b2
+c => c1,c2
+a\=>a => b\=>b
+a\,a => b\,b
+foo,bar,baz
+
+Television,TV,Televisions
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/open-exchange-rates.json b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/open-exchange-rates.json
new file mode 100644
index 00000000000..8fbc217f6e9
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/open-exchange-rates.json
@@ -0,0 +1,18 @@
+{
+ "disclaimer": "This data is not real, it was synthetically created to match currency.xml. It is modeled after the data format available from openexchangerates.org. See https://openexchangerates.org/documentation for details",
+ "license": "http://www.apache.org/licenses/LICENSE-2.0",
+ "timestamp": 1332070464,
+
+
+ "IMPORTANT NOTE": "In order for tests to work, this data must be kept in sync with ./currency.xml",
+
+
+ "base": "USD",
+ "rates": {
+ "USD": 1,
+ "JPY": 81.29,
+ "EUR": 2.5,
+ "GBP": 0.5,
+ "MXN": 2.0
+ }
+}
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/phrasesuggest.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/phrasesuggest.txt
new file mode 100644
index 00000000000..fd4984d70b8
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/phrasesuggest.txt
@@ -0,0 +1,8 @@
+# simple auto-suggest phrase dictionary for testing
+# note this uses tabs as separator!
+the first phrase 1.0
+the second phrase 2.0
+testing 1234 3.0
+foo 5.0
+the fifth phrase 2.0
+the final phrase 4.0
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/protwords.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/protwords.txt
new file mode 100644
index 00000000000..ab7e3e2470e
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/protwords.txt
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#use a protected word file to avoid stemming two
+#unrelated words to the same base word.
+#to test, we will use words that would normally obviously be stemmed.
+cats
+ridding
+c#
+c++
+.net
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/regex-boost-processor-test.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/regex-boost-processor-test.txt
new file mode 100644
index 00000000000..1dc0537c72b
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/regex-boost-processor-test.txt
@@ -0,0 +1,10 @@
+# Sample config file for RegexBoostProcessor
+# This example applies boost on the "url" field to boost or deboost certain urls
+# All rules are evaluated, and if several of them match, the boosts are multiplied.
+# If for example one rule with boost 2.0 and one rule with boost 0.1 match, the resulting urlboost=0.2
+
+https?://[^/]+/old/.* 0.1 #Comments are removed
+https?://[^/]+/.*index\([0-9]\).html$ 0.5
+
+# Prioritize certain sites over others
+https?://www.mydomain.no/.* 1.5
\ No newline at end of file
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-add-schema-fields-update-processor.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-add-schema-fields-update-processor.xml
new file mode 100644
index 00000000000..2b59472f5f0
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-add-schema-fields-update-processor.xml
@@ -0,0 +1,49 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-behavior.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-behavior.xml
new file mode 100644
index 00000000000..20b5a3533b9
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-behavior.xml
@@ -0,0 +1,121 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-binaryfield.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-binaryfield.xml
new file mode 100644
index 00000000000..1f9312e61d0
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-binaryfield.xml
@@ -0,0 +1,97 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-bm25.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-bm25.xml
new file mode 100644
index 00000000000..54bdc0566aa
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-bm25.xml
@@ -0,0 +1,52 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.2
+ 0.76
+
+
+
+
+
+
+
+
+
+
+
+ text
+ id
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-charfilters.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-charfilters.xml
new file mode 100644
index 00000000000..5eaab1f19e5
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-charfilters.xml
@@ -0,0 +1,52 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ content
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-class-name-shortening-on-serialization.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-class-name-shortening-on-serialization.xml
new file mode 100644
index 00000000000..46a1321260c
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-class-name-shortening-on-serialization.xml
@@ -0,0 +1,43 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-collate.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-collate.xml
new file mode 100644
index 00000000000..7feb73a3015
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-collate.xml
@@ -0,0 +1,62 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text
+ id
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-copyfield-test.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-copyfield-test.xml
new file mode 100644
index 00000000000..3ab7837284f
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-copyfield-test.xml
@@ -0,0 +1,482 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text
+ id
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-dfr.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-dfr.xml
new file mode 100644
index 00000000000..c4f7d8331dd
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-dfr.xml
@@ -0,0 +1,70 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+ I(F)
+ B
+ H2
+
+
+
+
+
+
+
+ I(F)
+ B
+ H3
+ 900
+
+
+
+
+
+
+
+ P
+ L
+ H2
+ 7
+
+
+
+
+
+
+
+
+
+
+
+
+ text
+ id
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-docValues.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-docValues.xml
new file mode 100644
index 00000000000..63d87997402
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-docValues.xml
@@ -0,0 +1,74 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-docValuesFaceting.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-docValuesFaceting.xml
new file mode 100755
index 00000000000..0e3116d0797
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-docValuesFaceting.xml
@@ -0,0 +1,50 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+ id
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-docValuesMissing.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-docValuesMissing.xml
new file mode 100644
index 00000000000..3e39c2c40ac
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-docValuesMissing.xml
@@ -0,0 +1,88 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-docValuesMulti.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-docValuesMulti.xml
new file mode 100644
index 00000000000..6d58feda4e5
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-docValuesMulti.xml
@@ -0,0 +1,54 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-eff.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-eff.xml
new file mode 100644
index 00000000000..60cab4f8601
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-eff.xml
@@ -0,0 +1,45 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-folding.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-folding.xml
new file mode 100644
index 00000000000..c2a0e60f3ed
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-folding.xml
@@ -0,0 +1,266 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ content
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-ib.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-ib.xml
new file mode 100644
index 00000000000..3d55b2ac70b
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-ib.xml
@@ -0,0 +1,58 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+ SPL
+ DF
+ H2
+
+
+
+
+
+
+
+ LL
+ TTF
+ H3
+ 900
+
+
+
+
+
+
+
+
+
+
+
+ text
+ id
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-id-and-version-fields-only.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-id-and-version-fields-only.xml
new file mode 100644
index 00000000000..9f5059f26c1
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-id-and-version-fields-only.xml
@@ -0,0 +1,29 @@
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-lmdirichlet.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-lmdirichlet.xml
new file mode 100644
index 00000000000..f39922f7c45
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-lmdirichlet.xml
@@ -0,0 +1,51 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1000
+
+
+
+
+
+
+
+
+
+
+
+ text
+ id
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-lmjelinekmercer.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-lmjelinekmercer.xml
new file mode 100644
index 00000000000..49b692e8d90
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-lmjelinekmercer.xml
@@ -0,0 +1,51 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 0.4
+
+
+
+
+
+
+
+
+
+
+
+ text
+ id
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-luceneMatchVersion.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-luceneMatchVersion.xml
new file mode 100644
index 00000000000..3bb2b491b3b
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-luceneMatchVersion.xml
@@ -0,0 +1,58 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-minimal.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-minimal.xml
new file mode 100644
index 00000000000..9e2f9471026
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-minimal.xml
@@ -0,0 +1,25 @@
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-not-required-unique-key.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-not-required-unique-key.xml
new file mode 100644
index 00000000000..b3869812375
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-not-required-unique-key.xml
@@ -0,0 +1,46 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ subject
+ id
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-numeric.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-numeric.xml
new file mode 100644
index 00000000000..d00545ed102
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-numeric.xml
@@ -0,0 +1,75 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-one-field-no-dynamic-field-unique-key.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-one-field-no-dynamic-field-unique-key.xml
new file mode 100644
index 00000000000..783ae77c958
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-one-field-no-dynamic-field-unique-key.xml
@@ -0,0 +1,29 @@
+
+
+
+
+
+
+
+
+
+
+
+
+ str
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-one-field-no-dynamic-field.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-one-field-no-dynamic-field.xml
new file mode 100644
index 00000000000..035f975d6b2
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-one-field-no-dynamic-field.xml
@@ -0,0 +1,28 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-phrasesuggest.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-phrasesuggest.xml
new file mode 100644
index 00000000000..f5ed9155e66
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-phrasesuggest.xml
@@ -0,0 +1,60 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text
+ id
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-postingshighlight.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-postingshighlight.xml
new file mode 100644
index 00000000000..e58b2e82eaf
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-postingshighlight.xml
@@ -0,0 +1,51 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text
+ id
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-replication1.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-replication1.xml
new file mode 100644
index 00000000000..fe123dfa6d0
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-replication1.xml
@@ -0,0 +1,46 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-replication2.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-replication2.xml
new file mode 100644
index 00000000000..a2409459aa7
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-replication2.xml
@@ -0,0 +1,49 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-required-fields.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-required-fields.xml
new file mode 100644
index 00000000000..8dea7914549
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-required-fields.xml
@@ -0,0 +1,436 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text
+ id
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-rest-lucene-match-version.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-rest-lucene-match-version.xml
new file mode 100644
index 00000000000..15caf81c67d
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-rest-lucene-match-version.xml
@@ -0,0 +1,40 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-rest.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-rest.xml
new file mode 100755
index 00000000000..a735e434bc7
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-rest.xml
@@ -0,0 +1,624 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text
+ id
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-reversed.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-reversed.xml
new file mode 100644
index 00000000000..40fc0e8e2f5
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-reversed.xml
@@ -0,0 +1,88 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ one
+ id
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-sim.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-sim.xml
new file mode 100644
index 00000000000..ca2bd788b38
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-sim.xml
@@ -0,0 +1,68 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ is there an echo?
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ sim1text
+ id
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-snippet-field.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-snippet-field.xml
new file mode 100644
index 00000000000..9e0d29f3e20
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-snippet-field.xml
@@ -0,0 +1,3 @@
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-snippet-type.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-snippet-type.xml
new file mode 100644
index 00000000000..bfbd3334204
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-snippet-type.xml
@@ -0,0 +1,3 @@
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-snippet-types.incl b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-snippet-types.incl
new file mode 100644
index 00000000000..fe9fd6d7a7b
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-snippet-types.incl
@@ -0,0 +1,19 @@
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-spatial.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-spatial.xml
new file mode 100644
index 00000000000..d1ca1f701cd
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-spatial.xml
@@ -0,0 +1,63 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-spellchecker.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-spellchecker.xml
new file mode 100644
index 00000000000..7124065626d
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-spellchecker.xml
@@ -0,0 +1,87 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+
+
+ text
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-stop-keep.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-stop-keep.xml
new file mode 100644
index 00000000000..831539ee8be
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-stop-keep.xml
@@ -0,0 +1,64 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ one
+ id
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-sweetspot.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-sweetspot.xml
new file mode 100644
index 00000000000..350e2e90851
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-sweetspot.xml
@@ -0,0 +1,76 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 6.0
+ 1.5
+
+ 3
+ 5
+ 0.5
+
+
+
+
+
+
+
+ 3.3
+ 7.7
+ 2.718281828459045
+ 5.0
+
+ 1
+ 5
+ 0.2
+
+
+
+
+
+
+
+
+
+
+
+
+ text
+ id
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-synonym-tokenizer.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-synonym-tokenizer.xml
new file mode 100644
index 00000000000..0906a13bfb5
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-synonym-tokenizer.xml
@@ -0,0 +1,43 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text
+ id
+
\ No newline at end of file
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-tfidf.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-tfidf.xml
new file mode 100644
index 00000000000..eacea9009a8
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-tfidf.xml
@@ -0,0 +1,50 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ false
+
+
+
+
+
+
+
+
+
+
+
+ text
+ id
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-tiny.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-tiny.xml
new file mode 100644
index 00000000000..08e0aebc42f
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-tiny.xml
@@ -0,0 +1,38 @@
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-trie.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-trie.xml
new file mode 100644
index 00000000000..1819bfa9020
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-trie.xml
@@ -0,0 +1,332 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+
+
+ text
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-xinclude.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-xinclude.xml
new file mode 100644
index 00000000000..94194df6192
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-xinclude.xml
@@ -0,0 +1,30 @@
+
+
+]>
+
+
+
+
+
+ &schema_entity_include;
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema.xml
new file mode 100644
index 00000000000..a22844de0c4
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema.xml
@@ -0,0 +1,712 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text
+ id
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ I am your default sim
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema11.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema11.xml
new file mode 100755
index 00000000000..a993cbd6f61
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema11.xml
@@ -0,0 +1,387 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+
+
+ text
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema12.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema12.xml
new file mode 100755
index 00000000000..506e08d787a
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema12.xml
@@ -0,0 +1,618 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text
+ id
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema15.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema15.xml
new file mode 100755
index 00000000000..b05e1a7ce9e
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema15.xml
@@ -0,0 +1,604 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text
+ id
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema_codec.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema_codec.xml
new file mode 100644
index 00000000000..4e49dce953e
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema_codec.xml
@@ -0,0 +1,49 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ string_f
+ string_f
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schemasurround.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schemasurround.xml
new file mode 100644
index 00000000000..04e90e33678
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schemasurround.xml
@@ -0,0 +1,608 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text
+ id
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-SOLR-749.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-SOLR-749.xml
new file mode 100644
index 00000000000..1fabd5c202f
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-SOLR-749.xml
@@ -0,0 +1,29 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-add-schema-fields-update-processor-chains.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-add-schema-fields-update-processor-chains.xml
new file mode 100644
index 00000000000..9a59d90820a
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-add-schema-fields-update-processor-chains.xml
@@ -0,0 +1,155 @@
+
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+ true
+ managed-schema
+
+
+
+
+ text
+
+ java.lang.Boolean
+ boolean
+
+
+ java.lang.Integer
+ tint
+
+
+ java.lang.Float
+ tfloat
+
+
+ java.util.Date
+ tdate
+
+
+ java.lang.Long
+ java.lang.Integer
+ tlong
+
+
+
+ java.lang.Double
+ java.lang.Float
+
+ tdouble
+
+
+
+
+
+
+ text
+
+ java.lang.Boolean
+ boolean
+
+
+ java.lang.Integer
+ tint
+
+
+ java.lang.Float
+ tfloat
+
+
+ java.util.Date
+ tdate
+
+
+ java.lang.Long
+ java.lang.Integer
+ tlong
+
+
+ java.lang.Number
+ tdouble
+
+
+
+
+
+
+
+
+
+
+
+ yyyy-MM-dd'T'HH:mm:ss.SSSZ
+ yyyy-MM-dd'T'HH:mm:ss,SSSZ
+ yyyy-MM-dd'T'HH:mm:ss.SSS
+ yyyy-MM-dd'T'HH:mm:ss,SSS
+ yyyy-MM-dd'T'HH:mm:ssZ
+ yyyy-MM-dd'T'HH:mm:ss
+ yyyy-MM-dd'T'HH:mmZ
+ yyyy-MM-dd'T'HH:mm
+ yyyy-MM-dd HH:mm:ss.SSSZ
+ yyyy-MM-dd HH:mm:ss,SSSZ
+ yyyy-MM-dd HH:mm:ss.SSS
+ yyyy-MM-dd HH:mm:ss,SSS
+ yyyy-MM-dd HH:mm:ssZ
+ yyyy-MM-dd HH:mm:ss
+ yyyy-MM-dd HH:mmZ
+ yyyy-MM-dd HH:mm
+ yyyy-MM-dd
+
+
+
+ text
+
+ java.lang.Boolean
+ boolean
+
+
+ java.lang.Integer
+ tint
+
+
+ java.lang.Float
+ tfloat
+
+
+ java.util.Date
+ tdate
+
+
+ java.lang.Long
+ java.lang.Integer
+ tlong
+
+
+ java.lang.Number
+ tdouble
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-altdirectory.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-altdirectory.xml
new file mode 100755
index 00000000000..3105baf5157
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-altdirectory.xml
@@ -0,0 +1,26 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-basic.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-basic.xml
new file mode 100644
index 00000000000..03963023ae1
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-basic.xml
@@ -0,0 +1,29 @@
+
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+ ${solr.data.dir:}
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-caching.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-caching.xml
new file mode 100644
index 00000000000..0de6f9412f7
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-caching.xml
@@ -0,0 +1,39 @@
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-components-name.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-components-name.xml
new file mode 100644
index 00000000000..b5501d85508
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-components-name.xml
@@ -0,0 +1,75 @@
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.data.dir:}
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+
+
+ true
+
+ component1
+
+
+ component2
+
+
+
+
+
+
+
+
+
+ max-age=30, public
+
+
+
+
+ solr
+ solrconfig.xml schema.xml admin-extra.html
+
+
+
+ foo
+
+
+ bar
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-defaults.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-defaults.xml
new file mode 100644
index 00000000000..fe39eef6a3e
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-defaults.xml
@@ -0,0 +1,43 @@
+
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-delpolicy1.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-delpolicy1.xml
new file mode 100644
index 00000000000..5cd0e7edf1a
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-delpolicy1.xml
@@ -0,0 +1,51 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+
+
+
+
+ ${useCompoundFile:false}
+
+ ${solr.tests.maxBufferedDocs}
+ ${solr.tests.maxIndexingThreads}
+ ${solr.tests.ramBufferSizeMB}
+
+
+
+ single
+
+
+
+ true
+ 3
+ 100MILLISECONDS
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-delpolicy2.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-delpolicy2.xml
new file mode 100644
index 00000000000..9925a1e1b69
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-delpolicy2.xml
@@ -0,0 +1,48 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+
+ ${useCompoundFile:false}
+
+ ${solr.tests.maxBufferedDocs}
+ ${solr.tests.maxIndexingThreads}
+ ${solr.tests.ramBufferSizeMB}
+
+
+
+ single
+
+
+ value1
+ value2
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-elevate.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-elevate.xml
new file mode 100644
index 00000000000..b7dc855a0c5
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-elevate.xml
@@ -0,0 +1,178 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+ ${solr.data.dir:}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1024
+
+
+
+
+
+
+
+
+
+ true
+
+ 10
+
+
+
+
+
+
+
+
+
+
+
+
+
+ string
+ ${elevate.file:elevate.xml}
+
+
+
+
+
+ string
+ ${elevate.data.file:elevate-data.xml}
+
+
+
+
+ explicit
+
+
+ elevate
+
+
+
+
+
+ explicit
+
+
+ dataElevate
+
+
+
+
+
+
+
+
+
+ max-age=30, public
+
+
+
+
+ solr
+ solrconfig.xml schema.xml admin-extra.html
+
+
+
+ prefix-${solr.test.sys.prop2}-suffix
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-functionquery.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-functionquery.xml
new file mode 100755
index 00000000000..1a1a4ffca62
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-functionquery.xml
@@ -0,0 +1,43 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 0.0
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-highlight.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-highlight.xml
new file mode 100644
index 00000000000..7d55cc2adef
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-highlight.xml
@@ -0,0 +1,60 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 100
+
+
+
+
+
+ 70
+
+
+
+
+
+
+ ]]>
+ ]]>
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-implicitproperties.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-implicitproperties.xml
new file mode 100644
index 00000000000..a54168c38cd
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-implicitproperties.xml
@@ -0,0 +1,79 @@
+
+
+
+
+
+
+ LUCENE_41
+
+ ${solr.data.dir:}
+
+
+
+
+
+
+
+
+
+
+
+ true
+ 20
+ 20
+
+ true
+
+ 1
+
+
+
+
+
+
+
+
+
+
+
+ all
+ text
+ ${solr.core.name}
+ ${solr.core.dataDir}
+ ${solr.core.config}
+ ${solr.core.schema}
+ ${solr.core.transient}
+
+
+
+
+
+
+
+
+ text/plain; charset=UTF-8
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-indexconfig.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-indexconfig.xml
new file mode 100644
index 00000000000..066f8632e96
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-indexconfig.xml
@@ -0,0 +1,30 @@
+
+
+
+
+ ${solr.data.dir:}
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+ ${useCompoundFile:false}
+ 123
+ true
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-infostream-logging.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-infostream-logging.xml
new file mode 100644
index 00000000000..722f5e42265
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-infostream-logging.xml
@@ -0,0 +1,27 @@
+
+
+
+
+ ${solr.data.dir:}
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+ true
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-lazywriter.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-lazywriter.xml
new file mode 100644
index 00000000000..0636a1dcfac
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-lazywriter.xml
@@ -0,0 +1,28 @@
+
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-logmergepolicy.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-logmergepolicy.xml
new file mode 100644
index 00000000000..371bfb5638d
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-logmergepolicy.xml
@@ -0,0 +1,37 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+ -1
+ -1
+ -1
+
+ 11
+ 456
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-managed-schema.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-managed-schema.xml
new file mode 100644
index 00000000000..fc49a7b1c8c
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-managed-schema.xml
@@ -0,0 +1,51 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+ ${managed.schema.mutable}
+ managed-schema
+
+
+
+
+
+
+ ${solr.ulog.dir:}
+
+
+
+
+ true
+
+
+
+
+ true
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master.xml
new file mode 100644
index 00000000000..9118bef45f0
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master.xml
@@ -0,0 +1,72 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+ ${solr.data.dir:}
+
+
+
+
+
+
+
+ true
+
+
+
+
+ commit
+
+ schema.xml,xslt/dummy.xsl
+
+
+
+
+
+
+ 4
+ true
+ text,name,subject,title,whitetok
+
+
+
+
+
+
+ 4
+ true
+ text,name,subject,title,whitetok
+
+
+
+
+
+
+
+
+
+ max-age=30, public
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master1-keepOneBackup.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master1-keepOneBackup.xml
new file mode 100644
index 00000000000..30b4e3b7cb6
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master1-keepOneBackup.xml
@@ -0,0 +1,49 @@
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+ ${solr.data.dir:}
+
+
+
+
+
+
+
+
+
+
+
+ commit
+ schema-replication2.xml:schema.xml
+
+ 1
+
+
+
+
+
+
+
+
+ max-age=30, public
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master1.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master1.xml
new file mode 100644
index 00000000000..2e9885f4478
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master1.xml
@@ -0,0 +1,69 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+ ${solr.data.dir:}
+
+
+
+
+
+
+
+
+ true
+
+
+
+
+ commit
+ schema-replication2.xml:schema.xml
+
+
+
+
+
+
+ 4
+ true
+ text,name,subject,title,whitetok
+
+
+
+
+
+
+ 4
+ true
+ text,name,subject,title,whitetok
+
+
+
+
+
+
+
+
+
+ max-age=30, public
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master2.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master2.xml
new file mode 100644
index 00000000000..21d38a3af94
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master2.xml
@@ -0,0 +1,69 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+ ${solr.data.dir:}
+
+
+
+
+
+
+
+
+ true
+
+
+
+
+ startup
+ schema.xml
+
+
+
+
+
+
+ 4
+ true
+ text,name,subject,title,whitetok
+
+
+
+
+
+
+ 4
+ true
+ text,name,subject,title,whitetok
+
+
+
+
+
+
+
+
+
+ max-age=30, public
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master3.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master3.xml
new file mode 100644
index 00000000000..b19073ba0ef
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master3.xml
@@ -0,0 +1,70 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+ ${solr.data.dir:}
+
+
+
+
+
+
+
+
+ true
+
+
+
+
+ commit
+ startup
+ schema.xml
+
+
+
+
+
+
+ 4
+ true
+ text,name,subject,title,whitetok
+
+
+
+
+
+
+ 4
+ true
+ text,name,subject,title,whitetok
+
+
+
+
+
+
+
+
+
+ max-age=30, public
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-mergepolicy-defaults.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-mergepolicy-defaults.xml
new file mode 100644
index 00000000000..9d2a99aff4d
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-mergepolicy-defaults.xml
@@ -0,0 +1,32 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-mergepolicy-legacy.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-mergepolicy-legacy.xml
new file mode 100644
index 00000000000..00c77ae5e78
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-mergepolicy-legacy.xml
@@ -0,0 +1,31 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+ 7
+ ${useCompoundFile:false}
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-minimal.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-minimal.xml
new file mode 100644
index 00000000000..78a4eb711d3
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-minimal.xml
@@ -0,0 +1,75 @@
+
+
+
+
+
+
+ LUCENE_41
+
+ ${solr.data.dir:}
+
+
+
+
+
+
+
+
+
+
+
+ true
+ 20
+ 20
+
+ true
+
+ 1
+
+
+
+
+
+
+
+
+
+
+ explicit
+ json
+ true
+ text
+
+
+
+
+
+
+
+
+ text/plain; charset=UTF-8
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-nocache.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-nocache.xml
new file mode 100644
index 00000000000..ee27d0c49de
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-nocache.xml
@@ -0,0 +1,41 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+
+ ${solr.data.dir:}
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-noopregen.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-noopregen.xml
new file mode 100644
index 00000000000..4537724b433
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-noopregen.xml
@@ -0,0 +1,36 @@
+
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+ ${solr.data.dir:}
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-parsing-update-processor-chains.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-parsing-update-processor-chains.xml
new file mode 100644
index 00000000000..3c41f507158
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-parsing-update-processor-chains.xml
@@ -0,0 +1,230 @@
+
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+
+ yyyy-MM-dd'T'HH:mm:ss.SSSZ
+
+
+
+
+
+
+ yyyy-MM-dd'T'HH:mm:ss.SSSZ
+
+
+
+
+
+ false
+ yyyy-MM-dd'T'HH:mm:ss.SSSZ
+
+
+
+
+
+ solr.DateField
+ solr.TrieDateField
+ yyyy-MM-dd'T'HH:mm:ss.SSSZ
+
+
+
+
+
+ America/New_York
+ en_US
+ yyyy-MM-dd'T'HH:mm:ss.SSSZ
+ yyyy-MM-dd'T'HH:mm:ss.SSS
+
+
+
+
+
+
+ America/Los_Angeles
+
+ MM/dd/yyyy
+
+
+
+
+
+
+ UTC
+ en_US
+
+ yyyy-MM-dd'T'HH:mm:ss.SSSZ
+ yyyy-MM-dd'T'HH:mm:ss,SSSZ
+ yyyy-MM-dd'T'HH:mm:ss.SSS
+ yyyy-MM-dd'T'HH:mm:ss,SSS
+ yyyy-MM-dd'T'HH:mm:ssZ
+ yyyy-MM-dd'T'HH:mm:ss
+ yyyy-MM-dd'T'HH:mmZ
+ yyyy-MM-dd'T'HH:mm
+ yyyy-MM-dd HH:mm:ss.SSSZ
+ yyyy-MM-dd HH:mm:ss,SSSZ
+ yyyy-MM-dd HH:mm:ss.SSS
+ yyyy-MM-dd HH:mm:ss,SSS
+ yyyy-MM-dd HH:mm:ssZ
+ yyyy-MM-dd HH:mm:ss
+ yyyy-MM-dd HH:mmZ
+ yyyy-MM-dd HH:mm
+ yyyy-MM-dd hh:mm a
+ yyyy-MM-dd hh:mma
+ yyyy-MM-dd
+ EEE MMM dd HH:mm:ss Z yyyy
+ EEE MMM dd HH:mm:ss yyyy Z
+ EEE MMM dd HH:mm:ss yyyy
+ EEE, dd MMM yyyy HH:mm:ss Z
+ EEEE, dd-MMM-yy HH:mm:ss Z
+ EEEE, MMMM dd, yyyy
+ MMMM dd, yyyy
+ MMM. dd, yyyy
+
+
+
+
+
+
+ UTC
+ fr
+ 'le' EEEE dd MMMM yyyy
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ru_RU
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ru_RU
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ fr_FR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ fr_FR
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ false
+
+ true
+ YES
+ on
+
+
+ false
+ no
+ oFF
+
+
+
+
+
+
+ yup
+ nope
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ yyyy-MM-dd
+ yyyy-MM-dd'T'HH:mm:ss.SSSZ
+ yyyy-MM-dd'T'HH:mm
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml
new file mode 100644
index 00000000000..b4f560ed32f
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml
@@ -0,0 +1,272 @@
+
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+ ${solr.data.dir:}
+
+
+
+
+
+
+ suggest_wfst
+ org.apache.solr.spelling.suggest.Suggester
+ org.apache.solr.spelling.suggest.fst.WFSTLookupFactory
+ suggest_wfst
+ false
+
+
+ true
+
+ phrasesuggest.txt
+
+
+
+ phrase_suggest
+
+
+
+
+
+ suggest_analyzing
+ org.apache.solr.spelling.suggest.Suggester
+ org.apache.solr.spelling.suggest.fst.AnalyzingLookupFactory
+ suggest_analyzing
+ false
+
+
+ true
+ ja_suggest
+ false
+
+ jasuggest.txt
+
+
+
+ phrase_suggest
+
+
+
+
+
+ infix_suggest_analyzing
+ org.apache.solr.spelling.suggest.Suggester
+ org.apache.solr.spelling.suggest.fst.AnalyzingInfixLookupFactory
+ false
+
+
+ text
+
+ analyzingInfixSuggest.txt
+
+
+
+ phrase_suggest
+
+
+
+
+
+ fuzzy_suggest_analyzing
+ org.apache.solr.spelling.suggest.Suggester
+ org.apache.solr.spelling.suggest.fst.FuzzyLookupFactory
+ fuzzy_suggest_analyzing
+ false
+
+
+ true
+ text
+ false
+
+ fuzzysuggest.txt
+
+
+
+ phrase_suggest
+
+
+
+
+
+ fuzzy_suggest_analyzing_with_max_edit_2
+ org.apache.solr.spelling.suggest.Suggester
+ org.apache.solr.spelling.suggest.fst.FuzzyLookupFactory
+ fuzzy_suggest_analyzing_with_max_edit_2
+ false
+
+
+ true
+ text
+ false
+ 2
+
+ fuzzysuggest.txt
+
+
+
+ phrase_suggest
+
+
+
+
+
+ fuzzy_suggest_analyzing_with_non_fuzzy_prefix_4
+ org.apache.solr.spelling.suggest.Suggester
+ org.apache.solr.spelling.suggest.fst.FuzzyLookupFactory
+ fuzzy_suggest_analyzing_with_non_fuzzy_prefix_4
+ false
+
+
+ true
+ text
+ false
+ 4
+
+ fuzzysuggest.txt
+
+
+
+ phrase_suggest
+
+
+
+
+
+ fuzzy_suggest_analyzing_with_min_fuzzy_length_2
+ org.apache.solr.spelling.suggest.Suggester
+ org.apache.solr.spelling.suggest.fst.FuzzyLookupFactory
+ fuzzy_suggest_analyzing_with_min_fuzzy_length_2
+ false
+
+
+ true
+ text
+ false
+ 2
+
+ fuzzysuggest.txt
+
+
+
+ phrase_suggest
+
+
+
+
+
+
+
+
+ true
+ suggest_wfst
+ false
+
+ true
+
+
+ suggest_wfst
+
+
+
+
+
+
+ true
+ suggest_analyzing
+ false
+
+ true
+
+
+ suggest_analyzing
+
+
+
+
+
+
+ true
+ infix_suggest_analyzing
+ false
+
+ true
+
+
+ infix_suggest_analyzing
+
+
+
+
+
+ true
+ fuzzy_suggest_analyzing
+ false
+
+ true
+
+
+ fuzzy_suggest_analyzing
+
+
+
+
+
+
+ true
+ fuzzy_suggest_analyzing_with_max_edit_2
+ false
+
+ true
+
+
+ fuzzy_suggest_analyzing_with_max_edit_2
+
+
+
+
+
+
+ true
+ fuzzy_suggest_analyzing_with_non_fuzzy_prefix_4
+ false
+
+ true
+
+
+ fuzzy_suggest_analyzing_with_non_fuzzy_prefix_4
+
+
+
+
+
+
+ true
+ fuzzy_suggest_analyzing_with_min_fuzzy_length_2
+ false
+
+ true
+
+
+ fuzzy_suggest_analyzing_with_min_fuzzy_length_2
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-postingshighlight.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-postingshighlight.xml
new file mode 100644
index 00000000000..c3d9d544e1f
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-postingshighlight.xml
@@ -0,0 +1,34 @@
+
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+ ${solr.data.dir:}
+
+
+
+
+ false
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-querysender-noquery.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-querysender-noquery.xml
new file mode 100644
index 00000000000..af6cc75112d
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-querysender-noquery.xml
@@ -0,0 +1,74 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-querysender.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-querysender.xml
new file mode 100644
index 00000000000..12252c06b6f
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-querysender.xml
@@ -0,0 +1,70 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ solr 0 10 mock
+ rocks 0 10 mock
+
+
+
+
+
+
+
+ fast_warm 0 10
+ mock
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-repeater.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-repeater.xml
new file mode 100644
index 00000000000..5ec8e5920b3
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-repeater.xml
@@ -0,0 +1,63 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+ ${solr.data.dir:}
+
+
+
+
+
+
+
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ commit
+ schema.xml
+
+
+ http://127.0.0.1:TEST_PORT/solr/replication
+
+
+
+
+
+
+
+ max-age=30, public
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-reqHandler.incl b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-reqHandler.incl
new file mode 100644
index 00000000000..03f236fccf7
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-reqHandler.incl
@@ -0,0 +1,5 @@
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-response-log-component.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-response-log-component.xml
new file mode 100644
index 00000000000..859883d52f0
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-response-log-component.xml
@@ -0,0 +1,54 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+ ${solr.data.dir:}
+
+
+
+
+
+
+
+
+
+ dismax
+
+
+ responselog
+
+
+
+
+
+ dismax
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-script-updateprocessor.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-script-updateprocessor.xml
new file mode 100644
index 00000000000..43fbc2873da
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-script-updateprocessor.xml
@@ -0,0 +1,112 @@
+
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+
+
+ javascript
+ missleading.extension.updateprocessor.js.txt
+
+
+
+
+
+
+
+
+
+
+
+ trivial.updateprocessor0.js
+
+ true
+ 1
+
+
+
+
+
+
+
+
+ trivial.updateprocessor0.js
+ trivial.updateprocessor1.js
+
+
+ true
+ 1
+
+
+
+
+
+
+
+ trivial.updateprocessor0.js
+ trivial.updateprocessor1.js
+
+ true
+ 1
+
+
+
+
+
+
+
+
+ conditional.updateprocessor.js
+ addfields.updateprocessor.js
+
+
+
+
+
+
+ conditional.updateprocessor.js
+
+
+ addfields.updateprocessor.js
+
+
+
+
+
+ throw.error.on.add.updateprocessor.js
+
+
+
+
+ missing.functions.updateprocessor.js
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-slave.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-slave.xml
new file mode 100644
index 00000000000..ac2e59ee56e
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-slave.xml
@@ -0,0 +1,61 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+ ${solr.data.dir:}
+
+
+
+
+
+
+
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ http://127.0.0.1:TEST_PORT/solr
+ 00:00:01
+ COMPRESSION
+
+
+
+
+
+
+
+ max-age=30, public
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-slave1.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-slave1.xml
new file mode 100644
index 00000000000..36d6d92e146
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-slave1.xml
@@ -0,0 +1,57 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+ ${solr.data.dir:}
+
+
+
+
+
+
+
+
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ max-age=30, public
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-snippet-processor.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-snippet-processor.xml
new file mode 100644
index 00000000000..8c76857f32b
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-snippet-processor.xml
@@ -0,0 +1,6 @@
+
+
+ field-included
+ x
+ x_x
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-solcoreproperties.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-solcoreproperties.xml
new file mode 100644
index 00000000000..3a1547f1b1c
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-solcoreproperties.xml
@@ -0,0 +1,35 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+ ${solr.data.dir:}
+
+
+
+
+
+
+
+ ${foo.foo1}
+ ${foo.foo2}
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-spellcheckcomponent.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-spellcheckcomponent.xml
new file mode 100644
index 00000000000..9092a5875a8
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-spellcheckcomponent.xml
@@ -0,0 +1,178 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+ ${solr.data.dir:}
+
+
+
+
+
+
+ lowerpunctfilt
+
+
+ default
+ lowerfilt
+ spellchecker1
+ true
+
+
+ default_teststop
+ default_teststop
+ true
+ teststop
+
+
+ direct
+ solr.DirectSolrSpellChecker
+ 3
+ 100
+ teststop
+
+
+ direct_lowerfilt
+ solr.DirectSolrSpellChecker
+ 3
+ 100
+ lowerfilt
+
+
+ wordbreak
+ solr.WordBreakSolrSpellChecker
+ lowerfilt
+ true
+ true
+ MAX_FREQ
+ 10
+
+
+ threshold
+ lowerfilt
+ spellcheckerThreshold
+ true
+ .29
+
+
+ threshold_direct
+ solr.DirectSolrSpellChecker
+ lowerfilt
+ spellcheckerThreshold
+ true
+ .29
+
+
+ multipleFields
+ lowerfilt1and2
+ spellcheckerMultipleFields
+ true
+
+
+
+ jarowinkler
+ lowerfilt
+
+ org.apache.lucene.search.spell.JaroWinklerDistance
+ spellchecker2
+
+
+
+ solr.FileBasedSpellChecker
+ external
+ spellings.txt
+ UTF-8
+ spellchecker3
+
+
+
+ freq
+ lowerfilt
+ spellcheckerFreq
+
+ freq
+ true
+
+
+ fqcn
+ lowerfilt
+ spellcheckerFQCN
+ org.apache.solr.spelling.SampleComparator
+ true
+
+
+ perDict
+ org.apache.solr.handler.component.DummyCustomParamSpellChecker
+ lowerfilt
+
+
+
+
+
+
+
+
+
+ false
+
+ false
+
+ 1
+
+
+ spellcheck
+
+
+
+
+ dismax
+ lowerfilt1^1
+
+
+ spellcheck
+
+
+
+
+ default
+ wordbreak
+ 20
+
+
+ spellcheck
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-spellchecker.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-spellchecker.xml
new file mode 100644
index 00000000000..e6744cb3944
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-spellchecker.xml
@@ -0,0 +1,142 @@
+
+
+
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+
+
+
+
+
+ suggest
+ org.apache.solr.spelling.suggest.Suggester
+ org.apache.solr.spelling.suggest.jaspell.JaspellLookup
+ suggest
+ suggest
+ true
+
+
+ 0.0
+
+
+
+
+
+
+ suggest_tst
+ org.apache.solr.spelling.suggest.Suggester
+ org.apache.solr.spelling.suggest.tst.TSTLookup
+ suggest
+ suggest_tst
+ true
+
+
+ 0.0
+
+
+
+
+
+
+ suggest_fst
+ org.apache.solr.spelling.suggest.Suggester
+ org.apache.solr.spelling.suggest.fst.FSTLookup
+ suggest
+ suggest_fst
+ true
+
+
+ 5
+ true
+
+
+
+
+
+
+ suggest_wfst
+ org.apache.solr.spelling.suggest.Suggester
+ org.apache.solr.spelling.suggest.fst.WFSTLookupFactory
+ suggest
+ suggest_wfst
+ true
+
+
+ true
+
+
+
+
+
+
+ true
+ suggest
+ true
+
+
+ suggest_jaspell
+
+
+
+
+
+
+ true
+ suggest_tst
+ true
+
+
+ suggest_tst
+
+
+
+
+
+
+ true
+ suggest_fst
+ false
+
+
+ suggest_fst
+
+
+
+
+
+
+ true
+ suggest_wfst
+ false
+
+
+ suggest_wfst
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-test-misc.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-test-misc.xml
new file mode 100644
index 00000000000..fdca7893d92
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-test-misc.xml
@@ -0,0 +1,52 @@
+
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+
+ solr
+ solrconfig.xml schema.xml admin-extra.html
+
+
+
+
+
+
+
+
+
+ prefix-${solr.test.sys.prop2}-suffix
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-tieredmergepolicy.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-tieredmergepolicy.xml
new file mode 100644
index 00000000000..86a79fbf8fc
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-tieredmergepolicy.xml
@@ -0,0 +1,47 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+ 7
+
+ 19
+ 9
+ 0.1
+
+
+ ${useCompoundFile:false}
+
+
+
+ 987
+ 42
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml
new file mode 100644
index 00000000000..d55845c13d0
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml
@@ -0,0 +1,120 @@
+
+
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+ ${solr.hdfs.blockcache.enabled:true}
+ ${solr.hdfs.blockcache.blocksperbank:1024}
+ ${solr.hdfs.home:}
+ ${solr.hdfs.confdir:}
+
+
+ ${solr.data.dir:}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+
+
+
+
+
+
+
+
+
+ ${solr.ulog.dir:}
+
+
+
+
+
+ true
+ true
+ v_t,t_field
+ org.apache.solr.update.processor.TextProfileSignature
+
+
+
+
+
+
+ true
+ non_indexed_signature_sS
+ false
+ v_t,t_field
+ org.apache.solr.update.processor.TextProfileSignature
+
+
+
+
+
+
+
+
+
+ regex_dup_A_s
+ x
+ x_x
+
+
+
+ regex_dup_B_s
+ x
+ x_x
+
+
+
+
+
+
+
+ regex_dup_A_s
+ x
+ x_x
+
+
+ regex_dup_B_s
+ x
+ x_x
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-transformers.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-transformers.xml
new file mode 100644
index 00000000000..ecaaf1146d5
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-transformers.xml
@@ -0,0 +1,84 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+
+
+
+
+ x1
+ x2
+
+
+
+ 100
+
+
+
+ x1
+ x2
+
+
+
+
+ xA
+ xA
+
+
+
+
+
+
+
+ 88
+ 99
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml
new file mode 100644
index 00000000000..1b99f61dc36
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml
@@ -0,0 +1,464 @@
+
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+
+
+
+ solr.TrieIntField
+ solr.TrieLongField
+
+
+
+ min_foo_l
+
+
+ max_foo_l
+
+
+ ;
+
+ primary_author_s1
+
+
+
+ primary_author_s1
+ first_foo_l
+
+
+
+
+
+
+
+
+
+
+
+
+ foo_t
+
+
+
+
+
+
+
+ foo_t
+
+
+
+
+
+ foo.*
+ bar.*
+
+ .*HOSS.*
+
+
+
+
+
+ foo.*
+ bar.*
+
+
+ solr.DateField
+
+
+ .*HOSS.*
+
+
+
+
+
+ foo.*
+ bar.*
+
+
+ solr.DateField
+ .*HOSS.*
+
+
+
+
+
+
+ name
+ foo_t
+
+
+
+
+
+ name
+ foo_t
+
+
+
+
+
+
+ foo.*
+ bar.*_s
+
+
+
+
+
+ nametext
+ text_sw
+
+
+
+
+
+ solr.DateField
+ solr.StrField
+
+
+
+
+
+ solr.DateField
+ solr.StrField
+
+ foo.*
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ foo.*
+ yak.*
+
+
+
+
+
+
+
+
+
+
+ foo_s
+
+
+
+
+ string
+ ;
+
+
+
+
+
+ foo_s
+ bar_s
+
+
+
+
+ foo_s
+ bar_s
+
+
+
+
+ foo_i
+ foo_s
+ bar_s
+
+
+
+
+ foo_i
+ foo_s
+ bar_s
+
+
+
+
+
+ html_s
+
+
+
+
+
+
+ trunc
+ 5
+
+
+
+
+
+ count_field
+
+
+
+
+
+
+
+
+
+ false
+
+
+
+
+
+ true
+
+
+
+
+
+ foo.*
+ false
+
+
+
+
+
+ foo.*
+
+ false
+
+
+
+
+
+
+ false
+
+
+
+
+
+ true
+
+
+
+
+
+ .*_raw
+
+
+
+
+
+ source1_s
+ dest_s
+
+
+
+
+ source1_s
+ source2_s
+ dest_s
+
+
+
+
+
+
+ source1_s
+ source2_s
+
+ dest_s
+
+
+
+
+
+
+ source\d_.*
+
+ source0_.*
+
+
+ dest_s
+
+
+
+
+
+ field1
+ toField
+
+
+ toField
+ 3
+
+
+
+
+
+ field1
+ toField
+
+
+ field1
+
+
+
+
+
+ toField
+
+
+ field1
+ toField
+
+
+
+
+
+ field1
+ field2
+ toField
+
+
+ ;
+ toField
+
+
+
+
+
+
+ category
+ category_s
+
+
+
+ authors
+ editors
+
+ contributors
+
+
+
+ .*_price
+
+ list_price
+
+
+ all_prices
+
+
+
+
+
+ category
+ category_count
+
+
+ category_count
+
+
+ category_count
+ 0
+
+
+
+
+
+ content
+ title
+ \s+
+ X
+
+
+
+
+
+ processor_default_s
+ X
+
+
+ processor_default_i
+ 42
+
+
+ uuid
+
+
+ timestamp
+
+
+
+
+
+ uniq_.*
+
+
+
+
+
+ subject
+ title
+ teststop
+ nonexistent
+ ssto
+ sind
+ simple
+
+
+
+
+
+
+ subject
+ title
+ teststop
+ nonexistent
+ ssto
+ sind
+ json
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-warmer.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-warmer.xml
new file mode 100644
index 00000000000..3f187f34d9d
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-warmer.xml
@@ -0,0 +1,46 @@
+
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+ ${solr.data.dir:}
+
+
+
+
+
+
+
+
+ ${useCompoundFile}
+ ${solr.tests.maxBufferedDocs}
+ ${solr.tests.maxIndexingThreads}
+ ${solr.tests.ramBufferSizeMB}
+
+ 1000
+ 10000
+ single
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-xinclude.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-xinclude.xml
new file mode 100644
index 00000000000..230a1ebf2f6
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-xinclude.xml
@@ -0,0 +1,35 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml
new file mode 100644
index 00000000000..055f3d7faeb
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml
@@ -0,0 +1,48 @@
+
+
+
+
+
+
+
+
+ ${useCompoundFile:false}
+
+ ${solr.tests.maxBufferedDocs}
+ ${solr.tests.maxIndexingThreads}
+ ${solr.tests.ramBufferSizeMB}
+
+
+ ${solr.tests.nrtMode:true}
+
+ 1000
+ 10000
+
+
+ ${solr.tests.lockType:single}
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig.xml
new file mode 100644
index 00000000000..810aa1d312e
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig.xml
@@ -0,0 +1,562 @@
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.data.dir:}
+
+
+
+ 1000000
+ 2000000
+ 3000000
+ 4000000
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.ulog.dir:}
+
+
+
+ ${solr.commitwithin.softcommit:true}
+
+
+
+
+
+
+ 1024
+
+
+
+
+
+
+
+
+
+
+
+ true
+
+
+
+
+
+ 10
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+
+
+
+
+ true
+
+
+
+
+
+ dismax
+ *:*
+ 0.01
+
+ text^0.5 features_t^1.0 subject^1.4 title_stemmed^2.0
+
+
+ text^0.2 features_t^1.1 subject^1.4 title_stemmed^2.0 title^1.5
+
+
+ ord(weight)^0.5 recip(rord(iind),1,1000,1000)^0.3
+
+
+ 3<-1 5<-2 6<90%
+
+ 100
+
+
+
+
+
+
+
+
+
+
+ 4
+ true
+ text,name,subject,title,whitetok
+
+
+
+
+
+
+ 4
+ true
+ text,name,subject,title,whitetok
+
+
+
+
+
+
+
+ lowerpunctfilt
+
+
+ default
+ lowerfilt
+ spellchecker1
+ false
+
+
+ direct
+ DirectSolrSpellChecker
+ lowerfilt
+ 3
+
+
+ wordbreak
+ solr.WordBreakSolrSpellChecker
+ lowerfilt
+ true
+ true
+ 10
+
+
+ multipleFields
+ lowerfilt1and2
+ spellcheckerMultipleFields
+ false
+
+
+
+ jarowinkler
+ lowerfilt
+
+ org.apache.lucene.search.spell.JaroWinklerDistance
+ spellchecker2
+
+
+
+ solr.FileBasedSpellChecker
+ external
+ spellings.txt
+ UTF-8
+ spellchecker3
+
+
+
+ freq
+ lowerfilt
+ spellcheckerFreq
+
+ freq
+ false
+
+
+ fqcn
+ lowerfilt
+ spellcheckerFQCN
+ org.apache.solr.spelling.SampleComparator
+ false
+
+
+ perDict
+ org.apache.solr.handler.component.DummyCustomParamSpellChecker
+ lowerfilt
+
+
+
+
+
+
+
+ termsComp
+
+
+
+
+
+
+
+
+ false
+
+ false
+
+ 1
+
+
+ spellcheck
+
+
+
+
+ direct
+ false
+ false
+ 1
+
+
+ spellcheck
+
+
+
+
+ default
+ wordbreak
+ 20
+
+
+ spellcheck
+
+
+
+
+ direct
+ wordbreak
+ 20
+
+
+ spellcheck
+
+
+
+
+ dismax
+ lowerfilt1^1
+
+
+ spellcheck
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ tvComponent
+
+
+
+
+
+
+
+
+
+
+
+ 100
+
+
+
+
+
+ 70
+
+
+
+
+
+
+ ]]>
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ 10
+ .,!?
+
+
+
+
+
+ WORD
+ en
+ US
+
+
+
+
+
+
+
+
+
+ max-age=30, public
+
+
+
+
+
+
+ explicit
+ true
+
+
+
+
+ solr
+ solrconfig.xml schema.xml admin-extra.html
+
+
+
+ prefix-${solr.test.sys.prop2}-suffix
+
+
+
+
+
+ false
+ true
+ v_t,t_field
+ org.apache.solr.update.processor.TextProfileSignature
+
+
+
+
+
+ false
+ false
+ id
+
+ org.apache.solr.update.processor.Lookup3Signature
+
+
+
+
+
+
+ true
+ non_indexed_signature_sS
+ false
+ v_t,t_field
+ org.apache.solr.update.processor.TextProfileSignature
+
+
+
+
+
+
+ uniq
+ uniq2
+ uniq3
+
+
+
+
+
+
+
+
+ regex_dup_A_s
+ x
+ x_x
+
+
+
+ regex_dup_B_s
+ x
+ x_x
+
+
+
+
+
+
+
+ regex_dup_A_s
+ x
+ x_x
+
+
+ regex_dup_B_s
+ x
+ x_x
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig_codec.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig_codec.xml
new file mode 100644
index 00000000000..c5cc04cfe9d
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig_codec.xml
@@ -0,0 +1,25 @@
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig_perf.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig_perf.xml
new file mode 100755
index 00000000000..172fc953f37
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig_perf.xml
@@ -0,0 +1,76 @@
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+ ${solr.data.dir:}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ 20
+ 200
+ false
+ 2
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stemdict.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stemdict.txt
new file mode 100644
index 00000000000..f57a4ad490f
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stemdict.txt
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+# test that we can override the stemming algorithm with our own mappings
+# these must be tab-separated
+monkeys monkey
+otters otter
+# some crazy ones that a stemmer would never do
+dogs cat
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stop-1.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stop-1.txt
new file mode 100644
index 00000000000..8dfe80902d2
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stop-1.txt
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+foo
+bar
\ No newline at end of file
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stop-2.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stop-2.txt
new file mode 100644
index 00000000000..646b7ff4ddb
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stop-2.txt
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+junk
+more
\ No newline at end of file
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stop-snowball.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stop-snowball.txt
new file mode 100644
index 00000000000..1c0c6f51142
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stop-snowball.txt
@@ -0,0 +1,10 @@
+ | This is a file in snowball format, empty lines are ignored, '|' is a comment
+ | Additionally, multiple words can be on the same line, allowing stopwords to be
+ | arranged in tables (useful in some languages where they might inflect)
+
+ | fictitious table below
+
+|third person singular
+|Subject Object Possessive Reflexive
+he him his himself| masculine
+she her hers herself| feminine
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stoptypes-1.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stoptypes-1.txt
new file mode 100644
index 00000000000..456348ea9dc
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stoptypes-1.txt
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stoptypes-2.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stoptypes-2.txt
new file mode 100644
index 00000000000..d8a3810c26c
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stoptypes-2.txt
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stopwithbom.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stopwithbom.txt
new file mode 100644
index 00000000000..eb5f6e1c0f8
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stopwithbom.txt
@@ -0,0 +1 @@
+BOMsAreEvil
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stopwords.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stopwords.txt
new file mode 100644
index 00000000000..b5824da3263
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stopwords.txt
@@ -0,0 +1,58 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+# a couple of test stopwords to test that the words are really being
+# configured from this file:
+stopworda
+stopwordb
+
+#Standard english stop words taken from Lucene's StopAnalyzer
+a
+an
+and
+are
+as
+at
+be
+but
+by
+for
+if
+in
+into
+is
+it
+no
+not
+of
+on
+or
+s
+such
+t
+that
+the
+their
+then
+there
+these
+they
+this
+to
+was
+will
+with
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stopwordsWrongEncoding.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stopwordsWrongEncoding.txt
new file mode 100644
index 00000000000..0d305c88c59
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stopwordsWrongEncoding.txt
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# stopwords in the wrong encoding (ISO-8859-1).
+# tests resourceloader's ability to report wrongly encoded files.
+bañadores
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/synonyms.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/synonyms.txt
new file mode 100644
index 00000000000..b0e31cb7ec8
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/synonyms.txt
@@ -0,0 +1,31 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+#some test synonym mappings unlikely to appear in real input text
+aaa => aaaa
+bbb => bbbb1 bbbb2
+ccc => cccc1,cccc2
+a\=>a => b\=>b
+a\,a => b\,b
+fooaaa,baraaa,bazaaa
+
+# Some synonym groups specific to this example
+GB,gib,gigabyte,gigabytes
+MB,mib,megabyte,megabytes
+Television, Televisions, TV, TVs
+#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
+#after us won't split it into two words.
+
+# Synonym mappings can be used for spelling correction too
+pixima => pixma
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/throw.error.on.add.updateprocessor.js b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/throw.error.on.add.updateprocessor.js
new file mode 100644
index 00000000000..ca56fe35cfe
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/throw.error.on.add.updateprocessor.js
@@ -0,0 +1,21 @@
+function processAdd() {
+ throw "guess what? no-soup-fo-you !!!";
+}
+
+// // //
+
+function processDelete() {
+ // NOOP
+}
+function processCommit() {
+ // NOOP
+}
+function processRollback() {
+ // NOOP
+}
+function processMergeIndexes() {
+ // NOOP
+}
+function finish() {
+ // NOOP
+}
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/trivial.updateprocessor0.js b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/trivial.updateprocessor0.js
new file mode 100644
index 00000000000..b1856b15d85
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/trivial.updateprocessor0.js
@@ -0,0 +1,59 @@
+var Assert = Packages.org.junit.Assert;
+
+function processAdd(cmd) {
+ functionMessages.add("processAdd0");
+ Assert.assertNotNull(req);
+ Assert.assertNotNull(rsp);
+ Assert.assertNotNull(logger);
+ Assert.assertNotNull(cmd);
+ Assert.assertNotNull(params);
+ Assert.assertTrue(1 == params.get('intValue').intValue()); // had issues with assertTrue(1, params.get('intValue').intValue()) casting to wrong variant
+ Assert.assertTrue(params.get('boolValue').booleanValue());
+
+ // Integer.valueOf is needed here to get a tru java object, because
+ // all javascript numbers are floating point (ie: java.lang.Double)
+ cmd.getSolrInputDocument().addField("script_added_i",
+ java.lang.Integer.valueOf(42));
+ cmd.getSolrInputDocument().addField("script_added_d", 42.3);
+
+}
+
+function processDelete(cmd) {
+ functionMessages.add("processDelete0");
+ Assert.assertNotNull(req);
+ Assert.assertNotNull(rsp);
+ Assert.assertNotNull(logger);
+ Assert.assertNotNull(cmd);
+}
+
+function processMergeIndexes(cmd) {
+ functionMessages.add("processMergeIndexes0");
+ Assert.assertNotNull(req);
+ Assert.assertNotNull(rsp);
+ Assert.assertNotNull(logger);
+ Assert.assertNotNull(cmd);
+}
+
+function processCommit(cmd) {
+ functionMessages.add("processCommit0");
+ Assert.assertNotNull(req);
+ Assert.assertNotNull(rsp);
+ Assert.assertNotNull(logger);
+ Assert.assertNotNull(cmd);
+}
+
+function processRollback(cmd) {
+ functionMessages.add("processRollback0");
+ Assert.assertNotNull(req);
+ Assert.assertNotNull(rsp);
+ Assert.assertNotNull(logger);
+ Assert.assertNotNull(cmd);
+}
+
+function finish() {
+ functionMessages.add("finish0");
+ Assert.assertNotNull(req);
+ Assert.assertNotNull(rsp);
+ Assert.assertNotNull(logger);
+}
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/trivial.updateprocessor1.js b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/trivial.updateprocessor1.js
new file mode 100644
index 00000000000..98bdf2ab060
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/trivial.updateprocessor1.js
@@ -0,0 +1,25 @@
+function processAdd(cmd) {
+ functionMessages.add("processAdd1");
+
+}
+
+function processDelete(cmd) {
+ functionMessages.add("processDelete1");
+}
+
+function processMergeIndexes(cmd) {
+ functionMessages.add("processMergeIndexes1");
+}
+
+function processCommit(cmd) {
+ functionMessages.add("processCommit1");
+}
+
+function processRollback(cmd) {
+ functionMessages.add("processRollback1");
+}
+
+function finish() {
+ functionMessages.add("finish1");
+}
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/wdftypes.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/wdftypes.txt
new file mode 100644
index 00000000000..7378b0802e7
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/wdftypes.txt
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# A customized type mapping for WordDelimiterFilterFactory
+# the allowable types are: LOWER, UPPER, ALPHA, DIGIT, ALPHANUM, SUBWORD_DELIM
+#
+# the default for any character without a mapping is always computed from
+# Unicode character properties
+
+# Map the $, %, '.', and ',' characters to DIGIT
+# This might be useful for financial data.
+$ => DIGIT
+% => DIGIT
+. => DIGIT
+\u002C => DIGIT
+
+# in some cases you might not want to split on ZWJ
+# this also tests the case where we need a bigger byte[]
+# see http://en.wikipedia.org/wiki/Zero-width_joiner
+\u200D => ALPHANUM
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/xslt/dummy-using-include.xsl b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/xslt/dummy-using-include.xsl
new file mode 100644
index 00000000000..f10cfbf9330
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/xslt/dummy-using-include.xsl
@@ -0,0 +1,31 @@
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/xslt/dummy.xsl b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/xslt/dummy.xsl
new file mode 100644
index 00000000000..fbbd8f745cd
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/xslt/dummy.xsl
@@ -0,0 +1,39 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/xslt/xsl-update-handler-test.xsl b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/xslt/xsl-update-handler-test.xsl
new file mode 100644
index 00000000000..2e7359a62b6
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/xslt/xsl-update-handler-test.xsl
@@ -0,0 +1,49 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/lib/README b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/lib/README
new file mode 100644
index 00000000000..b7ca5b834f4
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/lib/README
@@ -0,0 +1,18 @@
+
+
+Items under this directory are used by TestConfig.testLibs()
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/lib/classes/empty-file-main-lib.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/lib/classes/empty-file-main-lib.txt
new file mode 100644
index 00000000000..8b137891791
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/lib/classes/empty-file-main-lib.txt
@@ -0,0 +1 @@
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/conf/core.properties b/solr/contrib/solr-morphlines-core/src/test-files/solr/conf/core.properties
new file mode 100644
index 00000000000..65df5e6114f
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/conf/core.properties
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+schema=schema-tiny.xml
+config=solrconfig-minimal.xml
+transient=true
+loadOnStartup=false
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/crazy-path-to-config.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/crazy-path-to-config.xml
new file mode 100644
index 00000000000..55801c4faf1
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/crazy-path-to-config.xml
@@ -0,0 +1,59 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+ 0
+
+
+
+ 1024
+ true
+ 10
+
+
+
+
+
+
+
+ implicit
+
+
+
+
+
+
+
+
+
+
+ solr
+ solrconfig.xml schema.xml
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/crazy-path-to-schema.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/crazy-path-to-schema.xml
new file mode 100644
index 00000000000..a2216ddfa99
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/crazy-path-to-schema.xml
@@ -0,0 +1,46 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ subject
+ id
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/external_eff b/solr/contrib/solr-morphlines-core/src/test-files/solr/external_eff
new file mode 100644
index 00000000000..a23f9b554bd
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/external_eff
@@ -0,0 +1,10 @@
+1=0.354
+2=0.975
+3=0.001
+4=100.35
+5=53.9
+6=70
+7=3.957
+8=1400
+9=24
+10=450
\ No newline at end of file
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-50-all.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-50-all.xml
new file mode 100644
index 00000000000..886e4434631
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-50-all.xml
@@ -0,0 +1,52 @@
+
+
+
+ testAdminHandler
+ 11
+ ${coreRootDirectory:testCoreRootDirectory}
+ testManagementPath
+ testSharedLib
+ ${shareSchema:testShareSchema}
+ 66
+
+
+ 22
+ 33
+ 55
+ testHost
+ testHostContext
+ ${hostPort:44}
+ 77
+ testZkHost
+
+
+
+ testLoggingClass
+ testLoggingEnabled
+
+ 88
+ 99
+
+
+
+
+ ${socketTimeout:100}
+ ${connTimeout:110}
+
+
+
\ No newline at end of file
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-multicore.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-multicore.xml
new file mode 100644
index 00000000000..abb308ec997
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-multicore.xml
@@ -0,0 +1,70 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-no-core.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-no-core.xml
new file mode 100644
index 00000000000..476b5bc7a10
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-no-core.xml
@@ -0,0 +1,39 @@
+
+
+
+
+
+ ${shareSchema:false}
+
+
+ 127.0.0.1
+ ${hostContext:solr}
+ ${hostPort:8983}
+ ${solr.zkclienttimeout:30000}
+ ${genericCoreNodeNames:true}
+ ${distribUpdateConnTimeout:15000}
+ ${distribUpdateSoTimeout:120000}
+
+
+
+ ${socketTimeout:120000}
+ ${connTimeout:15000}
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-shardhandler-old.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-shardhandler-old.xml
new file mode 100644
index 00000000000..70aaa56faa0
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-shardhandler-old.xml
@@ -0,0 +1,29 @@
+
+
+
+
+
+
+
+ myMagicRequiredValue
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-shardhandler.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-shardhandler.xml
new file mode 100644
index 00000000000..f5d24fe931d
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-shardhandler.xml
@@ -0,0 +1,29 @@
+
+
+
+
+
+
+
+ myMagicRequiredValue
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-stress-new.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-stress-new.xml
new file mode 100644
index 00000000000..3f8b213eab5
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-stress-new.xml
@@ -0,0 +1,34 @@
+
+
+
+
+
+
+ 127.0.0.1
+ 8983
+ ${hostContext:solr}
+
+
+
+ ${socketTimeout:120000}
+ ${connTimeout:15000}
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-stress-old.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-stress-old.xml
new file mode 100644
index 00000000000..6bc1c35e888
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-stress-old.xml
@@ -0,0 +1,59 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ${socketTimeout:120000}
+ ${connTimeout:15000}
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/solr.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr.xml
new file mode 100644
index 00000000000..4604f60476f
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr.xml
@@ -0,0 +1,43 @@
+
+
+
+
+
+
+
+
+
+
+ ${socketTimeout:120000}
+ ${connTimeout:15000}
+
+
+
+
diff --git a/solr/contrib/solr-morphlines-core/src/test-files/spellings.txt b/solr/contrib/solr-morphlines-core/src/test-files/spellings.txt
new file mode 100644
index 00000000000..2d2472e340a
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test-files/spellings.txt
@@ -0,0 +1,16 @@
+foo
+bar
+Solr
+junk
+foo
+bar
+Solr
+junk
+foo
+bar
+Solr
+junk
+foo
+bar
+Solr
+junk
\ No newline at end of file
diff --git a/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineTestBase.java b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineTestBase.java
new file mode 100644
index 00000000000..e5e1d3cce67
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineTestBase.java
@@ -0,0 +1,268 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.morphlines.solr;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.SolrServer;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.impl.HttpSolrServer;
+import org.apache.solr.client.solrj.impl.XMLResponseParser;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.util.ExternalPaths;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.cloudera.cdk.morphline.api.Collector;
+import com.cloudera.cdk.morphline.api.Command;
+import com.cloudera.cdk.morphline.api.MorphlineContext;
+import com.cloudera.cdk.morphline.api.Record;
+import com.cloudera.cdk.morphline.base.Compiler;
+import com.cloudera.cdk.morphline.base.FaultTolerance;
+import com.cloudera.cdk.morphline.base.Fields;
+import com.cloudera.cdk.morphline.base.Notifications;
+import com.cloudera.cdk.morphline.stdlib.PipeBuilder;
+import com.codahale.metrics.MetricRegistry;
+import com.google.common.io.Files;
+import com.typesafe.config.Config;
+
+public class AbstractSolrMorphlineTestBase extends SolrTestCaseJ4 {
+
+ protected Collector collector;
+ protected Command morphline;
+ protected SolrServer solrServer;
+ protected DocumentLoader testServer;
+
+ protected static final boolean TEST_WITH_EMBEDDED_SOLR_SERVER = true;
+ protected static final String EXTERNAL_SOLR_SERVER_URL = System.getProperty("externalSolrServer");
+// protected static final String EXTERNAL_SOLR_SERVER_URL = "http://127.0.0.1:8983/solr";
+
+ protected static final String RESOURCES_DIR = ExternalPaths.SOURCE_HOME + "/contrib/solr-mr/src/test-files";
+ protected static final String DEFAULT_BASE_DIR = "solr";
+ protected static final AtomicInteger SEQ_NUM = new AtomicInteger();
+ protected static final AtomicInteger SEQ_NUM2 = new AtomicInteger();
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(AbstractSolrMorphlineTestBase.class);
+
+ protected String tempDir;
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ myInitCore(DEFAULT_BASE_DIR);
+ }
+
+ protected static void myInitCore(String baseDirName) throws Exception {
+ initCore(
+ RESOURCES_DIR + "/" + baseDirName + "/collection1/conf/solrconfig.xml",
+ RESOURCES_DIR + "/" + baseDirName + "/collection1/conf/schema.xml",
+ RESOURCES_DIR + "/" + baseDirName
+ );
+ }
+
+ @Before
+ public void setUp() throws Exception {
+ super.setUp();
+ collector = new Collector();
+
+ if (EXTERNAL_SOLR_SERVER_URL != null) {
+ //solrServer = new ConcurrentUpdateSolrServer(EXTERNAL_SOLR_SERVER_URL, 2, 2);
+ //solrServer = new SafeConcurrentUpdateSolrServer(EXTERNAL_SOLR_SERVER_URL, 2, 2);
+ solrServer = new HttpSolrServer(EXTERNAL_SOLR_SERVER_URL);
+ ((HttpSolrServer)solrServer).setParser(new XMLResponseParser());
+ } else {
+ if (TEST_WITH_EMBEDDED_SOLR_SERVER) {
+ solrServer = new EmbeddedTestSolrServer(h.getCoreContainer(), "");
+ } else {
+ throw new RuntimeException("Not yet implemented");
+ //solrServer = new TestSolrServer(getSolrServer());
+ }
+ }
+
+ int batchSize = SEQ_NUM2.incrementAndGet() % 2 == 0 ? 100 : 1; //SolrInspector.DEFAULT_SOLR_SERVER_BATCH_SIZE : 1;
+ testServer = new SolrServerDocumentLoader(solrServer, batchSize);
+ deleteAllDocuments();
+
+ tempDir = TEMP_DIR + "/test-morphlines-" + System.currentTimeMillis();
+ new File(tempDir).mkdirs();
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ collector = null;
+ solrServer = null;
+ super.tearDown();
+ }
+
+ protected void testDocumentTypesInternal(String[] files, Map expectedRecords) throws Exception {
+ deleteAllDocuments();
+ int numDocs = 0;
+ for (int i = 0; i < 1; i++) {
+
+ for (String file : files) {
+ File f = new File(file);
+ byte[] body = Files.toByteArray(f);
+ Record event = new Record();
+ //event.put(Fields.ID, docId++);
+ event.getFields().put(Fields.ATTACHMENT_BODY, new ByteArrayInputStream(body));
+ event.getFields().put(Fields.ATTACHMENT_NAME, f.getName());
+ event.getFields().put(Fields.BASE_ID, f.getName());
+ load(event);
+ Integer count = expectedRecords.get(file);
+ if (count != null) {
+ numDocs += count;
+ } else {
+ numDocs++;
+ }
+ assertEquals("unexpected results in " + file, numDocs, queryResultSetSize("*:*"));
+ }
+ }
+ assertEquals(numDocs, queryResultSetSize("*:*"));
+ }
+
+ private boolean load(Record record) {
+ Notifications.notifyStartSession(morphline);
+ return morphline.process(record);
+ }
+
+ protected int queryResultSetSize(String query) {
+// return collector.getRecords().size();
+ try {
+ testServer.commitTransaction();
+ solrServer.commit(false, true, true);
+ QueryResponse rsp = solrServer.query(new SolrQuery(query).setRows(Integer.MAX_VALUE));
+ LOGGER.debug("rsp: {}", rsp);
+ int i = 0;
+ for (SolrDocument doc : rsp.getResults()) {
+ LOGGER.debug("rspDoc #{}: {}", i++, doc);
+ }
+ int size = rsp.getResults().size();
+ return size;
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private void deleteAllDocuments() throws SolrServerException, IOException {
+ collector.reset();
+ SolrServer s = solrServer;
+ s.deleteByQuery("*:*"); // delete everything!
+ s.commit();
+ }
+
+
+ public static void setupMorphline(String tempDir, String file) throws IOException {
+ String morphlineText = FileUtils.readFileToString(new File(RESOURCES_DIR + "/" + file + ".conf"), "UTF-8");
+ morphlineText = morphlineText.replaceAll("RESOURCES_DIR", new File(tempDir).getAbsolutePath());
+
+ FileUtils.writeStringToFile(new File(tempDir + "/" + file + ".conf"), morphlineText, "UTF-8");
+ }
+
+ protected Command createMorphline(String file) throws IOException {
+ setupMorphline(tempDir, file);
+
+ return new PipeBuilder().build(parse(file), null, collector, createMorphlineContext());
+ }
+
+ private MorphlineContext createMorphlineContext() {
+ return new SolrMorphlineContext.Builder()
+ .setDocumentLoader(testServer)
+// .setDocumentLoader(new CollectingDocumentLoader(100))
+ .setExceptionHandler(new FaultTolerance(false, false, SolrServerException.class.getName()))
+ .setMetricRegistry(new MetricRegistry())
+ .build();
+ }
+
+ private Config parse(String file) throws IOException {
+ SolrLocator locator = new SolrLocator(createMorphlineContext());
+ locator.setSolrHomeDir(testSolrHome + "/collection1");
+ Config config = new Compiler().parse(new File(tempDir + "/" + file + ".conf"), locator.toConfig("SOLR_LOCATOR"));
+ config = config.getConfigList("morphlines").get(0);
+ return config;
+ }
+
+ protected void startSession() {
+ Notifications.notifyStartSession(morphline);
+ }
+
+ protected void testDocumentContent(HashMap expectedResultMap)
+ throws Exception {
+ QueryResponse rsp = solrServer.query(new SolrQuery("*:*").setRows(Integer.MAX_VALUE));
+ // Check that every expected field/values shows up in the actual query
+ for (Entry current : expectedResultMap.entrySet()) {
+ String field = current.getKey();
+ for (String expectedFieldValue : current.getValue().getFieldValues()) {
+ ExpectedResult.CompareType compareType = current.getValue().getCompareType();
+ boolean foundField = false;
+
+ for (SolrDocument doc : rsp.getResults()) {
+ Collection actualFieldValues = doc.getFieldValues(field);
+ if (compareType == ExpectedResult.CompareType.equals) {
+ if (actualFieldValues != null && actualFieldValues.contains(expectedFieldValue)) {
+ foundField = true;
+ break;
+ }
+ }
+ else {
+ for (Iterator it = actualFieldValues.iterator(); it.hasNext(); ) {
+ String actualValue = it.next().toString(); // test only supports string comparison
+ if (actualFieldValues != null && actualValue.contains(expectedFieldValue)) {
+ foundField = true;
+ break;
+ }
+ }
+ }
+ }
+ assert(foundField); // didn't find expected field/value in query
+ }
+ }
+ }
+
+ /**
+ * Representation of the expected output of a SolrQuery.
+ */
+ protected static class ExpectedResult {
+ private HashSet fieldValues;
+ public enum CompareType {
+ equals, // Compare with equals, i.e. actual.equals(expected)
+ contains; // Compare with contains, i.e. actual.contains(expected)
+ }
+ private CompareType compareType;
+
+ public ExpectedResult(HashSet fieldValues, CompareType compareType) {
+ this.fieldValues = fieldValues;
+ this.compareType = compareType;
+ }
+ public HashSet getFieldValues() { return fieldValues; }
+ public CompareType getCompareType() { return compareType; }
+ }
+}
diff --git a/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineZkTestBase.java b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineZkTestBase.java
new file mode 100644
index 00000000000..62cf325d5a7
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineZkTestBase.java
@@ -0,0 +1,249 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.morphlines.solr;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.embedded.JettySolrRunner;
+import org.apache.solr.cloud.AbstractFullDistribZkTestBase;
+import org.apache.solr.cloud.AbstractZkTestCase;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.cloud.SolrZkClient;
+import org.apache.solr.util.ExternalPaths;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import com.cloudera.cdk.morphline.api.Command;
+import com.cloudera.cdk.morphline.api.Collector;
+import com.cloudera.cdk.morphline.api.MorphlineContext;
+import com.cloudera.cdk.morphline.api.Record;
+import com.cloudera.cdk.morphline.base.Compiler;
+import com.cloudera.cdk.morphline.base.FaultTolerance;
+import com.cloudera.cdk.morphline.base.Notifications;
+import com.cloudera.cdk.morphline.stdlib.PipeBuilder;
+import com.codahale.metrics.MetricRegistry;
+import com.google.common.collect.ListMultimap;
+import com.typesafe.config.Config;
+
+public abstract class AbstractSolrMorphlineZkTestBase extends AbstractFullDistribZkTestBase {
+ private static final File solrHomeDirectory = new File(TEMP_DIR, AbstractSolrMorphlineZkTestBase.class.getName());
+
+ protected static final String RESOURCES_DIR = ExternalPaths.SOURCE_HOME + "/contrib/solr-mr/src/test-files";
+ private static final File SOLR_INSTANCE_DIR = new File(RESOURCES_DIR + "/solr");
+ private static final File SOLR_CONF_DIR = new File(RESOURCES_DIR + "/solr/collection1");
+
+ protected Collector collector;
+ protected Command morphline;
+
+ @Override
+ public String getSolrHome() {
+ return solrHomeDirectory.getPath();
+ }
+
+ public AbstractSolrMorphlineZkTestBase() {
+ fixShardCount = true;
+ sliceCount = 3;
+ shardCount = 3;
+ }
+
+ @BeforeClass
+ public static void setupClass() throws Exception {
+ AbstractZkTestCase.SOLRHOME = solrHomeDirectory;
+ FileUtils.copyDirectory(SOLR_INSTANCE_DIR, solrHomeDirectory);
+ createTempDir();
+ }
+
+ @Override
+ @Before
+ public void setUp() throws Exception {
+ super.setUp();
+ System.setProperty("host", "127.0.0.1");
+ System.setProperty("numShards", Integer.toString(sliceCount));
+ uploadConfFiles();
+ collector = new Collector();
+ }
+
+ @Override
+ @After
+ public void tearDown() throws Exception {
+ super.tearDown();
+ System.clearProperty("host");
+ System.clearProperty("numShards");
+ }
+
+ @Test
+ @Override
+ public void testDistribSearch() throws Exception {
+ super.testDistribSearch();
+ }
+
+ @Override
+ protected void commit() throws Exception {
+ Notifications.notifyCommitTransaction(morphline);
+ super.commit();
+ }
+
+ protected Command parse(String file) throws IOException {
+ return parse(file, "collection1");
+ }
+
+ protected Command parse(String file, String collection) throws IOException {
+ SolrLocator locator = new SolrLocator(createMorphlineContext());
+ locator.setCollectionName(collection);
+ locator.setZkHost(zkServer.getZkAddress());
+ //locator.setServerUrl(cloudJettys.get(0).url); // TODO: download IndexSchema from solrUrl not yet implemented
+ //locator.setSolrHomeDir(SOLR_HOME_DIR.getPath());
+ Config config = new Compiler().parse(new File(RESOURCES_DIR + "/" + file + ".conf"), locator.toConfig("SOLR_LOCATOR"));
+ config = config.getConfigList("morphlines").get(0);
+ return createMorphline(config);
+ }
+
+ private Command createMorphline(Config config) {
+ return new PipeBuilder().build(config, null, collector, createMorphlineContext());
+ }
+
+ private MorphlineContext createMorphlineContext() {
+ return new MorphlineContext.Builder()
+ .setExceptionHandler(new FaultTolerance(false, false, SolrServerException.class.getName()))
+ .setMetricRegistry(new MetricRegistry())
+ .build();
+ }
+
+ protected void startSession() {
+ Notifications.notifyStartSession(morphline);
+ }
+
+ protected ListMultimap next(Iterator iter) {
+ SolrDocument doc = iter.next();
+ Record record = toRecord(doc);
+ record.removeAll("_version_"); // the values of this field are unknown and internal to solr
+ return record.getFields();
+ }
+
+ private Record toRecord(SolrDocument doc) {
+ Record record = new Record();
+ for (String key : doc.keySet()) {
+ record.getFields().replaceValues(key, doc.getFieldValues(key));
+ }
+ return record;
+ }
+
+ @Override
+ public JettySolrRunner createJetty(File solrHome, String dataDir,
+ String shardList, String solrConfigOverride, String schemaOverride)
+ throws Exception {
+
+ JettySolrRunner jetty = new JettySolrRunner(solrHome.getAbsolutePath(),
+ context, 0, solrConfigOverride, schemaOverride);
+
+ jetty.setShards(shardList);
+
+ if (System.getProperty("collection") == null) {
+ System.setProperty("collection", "collection1");
+ }
+
+ jetty.start();
+
+ System.clearProperty("collection");
+
+ return jetty;
+ }
+
+ private static void putConfig(SolrZkClient zkClient, File solrhome, String name) throws Exception {
+ putConfig(zkClient, solrhome, name, name);
+ }
+
+ private static void putConfig(SolrZkClient zkClient, File solrhome, String srcName, String destName)
+ throws Exception {
+
+ File file = new File(solrhome, "conf" + File.separator + srcName);
+ if (!file.exists()) {
+ // LOG.info("skipping " + file.getAbsolutePath() +
+ // " because it doesn't exist");
+ return;
+ }
+
+ String destPath = "/configs/conf1/" + destName;
+ // LOG.info("put " + file.getAbsolutePath() + " to " + destPath);
+ zkClient.makePath(destPath, file, false, true);
+ }
+
+ private void uploadConfFiles() throws Exception {
+ // upload our own config files
+ SolrZkClient zkClient = new SolrZkClient(zkServer.getZkAddress(), 10000);
+ putConfig(zkClient, SOLR_CONF_DIR, "solrconfig.xml");
+ putConfig(zkClient, SOLR_CONF_DIR, "schema.xml");
+ putConfig(zkClient, SOLR_CONF_DIR, "elevate.xml");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_en.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_ar.txt");
+
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_bg.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_ca.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_cz.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_da.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_el.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_es.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_eu.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_de.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_fa.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_fi.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_fr.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_ga.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_gl.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_hi.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_hu.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_hy.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_id.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_it.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_ja.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_lv.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_nl.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_no.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_pt.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_ro.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_ru.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_sv.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_th.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_tr.txt");
+
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/contractions_ca.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/contractions_fr.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/contractions_ga.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/contractions_it.txt");
+
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/stemdict_nl.txt");
+
+ putConfig(zkClient, SOLR_CONF_DIR, "lang/hyphenations_ga.txt");
+
+ putConfig(zkClient, SOLR_CONF_DIR, "stopwords.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "protwords.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "currency.xml");
+ putConfig(zkClient, SOLR_CONF_DIR, "open-exchange-rates.json");
+ putConfig(zkClient, SOLR_CONF_DIR, "mapping-ISOLatin1Accent.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "old_synonyms.txt");
+ putConfig(zkClient, SOLR_CONF_DIR, "synonyms.txt");
+ zkClient.close();
+ }
+
+}
diff --git a/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/CollectingDocumentLoader.java b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/CollectingDocumentLoader.java
new file mode 100644
index 00000000000..ed58cffff6e
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/CollectingDocumentLoader.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.morphlines.solr;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.solr.client.solrj.response.SolrPingResponse;
+import org.apache.solr.client.solrj.response.UpdateResponse;
+import org.apache.solr.common.SolrInputDocument;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A mockup DocumentLoader implementation for unit tests; collects all documents into a main memory list.
+ */
+class CollectingDocumentLoader implements DocumentLoader {
+
+ private final int batchSize;
+ private final List batch = new ArrayList ();
+ private List results = new ArrayList ();
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(CollectingDocumentLoader.class);
+
+ public CollectingDocumentLoader(int batchSize) {
+ if (batchSize <= 0) {
+ throw new IllegalArgumentException("batchSize must be a positive number: " + batchSize);
+ }
+ this.batchSize = batchSize;
+ }
+
+ @Override
+ public void beginTransaction() {
+ LOGGER.trace("beginTransaction");
+ batch.clear();
+ }
+
+ @Override
+ public void load(SolrInputDocument doc) {
+ LOGGER.trace("load doc: {}", doc);
+ batch.add(doc);
+ if (batch.size() >= batchSize) {
+ loadBatch();
+ }
+ }
+
+ @Override
+ public void commitTransaction() {
+ LOGGER.trace("commitTransaction");
+ if (batch.size() > 0) {
+ loadBatch();
+ }
+ }
+
+ private void loadBatch() {
+ try {
+ results.addAll(batch);
+ } finally {
+ batch.clear();
+ }
+ }
+
+ @Override
+ public UpdateResponse rollbackTransaction() {
+ LOGGER.trace("rollback");
+ return new UpdateResponse();
+ }
+
+ @Override
+ public void shutdown() {
+ LOGGER.trace("shutdown");
+ }
+
+ @Override
+ public SolrPingResponse ping() {
+ LOGGER.trace("ping");
+ return new SolrPingResponse();
+ }
+
+}
diff --git a/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/EmbeddedTestSolrServer.java b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/EmbeddedTestSolrServer.java
new file mode 100644
index 00000000000..1f747f3d2d8
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/EmbeddedTestSolrServer.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.morphlines.solr;
+
+import java.io.IOException;
+
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
+import org.apache.solr.client.solrj.response.UpdateResponse;
+import org.apache.solr.core.CoreContainer;
+
+/**
+ * An EmbeddedSolrServer that supresses shutdown and rollback requests as
+ * necessary for testing
+ */
+public class EmbeddedTestSolrServer extends EmbeddedSolrServer {
+
+ public EmbeddedTestSolrServer(CoreContainer coreContainer, String coreName) {
+ super(coreContainer, coreName);
+ }
+
+ @Override
+ public void shutdown() {
+ ; // NOP
+ }
+
+ @Override
+ public UpdateResponse rollback() throws SolrServerException, IOException {
+ return new UpdateResponse();
+ }
+
+}
diff --git a/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineTest.java b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineTest.java
new file mode 100644
index 00000000000..126eef34979
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineTest.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.morphlines.solr;
+
+import java.util.Arrays;
+
+import org.junit.Test;
+
+import com.cloudera.cdk.morphline.api.Record;
+import com.cloudera.cdk.morphline.base.Fields;
+import com.cloudera.cdk.morphline.base.Notifications;
+
+public class SolrMorphlineTest extends AbstractSolrMorphlineTestBase {
+
+ @Test
+ public void testLoadSolrBasic() throws Exception {
+ //System.setProperty("ENV_SOLR_HOME", testSolrHome + "/collection1");
+ morphline = createMorphline("test-morphlines/loadSolrBasic");
+ //System.clearProperty("ENV_SOLR_HOME");
+ Record record = new Record();
+ record.put(Fields.ID, "id0");
+ record.put("first_name", "Nadja"); // will be sanitized
+ startSession();
+ Notifications.notifyBeginTransaction(morphline);
+ assertTrue(morphline.process(record));
+ assertEquals(1, collector.getNumStartEvents());
+ Notifications.notifyCommitTransaction(morphline);
+ Record expected = new Record();
+ expected.put(Fields.ID, "id0");
+ assertEquals(Arrays.asList(expected), collector.getRecords());
+ assertEquals(1, queryResultSetSize("*:*"));
+ Notifications.notifyRollbackTransaction(morphline);
+ Notifications.notifyShutdown(morphline);
+ }
+
+ @Test
+ public void testTokenizeText() throws Exception {
+ morphline = createMorphline("test-morphlines/tokenizeText");
+ Record record = new Record();
+ record.put(Fields.MESSAGE, "Hello World!");
+ record.put(Fields.MESSAGE, "\nFoo@Bar.com #%()123");
+ Record expected = record.copy();
+ expected.getFields().putAll("tokens", Arrays.asList("hello", "world", "foo", "bar.com", "123"));
+ startSession();
+ Notifications.notifyBeginTransaction(morphline);
+ assertTrue(morphline.process(record));
+ assertEquals(1, collector.getNumStartEvents());
+ Notifications.notifyCommitTransaction(morphline);
+ assertEquals(expected, collector.getFirstRecord());
+ }
+
+}
diff --git a/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAliasTest.java b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAliasTest.java
new file mode 100644
index 00000000000..2fce297b34d
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAliasTest.java
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.morphlines.solr;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.lucene.util.LuceneTestCase.Slow;
+import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
+import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.request.QueryRequest;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.params.CollectionParams.CollectionAction;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.util.NamedList;
+
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence;
+import com.cloudera.cdk.morphline.api.Record;
+import com.cloudera.cdk.morphline.base.Fields;
+import com.cloudera.cdk.morphline.base.Notifications;
+
+@ThreadLeakAction({Action.WARN})
+@ThreadLeakLingering(linger = 0)
+@ThreadLeakZombies(Consequence.CONTINUE)
+@ThreadLeakScope(Scope.NONE)
+@SuppressCodecs({"Lucene3x", "Lucene40"})
+@Slow
+public class SolrMorphlineZkAliasTest extends AbstractSolrMorphlineZkTestBase {
+
+ @Override
+ public void doTest() throws Exception {
+
+ waitForRecoveriesToFinish(false);
+
+ createAlias("aliascollection", "collection1");
+
+ morphline = parse("test-morphlines/loadSolrBasic", "aliascollection");
+ Record record = new Record();
+ record.put(Fields.ID, "id0-innsbruck");
+ record.put("text", "mytext");
+ record.put("user_screen_name", "foo");
+ record.put("first_name", "Nadja"); // will be sanitized
+ startSession();
+ assertEquals(1, collector.getNumStartEvents());
+ Notifications.notifyBeginTransaction(morphline);
+ assertTrue(morphline.process(record));
+
+ record = new Record();
+ record.put(Fields.ID, "id1-innsbruck");
+ record.put("text", "mytext1");
+ record.put("user_screen_name", "foo1");
+ record.put("first_name", "Nadja1"); // will be sanitized
+ assertTrue(morphline.process(record));
+
+ Record expected = new Record();
+ expected.put(Fields.ID, "id0-innsbruck");
+ expected.put("text", "mytext");
+ expected.put("user_screen_name", "foo");
+ Iterator citer = collector.getRecords().iterator();
+ assertEquals(expected, citer.next());
+
+ Record expected2 = new Record();
+ expected2.put(Fields.ID, "id1-innsbruck");
+ expected2.put("text", "mytext1");
+ expected2.put("user_screen_name", "foo1");
+ assertEquals(expected2, citer.next());
+
+ assertFalse(citer.hasNext());
+
+ commit();
+
+ QueryResponse rsp = cloudClient.query(new SolrQuery("*:*").setRows(100000).addSort(Fields.ID, SolrQuery.ORDER.asc));
+ //System.out.println(rsp);
+ Iterator iter = rsp.getResults().iterator();
+ assertEquals(expected.getFields(), next(iter));
+ assertEquals(expected2.getFields(), next(iter));
+ assertFalse(iter.hasNext());
+
+ Notifications.notifyRollbackTransaction(morphline);
+ Notifications.notifyShutdown(morphline);
+
+
+ createAlias("aliascollection", "collection1,collection2");
+
+ try {
+ parse("test-morphlines/loadSolrBasic", "aliascollection");
+ fail("Expected IAE because update alias maps to multiple collections");
+ } catch (IllegalArgumentException e) {
+
+ }
+
+ cloudClient.shutdown();
+ }
+
+ private NamedList createAlias(String alias, String collections) throws SolrServerException, IOException {
+ ModifiableSolrParams params = new ModifiableSolrParams();
+ params.set("collections", collections);
+ params.set("name", alias);
+ params.set("action", CollectionAction.CREATEALIAS.toString());
+ QueryRequest request = new QueryRequest(params);
+ request.setPath("/admin/collections");
+ return cloudClient.request(request);
+ }
+
+}
diff --git a/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAvroTest.java b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAvroTest.java
new file mode 100644
index 00000000000..4e082cc260f
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAvroTest.java
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.morphlines.solr;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.avro.Schema.Field;
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.file.FileReader;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.lucene.util.LuceneTestCase.Slow;
+import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
+import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.SolrDocument;
+
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence;
+import com.cloudera.cdk.morphline.api.Record;
+import com.cloudera.cdk.morphline.base.Fields;
+import com.cloudera.cdk.morphline.base.Notifications;
+import com.google.common.base.Preconditions;
+import com.google.common.io.Files;
+
+@ThreadLeakAction({Action.WARN})
+@ThreadLeakLingering(linger = 0)
+@ThreadLeakZombies(Consequence.CONTINUE)
+@ThreadLeakScope(Scope.NONE)
+@SuppressCodecs({"Lucene3x", "Lucene40"})
+@Slow
+public class SolrMorphlineZkAvroTest extends AbstractSolrMorphlineZkTestBase {
+
+ @Override
+ public void doTest() throws Exception {
+ File file = new File(RESOURCES_DIR + "/test-documents/sample-statuses-20120906-141433-medium.avro");
+
+ waitForRecoveriesToFinish(false);
+
+ // load avro records via morphline and zk into solr
+ morphline = parse("test-morphlines/tutorialReadAvroContainer");
+ Record record = new Record();
+ byte[] body = Files.toByteArray(file);
+ record.put(Fields.ATTACHMENT_BODY, body);
+ startSession();
+ Notifications.notifyBeginTransaction(morphline);
+ assertTrue(morphline.process(record));
+ assertEquals(1, collector.getNumStartEvents());
+
+ commit();
+
+ // fetch sorted result set from solr
+ QueryResponse rsp = cloudClient.query(new SolrQuery("*:*").setRows(100000).addSort("id", SolrQuery.ORDER.asc));
+ assertEquals(2104, collector.getRecords().size());
+ assertEquals(collector.getRecords().size(), rsp.getResults().size());
+
+ Collections.sort(collector.getRecords(), new Comparator() {
+ @Override
+ public int compare(Record r1, Record r2) {
+ return r1.get("id").toString().compareTo(r2.get("id").toString());
+ }
+ });
+
+ // fetch test input data and sort like solr result set
+ List records = new ArrayList();
+ FileReader reader = new DataFileReader(file, new GenericDatumReader());
+ while (reader.hasNext()) {
+ GenericData.Record expected = reader.next();
+ records.add(expected);
+ }
+ assertEquals(collector.getRecords().size(), records.size());
+ Collections.sort(records, new Comparator() {
+ @Override
+ public int compare(GenericData.Record r1, GenericData.Record r2) {
+ return r1.get("id").toString().compareTo(r2.get("id").toString());
+ }
+ });
+
+ Object lastId = null;
+ for (int i = 0; i < records.size(); i++) {
+ //System.out.println("myrec" + i + ":" + records.get(i));
+ Object id = records.get(i);
+ if (id != null && id.equals(lastId)) {
+ throw new IllegalStateException("Detected duplicate id. Test input data must not contain duplicate ids!");
+ }
+ lastId = id;
+ }
+
+ for (int i = 0; i < records.size(); i++) {
+ //System.out.println("myrsp" + i + ":" + rsp.getResults().get(i));
+ }
+
+ Iterator rspIter = rsp.getResults().iterator();
+ for (int i = 0; i < records.size(); i++) {
+ // verify morphline spat out expected data
+ Record actual = collector.getRecords().get(i);
+ GenericData.Record expected = records.get(i);
+ Preconditions.checkNotNull(expected);
+ assertTweetEquals(expected, actual, i);
+
+ // verify Solr result set contains expected data
+ actual = new Record();
+ actual.getFields().putAll(next(rspIter));
+ assertTweetEquals(expected, actual, i);
+ }
+
+ Notifications.notifyRollbackTransaction(morphline);
+ Notifications.notifyShutdown(morphline);
+ cloudClient.shutdown();
+ }
+
+ private void assertTweetEquals(GenericData.Record expected, Record actual, int i) {
+ Preconditions.checkNotNull(expected);
+ Preconditions.checkNotNull(actual);
+// System.out.println("\n\nexpected: " + toString(expected));
+// System.out.println("actual: " + actual);
+ String[] fieldNames = new String[] {
+ "id",
+ "in_reply_to_status_id",
+ "in_reply_to_user_id",
+ "retweet_count",
+ "text",
+ };
+ for (String fieldName : fieldNames) {
+ assertEquals(
+ i + " fieldName: " + fieldName,
+ expected.get(fieldName).toString(),
+ actual.getFirstValue(fieldName).toString());
+ }
+ }
+
+ private String toString(GenericData.Record avroRecord) {
+ Record record = new Record();
+ for (Field field : avroRecord.getSchema().getFields()) {
+ record.put(field.name(), avroRecord.get(field.pos()));
+ }
+ return record.toString(); // prints sorted by key for human readability
+ }
+
+}
diff --git a/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkTest.java b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkTest.java
new file mode 100644
index 00000000000..0537c2e23ab
--- /dev/null
+++ b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkTest.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.morphlines.solr;
+
+import java.util.Iterator;
+
+import org.apache.lucene.util.LuceneTestCase.Slow;
+import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
+import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.SolrDocument;
+
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence;
+import com.cloudera.cdk.morphline.api.Record;
+import com.cloudera.cdk.morphline.base.Fields;
+import com.cloudera.cdk.morphline.base.Notifications;
+
+@ThreadLeakAction({Action.WARN})
+@ThreadLeakLingering(linger = 0)
+@ThreadLeakZombies(Consequence.CONTINUE)
+@ThreadLeakScope(Scope.NONE)
+@SuppressCodecs({"Lucene3x", "Lucene40"})
+@Slow
+public class SolrMorphlineZkTest extends AbstractSolrMorphlineZkTestBase {
+
+ @Override
+ public void doTest() throws Exception {
+
+ waitForRecoveriesToFinish(false);
+
+ morphline = parse("test-morphlines/loadSolrBasic");
+ Record record = new Record();
+ record.put(Fields.ID, "id0-innsbruck");
+ record.put("text", "mytext");
+ record.put("user_screen_name", "foo");
+ record.put("first_name", "Nadja"); // will be sanitized
+ startSession();
+ assertEquals(1, collector.getNumStartEvents());
+ Notifications.notifyBeginTransaction(morphline);
+ assertTrue(morphline.process(record));
+
+ record = new Record();
+ record.put(Fields.ID, "id1-innsbruck");
+ record.put("text", "mytext1");
+ record.put("user_screen_name", "foo1");
+ record.put("first_name", "Nadja1"); // will be sanitized
+ assertTrue(morphline.process(record));
+
+ Record expected = new Record();
+ expected.put(Fields.ID, "id0-innsbruck");
+ expected.put("text", "mytext");
+ expected.put("user_screen_name", "foo");
+ Iterator citer = collector.getRecords().iterator();
+ assertEquals(expected, citer.next());
+
+ Record expected2 = new Record();
+ expected2.put(Fields.ID, "id1-innsbruck");
+ expected2.put("text", "mytext1");
+ expected2.put("user_screen_name", "foo1");
+ assertEquals(expected2, citer.next());
+
+ assertFalse(citer.hasNext());
+
+ commit();
+
+ QueryResponse rsp = cloudClient.query(new SolrQuery("*:*").setRows(100000).addSort(Fields.ID, SolrQuery.ORDER.asc));
+ //System.out.println(rsp);
+ Iterator iter = rsp.getResults().iterator();
+ assertEquals(expected.getFields(), next(iter));
+ assertEquals(expected2.getFields(), next(iter));
+ assertFalse(iter.hasNext());
+
+ Notifications.notifyRollbackTransaction(morphline);
+ Notifications.notifyShutdown(morphline);
+ cloudClient.shutdown();
+ }
+
+}
diff --git a/solr/contrib/solr-mr/build.xml b/solr/contrib/solr-mr/build.xml
new file mode 100644
index 00000000000..d9f1f72a26b
--- /dev/null
+++ b/solr/contrib/solr-mr/build.xml
@@ -0,0 +1,147 @@
+
+
+
+
+
+
+
+ Solr map-reduce index construction.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-mr/ivy.xml b/solr/contrib/solr-mr/ivy.xml
new file mode 100644
index 00000000000..d51fd3b020e
--- /dev/null
+++ b/solr/contrib/solr-mr/ivy.xml
@@ -0,0 +1,37 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-mr/src/java/assembly/hadoop-job.xml b/solr/contrib/solr-mr/src/java/assembly/hadoop-job.xml
new file mode 100644
index 00000000000..1640b6ff72e
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/assembly/hadoop-job.xml
@@ -0,0 +1,39 @@
+
+
+
+
+
+ job
+
+ jar
+
+ false
+
+
+ false
+ runtime
+ lib
+
+ ${groupId}:${artifactId}
+
+
+
+ true
+
+ ${groupId}:${artifactId}
+
+
+
+
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/BatchWriter.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/BatchWriter.java
new file mode 100644
index 00000000000..6b650b6cc7b
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/BatchWriter.java
@@ -0,0 +1,241 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Locale;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskID;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
+import org.apache.solr.client.solrj.response.UpdateResponse;
+import org.apache.solr.common.SolrInputDocument;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Enables adding batches of documents to an EmbeddedSolrServer.
+ */
+class BatchWriter {
+
+ private final EmbeddedSolrServer solr;
+ private volatile Exception batchWriteException = null;
+
+ private static final Logger LOG = LoggerFactory.getLogger(BatchWriter.class);
+
+ public Exception getBatchWriteException() {
+ return batchWriteException;
+ }
+
+ public void setBatchWriteException(Exception batchWriteException) {
+ this.batchWriteException = batchWriteException;
+ }
+
+ /** The number of writing threads. */
+ final int writerThreads;
+
+ /** Queue Size */
+ final int queueSize;
+
+ private final ThreadPoolExecutor batchPool;
+
+ private TaskID taskId = null;
+
+ /**
+ * The number of in progress batches, must be zero before the close can
+ * actually start closing
+ */
+ AtomicInteger executingBatches = new AtomicInteger(0);
+
+ /**
+ * Create the batch writer object, set the thread to daemon mode, and start
+ * it.
+ *
+ */
+
+ final class Batch implements Runnable {
+
+ private List documents;
+ private UpdateResponse result;
+
+ public Batch(Collection batch) {
+ documents = new ArrayList(batch);
+ }
+
+ public void run() {
+ try {
+ executingBatches.getAndIncrement();
+ result = runUpdate(documents);
+ } finally {
+ executingBatches.getAndDecrement();
+ }
+ }
+
+ protected List getDocuments() {
+ return documents;
+ }
+
+ protected void setDocuments(List documents) {
+ this.documents = documents;
+ }
+
+ protected UpdateResponse getResult() {
+ return result;
+ }
+
+ protected void setResult(UpdateResponse result) {
+ this.result = result;
+ }
+
+ protected void reset(List documents) {
+ if (this.documents == null) {
+ this.documents = new ArrayList(documents);
+ } else {
+ this.documents.clear();
+ this.documents.addAll(documents);
+ }
+ result = null;
+ }
+
+ protected void reset(SolrInputDocument document) {
+ if (this.documents == null) {
+ this.documents = new ArrayList();
+ } else {
+ this.documents.clear();
+ }
+ this.documents.add(document);
+ result = null;
+ }
+ }
+
+ protected UpdateResponse runUpdate(List batchToWrite) {
+ try {
+ UpdateResponse result = solr.add(batchToWrite);
+ SolrRecordWriter.incrementCounter(taskId, SolrCounters.class.getName(), SolrCounters.BATCHES_WRITTEN.toString(), 1);
+ SolrRecordWriter.incrementCounter(taskId, SolrCounters.class.getName(), SolrCounters.DOCUMENTS_WRITTEN.toString(), batchToWrite.size());
+ if (LOG.isDebugEnabled()) {
+ SolrRecordWriter.incrementCounter(taskId, SolrCounters.class.getName(), SolrCounters.BATCH_WRITE_TIME.toString(), result.getElapsedTime());
+ }
+ return result;
+ } catch (Throwable e) {
+ if (e instanceof Exception) {
+ setBatchWriteException((Exception) e);
+ } else {
+ setBatchWriteException(new Exception(e));
+ }
+ SolrRecordWriter.incrementCounter(taskId, getClass().getName() + ".errors", e.getClass().getName(), 1);
+ LOG.error("Unable to process batch", e);
+ return null;
+ }
+ }
+
+
+ public BatchWriter(EmbeddedSolrServer solr, int batchSize, TaskID tid,
+ int writerThreads, int queueSize) {
+ this.solr = solr;
+ this.writerThreads = writerThreads;
+ this.queueSize = queueSize;
+ taskId = tid;
+
+ // we need to obtain the settings before the constructor
+ if (writerThreads != 0) {
+ batchPool = new ThreadPoolExecutor(writerThreads, writerThreads, 5,
+ TimeUnit.SECONDS, new LinkedBlockingQueue(queueSize),
+ new ThreadPoolExecutor.CallerRunsPolicy());
+ } else { // single threaded case
+ batchPool = null;
+ }
+ }
+
+ public void queueBatch(Collection batch)
+ throws IOException, SolrServerException {
+
+ throwIf();
+ Batch b = new Batch(batch);
+ if (batchPool != null) {
+ batchPool.execute(b);
+ } else { // single threaded case
+ b.run();
+ throwIf();
+ }
+ }
+
+ public synchronized void close(TaskAttemptContext context)
+ throws InterruptedException, SolrServerException, IOException {
+
+ if (batchPool != null) {
+ context.setStatus("Waiting for batches to complete");
+ batchPool.shutdown();
+
+ while (!batchPool.isTerminated()) {
+ LOG.info(String.format(Locale.ENGLISH,
+ "Waiting for %d items and %d threads to finish executing", batchPool
+ .getQueue().size(), batchPool.getActiveCount()));
+ batchPool.awaitTermination(5, TimeUnit.SECONDS);
+ }
+ }
+ //reporter.setStatus("Committing Solr");
+ //solr.commit(true, false);
+ context.setStatus("Optimizing Solr");
+ int maxSegments = context.getConfiguration().getInt(SolrOutputFormat.SOLR_RECORD_WRITER_MAX_SEGMENTS, 1);
+ LOG.info("Optimizing Solr: forcing merge down to {} segments", maxSegments);
+ long start = System.currentTimeMillis();
+ solr.optimize(true, false, maxSegments);
+ context.getCounter(SolrCounters.class.getName(), SolrCounters.PHYSICAL_REDUCER_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start);
+ float secs = (System.currentTimeMillis() - start) / 1000.0f;
+ LOG.info("Optimizing Solr: done forcing merge down to {} segments in {} secs", maxSegments, secs);
+ context.setStatus("Shutting down Solr");
+ // TODO is core close needed? - according to TestEmbeddedSolrServer it's not...
+ //core.close();
+ solr.shutdown();
+ }
+
+ /**
+ * Throw a legal exception if a previous batch write had an exception. The
+ * previous state is cleared. Uses {@link #batchWriteException} for the state
+ * from the last exception.
+ *
+ * This will loose individual exceptions if the exceptions happen rapidly.
+ *
+ * @throws IOException On low level IO error
+ * @throws SolrServerException On Solr Exception
+ */
+ private void throwIf() throws IOException, SolrServerException {
+
+ final Exception last = batchWriteException;
+ batchWriteException = null;
+
+ if (last == null) {
+ return;
+ }
+ if (last instanceof SolrServerException) {
+ throw (SolrServerException) last;
+ }
+ if (last instanceof IOException) {
+ throw (IOException) last;
+ }
+ throw new IOException("Batch Write Failure", last);
+ }
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/DataInputInputStream.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/DataInputInputStream.java
new file mode 100644
index 00000000000..33f609f1f2d
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/DataInputInputStream.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.hadoop;
+
+import java.io.DataInput;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * An InputStream that wraps a DataInput.
+ * @see DataOutputOutputStream
+ */
+@InterfaceAudience.Private
+public class DataInputInputStream extends InputStream {
+
+ private DataInput in;
+
+ /**
+ * Construct an InputStream from the given DataInput. If 'in'
+ * is already an InputStream, simply returns it. Otherwise, wraps
+ * it in an InputStream.
+ * @param in the DataInput to wrap
+ * @return an InputStream instance that reads from 'in'
+ */
+ public static InputStream constructInputStream(DataInput in) {
+ if (in instanceof InputStream) {
+ return (InputStream)in;
+ } else {
+ return new DataInputInputStream(in);
+ }
+ }
+
+
+ public DataInputInputStream(DataInput in) {
+ this.in = in;
+ }
+
+ @Override
+ public int read() throws IOException {
+ return in.readUnsignedByte();
+ }
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/DataOutputOutputStream.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/DataOutputOutputStream.java
new file mode 100644
index 00000000000..389c52a577d
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/DataOutputOutputStream.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * OutputStream implementation that wraps a DataOutput.
+ */
+@InterfaceAudience.Private
+public class DataOutputOutputStream extends OutputStream {
+
+ private final DataOutput out;
+
+ /**
+ * Construct an OutputStream from the given DataOutput. If 'out'
+ * is already an OutputStream, simply returns it. Otherwise, wraps
+ * it in an OutputStream.
+ * @param out the DataOutput to wrap
+ * @return an OutputStream instance that outputs to 'out'
+ */
+ public static OutputStream constructOutputStream(DataOutput out) {
+ if (out instanceof OutputStream) {
+ return (OutputStream)out;
+ } else {
+ return new DataOutputOutputStream(out);
+ }
+ }
+
+ private DataOutputOutputStream(DataOutput out) {
+ this.out = out;
+ }
+
+ @Override
+ public void write(int b) throws IOException {
+ out.writeByte(b);
+ }
+
+ @Override
+ public void write(byte[] b, int off, int len) throws IOException {
+ out.write(b, off, len);
+ }
+
+ @Override
+ public void write(byte[] b) throws IOException {
+ out.write(b);
+ }
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/DryRunDocumentLoader.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/DryRunDocumentLoader.java
new file mode 100644
index 00000000000..bacf1d0e1fc
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/DryRunDocumentLoader.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop;
+
+import org.apache.solr.client.solrj.response.SolrPingResponse;
+import org.apache.solr.client.solrj.response.UpdateResponse;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.morphlines.solr.DocumentLoader;
+
+/**
+ * Prints documents to stdout instead of loading them into Solr for quicker turnaround during early
+ * trial & debug sessions.
+ */
+final class DryRunDocumentLoader implements DocumentLoader {
+
+ @Override
+ public void beginTransaction() {
+ }
+
+ @Override
+ public void load(SolrInputDocument doc) {
+ System.out.println("dryrun: " + doc);
+ }
+
+ @Override
+ public void commitTransaction() {
+ }
+
+ @Override
+ public UpdateResponse rollbackTransaction() {
+ return new UpdateResponse();
+ }
+
+ @Override
+ public void shutdown() {
+ }
+
+ @Override
+ public SolrPingResponse ping() {
+ return new SolrPingResponse();
+ }
+
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/GoLive.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/GoLive.java
new file mode 100644
index 00000000000..a7e4f7dda9d
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/GoLive.java
@@ -0,0 +1,212 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.Callable;
+import java.util.concurrent.CompletionService;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorCompletionService;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Future;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.impl.CloudSolrServer;
+import org.apache.solr.client.solrj.impl.HttpSolrServer;
+import org.apache.solr.client.solrj.request.CoreAdminRequest;
+import org.apache.solr.hadoop.MapReduceIndexerTool.Options;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * The optional (parallel) GoLive phase merges the output shards of the previous
+ * phase into a set of live customer facing Solr servers, typically a SolrCloud.
+ */
+class GoLive {
+
+ private static final Logger LOG = LoggerFactory.getLogger(GoLive.class);
+
+ // TODO: handle clusters with replicas
+ public boolean goLive(Options options, FileStatus[] outDirs) {
+ LOG.info("Live merging of output shards into Solr cluster...");
+ boolean success = false;
+ long start = System.currentTimeMillis();
+ int concurrentMerges = options.goLiveThreads;
+ ThreadPoolExecutor executor = new ThreadPoolExecutor(concurrentMerges,
+ concurrentMerges, 1, TimeUnit.SECONDS,
+ new LinkedBlockingQueue());
+
+ try {
+ CompletionService completionService = new ExecutorCompletionService(executor);
+ Set> pending = new HashSet>();
+ int cnt = -1;
+ for (final FileStatus dir : outDirs) {
+
+ LOG.debug("processing: " + dir.getPath());
+
+ cnt++;
+ List urls = options.shardUrls.get(cnt);
+
+ for (String url : urls) {
+
+ String baseUrl = url;
+ if (baseUrl.endsWith("/")) {
+ baseUrl = baseUrl.substring(0, baseUrl.length() - 1);
+ }
+
+ int lastPathIndex = baseUrl.lastIndexOf("/");
+ if (lastPathIndex == -1) {
+ LOG.error("Found unexpected shardurl, live merge failed: " + baseUrl);
+ return false;
+ }
+
+ final String name = baseUrl.substring(lastPathIndex + 1);
+ baseUrl = baseUrl.substring(0, lastPathIndex);
+ final String mergeUrl = baseUrl;
+
+ Callable task = new Callable() {
+ @Override
+ public Request call() {
+ Request req = new Request();
+ LOG.info("Live merge " + dir.getPath() + " into " + mergeUrl);
+ final HttpSolrServer server = new HttpSolrServer(mergeUrl);
+ try {
+ CoreAdminRequest.MergeIndexes mergeRequest = new CoreAdminRequest.MergeIndexes();
+ mergeRequest.setCoreName(name);
+ mergeRequest.setIndexDirs(Arrays.asList(dir.getPath().toString() + "/data/index"));
+ try {
+ mergeRequest.process(server);
+ req.success = true;
+ } catch (SolrServerException e) {
+ req.e = e;
+ return req;
+ } catch (IOException e) {
+ req.e = e;
+ return req;
+ }
+ } finally {
+ server.shutdown();
+ }
+ return req;
+ }
+ };
+ pending.add(completionService.submit(task));
+ }
+ }
+
+ while (pending != null && pending.size() > 0) {
+ try {
+ Future future = completionService.take();
+ if (future == null) break;
+ pending.remove(future);
+
+ try {
+ Request req = future.get();
+
+ if (!req.success) {
+ // failed
+ LOG.error("A live merge command failed", req.e);
+ return false;
+ }
+
+ } catch (ExecutionException e) {
+ LOG.error("Error sending live merge command", e);
+ return false;
+ }
+
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ LOG.error("Live merge process interrupted", e);
+ return false;
+ }
+ }
+
+ cnt = -1;
+
+
+ try {
+ LOG.info("Committing live merge...");
+ if (options.zkHost != null) {
+ CloudSolrServer server = new CloudSolrServer(options.zkHost);
+ server.setDefaultCollection(options.collection);
+ server.commit();
+ server.shutdown();
+ } else {
+ for (List urls : options.shardUrls) {
+ for (String url : urls) {
+ // TODO: we should do these concurrently
+ HttpSolrServer server = new HttpSolrServer(url);
+ server.commit();
+ server.shutdown();
+ }
+ }
+ }
+ LOG.info("Done committing live merge");
+ } catch (Exception e) {
+ LOG.error("Error sending commits to live Solr cluster", e);
+ return false;
+ }
+
+ success = true;
+ return true;
+ } finally {
+ shutdownNowAndAwaitTermination(executor);
+ float secs = (System.currentTimeMillis() - start) / 1000.0f;
+ LOG.info("Live merging of index shards into Solr cluster took " + secs + " secs");
+ if (success) {
+ LOG.info("Live merging completed successfully");
+ } else {
+ LOG.info("Live merging failed");
+ }
+ }
+
+ // if an output dir does not exist, we should fail and do no merge?
+ }
+
+ private void shutdownNowAndAwaitTermination(ExecutorService pool) {
+ pool.shutdown(); // Disable new tasks from being submitted
+ pool.shutdownNow(); // Cancel currently executing tasks
+ boolean shutdown = false;
+ while (!shutdown) {
+ try {
+ // Wait a while for existing tasks to terminate
+ shutdown = pool.awaitTermination(5, TimeUnit.SECONDS);
+ } catch (InterruptedException ie) {
+ // Preserve interrupt status
+ Thread.currentThread().interrupt();
+ }
+ if (!shutdown) {
+ pool.shutdownNow(); // Cancel currently executing tasks
+ }
+ }
+ }
+
+
+ private static final class Request {
+ Exception e;
+ boolean success = false;
+ }
+
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/HdfsFileFieldNames.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/HdfsFileFieldNames.java
new file mode 100644
index 00000000000..c9eaef6c9e9
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/HdfsFileFieldNames.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop;
+
+
+/**
+ * Solr field names for metadata of an HDFS file.
+ */
+public interface HdfsFileFieldNames {
+
+ public static final String FILE_UPLOAD_URL = "file_upload_url";
+ public static final String FILE_DOWNLOAD_URL = "file_download_url";
+ public static final String FILE_SCHEME = "file_scheme";
+ public static final String FILE_HOST = "file_host";
+ public static final String FILE_PORT = "file_port";
+ public static final String FILE_PATH = "file_path";
+ public static final String FILE_NAME = "file_name";
+ public static final String FILE_LENGTH = "file_length";
+ public static final String FILE_LAST_MODIFIED = "file_last_modified";
+ public static final String FILE_OWNER = "file_owner";
+ public static final String FILE_GROUP = "file_group";
+ public static final String FILE_PERMISSIONS_USER = "file_permissions_user";
+ public static final String FILE_PERMISSIONS_GROUP = "file_permissions_group";
+ public static final String FILE_PERMISSIONS_OTHER = "file_permissions_other";
+ public static final String FILE_PERMISSIONS_STICKYBIT = "file_permissions_stickybit";
+
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/HeartBeater.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/HeartBeater.java
new file mode 100644
index 00000000000..229235b96b6
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/HeartBeater.java
@@ -0,0 +1,158 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop;
+
+import java.util.Locale;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.mapreduce.TaskInputOutputContext;
+import org.apache.hadoop.util.Progressable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This class runs a background thread that once every 60 seconds checks to see if
+ * a progress report is needed. If a report is needed it is issued.
+ *
+ * A simple counter {@link #threadsNeedingHeartBeat} handles the number of
+ * threads requesting a heart beat.
+ *
+ * The expected usage pattern is
+ *
+ *
+ * try {
+ * heartBeater.needHeartBeat();
+ * do something that may take a while
+ * } finally {
+ * heartBeater.cancelHeartBeat();
+ * }
+ *
+ *
+ *
+ */
+public class HeartBeater extends Thread {
+
+ public static Logger LOG = LoggerFactory.getLogger(HeartBeater.class);
+
+ /**
+ * count of threads asking for heart beat, at 0 no heart beat done. This could
+ * be an atomic long but then missmatches in need/cancel could result in
+ * negative counts.
+ */
+ private volatile int threadsNeedingHeartBeat = 0;
+
+ private Progressable progress;
+
+ /**
+ * The amount of time to wait between checks for the need to issue a heart
+ * beat. In milliseconds.
+ */
+ private final long waitTimeMs = TimeUnit.MILLISECONDS.convert(60, TimeUnit.SECONDS);
+
+ private final CountDownLatch isClosing = new CountDownLatch(1);
+
+ /**
+ * Create the heart beat object thread set it to daemon priority and start the
+ * thread. When the count in {@link #threadsNeedingHeartBeat} is positive, the
+ * heart beat will be issued on the progress object every 60 seconds.
+ */
+ public HeartBeater(Progressable progress) {
+ setDaemon(true);
+ this.progress = progress;
+ LOG.info("Heart beat reporting class is " + progress.getClass().getName());
+ start();
+ }
+
+ public Progressable getProgress() {
+ return progress;
+ }
+
+ public void setProgress(Progressable progress) {
+ this.progress = progress;
+ }
+
+ @Override
+ public void run() {
+ LOG.info("HeartBeat thread running");
+ while (true) {
+ try {
+ synchronized (this) {
+ if (threadsNeedingHeartBeat > 0) {
+ progress.progress();
+ if (LOG.isInfoEnabled()) {
+ LOG.info(String.format(Locale.ENGLISH, "Issuing heart beat for %d threads",
+ threadsNeedingHeartBeat));
+ }
+ } else {
+ if (LOG.isInfoEnabled()) {
+ LOG.info(String.format(Locale.ENGLISH, "heartbeat skipped count %d",
+ threadsNeedingHeartBeat));
+ }
+ }
+ }
+ if (isClosing.await(waitTimeMs, TimeUnit.MILLISECONDS)) {
+ return;
+ }
+ } catch (Throwable e) {
+ LOG.error("HeartBeat throwable", e);
+ }
+ }
+ }
+
+ /**
+ * inform the background thread that heartbeats are to be issued. Issue a
+ * heart beat also
+ */
+ public synchronized void needHeartBeat() {
+ threadsNeedingHeartBeat++;
+ // Issue a progress report right away,
+ // just in case the the cancel comes before the background thread issues a
+ // report.
+ // If enough cases like this happen the 600 second timeout can occur
+ progress.progress();
+ if (threadsNeedingHeartBeat == 1) {
+ // this.notify(); // wake up the heartbeater
+ }
+ }
+
+ /**
+ * inform the background thread that this heartbeat request is not needed.
+ * This must be called at some point after each {@link #needHeartBeat()}
+ * request.
+ */
+ public synchronized void cancelHeartBeat() {
+ if (threadsNeedingHeartBeat > 0) {
+ threadsNeedingHeartBeat--;
+ } else {
+ Exception e = new Exception("Dummy");
+ e.fillInStackTrace();
+ LOG.warn("extra call to cancelHeartBeat", e);
+ }
+ }
+
+ public void setStatus(String status) {
+ if (progress instanceof TaskInputOutputContext) {
+ ((TaskInputOutputContext,?,?,?>) progress).setStatus(status);
+ }
+ }
+
+ /** Releases any resources */
+ public void close() {
+ isClosing.countDown();
+ }
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/LineRandomizerMapper.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/LineRandomizerMapper.java
new file mode 100644
index 00000000000..5d65fa306df
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/LineRandomizerMapper.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop;
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * MR Mapper that randomizing a list of URLs.
+ *
+ * Mapper input is (offset, URL) pairs. Each such pair indicates a file to
+ * index.
+ *
+ * Mapper output is (randomPosition, URL) pairs. The reducer receives these
+ * pairs sorted by randomPosition.
+ */
+public class LineRandomizerMapper extends Mapper {
+
+ private Random random;
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(LineRandomizerMapper.class);
+
+ @Override
+ protected void setup(Context context) throws IOException, InterruptedException {
+ super.setup(context);
+ random = createRandom(context);
+ }
+
+ @Override
+ protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
+ LOGGER.debug("map key: {}, value: {}", key, value);
+ context.write(new LongWritable(random.nextLong()), value);
+ }
+
+ private Random createRandom(Context context) {
+ long taskId = 0;
+ if (context.getTaskAttemptID() != null) { // MRUnit returns null
+ LOGGER.debug("context.getTaskAttemptID().getId(): {}", context.getTaskAttemptID().getId());
+ LOGGER.debug("context.getTaskAttemptID().getTaskID().getId(): {}", context.getTaskAttemptID().getTaskID().getId());
+ taskId = context.getTaskAttemptID().getTaskID().getId(); // taskId = 0, 1, ..., N
+ }
+ // create a good random seed, yet ensure deterministic PRNG sequence for easy reproducability
+ return new Random(421439783L * (taskId + 1));
+ }
+
+}
\ No newline at end of file
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/LineRandomizerReducer.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/LineRandomizerReducer.java
new file mode 100644
index 00000000000..af7759e9f90
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/LineRandomizerReducer.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * MR Reducer that randomizing a list of URLs.
+ *
+ * Reducer input is (randomPosition, URL) pairs. Each such pair indicates a file
+ * to index.
+ *
+ * Reducer output is a list of URLs, each URL in a random position.
+ */
+public class LineRandomizerReducer extends Reducer {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(LineRandomizerReducer.class);
+
+ @Override
+ protected void reduce(LongWritable key, Iterable values, Context context) throws IOException, InterruptedException {
+ for (Text value : values) {
+ LOGGER.debug("reduce key: {}, value: {}", key, value);
+ context.write(value, NullWritable.get());
+ }
+ }
+}
\ No newline at end of file
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/MapReduceIndexerTool.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/MapReduceIndexerTool.java
new file mode 100644
index 00000000000..e0e3e62709f
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/MapReduceIndexerTool.java
@@ -0,0 +1,1300 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop;
+
+
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import net.sourceforge.argparse4j.ArgumentParsers;
+import net.sourceforge.argparse4j.impl.Arguments;
+import net.sourceforge.argparse4j.impl.action.HelpArgumentAction;
+import net.sourceforge.argparse4j.impl.choice.RangeArgumentChoice;
+import net.sourceforge.argparse4j.impl.type.FileArgumentType;
+import net.sourceforge.argparse4j.inf.Argument;
+import net.sourceforge.argparse4j.inf.ArgumentGroup;
+import net.sourceforge.argparse4j.inf.ArgumentParser;
+import net.sourceforge.argparse4j.inf.ArgumentParserException;
+import net.sourceforge.argparse4j.inf.FeatureControl;
+import net.sourceforge.argparse4j.inf.Namespace;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.log4j.PropertyConfigurator;
+import org.apache.solr.common.cloud.SolrZkClient;
+import org.apache.solr.hadoop.dedup.RetainMostRecentUpdateConflictResolver;
+import org.apache.solr.hadoop.morphline.MorphlineMapRunner;
+import org.apache.solr.hadoop.morphline.MorphlineMapper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.cloudera.cdk.morphline.base.Fields;
+
+
+/**
+ * Public API for a MapReduce batch job driver that creates a set of Solr index shards from a set of
+ * input files and writes the indexes into HDFS, in a flexible, scalable and fault-tolerant manner.
+ * Also supports merging the output shards into a set of live customer facing Solr servers,
+ * typically a SolrCloud.
+ */
+public class MapReduceIndexerTool extends Configured implements Tool {
+
+ Job job; // visible for testing only
+
+ public static final String RESULTS_DIR = "results";
+
+ static final String MAIN_MEMORY_RANDOMIZATION_THRESHOLD =
+ MapReduceIndexerTool.class.getName() + ".mainMemoryRandomizationThreshold";
+
+ private static final String FULL_INPUT_LIST = "full-input-list.txt";
+
+ private static final Logger LOG = LoggerFactory.getLogger(MapReduceIndexerTool.class);
+
+
+ /**
+ * See http://argparse4j.sourceforge.net and for details see http://argparse4j.sourceforge.net/usage.html
+ */
+ static final class MyArgumentParser {
+
+ /**
+ * Parses the given command line arguments.
+ *
+ * @return exitCode null indicates the caller shall proceed with processing,
+ * non-null indicates the caller shall exit the program with the
+ * given exit status code.
+ */
+ public Integer parseArgs(String[] args, Configuration conf, Options opts) {
+ assert args != null;
+ assert conf != null;
+ assert opts != null;
+
+ if (args.length == 0) {
+ args = new String[] { "--help" };
+ }
+
+ ArgumentParser parser = ArgumentParsers
+ .newArgumentParser("hadoop [GenericOptions]... jar search-mr-*-job.jar " + MapReduceIndexerTool.class.getName(), false)
+ .defaultHelp(true)
+ .description(
+ "MapReduce batch job driver that takes a morphline and creates a set of Solr index shards from a set of input files " +
+ "and writes the indexes into HDFS, in a flexible, scalable and fault-tolerant manner. " +
+ "It also supports merging the output shards into a set of live customer facing Solr servers, " +
+ "typically a SolrCloud. The program proceeds in several consecutive MapReduce based phases, as follows:" +
+ "\n\n" +
+ "1) Randomization phase: This (parallel) phase randomizes the list of input files in order to spread " +
+ "indexing load more evenly among the mappers of the subsequent phase." +
+ "\n\n" +
+ "2) Mapper phase: This (parallel) phase takes the input files, extracts the relevant content, transforms it " +
+ "and hands SolrInputDocuments to a set of reducers. " +
+ "The ETL functionality is flexible and " +
+ "customizable using chains of arbitrary morphline commands that pipe records from one transformation command to another. " +
+ "Commands to parse and transform a set of standard data formats such as Avro, CSV, Text, HTML, XML, " +
+ "PDF, Word, Excel, etc. are provided out of the box, and additional custom commands and parsers for additional " +
+ "file or data formats can be added as morphline plugins. " +
+ "This is done by implementing a simple Java interface that consumes a record (e.g. a file in the form of an InputStream " +
+ "plus some headers plus contextual metadata) and generates as output zero or more records. " +
+ "Any kind of data format can be indexed and any Solr documents for any kind of Solr schema can be generated, " +
+ "and any custom ETL logic can be registered and executed.\n" +
+ "Record fields, including MIME types, can also explicitly be passed by force from the CLI to the morphline, for example: " +
+ "hadoop ... -D " + MorphlineMapRunner.MORPHLINE_FIELD_PREFIX + Fields.ATTACHMENT_MIME_TYPE + "=text/csv" +
+ "\n\n" +
+ "3) Reducer phase: This (parallel) phase loads the mapper's SolrInputDocuments into one EmbeddedSolrServer per reducer. " +
+ "Each such reducer and Solr server can be seen as a (micro) shard. The Solr servers store their " +
+ "data in HDFS." +
+ "\n\n" +
+ "4) Mapper-only merge phase: This (parallel) phase merges the set of reducer shards into the number of solr " +
+ "shards expected by the user, using a mapper-only job. This phase is omitted if the number " +
+ "of shards is already equal to the number of shards expected by the user. " +
+ "\n\n" +
+ "5) Go-live phase: This optional (parallel) phase merges the output shards of the previous phase into a set of " +
+ "live customer facing Solr servers, typically a SolrCloud. " +
+ "If this phase is omitted you can explicitly point each Solr server to one of the HDFS output shard directories." +
+ "\n\n" +
+ "Fault Tolerance: Mapper and reducer task attempts are retried on failure per the standard MapReduce semantics. " +
+ "On program startup all data in the --output-dir is deleted if that output directory already exists. " +
+ "If the whole job fails you can retry simply by rerunning the program again using the same arguments."
+ );
+
+ parser.addArgument("--help", "-help", "-h")
+ .help("Show this help message and exit")
+ .action(new HelpArgumentAction() {
+ @Override
+ public void run(ArgumentParser parser, Argument arg, Map attrs, String flag, Object value) throws ArgumentParserException {
+ try {
+ parser.printHelp(new PrintWriter(new OutputStreamWriter(System.out, "UTF-8")));
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException("Won't Happen for UTF-8");
+ }
+ System.out.println();
+ System.out.print(ToolRunnerHelpFormatter.getGenericCommandUsage());
+ //ToolRunner.printGenericCommandUsage(System.out);
+ System.out.println(
+ "Examples: \n\n" +
+
+ "# (Re)index an Avro based Twitter tweet file:\n" +
+ "sudo -u hdfs hadoop \\\n" +
+ " --config /etc/hadoop/conf.cloudera.mapreduce1 \\\n" +
+ " jar target/search-mr-*-job.jar " + MapReduceIndexerTool.class.getName() + " \\\n" +
+ " -D 'mapred.child.java.opts=-Xmx500m' \\\n" +
+// " -D 'mapreduce.child.java.opts=-Xmx500m' \\\n" +
+ " --log4j src/test/resources/log4j.properties \\\n" +
+ " --morphline-file ../search-core/src/test/resources/test-morphlines/tutorialReadAvroContainer.conf \\\n" +
+ " --solr-home-dir src/test/resources/solr/minimr \\\n" +
+ " --output-dir hdfs://c2202.mycompany.com/user/$USER/test \\\n" +
+ " --shards 1 \\\n" +
+ " hdfs:///user/$USER/test-documents/sample-statuses-20120906-141433.avro\n" +
+ "\n" +
+ "# (Re)index all files that match all of the following conditions:\n" +
+ "# 1) File is contained in dir tree hdfs:///user/$USER/solrloadtest/twitter/tweets\n" +
+ "# 2) file name matches the glob pattern 'sample-statuses*.gz'\n" +
+ "# 3) file was last modified less than 100000 minutes ago\n" +
+ "# 4) file size is between 1 MB and 1 GB\n" +
+ "# Also include extra library jar file containing JSON tweet Java parser:\n" +
+ "hadoop jar target/search-mr-*-job.jar " + "com.cloudera.cdk.morphline.hadoop.find.HdfsFindTool" + " \\\n" +
+ " -find hdfs:///user/$USER/solrloadtest/twitter/tweets \\\n" +
+ " -type f \\\n" +
+ " -name 'sample-statuses*.gz' \\\n" +
+ " -mmin -1000000 \\\n" +
+ " -size -100000000c \\\n" +
+ " -size +1000000c \\\n" +
+ "| sudo -u hdfs hadoop \\\n" +
+ " --config /etc/hadoop/conf.cloudera.mapreduce1 \\\n" +
+ " jar target/search-mr-*-job.jar " + MapReduceIndexerTool.class.getName() + " \\\n" +
+ " -D 'mapred.child.java.opts=-Xmx500m' \\\n" +
+// " -D 'mapreduce.child.java.opts=-Xmx500m' \\\n" +
+ " --log4j src/test/resources/log4j.properties \\\n" +
+ " --morphline-file ../search-core/src/test/resources/test-morphlines/tutorialReadJsonTestTweets.conf \\\n" +
+ " --solr-home-dir src/test/resources/solr/minimr \\\n" +
+ " --output-dir hdfs://c2202.mycompany.com/user/$USER/test \\\n" +
+ " --shards 100 \\\n" +
+ " --input-list -\n" +
+ "\n" +
+ "# Go live by merging resulting index shards into a live Solr cluster\n" +
+ "# (explicitly specify Solr URLs - for a SolrCloud cluster see next example):\n" +
+ "sudo -u hdfs hadoop \\\n" +
+ " --config /etc/hadoop/conf.cloudera.mapreduce1 \\\n" +
+ " jar target/search-mr-*-job.jar " + MapReduceIndexerTool.class.getName() + " \\\n" +
+ " -D 'mapred.child.java.opts=-Xmx500m' \\\n" +
+// " -D 'mapreduce.child.java.opts=-Xmx500m' \\\n" +
+ " --log4j src/test/resources/log4j.properties \\\n" +
+ " --morphline-file ../search-core/src/test/resources/test-morphlines/tutorialReadAvroContainer.conf \\\n" +
+ " --solr-home-dir src/test/resources/solr/minimr \\\n" +
+ " --output-dir hdfs://c2202.mycompany.com/user/$USER/test \\\n" +
+ " --shard-url http://solr001.mycompany.com:8983/solr/collection1 \\\n" +
+ " --shard-url http://solr002.mycompany.com:8983/solr/collection1 \\\n" +
+ " --go-live \\\n" +
+ " hdfs:///user/foo/indir\n" +
+ "\n" +
+ "# Go live by merging resulting index shards into a live SolrCloud cluster\n" +
+ "# (discover shards and Solr URLs through ZooKeeper):\n" +
+ "sudo -u hdfs hadoop \\\n" +
+ " --config /etc/hadoop/conf.cloudera.mapreduce1 \\\n" +
+ " jar target/search-mr-*-job.jar " + MapReduceIndexerTool.class.getName() + " \\\n" +
+ " -D 'mapred.child.java.opts=-Xmx500m' \\\n" +
+// " -D 'mapreduce.child.java.opts=-Xmx500m' \\\n" +
+ " --log4j src/test/resources/log4j.properties \\\n" +
+ " --morphline-file ../search-core/src/test/resources/test-morphlines/tutorialReadAvroContainer.conf \\\n" +
+ " --output-dir hdfs://c2202.mycompany.com/user/$USER/test \\\n" +
+ " --zk-host zk01.mycompany.com:2181/solr \\\n" +
+ " --collection collection1 \\\n" +
+ " --go-live \\\n" +
+ " hdfs:///user/foo/indir\n"
+ );
+ throw new FoundHelpArgument(); // Trick to prevent processing of any remaining arguments
+ }
+ });
+
+ ArgumentGroup requiredGroup = parser.addArgumentGroup("Required arguments");
+
+ Argument outputDirArg = requiredGroup.addArgument("--output-dir")
+ .metavar("HDFS_URI")
+ .type(new PathArgumentType(conf) {
+ @Override
+ public Path convert(ArgumentParser parser, Argument arg, String value) throws ArgumentParserException {
+ Path path = super.convert(parser, arg, value);
+ if ("hdfs".equals(path.toUri().getScheme()) && path.toUri().getAuthority() == null) {
+ // TODO: consider defaulting to hadoop's fs.default.name here or in SolrRecordWriter.createEmbeddedSolrServer()
+ throw new ArgumentParserException("Missing authority in path URI: " + path, parser);
+ }
+ return path;
+ }
+ }.verifyHasScheme().verifyIsAbsolute().verifyCanWriteParent())
+ .required(true)
+ .help("HDFS directory to write Solr indexes to. Inside there one output directory per shard will be generated. " +
+ "Example: hdfs://c2202.mycompany.com/user/$USER/test");
+
+ Argument inputListArg = parser.addArgument("--input-list")
+ .action(Arguments.append())
+ .metavar("URI")
+ // .type(new PathArgumentType(fs).verifyExists().verifyCanRead())
+ .type(Path.class)
+ .help("Local URI or HDFS URI of a UTF-8 encoded file containing a list of HDFS URIs to index, " +
+ "one URI per line in the file. If '-' is specified, URIs are read from the standard input. " +
+ "Multiple --input-list arguments can be specified.");
+
+ Argument morphlineFileArg = requiredGroup.addArgument("--morphline-file")
+ .metavar("FILE")
+ .type(new FileArgumentType().verifyExists().verifyIsFile().verifyCanRead())
+ .required(true)
+ .help("Relative or absolute path to a local config file that contains one or more morphlines. " +
+ "The file must be UTF-8 encoded. Example: /path/to/morphline.conf");
+
+ Argument morphlineIdArg = parser.addArgument("--morphline-id")
+ .metavar("STRING")
+ .type(String.class)
+ .help("The identifier of the morphline that shall be executed within the morphline config file " +
+ "specified by --morphline-file. If the --morphline-id option is ommitted the first (i.e. " +
+ "top-most) morphline within the config file is used. Example: morphline1");
+
+ Argument solrHomeDirArg = parser.addArgument("--solr-home-dir")
+ .metavar("DIR")
+ .type(new FileArgumentType() {
+ @Override
+ public File convert(ArgumentParser parser, Argument arg, String value) throws ArgumentParserException {
+ File solrHomeDir = super.convert(parser, arg, value);
+ File solrConfigFile = new File(new File(solrHomeDir, "conf"), "solrconfig.xml");
+ new FileArgumentType().verifyExists().verifyIsFile().verifyCanRead().convert(
+ parser, arg, solrConfigFile.getPath());
+ return solrHomeDir;
+ }
+ }.verifyIsDirectory().verifyCanRead())
+ .required(false)
+ .help("Relative or absolute path to a local dir containing Solr conf/ dir and in particular " +
+ "conf/solrconfig.xml and optionally also lib/ dir. This directory will be uploaded to each MR task. " +
+ "Example: src/test/resources/solr/minimr");
+
+ Argument updateConflictResolverArg = parser.addArgument("--update-conflict-resolver")
+ .metavar("FQCN")
+ .type(String.class)
+ .setDefault(RetainMostRecentUpdateConflictResolver.class.getName())
+ .help("Fully qualified class name of a Java class that implements the UpdateConflictResolver interface. " +
+ "This enables deduplication and ordering of a series of document updates for the same unique document " +
+ "key. For example, a MapReduce batch job might index multiple files in the same job where some of the " +
+ "files contain old and new versions of the very same document, using the same unique document key.\n" +
+ "Typically, implementations of this interface forbid collisions by throwing an exception, or ignore all but " +
+ "the most recent document version, or, in the general case, order colliding updates ascending from least " +
+ "recent to most recent (partial) update. The caller of this interface (i.e. the Hadoop Reducer) will then " +
+ "apply the updates to Solr in the order returned by the orderUpdates() method.\n" +
+ "The default RetainMostRecentUpdateConflictResolver implementation ignores all but the most recent document " +
+ "version, based on a configurable numeric Solr field, which defaults to the file_last_modified timestamp");
+
+ Argument mappersArg = parser.addArgument("--mappers")
+ .metavar("INTEGER")
+ .type(Integer.class)
+ .choices(new RangeArgumentChoice(-1, Integer.MAX_VALUE)) // TODO: also support X% syntax where X is an integer
+ .setDefault(-1)
+ .help("Tuning knob that indicates the maximum number of MR mapper tasks to use. -1 indicates use all map slots " +
+ "available on the cluster.");
+
+ Argument reducersArg = parser.addArgument("--reducers")
+ .metavar("INTEGER")
+ .type(Integer.class)
+ .choices(new RangeArgumentChoice(-1, Integer.MAX_VALUE)) // TODO: also support X% syntax where X is an integer
+ .setDefault(-1)
+ .help("Tuning knob that indicates the number of reducers to index into. " +
+ "-1 indicates use all reduce slots available on the cluster. " +
+ "0 indicates use one reducer per output shard, which disables the mtree merge MR algorithm. " +
+ "The mtree merge MR algorithm improves scalability by spreading load " +
+ "(in particular CPU load) among a number of parallel reducers that can be much larger than the number " +
+ "of solr shards expected by the user. It can be seen as an extension of concurrent lucene merges " +
+ "and tiered lucene merges to the clustered case. The subsequent mapper-only phase " +
+ "merges the output of said large number of reducers to the number of shards expected by the user, " +
+ "again by utilizing more available parallelism on the cluster.");
+
+ Argument fanoutArg = parser.addArgument("--fanout")
+ .metavar("INTEGER")
+ .type(Integer.class)
+ .choices(new RangeArgumentChoice(2, Integer.MAX_VALUE))
+ .setDefault(Integer.MAX_VALUE)
+ .help(FeatureControl.SUPPRESS);
+
+ Argument maxSegmentsArg = parser.addArgument("--max-segments")
+ .metavar("INTEGER")
+ .type(Integer.class)
+ .choices(new RangeArgumentChoice(1, Integer.MAX_VALUE))
+ .setDefault(1)
+ .help("Tuning knob that indicates the maximum number of segments to be contained on output in the index of " +
+ "each reducer shard. After a reducer has built its output index it applies a merge policy to merge segments " +
+ "until there are <= maxSegments lucene segments left in this index. " +
+ "Merging segments involves reading and rewriting all data in all these segment files, " +
+ "potentially multiple times, which is very I/O intensive and time consuming. " +
+ "However, an index with fewer segments can later be merged faster, " +
+ "and it can later be queried faster once deployed to a live Solr serving shard. " +
+ "Set maxSegments to 1 to optimize the index for low query latency. " +
+ "In a nutshell, a small maxSegments value trades indexing latency for subsequently improved query latency. " +
+ "This can be a reasonable trade-off for batch indexing systems.");
+
+ Argument fairSchedulerPoolArg = parser.addArgument("--fair-scheduler-pool")
+ .metavar("STRING")
+ .help("Optional tuning knob that indicates the name of the fair scheduler pool to submit jobs to. " +
+ "The Fair Scheduler is a pluggable MapReduce scheduler that provides a way to share large clusters. " +
+ "Fair scheduling is a method of assigning resources to jobs such that all jobs get, on average, an " +
+ "equal share of resources over time. When there is a single job running, that job uses the entire " +
+ "cluster. When other jobs are submitted, tasks slots that free up are assigned to the new jobs, so " +
+ "that each job gets roughly the same amount of CPU time. Unlike the default Hadoop scheduler, which " +
+ "forms a queue of jobs, this lets short jobs finish in reasonable time while not starving long jobs. " +
+ "It is also an easy way to share a cluster between multiple of users. Fair sharing can also work with " +
+ "job priorities - the priorities are used as weights to determine the fraction of total compute time " +
+ "that each job gets.");
+
+ Argument dryRunArg = parser.addArgument("--dry-run")
+ .action(Arguments.storeTrue())
+ .help("Run in local mode and print documents to stdout instead of loading them into Solr. This executes " +
+ "the morphline in the client process (without submitting a job to MR) for quicker turnaround during " +
+ "early trial & debug sessions.");
+
+ Argument log4jConfigFileArg = parser.addArgument("--log4j")
+ .metavar("FILE")
+ .type(new FileArgumentType().verifyExists().verifyIsFile().verifyCanRead())
+ .help("Relative or absolute path to a log4j.properties config file on the local file system. This file " +
+ "will be uploaded to each MR task. Example: /path/to/log4j.properties");
+
+ Argument verboseArg = parser.addArgument("--verbose", "-v")
+ .action(Arguments.storeTrue())
+ .help("Turn on verbose output.");
+
+ ArgumentGroup clusterInfoGroup = parser
+ .addArgumentGroup("Cluster arguments")
+ .description(
+ "Arguments that provide information about your Solr cluster. "
+ + "If you are not using --go-live, pass the --shards argument. If you are building shards for "
+ + "a Non-SolrCloud cluster, pass the --shard-url argument one or more times. To build indexes for"
+ + " a replicated cluster with --shard-url, pass replica urls consecutively and also pass --shards. "
+ + "If you are building shards for a SolrCloud cluster, pass the --zk-host argument. "
+ + "Using --go-live requires either --shard-url or --zk-host.");
+
+ Argument shardUrlsArg = clusterInfoGroup.addArgument("--shard-url")
+ .metavar("URL")
+ .type(String.class)
+ .action(Arguments.append())
+ .help("Solr URL to merge resulting shard into if using --go-live. " +
+ "Example: http://solr001.mycompany.com:8983/solr/collection1. " +
+ "Multiple --shard-url arguments can be specified, one for each desired shard. " +
+ "If you are merging shards into a SolrCloud cluster, use --zk-host instead.");
+
+ Argument zkHostArg = clusterInfoGroup.addArgument("--zk-host")
+ .metavar("STRING")
+ .type(String.class)
+ .help("The address of a ZooKeeper ensemble being used by a SolrCloud cluster. "
+ + "This ZooKeeper ensemble will be examined to determine the number of output "
+ + "shards to create as well as the Solr URLs to merge the output shards into when using the --go-live option. "
+ + "Requires that you also pass the --collection to merge the shards into.\n"
+ + "\n"
+ + "The --zk-host option implements the same partitioning semantics as the standard SolrCloud "
+ + "Near-Real-Time (NRT) API. This enables to mix batch updates from MapReduce ingestion with "
+ + "updates from standard Solr NRT ingestion on the same SolrCloud cluster, "
+ + "using identical unique document keys.\n"
+ + "\n"
+ + "Format is: a list of comma separated host:port pairs, each corresponding to a zk "
+ + "server. Example: '127.0.0.1:2181,127.0.0.1:2182,127.0.0.1:2183' If "
+ + "the optional chroot suffix is used the example would look "
+ + "like: '127.0.0.1:2181/solr,127.0.0.1:2182/solr,127.0.0.1:2183/solr' "
+ + "where the client would be rooted at '/solr' and all paths "
+ + "would be relative to this root - i.e. getting/setting/etc... "
+ + "'/foo/bar' would result in operations being run on "
+ + "'/solr/foo/bar' (from the server perspective).\n"
+ + "\n"
+ + "If --solr-home-dir is not specified, the Solr home directory for the collection "
+ + "will be downloaded from this ZooKeeper ensemble.");
+
+ Argument shardsArg = clusterInfoGroup.addArgument("--shards")
+ .metavar("INTEGER")
+ .type(Integer.class)
+ .choices(new RangeArgumentChoice(1, Integer.MAX_VALUE))
+ .help("Number of output shards to generate.");
+
+ ArgumentGroup goLiveGroup = parser.addArgumentGroup("Go live arguments")
+ .description("Arguments for merging the shards that are built into a live Solr cluster. " +
+ "Also see the Cluster arguments.");
+
+ Argument goLiveArg = goLiveGroup.addArgument("--go-live")
+ .action(Arguments.storeTrue())
+ .help("Allows you to optionally merge the final index shards into a live Solr cluster after they are built. " +
+ "You can pass the ZooKeeper address with --zk-host and the relevant cluster information will be auto detected. " +
+ "If you are not using a SolrCloud cluster, --shard-url arguments can be used to specify each SolrCore to merge " +
+ "each shard into.");
+
+ Argument collectionArg = goLiveGroup.addArgument("--collection")
+ .metavar("STRING")
+ .help("The SolrCloud collection to merge shards into when using --go-live and --zk-host. Example: collection1");
+
+ Argument goLiveThreadsArg = goLiveGroup.addArgument("--go-live-threads")
+ .metavar("INTEGER")
+ .type(Integer.class)
+ .choices(new RangeArgumentChoice(1, Integer.MAX_VALUE))
+ .setDefault(1000)
+ .help("Tuning knob that indicates the maximum number of live merges to run in parallel at one time.");
+
+ // trailing positional arguments
+ Argument inputFilesArg = parser.addArgument("input-files")
+ .metavar("HDFS_URI")
+ .type(new PathArgumentType(conf).verifyHasScheme().verifyExists().verifyCanRead())
+ .nargs("*")
+ .setDefault()
+ .help("HDFS URI of file or directory tree to index.");
+
+ Namespace ns;
+ try {
+ ns = parser.parseArgs(args);
+ } catch (FoundHelpArgument e) {
+ return 0;
+ } catch (ArgumentParserException e) {
+ parser.handleError(e);
+ return 1;
+ }
+
+ opts.log4jConfigFile = (File) ns.get(log4jConfigFileArg.getDest());
+ if (opts.log4jConfigFile != null) {
+ PropertyConfigurator.configure(opts.log4jConfigFile.getPath());
+ }
+ LOG.debug("Parsed command line args: {}", ns);
+
+ opts.inputLists = ns.getList(inputListArg.getDest());
+ if (opts.inputLists == null) {
+ opts.inputLists = Collections.EMPTY_LIST;
+ }
+ opts.inputFiles = ns.getList(inputFilesArg.getDest());
+ opts.outputDir = (Path) ns.get(outputDirArg.getDest());
+ opts.mappers = ns.getInt(mappersArg.getDest());
+ opts.reducers = ns.getInt(reducersArg.getDest());
+ opts.updateConflictResolver = ns.getString(updateConflictResolverArg.getDest());
+ opts.fanout = ns.getInt(fanoutArg.getDest());
+ opts.maxSegments = ns.getInt(maxSegmentsArg.getDest());
+ opts.morphlineFile = (File) ns.get(morphlineFileArg.getDest());
+ opts.morphlineId = ns.getString(morphlineIdArg.getDest());
+ opts.solrHomeDir = (File) ns.get(solrHomeDirArg.getDest());
+ opts.fairSchedulerPool = ns.getString(fairSchedulerPoolArg.getDest());
+ opts.isDryRun = ns.getBoolean(dryRunArg.getDest());
+ opts.isVerbose = ns.getBoolean(verboseArg.getDest());
+ opts.zkHost = ns.getString(zkHostArg.getDest());
+ opts.shards = ns.getInt(shardsArg.getDest());
+ opts.shardUrls = buildShardUrls(ns.getList(shardUrlsArg.getDest()), opts.shards);
+ opts.goLive = ns.getBoolean(goLiveArg.getDest());
+ opts.goLiveThreads = ns.getInt(goLiveThreadsArg.getDest());
+ opts.collection = ns.getString(collectionArg.getDest());
+
+ try {
+ verifyGoLiveArgs(opts, parser);
+ } catch (ArgumentParserException e) {
+ parser.handleError(e);
+ return 1;
+ }
+
+ if (opts.inputLists.isEmpty() && opts.inputFiles.isEmpty()) {
+ LOG.info("No input files specified - nothing to process");
+ return 0; // nothing to process
+ }
+ return null;
+ }
+
+ /** Marker trick to prevent processing of any remaining arguments once --help option has been parsed */
+ private static final class FoundHelpArgument extends RuntimeException {
+ }
+ }
+ // END OF INNER CLASS
+
+ static List> buildShardUrls(List urls, Integer numShards) {
+ if (urls == null) return null;
+ List> shardUrls = new ArrayList>(urls.size());
+ List list = null;
+
+ int sz;
+ if (numShards == null) {
+ numShards = urls.size();
+ }
+ sz = (int) Math.ceil(urls.size() / (float)numShards);
+ for (int i = 0; i < urls.size(); i++) {
+ if (i % sz == 0) {
+ list = new ArrayList();
+ shardUrls.add(list);
+ }
+ list.add((String) urls.get(i));
+ }
+
+ return shardUrls;
+ }
+
+ static final class Options {
+ boolean goLive;
+ String collection;
+ String zkHost;
+ Integer goLiveThreads;
+ List> shardUrls;
+ List inputLists;
+ List inputFiles;
+ Path outputDir;
+ int mappers;
+ int reducers;
+ String updateConflictResolver;
+ int fanout;
+ Integer shards;
+ int maxSegments;
+ File morphlineFile;
+ String morphlineId;
+ File solrHomeDir;
+ String fairSchedulerPool;
+ boolean isDryRun;
+ File log4jConfigFile;
+ boolean isVerbose;
+ }
+ // END OF INNER CLASS
+
+
+ /** API for command line clients */
+ public static void main(String[] args) throws Exception {
+ int res = ToolRunner.run(new Configuration(), new MapReduceIndexerTool(), args);
+ System.exit(res);
+ }
+
+ public MapReduceIndexerTool() {}
+
+ @Override
+ public int run(String[] args) throws Exception {
+ Options opts = new Options();
+ Integer exitCode = new MyArgumentParser().parseArgs(args, getConf(), opts);
+ if (exitCode != null) {
+ return exitCode;
+ }
+ return run(opts);
+ }
+
+ /** API for Java clients; visible for testing; may become a public API eventually */
+ int run(Options options) throws Exception {
+
+ if ("local".equals(getConf().get("mapred.job.tracker"))) {
+ throw new IllegalStateException(
+ "Running with LocalJobRunner (i.e. all of Hadoop inside a single JVM) is not supported " +
+ "because LocalJobRunner does not (yet) implement the Hadoop Distributed Cache feature, " +
+ "which is required for passing files via --files and --libjars");
+ }
+
+ long programStartTime = System.currentTimeMillis();
+ if (options.fairSchedulerPool != null) {
+ getConf().set("mapred.fairscheduler.pool", options.fairSchedulerPool);
+ }
+ getConf().setInt(SolrOutputFormat.SOLR_RECORD_WRITER_MAX_SEGMENTS, options.maxSegments);
+
+ // switch off a false warning about allegedly not implementing Tool
+ // also see http://hadoop.6.n7.nabble.com/GenericOptionsParser-warning-td8103.html
+ // also see https://issues.apache.org/jira/browse/HADOOP-8183
+ getConf().setBoolean("mapred.used.genericoptionsparser", true);
+
+ if (options.log4jConfigFile != null) {
+ Utils.setLogConfigFile(options.log4jConfigFile, getConf());
+ addDistributedCacheFile(options.log4jConfigFile, getConf());
+ }
+
+ job = Job.getInstance(getConf());
+ job.setJarByClass(getClass());
+
+ if (options.morphlineFile == null) {
+ throw new ArgumentParserException("Argument --morphline-file is required", null);
+ }
+ verifyGoLiveArgs(options, null);
+ verifyZKStructure(options, null);
+
+ int mappers = new JobClient(job.getConfiguration()).getClusterStatus().getMaxMapTasks(); // MR1
+ //int mappers = job.getCluster().getClusterStatus().getMapSlotCapacity(); // Yarn only
+ LOG.info("Cluster reports {} mapper slots", mappers);
+
+ if (options.mappers == -1) {
+ mappers = 8 * mappers; // better accomodate stragglers
+ } else {
+ mappers = options.mappers;
+ }
+ if (mappers <= 0) {
+ throw new IllegalStateException("Illegal number of mappers: " + mappers);
+ }
+ options.mappers = mappers;
+
+ FileSystem fs = options.outputDir.getFileSystem(job.getConfiguration());
+ if (fs.exists(options.outputDir) && !delete(options.outputDir, true, fs)) {
+ return -1;
+ }
+ Path outputResultsDir = new Path(options.outputDir, RESULTS_DIR);
+ Path outputReduceDir = new Path(options.outputDir, "reducers");
+ Path outputStep1Dir = new Path(options.outputDir, "tmp1");
+ Path outputStep2Dir = new Path(options.outputDir, "tmp2");
+ Path outputTreeMergeStep = new Path(options.outputDir, "mtree-merge-output");
+ Path fullInputList = new Path(outputStep1Dir, FULL_INPUT_LIST);
+
+ LOG.debug("Creating list of input files for mappers: {}", fullInputList);
+ long numFiles = addInputFiles(options.inputFiles, options.inputLists, fullInputList, job.getConfiguration());
+ if (numFiles == 0) {
+ LOG.info("No input files found - nothing to process");
+ return 0;
+ }
+ int numLinesPerSplit = (int) ceilDivide(numFiles, mappers);
+ if (numLinesPerSplit < 0) { // numeric overflow from downcasting long to int?
+ numLinesPerSplit = Integer.MAX_VALUE;
+ }
+ numLinesPerSplit = Math.max(1, numLinesPerSplit);
+
+ int realMappers = Math.min(mappers, (int) ceilDivide(numFiles, numLinesPerSplit));
+ calculateNumReducers(options, realMappers);
+ int reducers = options.reducers;
+ LOG.info("Using these parameters: " +
+ "numFiles: {}, mappers: {}, realMappers: {}, reducers: {}, shards: {}, fanout: {}, maxSegments: {}",
+ new Object[] {numFiles, mappers, realMappers, reducers, options.shards, options.fanout, options.maxSegments});
+
+
+ LOG.info("Randomizing list of {} input files to spread indexing load more evenly among mappers", numFiles);
+ long startTime = System.currentTimeMillis();
+ if (numFiles < job.getConfiguration().getInt(MAIN_MEMORY_RANDOMIZATION_THRESHOLD, 100001)) {
+ // If there are few input files reduce latency by directly running main memory randomization
+ // instead of launching a high latency MapReduce job
+ randomizeFewInputFiles(fs, outputStep2Dir, fullInputList);
+ } else {
+ // Randomize using a MapReduce job. Use sequential algorithm below a certain threshold because there's no
+ // benefit in using many parallel mapper tasks just to randomize the order of a few lines each
+ int numLinesPerRandomizerSplit = Math.max(10 * 1000 * 1000, numLinesPerSplit);
+ Job randomizerJob = randomizeManyInputFiles(getConf(), fullInputList, outputStep2Dir, numLinesPerRandomizerSplit);
+ if (!waitForCompletion(randomizerJob, options.isVerbose)) {
+ return -1; // job failed
+ }
+ }
+ float secs = (System.currentTimeMillis() - startTime) / 1000.0f;
+ LOG.info("Done. Randomizing list of {} input files took {} secs", numFiles, secs);
+
+
+ job.setInputFormatClass(NLineInputFormat.class);
+ NLineInputFormat.addInputPath(job, outputStep2Dir);
+ NLineInputFormat.setNumLinesPerSplit(job, numLinesPerSplit);
+ FileOutputFormat.setOutputPath(job, outputReduceDir);
+
+ String mapperClass = job.getConfiguration().get(JobContext.MAP_CLASS_ATTR);
+ if (mapperClass == null) { // enable customization
+ Class clazz = MorphlineMapper.class;
+ mapperClass = clazz.getName();
+ job.setMapperClass(clazz);
+ }
+ job.setJobName(getClass().getName() + "/" + Utils.getShortClassName(mapperClass));
+
+ if (job.getConfiguration().get(JobContext.REDUCE_CLASS_ATTR) == null) { // enable customization
+ job.setReducerClass(SolrReducer.class);
+ }
+ if (options.updateConflictResolver == null) {
+ throw new IllegalArgumentException("updateConflictResolver must not be null");
+ }
+ job.getConfiguration().set(SolrReducer.UPDATE_CONFLICT_RESOLVER, options.updateConflictResolver);
+
+ if (options.zkHost != null) {
+ assert options.collection != null;
+ /*
+ * MapReduce partitioner that partitions the Mapper output such that each
+ * SolrInputDocument gets sent to the SolrCloud shard that it would have
+ * been sent to if the document were ingested via the standard SolrCloud
+ * Near Real Time (NRT) API.
+ *
+ * In other words, this class implements the same partitioning semantics
+ * as the standard SolrCloud NRT API. This enables to mix batch updates
+ * from MapReduce ingestion with updates from standard NRT ingestion on
+ * the same SolrCloud cluster, using identical unique document keys.
+ */
+ if (job.getConfiguration().get(JobContext.PARTITIONER_CLASS_ATTR) == null) { // enable customization
+ job.setPartitionerClass(SolrCloudPartitioner.class);
+ }
+ job.getConfiguration().set(SolrCloudPartitioner.ZKHOST, options.zkHost);
+ job.getConfiguration().set(SolrCloudPartitioner.COLLECTION, options.collection);
+ }
+ job.getConfiguration().setInt(SolrCloudPartitioner.SHARDS, options.shards);
+
+ job.setOutputFormatClass(SolrOutputFormat.class);
+ if (options.solrHomeDir != null) {
+ SolrOutputFormat.setupSolrHomeCache(options.solrHomeDir, job);
+ } else {
+ assert options.zkHost != null;
+ // use the config that this collection uses for the SolrHomeCache.
+ ZooKeeperInspector zki = new ZooKeeperInspector();
+ SolrZkClient zkClient = zki.getZkClient(options.zkHost);
+ try {
+ String configName = zki.readConfigName(zkClient, options.collection);
+ File tmpSolrHomeDir = zki.downloadConfigDir(zkClient, configName);
+ SolrOutputFormat.setupSolrHomeCache(tmpSolrHomeDir, job);
+ options.solrHomeDir = tmpSolrHomeDir;
+ } finally {
+ zkClient.close();
+ }
+ }
+
+ MorphlineMapRunner runner = setupMorphline(options);
+ if (options.isDryRun && runner != null) {
+ LOG.info("Indexing {} files in dryrun mode", numFiles);
+ startTime = System.currentTimeMillis();
+ dryRun(runner, fs, fullInputList);
+ secs = (System.currentTimeMillis() - startTime) / 1000.0f;
+ LOG.info("Done. Indexing {} files in dryrun mode took {} secs", numFiles, secs);
+ goodbye(null, programStartTime);
+ return 0;
+ }
+ job.getConfiguration().set(MorphlineMapRunner.MORPHLINE_FILE_PARAM, options.morphlineFile.getName());
+
+ job.setNumReduceTasks(reducers);
+ job.setOutputKeyClass(Text.class);
+ job.setOutputValueClass(SolrInputDocumentWritable.class);
+ LOG.info("Indexing {} files using {} real mappers into {} reducers", new Object[] {numFiles, realMappers, reducers});
+ startTime = System.currentTimeMillis();
+ if (!waitForCompletion(job, true)) {
+ return -1; // job failed
+ }
+
+ secs = (System.currentTimeMillis() - startTime) / 1000.0f;
+ LOG.info("Done. Indexing {} files using {} real mappers into {} reducers took {} secs", new Object[] {numFiles, realMappers, reducers, secs});
+
+ int mtreeMergeIterations = 0;
+ if (reducers > options.shards) {
+ mtreeMergeIterations = (int) Math.round(log(options.fanout, reducers / options.shards));
+ }
+ LOG.debug("MTree merge iterations to do: {}", mtreeMergeIterations);
+ int mtreeMergeIteration = 1;
+ while (reducers > options.shards) { // run a mtree merge iteration
+ job = Job.getInstance(getConf());
+ job.setJarByClass(getClass());
+ job.setJobName(getClass().getName() + "/" + Utils.getShortClassName(TreeMergeMapper.class));
+ job.setMapperClass(TreeMergeMapper.class);
+ job.setOutputFormatClass(TreeMergeOutputFormat.class);
+ job.setNumReduceTasks(0);
+ job.setOutputKeyClass(Text.class);
+ job.setOutputValueClass(NullWritable.class);
+ job.setInputFormatClass(NLineInputFormat.class);
+
+ Path inputStepDir = new Path(options.outputDir, "mtree-merge-input-iteration" + mtreeMergeIteration);
+ fullInputList = new Path(inputStepDir, FULL_INPUT_LIST);
+ LOG.debug("MTree merge iteration {}/{}: Creating input list file for mappers {}", new Object[] {mtreeMergeIteration, mtreeMergeIterations, fullInputList});
+ numFiles = createTreeMergeInputDirList(outputReduceDir, fs, fullInputList);
+ if (numFiles != reducers) {
+ throw new IllegalStateException("Not same reducers: " + reducers + ", numFiles: " + numFiles);
+ }
+ NLineInputFormat.addInputPath(job, fullInputList);
+ NLineInputFormat.setNumLinesPerSplit(job, options.fanout);
+ FileOutputFormat.setOutputPath(job, outputTreeMergeStep);
+
+ LOG.info("MTree merge iteration {}/{}: Merging {} shards into {} shards using fanout {}", new Object[] {
+ mtreeMergeIteration, mtreeMergeIterations, reducers, (reducers / options.fanout), options.fanout});
+ startTime = System.currentTimeMillis();
+ if (!waitForCompletion(job, options.isVerbose)) {
+ return -1; // job failed
+ }
+ secs = (System.currentTimeMillis() - startTime) / 1000.0f;
+ LOG.info("MTree merge iteration {}/{}: Done. Merging {} shards into {} shards using fanout {} took {} secs",
+ new Object[] {mtreeMergeIteration, mtreeMergeIterations, reducers, (reducers / options.fanout), options.fanout, secs});
+
+ if (!delete(outputReduceDir, true, fs)) {
+ return -1;
+ }
+ if (!rename(outputTreeMergeStep, outputReduceDir, fs)) {
+ return -1;
+ }
+ assert reducers % options.fanout == 0;
+ reducers = reducers / options.fanout;
+ mtreeMergeIteration++;
+ }
+ assert reducers == options.shards;
+
+ // normalize output shard dir prefix, i.e.
+ // rename part-r-00000 to part-00000 (stems from zero tree merge iterations)
+ // rename part-m-00000 to part-00000 (stems from > 0 tree merge iterations)
+ for (FileStatus stats : fs.listStatus(outputReduceDir)) {
+ String dirPrefix = SolrOutputFormat.getOutputName(job);
+ Path srcPath = stats.getPath();
+ if (stats.isDirectory() && srcPath.getName().startsWith(dirPrefix)) {
+ String dstName = dirPrefix + srcPath.getName().substring(dirPrefix.length() + "-m".length());
+ Path dstPath = new Path(srcPath.getParent(), dstName);
+ if (!rename(srcPath, dstPath, fs)) {
+ return -1;
+ }
+ }
+ };
+
+ // publish results dir
+ if (!rename(outputReduceDir, outputResultsDir, fs)) {
+ return -1;
+ }
+
+ if (options.goLive && !new GoLive().goLive(options, listSortedOutputShardDirs(outputResultsDir, fs))) {
+ return -1;
+ }
+
+ goodbye(job, programStartTime);
+ return 0;
+ }
+
+ private void calculateNumReducers(Options options, int realMappers) throws IOException {
+ if (options.shards <= 0) {
+ throw new IllegalStateException("Illegal number of shards: " + options.shards);
+ }
+ if (options.fanout <= 1) {
+ throw new IllegalStateException("Illegal fanout: " + options.fanout);
+ }
+ if (realMappers <= 0) {
+ throw new IllegalStateException("Illegal realMappers: " + realMappers);
+ }
+
+
+ int reducers = new JobClient(job.getConfiguration()).getClusterStatus().getMaxReduceTasks(); // MR1
+ //reducers = job.getCluster().getClusterStatus().getReduceSlotCapacity(); // Yarn only
+ LOG.info("Cluster reports {} reduce slots", reducers);
+
+ if (options.reducers == 0) {
+ reducers = options.shards;
+ } else if (options.reducers == -1) {
+ reducers = Math.min(reducers, realMappers); // no need to use many reducers when using few mappers
+ } else {
+ reducers = options.reducers;
+ }
+ reducers = Math.max(reducers, options.shards);
+
+ if (reducers != options.shards) {
+ // Ensure fanout isn't misconfigured. fanout can't meaningfully be larger than what would be
+ // required to merge all leaf shards in one single tree merge iteration into root shards
+ options.fanout = Math.min(options.fanout, (int) ceilDivide(reducers, options.shards));
+
+ // Ensure invariant reducers == options.shards * (fanout ^ N) where N is an integer >= 1.
+ // N is the number of mtree merge iterations.
+ // This helps to evenly spread docs among root shards and simplifies the impl of the mtree merge algorithm.
+ int s = options.shards;
+ while (s < reducers) {
+ s = s * options.fanout;
+ }
+ reducers = s;
+ assert reducers % options.fanout == 0;
+ }
+ options.reducers = reducers;
+ }
+
+ private long addInputFiles(List inputFiles, List inputLists, Path fullInputList, Configuration conf)
+ throws IOException {
+
+ long numFiles = 0;
+ FileSystem fs = fullInputList.getFileSystem(conf);
+ FSDataOutputStream out = fs.create(fullInputList);
+ try {
+ Writer writer = new BufferedWriter(new OutputStreamWriter(out, "UTF-8"));
+
+ for (Path inputFile : inputFiles) {
+ FileSystem inputFileFs = inputFile.getFileSystem(conf);
+ if (inputFileFs.exists(inputFile)) {
+ PathFilter pathFilter = new PathFilter() {
+ @Override
+ public boolean accept(Path path) {
+ return !path.getName().startsWith("."); // ignore "hidden" files and dirs
+ }
+ };
+ numFiles += addInputFilesRecursively(inputFile, writer, inputFileFs, pathFilter);
+ }
+ }
+
+ for (Path inputList : inputLists) {
+ InputStream in;
+ if (inputList.toString().equals("-")) {
+ in = System.in;
+ } else if (inputList.isAbsoluteAndSchemeAuthorityNull()) {
+ in = new BufferedInputStream(new FileInputStream(inputList.toString()));
+ } else {
+ in = inputList.getFileSystem(conf).open(inputList);
+ }
+ try {
+ BufferedReader reader = new BufferedReader(new InputStreamReader(in, "UTF-8"));
+ String line;
+ while ((line = reader.readLine()) != null) {
+ writer.write(line + "\n");
+ numFiles++;
+ }
+ reader.close();
+ } finally {
+ in.close();
+ }
+ }
+
+ writer.close();
+ } finally {
+ out.close();
+ }
+ return numFiles;
+ }
+
+ /**
+ * Add the specified file to the input set, if path is a directory then
+ * add the files contained therein.
+ */
+ private long addInputFilesRecursively(Path path, Writer writer, FileSystem fs, PathFilter pathFilter) throws IOException {
+ long numFiles = 0;
+ for (FileStatus stat : fs.listStatus(path, pathFilter)) {
+ LOG.debug("Adding path {}", stat.getPath());
+ if (stat.isDirectory()) {
+ numFiles += addInputFilesRecursively(stat.getPath(), writer, fs, pathFilter);
+ } else {
+ writer.write(stat.getPath().toString() + "\n");
+ numFiles++;
+ }
+ }
+ return numFiles;
+ }
+
+ private void randomizeFewInputFiles(FileSystem fs, Path outputStep2Dir, Path fullInputList) throws IOException {
+ List lines = new ArrayList();
+ BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(fullInputList), "UTF-8"));
+ try {
+ String line;
+ while ((line = reader.readLine()) != null) {
+ lines.add(line);
+ }
+ } finally {
+ reader.close();
+ }
+
+ Collections.shuffle(lines, new Random(421439783L)); // constant seed for reproducability
+
+ FSDataOutputStream out = fs.create(new Path(outputStep2Dir, FULL_INPUT_LIST));
+ Writer writer = new BufferedWriter(new OutputStreamWriter(out, "UTF-8"));
+ try {
+ for (String line : lines) {
+ writer.write(line + "\n");
+ }
+ } finally {
+ writer.close();
+ }
+ }
+
+ /**
+ * To uniformly spread load across all mappers we randomize fullInputList
+ * with a separate small Mapper & Reducer preprocessing step. This way
+ * each input line ends up on a random position in the output file list.
+ * Each mapper indexes a disjoint consecutive set of files such that each
+ * set has roughly the same size, at least from a probabilistic
+ * perspective.
+ *
+ * For example an input file with the following input list of URLs:
+ *
+ * A
+ * B
+ * C
+ * D
+ *
+ * might be randomized into the following output list of URLs:
+ *
+ * C
+ * A
+ * D
+ * B
+ *
+ * The implementation sorts the list of lines by randomly generated numbers.
+ */
+ private Job randomizeManyInputFiles(Configuration baseConfig, Path fullInputList, Path outputStep2Dir, int numLinesPerSplit)
+ throws IOException {
+
+ Job job2 = Job.getInstance(baseConfig);
+ job2.setJarByClass(getClass());
+ job2.setJobName(getClass().getName() + "/" + Utils.getShortClassName(LineRandomizerMapper.class));
+ job2.setInputFormatClass(NLineInputFormat.class);
+ NLineInputFormat.addInputPath(job2, fullInputList);
+ NLineInputFormat.setNumLinesPerSplit(job2, numLinesPerSplit);
+ job2.setMapperClass(LineRandomizerMapper.class);
+ job2.setReducerClass(LineRandomizerReducer.class);
+ job2.setOutputFormatClass(TextOutputFormat.class);
+ FileOutputFormat.setOutputPath(job2, outputStep2Dir);
+ job2.setNumReduceTasks(1);
+ job2.setOutputKeyClass(LongWritable.class);
+ job2.setOutputValueClass(Text.class);
+ return job2;
+ }
+
+ // do the same as if the user had typed 'hadoop ... --files '
+ private void addDistributedCacheFile(File file, Configuration conf) throws IOException {
+ String HADOOP_TMP_FILES = "tmpfiles"; // see Hadoop's GenericOptionsParser
+ String tmpFiles = conf.get(HADOOP_TMP_FILES, "");
+ if (tmpFiles.length() > 0) { // already present?
+ tmpFiles = tmpFiles + ",";
+ }
+ GenericOptionsParser parser = new GenericOptionsParser(
+ new Configuration(conf),
+ new String[] { "--files", file.getCanonicalPath() });
+ String additionalTmpFiles = parser.getConfiguration().get(HADOOP_TMP_FILES);
+ assert additionalTmpFiles != null;
+ assert additionalTmpFiles.length() > 0;
+ tmpFiles += additionalTmpFiles;
+ conf.set(HADOOP_TMP_FILES, tmpFiles);
+ }
+
+ private MorphlineMapRunner setupMorphline(Options options) throws IOException, URISyntaxException {
+ if (options.morphlineId != null) {
+ job.getConfiguration().set(MorphlineMapRunner.MORPHLINE_ID_PARAM, options.morphlineId);
+ }
+ addDistributedCacheFile(options.morphlineFile, job.getConfiguration());
+ if (!options.isDryRun) {
+ return null;
+ }
+
+ /*
+ * Ensure scripting support for Java via morphline "java" command works even in dryRun mode,
+ * i.e. when executed in the client side driver JVM. To do so, collect all classpath URLs from
+ * the class loaders chain that org.apache.hadoop.util.RunJar (hadoop jar xyz-job.jar) and
+ * org.apache.hadoop.util.GenericOptionsParser (--libjars) have installed, then tell
+ * FastJavaScriptEngine.parse() where to find classes that JavaBuilder scripts might depend on.
+ * This ensures that scripts that reference external java classes compile without exceptions
+ * like this:
+ *
+ * ... caused by compilation failed: mfm:///MyJavaClass1.java:2: package
+ * com.cloudera.cdk.morphline.api does not exist
+ */
+ LOG.trace("dryRun: java.class.path: {}", System.getProperty("java.class.path"));
+ String fullClassPath = "";
+ ClassLoader loader = Thread.currentThread().getContextClassLoader(); // see org.apache.hadoop.util.RunJar
+ while (loader != null) { // walk class loaders, collect all classpath URLs
+ if (loader instanceof URLClassLoader) {
+ URL[] classPathPartURLs = ((URLClassLoader) loader).getURLs(); // see org.apache.hadoop.util.RunJar
+ LOG.trace("dryRun: classPathPartURLs: {}", Arrays.asList(classPathPartURLs));
+ StringBuilder classPathParts = new StringBuilder();
+ for (URL url : classPathPartURLs) {
+ File file = new File(url.toURI());
+ if (classPathPartURLs.length > 0) {
+ classPathParts.append(File.pathSeparator);
+ }
+ classPathParts.append(file.getPath());
+ }
+ LOG.trace("dryRun: classPathParts: {}", classPathParts);
+ String separator = File.pathSeparator;
+ if (fullClassPath.length() == 0 || classPathParts.length() == 0) {
+ separator = "";
+ }
+ fullClassPath = classPathParts + separator + fullClassPath;
+ }
+ loader = loader.getParent();
+ }
+
+ // tell FastJavaScriptEngine.parse() where to find the classes that the script might depend on
+ if (fullClassPath.length() > 0) {
+ assert System.getProperty("java.class.path") != null;
+ fullClassPath = System.getProperty("java.class.path") + File.pathSeparator + fullClassPath;
+ LOG.trace("dryRun: fullClassPath: {}", fullClassPath);
+ System.setProperty("java.class.path", fullClassPath); // see FastJavaScriptEngine.parse()
+ }
+
+ job.getConfiguration().set(MorphlineMapRunner.MORPHLINE_FILE_PARAM, options.morphlineFile.getPath());
+ return new MorphlineMapRunner(
+ job.getConfiguration(), new DryRunDocumentLoader(), options.solrHomeDir.getPath());
+ }
+
+ /*
+ * Executes the morphline in the current process (without submitting a job to MR) for quicker
+ * turnaround during trial & debug sessions
+ */
+ private void dryRun(MorphlineMapRunner runner, FileSystem fs, Path fullInputList) throws IOException {
+ BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(fullInputList), "UTF-8"));
+ try {
+ String line;
+ while ((line = reader.readLine()) != null) {
+ runner.map(line, job.getConfiguration(), null);
+ }
+ runner.cleanup();
+ } finally {
+ reader.close();
+ }
+ }
+
+ private int createTreeMergeInputDirList(Path outputReduceDir, FileSystem fs, Path fullInputList)
+ throws FileNotFoundException, IOException {
+
+ FileStatus[] dirs = listSortedOutputShardDirs(outputReduceDir, fs);
+ int numFiles = 0;
+ FSDataOutputStream out = fs.create(fullInputList);
+ try {
+ Writer writer = new BufferedWriter(new OutputStreamWriter(out, "UTF-8"));
+ for (FileStatus stat : dirs) {
+ LOG.debug("Adding path {}", stat.getPath());
+ Path dir = new Path(stat.getPath(), "data/index");
+ if (!fs.isDirectory(dir)) {
+ throw new IllegalStateException("Not a directory: " + dir);
+ }
+ writer.write(dir.toString() + "\n");
+ numFiles++;
+ }
+ writer.close();
+ } finally {
+ out.close();
+ }
+ return numFiles;
+ }
+
+ private FileStatus[] listSortedOutputShardDirs(Path outputReduceDir, FileSystem fs) throws FileNotFoundException,
+ IOException {
+
+ final String dirPrefix = SolrOutputFormat.getOutputName(job);
+ FileStatus[] dirs = fs.listStatus(outputReduceDir, new PathFilter() {
+ @Override
+ public boolean accept(Path path) {
+ return path.getName().startsWith(dirPrefix);
+ }
+ });
+ for (FileStatus dir : dirs) {
+ if (!dir.isDirectory()) {
+ throw new IllegalStateException("Not a directory: " + dir.getPath());
+ }
+ }
+ Arrays.sort(dirs); // FIXME: handle more than 99999 shards (need numeric sort rather than lexicographical sort)
+ return dirs;
+ }
+
+ private static void verifyGoLiveArgs(Options opts, ArgumentParser parser) throws ArgumentParserException {
+ if (opts.zkHost == null && opts.solrHomeDir == null) {
+ throw new ArgumentParserException("At least one of --zk-host or --solr-home-dir is required", parser);
+ }
+ if (opts.goLive && opts.zkHost == null && opts.shardUrls == null) {
+ throw new ArgumentParserException("--go-live requires that you also pass --shard-url or --zk-host", parser);
+ }
+
+ if (opts.zkHost != null && opts.collection == null) {
+ throw new ArgumentParserException("--zk-host requires that you also pass --collection", parser);
+ }
+
+ if (opts.zkHost != null) {
+ return;
+ // verify structure of ZK directory later, to avoid checking run-time errors during parsing.
+ } else if (opts.shardUrls != null) {
+ if (opts.shardUrls.size() == 0) {
+ throw new ArgumentParserException("--shard-url requires at least one URL", parser);
+ }
+ } else if (opts.shards != null) {
+ if (opts.shards <= 0) {
+ throw new ArgumentParserException("--shards must be a positive number: " + opts.shards, parser);
+ }
+ } else {
+ throw new ArgumentParserException("You must specify one of the following (mutually exclusive) arguments: "
+ + "--zk-host or --shard-url or --shards", parser);
+ }
+
+ if (opts.shardUrls != null) {
+ opts.shards = opts.shardUrls.size();
+ }
+
+ assert opts.shards != null;
+ assert opts.shards > 0;
+ }
+
+ private static void verifyZKStructure(Options opts, ArgumentParser parser) throws ArgumentParserException {
+ if (opts.zkHost != null) {
+ assert opts.collection != null;
+ ZooKeeperInspector zki = new ZooKeeperInspector();
+ try {
+ opts.shardUrls = zki.extractShardUrls(opts.zkHost, opts.collection);
+ } catch (Exception e) {
+ LOG.debug("Cannot extract SolrCloud shard URLs from ZooKeeper", e);
+ throw new ArgumentParserException(e, parser);
+ }
+ assert opts.shardUrls != null;
+ if (opts.shardUrls.size() == 0) {
+ throw new ArgumentParserException("--zk-host requires ZooKeeper " + opts.zkHost
+ + " to contain at least one SolrCore for collection: " + opts.collection, parser);
+ }
+ opts.shards = opts.shardUrls.size();
+ LOG.debug("Using SolrCloud shard URLs: {}", opts.shardUrls);
+ }
+ }
+
+ private boolean waitForCompletion(Job job, boolean isVerbose)
+ throws IOException, InterruptedException, ClassNotFoundException {
+
+ LOG.debug("Running job: " + getJobInfo(job));
+ boolean success = job.waitForCompletion(isVerbose);
+ if (!success) {
+ LOG.error("Job failed! " + getJobInfo(job));
+ }
+ return success;
+ }
+
+ private void goodbye(Job job, long startTime) {
+ float secs = (System.currentTimeMillis() - startTime) / 1000.0f;
+ if (job != null) {
+ LOG.info("Succeeded with job: " + getJobInfo(job));
+ }
+ LOG.info("Success. Done. Program took {} secs. Goodbye.", secs);
+ }
+
+ private String getJobInfo(Job job) {
+ return "jobName: " + job.getJobName() + ", jobId: " + job.getJobID();
+ }
+
+ private boolean rename(Path src, Path dst, FileSystem fs) throws IOException {
+ boolean success = fs.rename(src, dst);
+ if (!success) {
+ LOG.error("Cannot rename " + src + " to " + dst);
+ }
+ return success;
+ }
+
+ private boolean delete(Path path, boolean recursive, FileSystem fs) throws IOException {
+ boolean success = fs.delete(path, recursive);
+ if (!success) {
+ LOG.error("Cannot delete " + path);
+ }
+ return success;
+ }
+
+ // same as IntMath.divide(p, q, RoundingMode.CEILING)
+ private long ceilDivide(long p, long q) {
+ long result = p / q;
+ if (p % q != 0) {
+ result++;
+ }
+ return result;
+ }
+
+ /**
+ * Returns logbase value .
+ */
+ private double log(double base, double value) {
+ return Math.log(value) / Math.log(base);
+ }
+
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/PathArgumentType.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/PathArgumentType.java
new file mode 100644
index 00000000000..770a2f9f90b
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/PathArgumentType.java
@@ -0,0 +1,233 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop;
+
+import java.io.IOException;
+
+import net.sourceforge.argparse4j.inf.Argument;
+import net.sourceforge.argparse4j.inf.ArgumentParser;
+import net.sourceforge.argparse4j.inf.ArgumentParserException;
+import net.sourceforge.argparse4j.inf.ArgumentType;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsAction;
+
+/**
+ * ArgumentType subclass for HDFS Path type, using fluent style API.
+ */
+public class PathArgumentType implements ArgumentType {
+
+ private final Configuration conf;
+ private FileSystem fs;
+ private boolean acceptSystemIn = false;
+ private boolean verifyExists = false;
+ private boolean verifyNotExists = false;
+ private boolean verifyIsFile = false;
+ private boolean verifyIsDirectory = false;
+ private boolean verifyCanRead = false;
+ private boolean verifyCanWrite = false;
+ private boolean verifyCanWriteParent = false;
+ private boolean verifyCanExecute = false;
+ private boolean verifyIsAbsolute = false;
+ private boolean verifyHasScheme = false;
+ private String verifyScheme = null;
+
+ public PathArgumentType(Configuration conf) {
+ this.conf = conf;
+ }
+
+ public PathArgumentType acceptSystemIn() {
+ acceptSystemIn = true;
+ return this;
+ }
+
+ public PathArgumentType verifyExists() {
+ verifyExists = true;
+ return this;
+ }
+
+ public PathArgumentType verifyNotExists() {
+ verifyNotExists = true;
+ return this;
+ }
+
+ public PathArgumentType verifyIsFile() {
+ verifyIsFile = true;
+ return this;
+ }
+
+ public PathArgumentType verifyIsDirectory() {
+ verifyIsDirectory = true;
+ return this;
+ }
+
+ public PathArgumentType verifyCanRead() {
+ verifyCanRead = true;
+ return this;
+ }
+
+ public PathArgumentType verifyCanWrite() {
+ verifyCanWrite = true;
+ return this;
+ }
+
+ public PathArgumentType verifyCanWriteParent() {
+ verifyCanWriteParent = true;
+ return this;
+ }
+
+ public PathArgumentType verifyCanExecute() {
+ verifyCanExecute = true;
+ return this;
+ }
+
+ public PathArgumentType verifyIsAbsolute() {
+ verifyIsAbsolute = true;
+ return this;
+ }
+
+ public PathArgumentType verifyHasScheme() {
+ verifyHasScheme = true;
+ return this;
+ }
+
+ public PathArgumentType verifyScheme(String scheme) {
+ verifyScheme = scheme;
+ return this;
+ }
+
+ @Override
+ public Path convert(ArgumentParser parser, Argument arg, String value) throws ArgumentParserException {
+ Path file = new Path(value);
+ try {
+ fs = file.getFileSystem(conf);
+ if (verifyHasScheme && !isSystemIn(file)) {
+ verifyHasScheme(parser, file);
+ }
+ if (verifyScheme != null && !isSystemIn(file)) {
+ verifyScheme(parser, file);
+ }
+ if (verifyIsAbsolute && !isSystemIn(file)) {
+ verifyIsAbsolute(parser, file);
+ }
+ if (verifyExists && !isSystemIn(file)) {
+ verifyExists(parser, file);
+ }
+ if (verifyNotExists && !isSystemIn(file)) {
+ verifyNotExists(parser, file);
+ }
+ if (verifyIsFile && !isSystemIn(file)) {
+ verifyIsFile(parser, file);
+ }
+ if (verifyIsDirectory && !isSystemIn(file)) {
+ verifyIsDirectory(parser, file);
+ }
+ if (verifyCanRead && !isSystemIn(file)) {
+ verifyCanRead(parser, file);
+ }
+ if (verifyCanWrite && !isSystemIn(file)) {
+ verifyCanWrite(parser, file);
+ }
+ if (verifyCanWriteParent && !isSystemIn(file)) {
+ verifyCanWriteParent(parser, file);
+ }
+ if (verifyCanExecute && !isSystemIn(file)) {
+ verifyCanExecute(parser, file);
+ }
+ } catch (IOException e) {
+ throw new ArgumentParserException(e, parser);
+ }
+ return file;
+ }
+
+ private void verifyExists(ArgumentParser parser, Path file) throws ArgumentParserException, IOException {
+ if (!fs.exists(file)) {
+ throw new ArgumentParserException("File not found: " + file, parser);
+ }
+ }
+
+ private void verifyNotExists(ArgumentParser parser, Path file) throws ArgumentParserException, IOException {
+ if (fs.exists(file)) {
+ throw new ArgumentParserException("File found: " + file, parser);
+ }
+ }
+
+ private void verifyIsFile(ArgumentParser parser, Path file) throws ArgumentParserException, IOException {
+ if (!fs.isFile(file)) {
+ throw new ArgumentParserException("Not a file: " + file, parser);
+ }
+ }
+
+ private void verifyIsDirectory(ArgumentParser parser, Path file) throws ArgumentParserException, IOException {
+ if (!fs.isDirectory(file)) {
+ throw new ArgumentParserException("Not a directory: " + file, parser);
+ }
+ }
+
+ private void verifyCanRead(ArgumentParser parser, Path file) throws ArgumentParserException, IOException {
+ verifyExists(parser, file);
+ if (!fs.getFileStatus(file).getPermission().getUserAction().implies(FsAction.READ)) {
+ throw new ArgumentParserException("Insufficient permissions to read file: " + file, parser);
+ }
+ }
+
+ private void verifyCanWrite(ArgumentParser parser, Path file) throws ArgumentParserException, IOException {
+ verifyExists(parser, file);
+ if (!fs.getFileStatus(file).getPermission().getUserAction().implies(FsAction.WRITE)) {
+ throw new ArgumentParserException("Insufficient permissions to write file: " + file, parser);
+ }
+ }
+
+ private void verifyCanWriteParent(ArgumentParser parser, Path file) throws ArgumentParserException, IOException {
+ Path parent = file.getParent();
+ if (parent == null || !fs.exists(parent) || !fs.getFileStatus(parent).getPermission().getUserAction().implies(FsAction.WRITE)) {
+ throw new ArgumentParserException("Cannot write parent of file: " + file, parser);
+ }
+ }
+
+ private void verifyCanExecute(ArgumentParser parser, Path file) throws ArgumentParserException, IOException {
+ verifyExists(parser, file);
+ if (!fs.getFileStatus(file).getPermission().getUserAction().implies(FsAction.EXECUTE)) {
+ throw new ArgumentParserException("Insufficient permissions to execute file: " + file, parser);
+ }
+ }
+
+ private void verifyIsAbsolute(ArgumentParser parser, Path file) throws ArgumentParserException {
+ if (!file.isAbsolute()) {
+ throw new ArgumentParserException("Not an absolute file: " + file, parser);
+ }
+ }
+
+ private void verifyHasScheme(ArgumentParser parser, Path file) throws ArgumentParserException {
+ if (file.toUri().getScheme() == null) {
+ throw new ArgumentParserException("URI scheme is missing in path: " + file, parser);
+ }
+ }
+
+ private void verifyScheme(ArgumentParser parser, Path file) throws ArgumentParserException {
+ if (!verifyScheme.equals(file.toUri().getScheme())) {
+ throw new ArgumentParserException("Scheme of path: " + file + " must be: " + verifyScheme, parser);
+ }
+ }
+
+ private boolean isSystemIn(Path file) {
+ return acceptSystemIn && file.toString().equals("-");
+ }
+
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/PathParts.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/PathParts.java
new file mode 100644
index 00000000000..690901b4c76
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/PathParts.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop;
+
+import java.io.IOException;
+import java.net.URI;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+
+/**
+ * Extracts various components of an HDFS Path
+ */
+public final class PathParts {
+
+ private final String uploadURL;
+ private final Configuration conf;
+ private final FileSystem fs;
+ private final Path normalizedPath;
+ private FileStatus stats;
+
+ public PathParts(String uploadURL, Configuration conf) throws IOException {
+ if (uploadURL == null) {
+ throw new IllegalArgumentException("Path must not be null: " + uploadURL);
+ }
+ this.uploadURL = uploadURL;
+ if (conf == null) {
+ throw new IllegalArgumentException("Configuration must not be null: " + uploadURL);
+ }
+ this.conf = conf;
+ URI uri = stringToUri(uploadURL);
+ this.fs = FileSystem.get(uri, conf);
+ if (fs == null) {
+ throw new IllegalArgumentException("File system must not be null: " + uploadURL);
+ }
+ this.normalizedPath = fs.makeQualified(new Path(uri));
+ if (!normalizedPath.isAbsolute()) {
+ throw new IllegalArgumentException("Path must be absolute: " + uploadURL);
+ }
+ if (getScheme() == null) {
+ throw new IllegalArgumentException("Scheme must not be null: " + uploadURL);
+ }
+ if (getHost() == null) {
+ throw new IllegalArgumentException("Host must not be null: " + uploadURL);
+ }
+ if (getPort() < 0) {
+ throw new IllegalArgumentException("Port must not be negative: " + uploadURL);
+ }
+ }
+
+ public String getUploadURL() {
+ return uploadURL;
+ }
+
+ public Path getUploadPath() {
+ return new Path(getUploadURL());
+ }
+
+ public String getURIPath() {
+ return normalizedPath.toUri().getPath();
+ }
+
+ public String getName() {
+ return normalizedPath.getName();
+ }
+
+ public String getScheme() {
+ return normalizedPath.toUri().getScheme();
+ }
+
+ public String getHost() {
+ return normalizedPath.toUri().getHost();
+ }
+
+ public int getPort() {
+ int port = normalizedPath.toUri().getPort();
+ if (port == -1) {
+ port = fs.getWorkingDirectory().toUri().getPort();
+ if (port == -1) {
+ port = NameNode.DEFAULT_PORT;
+ }
+ }
+ return port;
+ }
+
+ public String getId() {
+ return getScheme() + "://" + getHost() + ":" + getPort() + getURIPath();
+ }
+
+ public String getDownloadURL() {
+ return getId();
+ }
+
+ public Configuration getConfiguration() {
+ return conf;
+ }
+
+ public FileSystem getFileSystem() {
+ return fs;
+ }
+
+ public FileStatus getFileStatus() throws IOException {
+ if (stats == null) {
+ stats = getFileSystem().getFileStatus(getUploadPath());
+ }
+ return stats;
+ }
+
+ private URI stringToUri(String pathString) {
+ //return new Path(pathString).toUri().normalize();
+ return URI.create(pathString).normalize();
+ }
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrCloudPartitioner.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrCloudPartitioner.java
new file mode 100644
index 00000000000..27f532c174a
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrCloudPartitioner.java
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Partitioner;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.DocRouter;
+import org.apache.solr.common.cloud.Slice;
+import org.apache.solr.common.params.MapSolrParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.Hash;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * MapReduce partitioner that partitions the Mapper output such that each
+ * SolrInputDocument gets sent to the SolrCloud shard that it would have been
+ * sent to if the document were ingested via the standard SolrCloud Near Real
+ * Time (NRT) API.
+ *
+ * In other words, this class implements the same partitioning semantics as the
+ * standard SolrCloud NRT API. This enables to mix batch updates from MapReduce
+ * ingestion with updates from standard NRT ingestion on the same SolrCloud
+ * cluster, using identical unique document keys.
+ */
+public class SolrCloudPartitioner extends Partitioner implements Configurable {
+
+ private Configuration conf;
+ private DocCollection docCollection;
+ private Map shardNumbers;
+ private int shards = 0;
+ private final SolrParams emptySolrParams = new MapSolrParams(Collections.EMPTY_MAP);
+
+ public static final String SHARDS = SolrCloudPartitioner.class.getName() + ".shards";
+ public static final String ZKHOST = SolrCloudPartitioner.class.getName() + ".zkHost";
+ public static final String COLLECTION = SolrCloudPartitioner.class.getName() + ".collection";
+
+ private static final Logger LOG = LoggerFactory.getLogger(SolrCloudPartitioner.class);
+
+ public SolrCloudPartitioner() {}
+
+ @Override
+ public void setConf(Configuration conf) {
+ this.conf = conf;
+ this.shards = conf.getInt(SHARDS, -1);
+ if (shards <= 0) {
+ throw new IllegalArgumentException("Illegal shards: " + shards);
+ }
+ String zkHost = conf.get(ZKHOST);
+ if (zkHost == null) {
+ throw new IllegalArgumentException("zkHost must not be null");
+ }
+ String collection = conf.get(COLLECTION);
+ if (collection == null) {
+ throw new IllegalArgumentException("collection must not be null");
+ }
+ LOG.info("Using SolrCloud zkHost: {}, collection: {}", zkHost, collection);
+ docCollection = new ZooKeeperInspector().extractDocCollection(zkHost, collection);
+ if (docCollection == null) {
+ throw new IllegalArgumentException("docCollection must not be null");
+ }
+ if (docCollection.getSlicesMap().size() != shards) {
+ throw new IllegalArgumentException("Incompatible shards: + " + shards + " for docCollection: " + docCollection);
+ }
+ List slices = new ZooKeeperInspector().getSortedSlices(docCollection.getSlices());
+ if (slices.size() != shards) {
+ throw new IllegalStateException("Incompatible sorted shards: + " + shards + " for docCollection: " + docCollection);
+ }
+ shardNumbers = new HashMap(10 * slices.size()); // sparse for performance
+ for (int i = 0; i < slices.size(); i++) {
+ shardNumbers.put(slices.get(i).getName(), i);
+ }
+ LOG.debug("Using SolrCloud docCollection: {}", docCollection);
+ DocRouter docRouter = docCollection.getRouter();
+ if (docRouter == null) {
+ throw new IllegalArgumentException("docRouter must not be null");
+ }
+ LOG.info("Using SolrCloud docRouterClass: {}", docRouter.getClass());
+ }
+
+ @Override
+ public Configuration getConf() {
+ return conf;
+ }
+
+ @Override
+ public int getPartition(Text key, SolrInputDocumentWritable value, int numPartitions) {
+ DocRouter docRouter = docCollection.getRouter();
+ SolrInputDocument doc = value.getSolrInputDocument();
+ String keyStr = key.toString();
+
+ // TODO: scalability: replace linear search in HashBasedRouter.hashToSlice() with binary search on sorted hash ranges
+ Slice slice = docRouter.getTargetSlice(keyStr, doc, emptySolrParams, docCollection);
+
+// LOG.info("slice: {}", slice);
+ if (slice == null) {
+ throw new IllegalStateException("No matching slice found! The slice seems unavailable. docRouterClass: "
+ + docRouter.getClass().getName());
+ }
+ int rootShard = shardNumbers.get(slice.getName());
+ if (rootShard < 0 || rootShard >= shards) {
+ throw new IllegalStateException("Illegal shard number " + rootShard + " for slice: " + slice + ", docCollection: "
+ + docCollection);
+ }
+
+ // map doc to micro shard aka leaf shard, akin to HashBasedRouter.sliceHash()
+ // taking into account mtree merge algorithm
+ assert numPartitions % shards == 0; // Also note that numPartitions is equal to the number of reducers
+ int hashCode = Hash.murmurhash3_x86_32(keyStr, 0, keyStr.length(), 0);
+ int offset = (hashCode & Integer.MAX_VALUE) % (numPartitions / shards);
+ int microShard = (rootShard * (numPartitions / shards)) + offset;
+// LOG.info("Subpartitions rootShard: {}, offset: {}", rootShard, offset);
+// LOG.info("Partitioned to p: {} for numPartitions: {}, shards: {}, key: {}, value: {}", microShard, numPartitions, shards, key, value);
+
+ assert microShard >= 0 && microShard < numPartitions;
+ return microShard;
+ }
+
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrCounters.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrCounters.java
new file mode 100644
index 00000000000..88e9acb57cc
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrCounters.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop;
+
+public enum SolrCounters {
+
+ DOCUMENTS_WRITTEN (getClassName(SolrReducer.class)
+ + ": Number of documents processed"),
+
+ BATCHES_WRITTEN (getClassName(SolrReducer.class)
+ + ": Number of document batches processed"),
+
+ BATCH_WRITE_TIME (getClassName(SolrReducer.class)
+ + ": Time spent by reducers writing batches [ms]"),
+
+ PHYSICAL_REDUCER_MERGE_TIME (getClassName(SolrReducer.class)
+ + ": Time spent by reducers on physical merges [ms]"),
+
+ LOGICAL_TREE_MERGE_TIME (getClassName(TreeMergeMapper.class)
+ + ": Time spent on logical tree merges [ms]"),
+
+ PHYSICAL_TREE_MERGE_TIME (getClassName(TreeMergeMapper.class)
+ + ": Time spent on physical tree merges [ms]");
+
+ private final String label;
+
+ private SolrCounters(String label) {
+ this.label = label;
+ }
+
+ public String toString() {
+ return label;
+ }
+
+ private static String getClassName(Class clazz) {
+ return Utils.getShortClassName(clazz);
+ }
+
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrInputDocumentWritable.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrInputDocumentWritable.java
new file mode 100644
index 00000000000..e043f7a0ed2
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrInputDocumentWritable.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.util.FastOutputStream;
+import org.apache.solr.common.util.JavaBinCodec;
+
+public class SolrInputDocumentWritable implements Writable {
+ private SolrInputDocument sid;
+
+ public SolrInputDocumentWritable() {
+ }
+
+ public SolrInputDocumentWritable(SolrInputDocument sid) {
+ this.sid = sid;
+ }
+
+ public SolrInputDocument getSolrInputDocument() {
+ return sid;
+ }
+
+ @Override
+ public String toString() {
+ return sid.toString();
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ JavaBinCodec codec = new JavaBinCodec();
+ FastOutputStream daos = FastOutputStream.wrap(DataOutputOutputStream.constructOutputStream(out));
+ codec.init(daos);
+ try {
+ codec.writeVal(sid);
+ } finally {
+ daos.flushBuffer();
+ }
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ JavaBinCodec codec = new JavaBinCodec();
+ UnbufferedDataInputInputStream dis = new UnbufferedDataInputInputStream(in);
+ sid = (SolrInputDocument)codec.readVal(dis);
+ }
+
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrMapper.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrMapper.java
new file mode 100644
index 00000000000..2a6d699b541
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrMapper.java
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop;
+
+import java.io.IOException;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Mapper;
+
+public class SolrMapper extends Mapper {
+
+ private Path solrHomeDir;
+
+ @Override
+ protected void setup(Context context) throws IOException, InterruptedException {
+ Utils.getLogConfigFile(context.getConfiguration());
+ super.setup(context);
+ solrHomeDir = SolrRecordWriter.findSolrConfig(context.getConfiguration());
+ }
+
+ protected Path getSolrHomeDir() {
+ return solrHomeDir;
+ }
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrOutputFormat.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrOutputFormat.java
new file mode 100644
index 00000000000..97b2b79404e
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrOutputFormat.java
@@ -0,0 +1,278 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+import java.util.UUID;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipOutputStream;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.filecache.DistributedCache;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class SolrOutputFormat extends FileOutputFormat {
+
+ private static final Logger LOG = LoggerFactory.getLogger(SolrOutputFormat.class);
+
+ /**
+ * The parameter used to pass the solr config zip file information. This will
+ * be the hdfs path to the configuration zip file
+ */
+ public static final String SETUP_OK = "solr.output.format.setup";
+
+ /** The key used to pass the zip file name through the configuration. */
+ public static final String ZIP_NAME = "solr.zip.name";
+
+ /**
+ * The base name of the zip file containing the configuration information.
+ * This file is passed via the distributed cache using a unique name, obtained
+ * via {@link #getZipName(Configuration jobConf)}.
+ */
+ public static final String ZIP_FILE_BASE_NAME = "solr.zip";
+
+ /**
+ * The key used to pass the boolean configuration parameter that instructs for
+ * regular or zip file output
+ */
+ public static final String OUTPUT_ZIP_FILE = "solr.output.zip.format";
+
+ static int defaultSolrWriterThreadCount = 0;
+
+ public static final String SOLR_WRITER_THREAD_COUNT = "solr.record.writer.num.threads";
+
+ static int defaultSolrWriterQueueSize = 1;
+
+ public static final String SOLR_WRITER_QUEUE_SIZE = "solr.record.writer.max.queues.size";
+
+ static int defaultSolrBatchSize = 20;
+
+ public static final String SOLR_RECORD_WRITER_BATCH_SIZE = "solr.record.writer.batch.size";
+
+ public static final String SOLR_RECORD_WRITER_MAX_SEGMENTS = "solr.record.writer.maxSegments";
+
+ public static String getSetupOk() {
+ return SETUP_OK;
+ }
+
+ /** Get the number of threads used for index writing */
+ public static void setSolrWriterThreadCount(int count, Configuration conf) {
+ conf.setInt(SOLR_WRITER_THREAD_COUNT, count);
+ }
+
+ /** Set the number of threads used for index writing */
+ public static int getSolrWriterThreadCount(Configuration conf) {
+ return conf.getInt(SOLR_WRITER_THREAD_COUNT, defaultSolrWriterThreadCount);
+ }
+
+ /**
+ * Set the maximum size of the the queue for documents to be written to the
+ * index.
+ */
+ public static void setSolrWriterQueueSize(int count, Configuration conf) {
+ conf.setInt(SOLR_WRITER_QUEUE_SIZE, count);
+ }
+
+ /** Return the maximum size for the number of documents pending index writing. */
+ public static int getSolrWriterQueueSize(Configuration conf) {
+ return conf.getInt(SOLR_WRITER_QUEUE_SIZE, defaultSolrWriterQueueSize);
+ }
+
+ /**
+ * Return the file name portion of the configuration zip file, from the
+ * configuration.
+ */
+ public static String getZipName(Configuration conf) {
+ return conf.get(ZIP_NAME, ZIP_FILE_BASE_NAME);
+ }
+
+ /**
+ * configure the job to output zip files of the output index, or full
+ * directory trees. Zip files are about 1/5th the size of the raw index, and
+ * much faster to write, but take more cpu to create.
+ *
+ * @param output true if should output zip files
+ * @param conf to use
+ */
+ public static void setOutputZipFormat(boolean output, Configuration conf) {
+ conf.setBoolean(OUTPUT_ZIP_FILE, output);
+ }
+
+ /**
+ * return true if the output should be a zip file of the index, rather than
+ * the raw index
+ *
+ * @param conf to use
+ * @return true if output zip files is on
+ */
+ public static boolean isOutputZipFormat(Configuration conf) {
+ return conf.getBoolean(OUTPUT_ZIP_FILE, false);
+ }
+
+ public static String getOutputName(JobContext job) {
+ return FileOutputFormat.getOutputName(job);
+ }
+
+ @Override
+ public void checkOutputSpecs(JobContext job) throws IOException {
+ super.checkOutputSpecs(job);
+ if (job.getConfiguration().get(SETUP_OK) == null) {
+ throw new IOException("Solr home cache not set up!");
+ }
+ }
+
+
+ @Override
+ public RecordWriter getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
+ Utils.getLogConfigFile(context.getConfiguration());
+ Path workDir = getDefaultWorkFile(context, "");
+ int batchSize = getBatchSize(context.getConfiguration());
+ return new SolrRecordWriter(context, workDir, batchSize);
+ }
+
+ public static void setupSolrHomeCache(File solrHomeDir, Job job) throws IOException{
+ File solrHomeZip = createSolrHomeZip(solrHomeDir);
+ addSolrConfToDistributedCache(job, solrHomeZip);
+ }
+
+ public static File createSolrHomeZip(File solrHomeDir) throws IOException {
+ return createSolrHomeZip(solrHomeDir, false);
+ }
+
+ private static File createSolrHomeZip(File solrHomeDir, boolean safeToModify) throws IOException {
+ if (solrHomeDir == null || !(solrHomeDir.exists() && solrHomeDir.isDirectory())) {
+ throw new IOException("Invalid solr home: " + solrHomeDir);
+ }
+ File solrHomeZip = File.createTempFile("solr", ".zip");
+ createZip(solrHomeDir, solrHomeZip);
+ return solrHomeZip;
+ }
+
+ public static void addSolrConfToDistributedCache(Job job, File solrHomeZip)
+ throws IOException {
+ // Make a reasonably unique name for the zip file in the distributed cache
+ // to avoid collisions if multiple jobs are running.
+ String hdfsZipName = UUID.randomUUID().toString() + '.'
+ + ZIP_FILE_BASE_NAME;
+ Configuration jobConf = job.getConfiguration();
+ jobConf.set(ZIP_NAME, hdfsZipName);
+
+ Path zipPath = new Path("/tmp", getZipName(jobConf));
+ FileSystem fs = FileSystem.get(jobConf);
+ fs.copyFromLocalFile(new Path(solrHomeZip.toString()), zipPath);
+ final URI baseZipUrl = fs.getUri().resolve(
+ zipPath.toString() + '#' + getZipName(jobConf));
+
+ DistributedCache.addCacheArchive(baseZipUrl, jobConf);
+ LOG.debug("Set Solr distributed cache: {}", Arrays.asList(job.getCacheArchives()));
+ LOG.debug("Set zipPath: {}", zipPath);
+ // Actually send the path for the configuration zip file
+ jobConf.set(SETUP_OK, zipPath.toString());
+ }
+
+ private static void createZip(File dir, File out) throws IOException {
+ HashSet files = new HashSet();
+ // take only conf/ and lib/
+ for (String allowedDirectory : SolrRecordWriter
+ .getAllowedConfigDirectories()) {
+ File configDir = new File(dir, allowedDirectory);
+ boolean configDirExists;
+ /** If the directory does not exist, and is required, bail out */
+ if (!(configDirExists = configDir.exists())
+ && SolrRecordWriter.isRequiredConfigDirectory(allowedDirectory)) {
+ throw new IOException(String.format(Locale.ENGLISH,
+ "required configuration directory %s is not present in %s",
+ allowedDirectory, dir));
+ }
+ if (!configDirExists) {
+ continue;
+ }
+ listFiles(configDir, files); // Store the files in the existing, allowed
+ // directory configDir, in the list of files
+ // to store in the zip file
+ }
+
+ out.delete();
+ int subst = dir.toString().length();
+ ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(out));
+ byte[] buf = new byte[1024];
+ for (File f : files) {
+ ZipEntry ze = new ZipEntry(f.toString().substring(subst));
+ zos.putNextEntry(ze);
+ InputStream is = new FileInputStream(f);
+ int cnt;
+ while ((cnt = is.read(buf)) >= 0) {
+ zos.write(buf, 0, cnt);
+ }
+ is.close();
+ zos.flush();
+ zos.closeEntry();
+ }
+
+ ZipEntry ze = new ZipEntry("solr.xml");
+ zos.putNextEntry(ze);
+ zos.write(" ".getBytes("UTF-8"));
+ zos.flush();
+ zos.closeEntry();
+ zos.close();
+ }
+
+ private static void listFiles(File dir, Set files) throws IOException {
+ File[] list = dir.listFiles();
+
+ if (list == null && dir.isFile()) {
+ files.add(dir);
+ return;
+ }
+
+ for (File f : list) {
+ if (f.isFile()) {
+ files.add(f);
+ } else {
+ listFiles(f, files);
+ }
+ }
+ }
+
+ public static int getBatchSize(Configuration jobConf) {
+ // TODO Auto-generated method stub
+ return jobConf.getInt(SolrOutputFormat.SOLR_RECORD_WRITER_BATCH_SIZE,
+ defaultSolrBatchSize);
+ }
+
+ public static void setBatchSize(int count, Configuration jobConf) {
+ jobConf.setInt(SOLR_RECORD_WRITER_BATCH_SIZE, count);
+ }
+
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrRecordWriter.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrRecordWriter.java
new file mode 100644
index 00000000000..e589c36313f
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrRecordWriter.java
@@ -0,0 +1,516 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Properties;
+import java.util.Set;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.filecache.DistributedCache;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskID;
+import org.apache.solr.hadoop.SolrOutputFormat;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.core.CoreContainer;
+import org.apache.solr.core.CoreDescriptor;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.core.SolrResourceLoader;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+class SolrRecordWriter extends RecordWriter {
+
+ private static final Logger LOG = LoggerFactory.getLogger(SolrRecordWriter.class);
+
+ public final static List allowedConfigDirectories = new ArrayList(
+ Arrays.asList(new String[] { "conf", "lib", "solr.xml" }));
+
+ public final static Set requiredConfigDirectories = new HashSet();
+
+ static {
+ requiredConfigDirectories.add("conf");
+ }
+
+ /**
+ * Return the list of directories names that may be included in the
+ * configuration data passed to the tasks.
+ *
+ * @return an UnmodifiableList of directory names
+ */
+ public static List getAllowedConfigDirectories() {
+ return Collections.unmodifiableList(allowedConfigDirectories);
+ }
+
+ /**
+ * check if the passed in directory is required to be present in the
+ * configuration data set.
+ *
+ * @param directory The directory to check
+ * @return true if the directory is required.
+ */
+ public static boolean isRequiredConfigDirectory(final String directory) {
+ return requiredConfigDirectories.contains(directory);
+ }
+
+ /** The path that the final index will be written to */
+
+ /** The location in a local temporary directory that the index is built in. */
+
+// /**
+// * If true, create a zip file of the completed index in the final storage
+// * location A .zip will be appended to the final output name if it is not
+// * already present.
+// */
+// private boolean outputZipFile = false;
+
+ private final HeartBeater heartBeater;
+ private final BatchWriter batchWriter;
+ private final List batch;
+ private final int batchSize;
+ private long numDocsWritten = 0;
+ private long nextLogTime = System.currentTimeMillis();
+
+ private static HashMap.Context> contextMap = new HashMap.Context>();
+
+ public SolrRecordWriter(TaskAttemptContext context, Path outputShardDir, int batchSize) {
+ this.batchSize = batchSize;
+ this.batch = new ArrayList(batchSize);
+ Configuration conf = context.getConfiguration();
+
+ // setLogLevel("org.apache.solr.core", "WARN");
+ // setLogLevel("org.apache.solr.update", "WARN");
+
+ heartBeater = new HeartBeater(context);
+ try {
+ heartBeater.needHeartBeat();
+
+ Path solrHomeDir = SolrRecordWriter.findSolrConfig(conf);
+ FileSystem fs = outputShardDir.getFileSystem(conf);
+ EmbeddedSolrServer solr = createEmbeddedSolrServer(solrHomeDir, fs, outputShardDir);
+ batchWriter = new BatchWriter(solr, batchSize,
+ context.getTaskAttemptID().getTaskID(),
+ SolrOutputFormat.getSolrWriterThreadCount(conf),
+ SolrOutputFormat.getSolrWriterQueueSize(conf));
+
+ } catch (Exception e) {
+ throw new IllegalStateException(String.format(Locale.ENGLISH,
+ "Failed to initialize record writer for %s, %s", context.getJobName(), conf
+ .get("mapred.task.id")), e);
+ } finally {
+ heartBeater.cancelHeartBeat();
+ }
+ }
+
+ public static EmbeddedSolrServer createEmbeddedSolrServer(Path solrHomeDir, FileSystem fs, Path outputShardDir)
+ throws IOException {
+
+ if (solrHomeDir == null) {
+ throw new IOException("Unable to find solr home setting");
+ }
+ LOG.info("Creating embedded Solr server with solrHomeDir: " + solrHomeDir + ", fs: " + fs + ", outputShardDir: " + outputShardDir);
+
+ Properties props = new Properties();
+ // FIXME note this is odd (no scheme) given Solr doesn't currently
+ // support uris (just abs/relative path)
+ Path solrDataDir = new Path(outputShardDir, "data");
+ if (!fs.exists(solrDataDir) && !fs.mkdirs(solrDataDir)) {
+ throw new IOException("Unable to create " + solrDataDir);
+ }
+
+ String dataDirStr = solrDataDir.toUri().toString();
+ props.setProperty("solr.data.dir", dataDirStr);
+ props.setProperty("solr.home", solrHomeDir.toString());
+
+ SolrResourceLoader loader = new SolrResourceLoader(solrHomeDir.toString(),
+ null, props);
+
+ LOG.info(String
+ .format(Locale.ENGLISH,
+ "Constructed instance information solr.home %s (%s), instance dir %s, conf dir %s, writing index to solr.data.dir %s, with permdir %s",
+ solrHomeDir, solrHomeDir.toUri(), loader.getInstanceDir(),
+ loader.getConfigDir(), dataDirStr, outputShardDir));
+
+ CoreContainer container = new CoreContainer(loader);
+ container.load();
+ CoreDescriptor descr = new CoreDescriptor(container, "core1",
+ ".", props);
+
+ SolrCore core = container.create(descr);
+ container.register(core, false);
+
+ System.setProperty("solr.hdfs.nrtcachingdirectory", "false");
+ System.setProperty("solr.hdfs.blockcache.enabled", "false");
+ System.setProperty("solr.autoCommit.maxTime", "-1");
+ System.setProperty("solr.autoSoftCommit.maxTime", "-1");
+ EmbeddedSolrServer solr = new EmbeddedSolrServer(container, "core1");
+ return solr;
+ }
+
+ public static void incrementCounter(TaskID taskId, String groupName, String counterName, long incr) {
+ Reducer,?,?,?>.Context context = contextMap.get(taskId);
+ if (context != null) {
+ context.getCounter(groupName, counterName).increment(incr);
+ }
+ }
+
+ public static void incrementCounter(TaskID taskId, Enum counterName, long incr) {
+ Reducer,?,?,?>.Context context = contextMap.get(taskId);
+ if (context != null) {
+ context.getCounter(counterName).increment(incr);
+ }
+ }
+
+ public static void addReducerContext(Reducer,?,?,?>.Context context) {
+ TaskID taskID = context.getTaskAttemptID().getTaskID();
+ contextMap.put(taskID, context);
+ }
+
+ public static Path findSolrConfig(Configuration conf) throws IOException {
+ Path solrHome = null;
+ // FIXME when mrunit supports the new cache apis
+ //URI[] localArchives = context.getCacheArchives();
+ Path[] localArchives = DistributedCache.getLocalCacheArchives(conf);
+ if (localArchives.length == 0) {
+ throw new IOException(String.format(Locale.ENGLISH,
+ "No local cache archives, where is %s:%s", SolrOutputFormat
+ .getSetupOk(), SolrOutputFormat.getZipName(conf)));
+ }
+ for (Path unpackedDir : localArchives) {
+ // Only logged if debugging
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(String.format(Locale.ENGLISH, "Examining unpack directory %s for %s",
+ unpackedDir, SolrOutputFormat.getZipName(conf)));
+
+ ProcessBuilder lsCmd = new ProcessBuilder(new String[] { "/bin/ls",
+ "-lR", unpackedDir.toString() });
+ lsCmd.redirectErrorStream();
+ Process ls = lsCmd.start();
+ byte[] buf = new byte[16 * 1024];
+ InputStream all = ls.getInputStream();
+ try {
+ int count;
+ while ((count = all.read(buf)) >= 0) {
+ System.err.write(buf, 0, count);
+ }
+ } catch (IOException ignore) {
+ } finally {
+ all.close();
+ }
+ String exitValue;
+ try {
+ exitValue = String.valueOf(ls.waitFor());
+ } catch (InterruptedException e) {
+ exitValue = "interrupted";
+ }
+ System.err.format("Exit value of 'ls -lR' is %s%n", exitValue);
+ }
+ if (unpackedDir.getName().equals(SolrOutputFormat.getZipName(conf))) {
+ LOG.info("Using this unpacked directory as solr home: {}", unpackedDir);
+ solrHome = unpackedDir;
+ break;
+ }
+ }
+
+ return solrHome;
+ }
+
+ /**
+ * Write a record. This method accumulates records in to a batch, and when
+ * {@link #batchSize} items are present flushes it to the indexer. The writes
+ * can take a substantial amount of time, depending on {@link #batchSize}. If
+ * there is heavy disk contention the writes may take more than the 600 second
+ * default timeout.
+ */
+ @Override
+ public void write(K key, V value) throws IOException {
+ heartBeater.needHeartBeat();
+ try {
+ try {
+ SolrInputDocumentWritable sidw = (SolrInputDocumentWritable) value;
+ batch.add(sidw.getSolrInputDocument());
+ if (batch.size() >= batchSize) {
+ batchWriter.queueBatch(batch);
+ numDocsWritten += batch.size();
+ if (System.currentTimeMillis() >= nextLogTime) {
+ LOG.info("docsWritten: {}", numDocsWritten);
+ nextLogTime += 10000;
+ }
+ batch.clear();
+ }
+ } catch (SolrServerException e) {
+ throw new IOException(e);
+ }
+ } finally {
+ heartBeater.cancelHeartBeat();
+ }
+
+ }
+
+ @Override
+ public void close(TaskAttemptContext context) throws IOException, InterruptedException {
+ if (context != null) {
+ heartBeater.setProgress(context);
+ }
+ try {
+ heartBeater.needHeartBeat();
+ if (batch.size() > 0) {
+ batchWriter.queueBatch(batch);
+ numDocsWritten += batch.size();
+ batch.clear();
+ }
+ LOG.info("docsWritten: {}", numDocsWritten);
+ batchWriter.close(context);
+// if (outputZipFile) {
+// context.setStatus("Writing Zip");
+// packZipFile(); // Written to the perm location
+// } else {
+// context.setStatus("Copying Index");
+// fs.completeLocalOutput(perm, temp); // copy to dfs
+// }
+ } catch (Exception e) {
+ if (e instanceof IOException) {
+ throw (IOException) e;
+ }
+ throw new IOException(e);
+ } finally {
+ heartBeater.cancelHeartBeat();
+ heartBeater.close();
+// File tempFile = new File(temp.toString());
+// if (tempFile.exists()) {
+// FileUtils.forceDelete(new File(temp.toString()));
+// }
+ }
+
+ context.setStatus("Done");
+ }
+
+// private void packZipFile() throws IOException {
+// FSDataOutputStream out = null;
+// ZipOutputStream zos = null;
+// int zipCount = 0;
+// LOG.info("Packing zip file for " + perm);
+// try {
+// out = fs.create(perm, false);
+// zos = new ZipOutputStream(out);
+//
+// String name = perm.getName().replaceAll(".zip$", "");
+// LOG.info("adding index directory" + temp);
+// zipCount = zipDirectory(conf, zos, name, temp.toString(), temp);
+// /**
+// for (String configDir : allowedConfigDirectories) {
+// if (!isRequiredConfigDirectory(configDir)) {
+// continue;
+// }
+// final Path confPath = new Path(solrHome, configDir);
+// LOG.info("adding configdirectory" + confPath);
+//
+// zipCount += zipDirectory(conf, zos, name, solrHome.toString(), confPath);
+// }
+// **/
+// } catch (Throwable ohFoo) {
+// LOG.error("packZipFile exception", ohFoo);
+// if (ohFoo instanceof RuntimeException) {
+// throw (RuntimeException) ohFoo;
+// }
+// if (ohFoo instanceof IOException) {
+// throw (IOException) ohFoo;
+// }
+// throw new IOException(ohFoo);
+//
+// } finally {
+// if (zos != null) {
+// if (zipCount == 0) { // If no entries were written, only close out, as
+// // the zip will throw an error
+// LOG.error("No entries written to zip file " + perm);
+// fs.delete(perm, false);
+// // out.close();
+// } else {
+// LOG.info(String.format("Wrote %d items to %s for %s", zipCount, perm,
+// temp));
+// zos.close();
+// }
+// }
+// }
+// }
+//
+// /**
+// * Write a file to a zip output stream, removing leading path name components
+// * from the actual file name when creating the zip file entry.
+// *
+// * The entry placed in the zip file is baseName
/
+// * relativePath
, where relativePath
is constructed
+// * by removing a leading root
from the path for
+// * itemToZip
.
+// *
+// * If itemToZip
is an empty directory, it is ignored. If
+// * itemToZip
is a directory, the contents of the directory are
+// * added recursively.
+// *
+// * @param zos The zip output stream
+// * @param baseName The base name to use for the file name entry in the zip
+// * file
+// * @param root The path to remove from itemToZip
to make a
+// * relative path name
+// * @param itemToZip The path to the file to be added to the zip file
+// * @return the number of entries added
+// * @throws IOException
+// */
+// static public int zipDirectory(final Configuration conf,
+// final ZipOutputStream zos, final String baseName, final String root,
+// final Path itemToZip) throws IOException {
+// LOG
+// .info(String
+// .format("zipDirectory: %s %s %s", baseName, root, itemToZip));
+// LocalFileSystem localFs = FileSystem.getLocal(conf);
+// int count = 0;
+//
+// final FileStatus itemStatus = localFs.getFileStatus(itemToZip);
+// if (itemStatus.isDirectory()) {
+// final FileStatus[] statai = localFs.listStatus(itemToZip);
+//
+// // Add a directory entry to the zip file
+// final String zipDirName = relativePathForZipEntry(itemToZip.toUri()
+// .getPath(), baseName, root);
+// final ZipEntry dirZipEntry = new ZipEntry(zipDirName
+// + Path.SEPARATOR_CHAR);
+// LOG.info(String.format("Adding directory %s to zip", zipDirName));
+// zos.putNextEntry(dirZipEntry);
+// zos.closeEntry();
+// count++;
+//
+// if (statai == null || statai.length == 0) {
+// LOG.info(String.format("Skipping empty directory %s", itemToZip));
+// return count;
+// }
+// for (FileStatus status : statai) {
+// count += zipDirectory(conf, zos, baseName, root, status.getPath());
+// }
+// LOG.info(String.format("Wrote %d entries for directory %s", count,
+// itemToZip));
+// return count;
+// }
+//
+// final String inZipPath = relativePathForZipEntry(itemToZip.toUri()
+// .getPath(), baseName, root);
+//
+// if (inZipPath.length() == 0) {
+// LOG.warn(String.format("Skipping empty zip file path for %s (%s %s)",
+// itemToZip, root, baseName));
+// return 0;
+// }
+//
+// // Take empty files in case the place holder is needed
+// FSDataInputStream in = null;
+// try {
+// in = localFs.open(itemToZip);
+// final ZipEntry ze = new ZipEntry(inZipPath);
+// ze.setTime(itemStatus.getModificationTime());
+// // Comments confuse looking at the zip file
+// // ze.setComment(itemToZip.toString());
+// zos.putNextEntry(ze);
+//
+// IOUtils.copyBytes(in, zos, conf, false);
+// zos.closeEntry();
+// LOG.info(String.format("Wrote %d entries for file %s", count, itemToZip));
+// return 1;
+// } finally {
+// in.close();
+// }
+//
+// }
+//
+// static String relativePathForZipEntry(final String rawPath,
+// final String baseName, final String root) {
+// String relativePath = rawPath.replaceFirst(Pattern.quote(root.toString()),
+// "");
+// LOG.info(String.format("RawPath %s, baseName %s, root %s, first %s",
+// rawPath, baseName, root, relativePath));
+//
+// if (relativePath.startsWith(Path.SEPARATOR)) {
+// relativePath = relativePath.substring(1);
+// }
+// LOG.info(String.format(
+// "RawPath %s, baseName %s, root %s, post leading slash %s", rawPath,
+// baseName, root, relativePath));
+// if (relativePath.isEmpty()) {
+// LOG.warn(String.format(
+// "No data after root (%s) removal from raw path %s", root, rawPath));
+// return baseName;
+// }
+// // Construct the path that will be written to the zip file, including
+// // removing any leading '/' characters
+// String inZipPath = baseName + Path.SEPARATOR_CHAR + relativePath;
+//
+// LOG.info(String.format("RawPath %s, baseName %s, root %s, inZip 1 %s",
+// rawPath, baseName, root, inZipPath));
+// if (inZipPath.startsWith(Path.SEPARATOR)) {
+// inZipPath = inZipPath.substring(1);
+// }
+// LOG.info(String.format("RawPath %s, baseName %s, root %s, inZip 2 %s",
+// rawPath, baseName, root, inZipPath));
+//
+// return inZipPath;
+//
+// }
+//
+ /*
+ static boolean setLogLevel(String packageName, String level) {
+ Log logger = LogFactory.getLog(packageName);
+ if (logger == null) {
+ return false;
+ }
+ // look for: org.apache.commons.logging.impl.SLF4JLocationAwareLog
+ LOG.warn("logger class:"+logger.getClass().getName());
+ if (logger instanceof Log4JLogger) {
+ process(((Log4JLogger) logger).getLogger(), level);
+ return true;
+ }
+ if (logger instanceof Jdk14Logger) {
+ process(((Jdk14Logger) logger).getLogger(), level);
+ return true;
+ }
+ return false;
+ }
+
+ public static void process(org.apache.log4j.Logger log, String level) {
+ if (level != null) {
+ log.setLevel(org.apache.log4j.Level.toLevel(level));
+ }
+ }
+
+ public static void process(java.util.logging.Logger log, String level) {
+ if (level != null) {
+ log.setLevel(java.util.logging.Level.parse(level));
+ }
+ }
+ */
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrReducer.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrReducer.java
new file mode 100644
index 00000000000..59f64ee493f
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrReducer.java
@@ -0,0 +1,167 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.hadoop.dedup.NoChangeUpdateConflictResolver;
+import org.apache.solr.hadoop.dedup.RetainMostRecentUpdateConflictResolver;
+import org.apache.solr.hadoop.dedup.UpdateConflictResolver;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.cloudera.cdk.morphline.api.ExceptionHandler;
+import com.cloudera.cdk.morphline.base.FaultTolerance;
+
+/**
+ * This class loads the mapper's SolrInputDocuments into one EmbeddedSolrServer
+ * per reducer. Each such reducer and Solr server can be seen as a (micro)
+ * shard. The Solr servers store their data in HDFS.
+ *
+ * More specifically, this class consumes a list of <docId, SolrInputDocument>
+ * pairs, sorted by docId, and sends them to an embedded Solr server to generate
+ * a Solr index shard from the documents.
+ */
+public class SolrReducer extends Reducer {
+
+ private UpdateConflictResolver resolver;
+ private HeartBeater heartBeater;
+ private ExceptionHandler exceptionHandler;
+
+ public static final String UPDATE_CONFLICT_RESOLVER = SolrReducer.class.getName() + ".updateConflictResolver";
+
+ private static final Logger LOG = LoggerFactory.getLogger(SolrReducer.class);
+
+ @Override
+ protected void setup(Context context) throws IOException, InterruptedException {
+ SolrRecordWriter.addReducerContext(context);
+ Class extends UpdateConflictResolver> resolverClass = context.getConfiguration().getClass(
+ UPDATE_CONFLICT_RESOLVER, RetainMostRecentUpdateConflictResolver.class, UpdateConflictResolver.class);
+
+ this.resolver = ReflectionUtils.newInstance(resolverClass, context.getConfiguration());
+ /*
+ * Note that ReflectionUtils.newInstance() above also implicitly calls
+ * resolver.configure(context.getConfiguration()) if the resolver
+ * implements org.apache.hadoop.conf.Configurable
+ */
+
+ this.exceptionHandler = new FaultTolerance(
+ context.getConfiguration().getBoolean(FaultTolerance.IS_PRODUCTION_MODE, false),
+ context.getConfiguration().getBoolean(FaultTolerance.IS_IGNORING_RECOVERABLE_EXCEPTIONS, false),
+ context.getConfiguration().get(FaultTolerance.RECOVERABLE_EXCEPTION_CLASSES, SolrServerException.class.getName()));
+
+ this.heartBeater = new HeartBeater(context);
+ }
+
+ protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
+ heartBeater.needHeartBeat();
+ try {
+ values = resolve(key, values, context);
+ super.reduce(key, values, context);
+ } catch (Exception e) {
+ LOG.error("Unable to process key " + key, e);
+ context.getCounter(getClass().getName() + ".errors", e.getClass().getName()).increment(1);
+ exceptionHandler.handleException(e, null);
+ } finally {
+ heartBeater.cancelHeartBeat();
+ }
+ }
+
+ private Iterable resolve(
+ final Text key, final Iterable values, final Context context) {
+
+ if (resolver instanceof NoChangeUpdateConflictResolver) {
+ return values; // fast path
+ }
+ return new Iterable() {
+ @Override
+ public Iterator iterator() {
+ return new WrapIterator(resolver.orderUpdates(key, new UnwrapIterator(values.iterator()), context));
+ }
+ };
+ }
+
+ @Override
+ protected void cleanup(Context context) throws IOException, InterruptedException {
+ heartBeater.close();
+ super.cleanup(context);
+ }
+
+
+ ///////////////////////////////////////////////////////////////////////////////
+ // Nested classes:
+ ///////////////////////////////////////////////////////////////////////////////
+ private static final class WrapIterator implements Iterator {
+
+ private Iterator parent;
+
+ private WrapIterator(Iterator parent) {
+ this.parent = parent;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return parent.hasNext();
+ }
+
+ @Override
+ public SolrInputDocumentWritable next() {
+ return new SolrInputDocumentWritable(parent.next());
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ }
+
+
+ ///////////////////////////////////////////////////////////////////////////////
+ // Nested classes:
+ ///////////////////////////////////////////////////////////////////////////////
+ private static final class UnwrapIterator implements Iterator {
+
+ private Iterator parent;
+
+ private UnwrapIterator(Iterator parent) {
+ this.parent = parent;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return parent.hasNext();
+ }
+
+ @Override
+ public SolrInputDocument next() {
+ return parent.next().getSolrInputDocument();
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/ToolRunnerHelpFormatter.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/ToolRunnerHelpFormatter.java
new file mode 100644
index 00000000000..d2efa96cdcf
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/ToolRunnerHelpFormatter.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.io.PrintWriter;
+import java.io.StringReader;
+import java.io.StringWriter;
+import java.io.UnsupportedEncodingException;
+
+import net.sourceforge.argparse4j.ArgumentParsers;
+import net.sourceforge.argparse4j.helper.ASCIITextWidthCounter;
+import net.sourceforge.argparse4j.helper.TextHelper;
+
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * Nicely formats the output of
+ * {@link ToolRunner#printGenericCommandUsage(PrintStream)} with the same look and feel that argparse4j uses for help text.
+ */
+class ToolRunnerHelpFormatter {
+
+ public static String getGenericCommandUsage() {
+ ByteArrayOutputStream bout = new ByteArrayOutputStream();
+ String msg;
+ try {
+ ToolRunner.printGenericCommandUsage(new PrintStream(bout, true, "UTF-8"));
+ msg = new String(bout.toByteArray(), "UTF-8");
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException(e); // unreachable
+ }
+
+ BufferedReader reader = new BufferedReader(new StringReader(msg));
+ StringBuilder result = new StringBuilder();
+ while (true) {
+ String line;
+ try {
+ line = reader.readLine();
+ } catch (IOException e) {
+ throw new RuntimeException(e); // unreachable
+ }
+
+ if (line == null) {
+ return result.toString(); // EOS
+ }
+
+ if (!line.startsWith("-")) {
+ result.append(line + "\n");
+ } else {
+ line = line.trim();
+ int i = line.indexOf(" ");
+ if (i < 0) {
+ i = line.indexOf('\t');
+ }
+ if (i < 0) {
+ result.append(line + "\n");
+ } else {
+ String title = line.substring(0, i).trim();
+ if (title.length() >= 3 && Character.isLetterOrDigit(title.charAt(1)) && Character.isLetterOrDigit(title.charAt(2))) {
+ title = "-" + title; // prefer "--libjars" long arg style over "-libjars" style but retain "-D foo" short arg style
+ }
+ String help = line.substring(i, line.length()).trim();
+ StringWriter strWriter = new StringWriter();
+ PrintWriter writer = new PrintWriter(strWriter, true);
+ TextHelper.printHelp(writer, title, help, new ASCIITextWidthCounter(), ArgumentParsers.getFormatWidth());
+ result.append(strWriter.toString());
+ }
+ }
+ }
+ }
+}
+
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/TreeMergeMapper.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/TreeMergeMapper.java
new file mode 100644
index 00000000000..5e2fe86a6fe
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/TreeMergeMapper.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * For the meat see {@link TreeMergeOutputFormat}.
+ */
+public class TreeMergeMapper extends Mapper {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(TreeMergeMapper.class);
+
+ public static final String MAX_SEGMENTS_ON_TREE_MERGE = "maxSegmentsOnTreeMerge";
+
+ @Override
+ protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
+ LOGGER.trace("map key: {}, value: {}", key, value);
+ context.write(value, NullWritable.get());
+ }
+
+}
\ No newline at end of file
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/TreeMergeOutputFormat.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/TreeMergeOutputFormat.java
new file mode 100644
index 00000000000..26de0aaa42c
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/TreeMergeOutputFormat.java
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.LogMergePolicy;
+import org.apache.lucene.index.MergePolicy;
+import org.apache.lucene.index.TieredMergePolicy;
+import org.apache.lucene.misc.IndexMergeTool;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Version;
+import org.apache.solr.store.hdfs.HdfsDirectory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * See {@link IndexMergeTool}.
+ */
+public class TreeMergeOutputFormat extends FileOutputFormat {
+
+ @Override
+ public RecordWriter getRecordWriter(TaskAttemptContext context) throws IOException {
+ Utils.getLogConfigFile(context.getConfiguration());
+ Path workDir = getDefaultWorkFile(context, "");
+ return new TreeMergeRecordWriter(context, workDir);
+ }
+
+
+ ///////////////////////////////////////////////////////////////////////////////
+ // Nested classes:
+ ///////////////////////////////////////////////////////////////////////////////
+ private static final class TreeMergeRecordWriter extends RecordWriter {
+
+ private final Path workDir;
+ private final List shards = new ArrayList();
+ private final HeartBeater heartBeater;
+ private final TaskAttemptContext context;
+
+ private static final Logger LOG = LoggerFactory.getLogger(TreeMergeRecordWriter.class);
+
+ public TreeMergeRecordWriter(TaskAttemptContext context, Path workDir) {
+ this.workDir = new Path(workDir, "data/index");
+ this.heartBeater = new HeartBeater(context);
+ this.context = context;
+ }
+
+ @Override
+ public void write(Text key, NullWritable value) {
+ LOG.info("map key: {}", key);
+ heartBeater.needHeartBeat();
+ try {
+ Path path = new Path(key.toString());
+ shards.add(path);
+ } finally {
+ heartBeater.cancelHeartBeat();
+ }
+ }
+
+ @Override
+ public void close(TaskAttemptContext context) throws IOException {
+ LOG.debug("Merging into dstDir: " + workDir + ", srcDirs: {}", shards);
+ heartBeater.needHeartBeat();
+ try {
+ Directory mergedIndex = new HdfsDirectory(workDir, context.getConfiguration());
+
+ IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_CURRENT, null)
+ .setOpenMode(OpenMode.CREATE)
+ //.setMergePolicy(mergePolicy) // TODO: grab tuned MergePolicy from solrconfig.xml?
+ //.setMergeScheduler(...) // TODO: grab tuned MergeScheduler from solrconfig.xml?
+ ;
+
+ if (LOG.isDebugEnabled()) {
+ writerConfig.setInfoStream(System.out);
+ }
+// writerConfig.setRAMBufferSizeMB(100); // improve performance
+// writerConfig.setMaxThreadStates(1);
+
+ // disable compound file to improve performance
+ // also see http://lucene.472066.n3.nabble.com/Questions-on-compound-file-format-td489105.html
+ // also see defaults in SolrIndexConfig
+ MergePolicy mergePolicy = writerConfig.getMergePolicy();
+ LOG.debug("mergePolicy was: {}", mergePolicy);
+ if (mergePolicy instanceof TieredMergePolicy) {
+ ((TieredMergePolicy) mergePolicy).setNoCFSRatio(0.0);
+// ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnceExplicit(10000);
+// ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnce(10000);
+// ((TieredMergePolicy) mergePolicy).setSegmentsPerTier(10000);
+ } else if (mergePolicy instanceof LogMergePolicy) {
+ ((LogMergePolicy) mergePolicy).setNoCFSRatio(0.0);
+ }
+ LOG.info("Using mergePolicy: {}", mergePolicy);
+
+ IndexWriter writer = new IndexWriter(mergedIndex, writerConfig);
+
+ Directory[] indexes = new Directory[shards.size()];
+ for (int i = 0; i < shards.size(); i++) {
+ indexes[i] = new HdfsDirectory(shards.get(i), context.getConfiguration());
+ }
+
+ context.setStatus("Logically merging " + shards.size() + " shards into one shard");
+ LOG.info("Logically merging " + shards.size() + " shards into one shard: " + workDir);
+ long start = System.currentTimeMillis();
+
+ writer.addIndexes(indexes);
+ // TODO: avoid intermediate copying of files into dst directory; rename the files into the dir instead (cp -> rename)
+ // This can improve performance and turns this phase into a true "logical" merge, completing in constant time.
+ // See https://issues.apache.org/jira/browse/LUCENE-4746
+
+ if (LOG.isDebugEnabled()) {
+ context.getCounter(SolrCounters.class.getName(), SolrCounters.LOGICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start);
+ }
+ float secs = (System.currentTimeMillis() - start) / 1000.0f;
+ LOG.info("Logical merge took {} secs", secs);
+ int maxSegments = context.getConfiguration().getInt(TreeMergeMapper.MAX_SEGMENTS_ON_TREE_MERGE, Integer.MAX_VALUE);
+ context.setStatus("Optimizing Solr: forcing mtree merge down to " + maxSegments + " segments");
+ LOG.info("Optimizing Solr: forcing tree merge down to {} segments", maxSegments);
+ start = System.currentTimeMillis();
+ if (maxSegments < Integer.MAX_VALUE) {
+ writer.forceMerge(maxSegments);
+ // TODO: consider perf enhancement for no-deletes merges: bulk-copy the postings data
+ // see http://lucene.472066.n3.nabble.com/Experience-with-large-merge-factors-tp1637832p1647046.html
+ }
+ if (LOG.isDebugEnabled()) {
+ context.getCounter(SolrCounters.class.getName(), SolrCounters.PHYSICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start);
+ }
+ secs = (System.currentTimeMillis() - start) / 1000.0f;
+ LOG.info("Optimizing Solr: done forcing tree merge down to {} segments in {} secs", maxSegments, secs);
+
+ start = System.currentTimeMillis();
+ LOG.info("Optimizing Solr: Closing index writer");
+ writer.close();
+ secs = (System.currentTimeMillis() - start) / 1000.0f;
+ LOG.info("Optimizing Solr: Done closing index writer in {} secs", secs);
+ context.setStatus("Done");
+ } finally {
+ heartBeater.cancelHeartBeat();
+ heartBeater.close();
+ }
+ }
+ }
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/UnbufferedDataInputInputStream.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/UnbufferedDataInputInputStream.java
new file mode 100644
index 00000000000..1ad141a4264
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/UnbufferedDataInputInputStream.java
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.hadoop;
+
+import java.io.BufferedReader;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+
+public class UnbufferedDataInputInputStream extends org.apache.solr.common.util.DataInputInputStream {
+ private final DataInputStream in;
+
+ public UnbufferedDataInputInputStream(DataInput in) {
+ this.in = new DataInputStream(DataInputInputStream.constructInputStream(in));
+ }
+
+ @Override
+ public void readFully(byte[] b) throws IOException {
+ in.readFully(b);
+ }
+
+ @Override
+ public void readFully(byte[] b, int off, int len) throws IOException {
+ in.readFully(b, off, len);
+ }
+
+ @Override
+ public int skipBytes(int n) throws IOException {
+ return in.skipBytes(n);
+ }
+
+ @Override
+ public boolean readBoolean() throws IOException {
+ return in.readBoolean();
+ }
+
+ @Override
+ public byte readByte() throws IOException {
+ return in.readByte();
+ }
+
+ @Override
+ public int readUnsignedByte() throws IOException {
+ return in.readUnsignedByte();
+ }
+
+ @Override
+ public short readShort() throws IOException {
+ return in.readShort();
+ }
+
+ @Override
+ public int readUnsignedShort() throws IOException {
+ return in.readUnsignedShort();
+ }
+
+ @Override
+ public char readChar() throws IOException {
+ return in.readChar();
+ }
+
+ @Override
+ public int readInt() throws IOException {
+ return in.readInt();
+ }
+
+ @Override
+ public long readLong() throws IOException {
+ return in.readLong();
+ }
+
+ @Override
+ public float readFloat() throws IOException {
+ return in.readFloat();
+ }
+
+ @Override
+ public double readDouble() throws IOException {
+ return in.readDouble();
+ }
+
+ @Override
+ public String readLine() throws IOException {
+ BufferedReader reader = new BufferedReader(new InputStreamReader(in, "UTF-8"));
+ return reader.readLine();
+ }
+
+ @Override
+ public String readUTF() throws IOException {
+ return in.readUTF();
+ }
+
+ @Override
+ public int read() throws IOException {
+ return in.read();
+ }
+
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/Utils.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/Utils.java
new file mode 100644
index 00000000000..c20d5784c0d
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/Utils.java
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop;
+
+import java.io.File;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.log4j.PropertyConfigurator;
+
+import com.google.common.annotations.Beta;
+
+
+@Beta
+public final class Utils {
+
+ private static final String LOG_CONFIG_FILE = "hadoop.log4j.configuration";
+
+ public static void setLogConfigFile(File file, Configuration conf) {
+ conf.set(LOG_CONFIG_FILE, file.getName());
+ }
+
+ public static void getLogConfigFile(Configuration conf) {
+ String log4jPropertiesFile = conf.get(LOG_CONFIG_FILE);
+ if (log4jPropertiesFile != null) {
+ PropertyConfigurator.configure(log4jPropertiesFile);
+ }
+ }
+
+ public static String getShortClassName(Class clazz) {
+ return getShortClassName(clazz.getName());
+ }
+
+ public static String getShortClassName(String className) {
+ int i = className.lastIndexOf('.'); // regular class
+ int j = className.lastIndexOf('$'); // inner class
+ return className.substring(1 + Math.max(i, j));
+ }
+
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/ZooKeeperInspector.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/ZooKeeperInspector.java
new file mode 100644
index 00000000000..ed916a33c93
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/ZooKeeperInspector.java
@@ -0,0 +1,198 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.hadoop;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.solr.cloud.ZkController;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.cloud.Aliases;
+import org.apache.solr.common.cloud.ClusterState;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.cloud.Slice;
+import org.apache.solr.common.cloud.SolrZkClient;
+import org.apache.solr.common.cloud.ZkCoreNodeProps;
+import org.apache.solr.common.cloud.ZkNodeProps;
+import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.common.util.StrUtils;
+import org.apache.zookeeper.KeeperException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.io.Files;
+
+/**
+ * Extracts SolrCloud information from ZooKeeper.
+ */
+final class ZooKeeperInspector {
+
+ private static final Logger LOG = LoggerFactory.getLogger(ZooKeeperInspector.class);
+
+ public List> extractShardUrls(String zkHost, String collection) {
+
+ DocCollection docCollection = extractDocCollection(zkHost, collection);
+ List slices = getSortedSlices(docCollection.getSlices());
+ List> solrUrls = new ArrayList>(slices.size());
+ for (Slice slice : slices) {
+ if (slice.getLeader() == null) {
+ throw new IllegalArgumentException("Cannot find SolrCloud slice leader. " +
+ "It looks like not all of your shards are registered in ZooKeeper yet");
+ }
+ Collection replicas = slice.getReplicas();
+ List urls = new ArrayList(replicas.size());
+ for (Replica replica : replicas) {
+ ZkCoreNodeProps props = new ZkCoreNodeProps(replica);
+ urls.add(props.getCoreUrl());
+ }
+ solrUrls.add(urls);
+ }
+ return solrUrls;
+ }
+
+ public DocCollection extractDocCollection(String zkHost, String collection) {
+ if (collection == null) {
+ throw new IllegalArgumentException("collection must not be null");
+ }
+ SolrZkClient zkClient = getZkClient(zkHost);
+
+ try {
+ ZkStateReader zkStateReader = new ZkStateReader(zkClient);
+ try {
+ // first check for alias
+ collection = checkForAlias(zkClient, collection);
+ zkStateReader.createClusterStateWatchersAndUpdate();
+ } catch (Exception e) {
+ throw new IllegalArgumentException("Cannot find expected information for SolrCloud in ZooKeeper: " + zkHost, e);
+ }
+
+ try {
+ return zkStateReader.getClusterState().getCollection(collection);
+ } catch (SolrException e) {
+ throw new IllegalArgumentException("Cannot find collection '" + collection + "' in ZooKeeper: " + zkHost, e);
+ }
+ } finally {
+ zkClient.close();
+ }
+ }
+
+ public SolrZkClient getZkClient(String zkHost) {
+ if (zkHost == null) {
+ throw new IllegalArgumentException("zkHost must not be null");
+ }
+
+ SolrZkClient zkClient;
+ try {
+ zkClient = new SolrZkClient(zkHost, 30000);
+ } catch (Exception e) {
+ throw new IllegalArgumentException("Cannot connect to ZooKeeper: " + zkHost, e);
+ }
+ return zkClient;
+ }
+
+ public List getSortedSlices(Collection slices) {
+ List sorted = new ArrayList(slices);
+ Collections.sort(sorted, new Comparator() {
+ @Override
+ public int compare(Slice slice1, Slice slice2) {
+ return slice1.getName().compareTo(slice2.getName());
+ }
+ });
+ return sorted;
+ }
+
+ /**
+ * Returns config value given collection name
+ * Borrowed heavily from Solr's ZKController.
+ */
+ public String readConfigName(SolrZkClient zkClient, String collection)
+ throws KeeperException, InterruptedException {
+ if (collection == null) {
+ throw new IllegalArgumentException("collection must not be null");
+ }
+ String configName = null;
+
+ // first check for alias
+ collection = checkForAlias(zkClient, collection);
+
+ String path = ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection;
+ if (LOG.isInfoEnabled()) {
+ LOG.info("Load collection config from:" + path);
+ }
+ byte[] data = zkClient.getData(path, null, null, true);
+
+ if(data != null) {
+ ZkNodeProps props = ZkNodeProps.load(data);
+ configName = props.getStr(ZkController.CONFIGNAME_PROP);
+ }
+
+ if (configName != null && !zkClient.exists(ZkController.CONFIGS_ZKNODE + "/" + configName, true)) {
+ LOG.error("Specified config does not exist in ZooKeeper:" + configName);
+ throw new IllegalArgumentException("Specified config does not exist in ZooKeeper:"
+ + configName);
+ }
+
+ return configName;
+ }
+
+ private String checkForAlias(SolrZkClient zkClient, String collection)
+ throws KeeperException, InterruptedException {
+ byte[] aliasData = zkClient.getData(ZkStateReader.ALIASES, null, null, true);
+ Aliases aliases = ClusterState.load(aliasData);
+ String alias = aliases.getCollectionAlias(collection);
+ if (alias != null) {
+ List aliasList = StrUtils.splitSmart(alias, ",", true);
+ if (aliasList.size() > 1) {
+ throw new IllegalArgumentException("collection cannot be an alias that maps to multiple collections");
+ }
+ collection = aliasList.get(0);
+ }
+ return collection;
+ }
+
+ /**
+ * Download and return the config directory from ZK
+ */
+ public File downloadConfigDir(SolrZkClient zkClient, String configName)
+ throws IOException, InterruptedException, KeeperException {
+ File dir = Files.createTempDir();
+ dir.deleteOnExit();
+ ZkController.downloadConfigDir(zkClient, configName, dir);
+ File confDir = new File(dir, "conf");
+ if (!confDir.isDirectory()) {
+ // create a temporary directory with "conf" subdir and mv the config in there. This is
+ // necessary because of CDH-11188; solrctl does not generate nor accept directories with e.g.
+ // conf/solrconfig.xml which is necessary for proper solr operation. This should work
+ // even if solrctl changes.
+ confDir = new File(Files.createTempDir().getAbsolutePath(), "conf");
+ confDir.getParentFile().deleteOnExit();
+ Files.move(dir, confDir);
+ dir = confDir.getParentFile();
+ }
+ FileUtils.writeStringToFile(new File(dir, "solr.xml"), " ", "UTF-8");
+ return dir;
+ }
+
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/NoChangeUpdateConflictResolver.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/NoChangeUpdateConflictResolver.java
new file mode 100644
index 00000000000..0eae9405717
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/NoChangeUpdateConflictResolver.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop.dedup;
+
+import java.util.Iterator;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Reducer.Context;
+import org.apache.solr.common.SolrInputDocument;
+
+/**
+ * UpdateConflictResolver implementation that returns the solr documents in the
+ * same order as they are received on input, i.e. without change in order.
+ */
+public final class NoChangeUpdateConflictResolver implements UpdateConflictResolver {
+
+ @Override
+ public Iterator orderUpdates(Text key, Iterator updates, Context ctx) {
+ return updates;
+ }
+
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/RejectingUpdateConflictResolver.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/RejectingUpdateConflictResolver.java
new file mode 100644
index 00000000000..60efb4c15bb
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/RejectingUpdateConflictResolver.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop.dedup;
+
+import java.util.Collections;
+import java.util.Iterator;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Reducer.Context;
+import org.apache.solr.common.SolrInputDocument;
+
+/**
+ * UpdateConflictResolver implementation that rejects multiple documents with
+ * the same key with an exception.
+ */
+public final class RejectingUpdateConflictResolver implements UpdateConflictResolver {
+
+ @Override
+ public Iterator orderUpdates(Text key, Iterator updates, Context ctx) {
+ SolrInputDocument firstUpdate = null;
+ while (updates.hasNext()) {
+ if (firstUpdate == null) {
+ firstUpdate = updates.next();
+ assert firstUpdate != null;
+ } else {
+ throw new IllegalArgumentException("Update conflict! Documents with the same unique key are forbidden: "
+ + key);
+ }
+ }
+ assert firstUpdate != null;
+ return Collections.singletonList(firstUpdate).iterator();
+ }
+
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/RetainMostRecentUpdateConflictResolver.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/RetainMostRecentUpdateConflictResolver.java
new file mode 100644
index 00000000000..1994c163dea
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/RetainMostRecentUpdateConflictResolver.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop.dedup;
+
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Reducer.Context;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.hadoop.HdfsFileFieldNames;
+import org.apache.solr.hadoop.Utils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * UpdateConflictResolver implementation that ignores all but the most recent
+ * document version, based on a configurable numeric Solr field, which defaults
+ * to the file_last_modified timestamp.
+ */
+public class RetainMostRecentUpdateConflictResolver implements UpdateConflictResolver, Configurable {
+
+ private Configuration conf;
+ private String orderByFieldName = ORDER_BY_FIELD_NAME_DEFAULT;
+
+ public static final String ORDER_BY_FIELD_NAME_KEY =
+ RetainMostRecentUpdateConflictResolver.class.getName() + ".orderByFieldName";
+
+ public static final String ORDER_BY_FIELD_NAME_DEFAULT = HdfsFileFieldNames.FILE_LAST_MODIFIED;
+
+ public static final String COUNTER_GROUP = Utils.getShortClassName(RetainMostRecentUpdateConflictResolver.class);
+ public static final String DUPLICATES_COUNTER_NAME = "Number of documents ignored as duplicates";
+ public static final String OUTDATED_COUNTER_NAME = "Number of documents ignored as outdated";
+
+ private static final Logger LOG = LoggerFactory.getLogger(RetainMostRecentUpdateConflictResolver.class);
+
+ @Override
+ public void setConf(Configuration conf) {
+ this.conf = conf;
+ this.orderByFieldName = conf.get(ORDER_BY_FIELD_NAME_KEY, orderByFieldName);
+ }
+
+ @Override
+ public Configuration getConf() {
+ return conf;
+ }
+
+ protected String getOrderByFieldName() {
+ return orderByFieldName;
+ }
+
+ @Override
+ public Iterator orderUpdates(Text key, Iterator updates, Context ctx) {
+ return getMaximum(updates, getOrderByFieldName(), new SolrInputDocumentComparator.TimeStampComparator(), ctx);
+ }
+
+ /** Returns the most recent document among the colliding updates */
+ protected Iterator getMaximum(Iterator updates, String fieldName,
+ Comparator child, Context context) {
+
+ SolrInputDocumentComparator comp = new SolrInputDocumentComparator(fieldName, child);
+ SolrInputDocument max = null;
+ long numDupes = 0;
+ long numOutdated = 0;
+ while (updates.hasNext()) {
+ SolrInputDocument next = updates.next();
+ assert next != null;
+ if (max == null) {
+ max = next;
+ } else {
+ int c = comp.compare(next, max);
+ if (c == 0) {
+ LOG.debug("Ignoring document version because it is a duplicate: {}", next);
+ numDupes++;
+ } else if (c > 0) {
+ LOG.debug("Ignoring document version because it is outdated: {}", max);
+ max = next;
+ numOutdated++;
+ } else {
+ LOG.debug("Ignoring document version because it is outdated: {}", next);
+ numOutdated++;
+ }
+ }
+ }
+
+ assert max != null;
+ if (numDupes > 0) {
+ context.getCounter(COUNTER_GROUP, DUPLICATES_COUNTER_NAME).increment(numDupes);
+ }
+ if (numOutdated > 0) {
+ context.getCounter(COUNTER_GROUP, OUTDATED_COUNTER_NAME).increment(numOutdated);
+ }
+ return Collections.singletonList(max).iterator();
+ }
+
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/SolrInputDocumentComparator.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/SolrInputDocumentComparator.java
new file mode 100644
index 00000000000..e8cfdbb52e4
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/SolrInputDocumentComparator.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop.dedup;
+
+import java.util.Comparator;
+
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.SolrInputField;
+
+/**
+ * Default mechanism of determining which of two Solr documents with the same
+ * key is the more recent version.
+ */
+public final class SolrInputDocumentComparator implements Comparator {
+
+ private Comparator child;
+ private String fieldName;
+
+ SolrInputDocumentComparator(String fieldName, Comparator child) {
+ this.child = child;
+ this.fieldName = fieldName;
+ }
+
+ @Override
+ public int compare(SolrInputDocument doc1, SolrInputDocument doc2) {
+ SolrInputField f1 = doc1.getField(fieldName);
+ SolrInputField f2 = doc2.getField(fieldName);
+ if (f1 == f2) {
+ return 0;
+ } else if (f1 == null) {
+ return -1;
+ } else if (f2 == null) {
+ return 1;
+ }
+
+ Object v1 = f1.getFirstValue();
+ Object v2 = f2.getFirstValue();
+ return child.compare(v1, v2);
+ }
+
+ ///////////////////////////////////////////////////////////////////////////////
+ // Nested classes:
+ ///////////////////////////////////////////////////////////////////////////////
+ public static final class TimeStampComparator implements Comparator {
+
+ @Override
+ public int compare(Object v1, Object v2) {
+ if (v1 == v2) {
+ return 0;
+ } else if (v1 == null) {
+ return -1;
+ } else if (v2 == null) {
+ return 1;
+ }
+ long t1 = getLong(v1);
+ long t2 = getLong(v2);
+ return (t1 < t2 ? -1 : (t1==t2 ? 0 : 1));
+ }
+
+ private long getLong(Object v) {
+ if (v instanceof Long) {
+ return ((Long) v).longValue();
+ } else {
+ return Long.parseLong(v.toString());
+ }
+ }
+
+ }
+
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/SortingUpdateConflictResolver.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/SortingUpdateConflictResolver.java
new file mode 100644
index 00000000000..24ea9363801
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/SortingUpdateConflictResolver.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop.dedup;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Reducer.Context;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.hadoop.HdfsFileFieldNames;
+
+/**
+ * UpdateConflictResolver implementation that orders colliding updates ascending
+ * from least recent to most recent (partial) update, based on a configurable
+ * numeric Solr field, which defaults to the file_last_modified timestamp.
+ */
+public class SortingUpdateConflictResolver implements UpdateConflictResolver, Configurable {
+
+ private Configuration conf;
+ private String orderByFieldName = ORDER_BY_FIELD_NAME_DEFAULT;
+
+ public static final String ORDER_BY_FIELD_NAME_KEY =
+ SortingUpdateConflictResolver.class.getName() + ".orderByFieldName";
+
+ public static final String ORDER_BY_FIELD_NAME_DEFAULT = HdfsFileFieldNames.FILE_LAST_MODIFIED;
+
+ @Override
+ public void setConf(Configuration conf) {
+ this.conf = conf;
+ this.orderByFieldName = conf.get(ORDER_BY_FIELD_NAME_KEY, orderByFieldName);
+ }
+
+ @Override
+ public Configuration getConf() {
+ return conf;
+ }
+
+ protected String getOrderByFieldName() {
+ return orderByFieldName;
+ }
+
+ @Override
+ public Iterator orderUpdates(Text key, Iterator updates, Context ctx) {
+ return sort(updates, getOrderByFieldName(), new SolrInputDocumentComparator.TimeStampComparator());
+ }
+
+ protected Iterator sort(Iterator updates, String fieldName, Comparator child) {
+ // TODO: use an external merge sort in the pathological case where there are a huge amount of collisions
+ List sortedUpdates = new ArrayList(1);
+ while (updates.hasNext()) {
+ sortedUpdates.add(updates.next());
+ }
+ if (sortedUpdates.size() > 1) { // conflicts are rare
+ Collections.sort(sortedUpdates, new SolrInputDocumentComparator(fieldName, child));
+ }
+ return sortedUpdates.iterator();
+ }
+
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/UpdateConflictResolver.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/UpdateConflictResolver.java
new file mode 100644
index 00000000000..94e23e134eb
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/UpdateConflictResolver.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop.dedup;
+
+import java.util.Iterator;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.Reducer.Context;
+import org.apache.solr.common.SolrInputDocument;
+
+/**
+ * Interface that enables deduplication and ordering of a series of document
+ * updates for the same unique document key.
+ *
+ * For example, a MapReduce batch job might index multiple files in the same job
+ * where some of the files contain old and new versions of the very same
+ * document, using the same unique document key.
+ *
+ * Typically, implementations of this interface forbid collisions by throwing an
+ * exception, or ignore all but the most recent document version, or, in the
+ * general case, order colliding updates ascending from least recent to most
+ * recent (partial) update.
+ *
+ * The caller of this interface (i.e. the Hadoop Reducer) will then apply the
+ * updates to Solr in the order returned by the orderUpdates() method.
+ *
+ * Configuration: If an UpdateConflictResolver implementation also implements
+ * {@link Configurable} then the Hadoop Reducer will call
+ * {@link Configurable#setConf(org.apache.hadoop.conf.Configuration)} on
+ * instance construction and pass the standard Hadoop configuration information.
+ */
+public interface UpdateConflictResolver {
+
+ /**
+ * Given a list of all colliding document updates for the same unique document
+ * key, this method returns zero or more documents in an application specific
+ * order.
+ *
+ * The caller will then apply the updates for this key to Solr in the order
+ * returned by the orderUpdate() method.
+ *
+ * @param uniqueKey
+ * the document key common to all collidingUpdates mentioned below
+ * @param collidingUpdates
+ * all updates in the MapReduce job that have a key equal to
+ * {@code uniqueKey} mentioned above. The input order is unspecified.
+ * @param context
+ * The Context
passed from the {@link Reducer}
+ * implementations.
+ * @return the order in which the updates shall be applied to Solr
+ */
+ Iterator orderUpdates(
+ Text uniqueKey, Iterator collidingUpdates, Context context);
+
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/package.html b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/package.html
new file mode 100644
index 00000000000..5543f0262be
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/package.html
@@ -0,0 +1,22 @@
+
+
+
+
+Dedupe related code.
+
+
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/morphline/MorphlineCounters.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/morphline/MorphlineCounters.java
new file mode 100644
index 00000000000..5ba98ff3968
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/morphline/MorphlineCounters.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop.morphline;
+
+import org.apache.solr.hadoop.Utils;
+
+public enum MorphlineCounters {
+
+ FILES_READ (getClassName(MorphlineMapper.class) + ": Number of files read"),
+
+ FILE_BYTES_READ (getClassName(MorphlineMapper.class) + ": Number of file bytes read"),
+
+ DOCS_READ (getClassName(MorphlineMapper.class) + ": Number of documents read"),
+
+ PARSER_OUTPUT_BYTES (getClassName(MorphlineMapper.class) + ": Number of document bytes generated by Tika parser"),
+
+ ERRORS (getClassName(MorphlineMapper.class) + ": Number of errors");
+
+ private final String label;
+
+ private MorphlineCounters(String label) {
+ this.label = label;
+ }
+
+ public String toString() {
+ return label;
+ }
+
+ private static String getClassName(Class clazz) {
+ return Utils.getShortClassName(clazz);
+ }
+
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/morphline/MorphlineMapRunner.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/morphline/MorphlineMapRunner.java
new file mode 100644
index 00000000000..606ac05fd2e
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/morphline/MorphlineMapRunner.java
@@ -0,0 +1,266 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop.morphline;
+
+import java.io.BufferedInputStream;
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.mapreduce.Mapper.Context;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.hadoop.HdfsFileFieldNames;
+import org.apache.solr.hadoop.PathParts;
+import org.apache.solr.hadoop.Utils;
+import org.apache.solr.morphlines.solr.DocumentLoader;
+import org.apache.solr.morphlines.solr.SolrLocator;
+import org.apache.solr.morphlines.solr.SolrMorphlineContext;
+import org.apache.solr.schema.IndexSchema;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.cloudera.cdk.morphline.api.Command;
+import com.cloudera.cdk.morphline.api.MorphlineCompilationException;
+import com.cloudera.cdk.morphline.api.MorphlineContext;
+import com.cloudera.cdk.morphline.api.Record;
+import com.cloudera.cdk.morphline.base.Compiler;
+import com.cloudera.cdk.morphline.base.FaultTolerance;
+import com.cloudera.cdk.morphline.base.Fields;
+import com.cloudera.cdk.morphline.base.Metrics;
+import com.cloudera.cdk.morphline.base.Notifications;
+import com.codahale.metrics.MetricRegistry;
+import com.codahale.metrics.Timer;
+import com.google.common.annotations.Beta;
+import com.google.common.base.Joiner;
+import com.typesafe.config.Config;
+import com.typesafe.config.ConfigFactory;
+
+/**
+ * Internal helper for {@link MorphlineMapper} and dryRun mode; This API is for *INTERNAL* use only
+ * and should not be considered public.
+ */
+@Beta
+public final class MorphlineMapRunner {
+
+ private MorphlineContext morphlineContext;
+ private Command morphline;
+ private IndexSchema schema;
+ private Map commandLineMorphlineHeaders;
+ private boolean disableFileOpen;
+ private String morphlineFileAndId;
+ private final Timer elapsedTime;
+
+ public static final String MORPHLINE_FILE_PARAM = "morphlineFile";
+ public static final String MORPHLINE_ID_PARAM = "morphlineId";
+
+ /**
+ * Morphline variables can be passed from the CLI to the Morphline, e.g.:
+ * hadoop ... -D morphlineVariable.zkHost=127.0.0.1:2181/solr
+ */
+ public static final String MORPHLINE_VARIABLE_PARAM = "morphlineVariable";
+
+ /**
+ * Headers, including MIME types, can also explicitly be passed by force from the CLI to Morphline, e.g:
+ * hadoop ... -D morphlineField._attachment_mimetype=text/csv
+ */
+ public static final String MORPHLINE_FIELD_PREFIX = "morphlineField.";
+
+ /**
+ * Flag to disable reading of file contents if indexing just file metadata is sufficient.
+ * This improves performance and confidentiality.
+ */
+ public static final String DISABLE_FILE_OPEN = "morphlineDisableFileOpen";
+
+ private static final Logger LOG = LoggerFactory.getLogger(MorphlineMapRunner.class);
+
+ MorphlineContext getMorphlineContext() {
+ return morphlineContext;
+ }
+
+ IndexSchema getSchema() {
+ return schema;
+ }
+
+ public MorphlineMapRunner(Configuration configuration, DocumentLoader loader, String solrHomeDir) throws IOException {
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("CWD is {}", new File(".").getCanonicalPath());
+ TreeMap map = new TreeMap();
+ for (Map.Entry entry : configuration) {
+ map.put(entry.getKey(), entry.getValue());
+ }
+ LOG.trace("Configuration:\n{}", Joiner.on("\n").join(map.entrySet()));
+ }
+
+ FaultTolerance faultTolerance = new FaultTolerance(
+ configuration.getBoolean(FaultTolerance.IS_PRODUCTION_MODE, false),
+ configuration.getBoolean(FaultTolerance.IS_IGNORING_RECOVERABLE_EXCEPTIONS, false),
+ configuration.get(FaultTolerance.RECOVERABLE_EXCEPTION_CLASSES, SolrServerException.class.getName())
+ );
+
+ morphlineContext = new SolrMorphlineContext.Builder()
+ .setDocumentLoader(loader)
+ .setExceptionHandler(faultTolerance)
+ .setMetricRegistry(new MetricRegistry())
+ .build();
+
+ class MySolrLocator extends SolrLocator { // trick to access protected ctor
+ public MySolrLocator(MorphlineContext ctx) {
+ super(ctx);
+ }
+ }
+
+ SolrLocator locator = new MySolrLocator(morphlineContext);
+ locator.setSolrHomeDir(solrHomeDir);
+ schema = locator.getIndexSchema();
+
+ // rebuild context, now with schema
+ morphlineContext = new SolrMorphlineContext.Builder()
+ .setIndexSchema(schema)
+ .setDocumentLoader(loader)
+ .setExceptionHandler(faultTolerance)
+ .setMetricRegistry(morphlineContext.getMetricRegistry())
+ .build();
+
+ String morphlineFile = configuration.get(MORPHLINE_FILE_PARAM);
+ String morphlineId = configuration.get(MORPHLINE_ID_PARAM);
+ if (morphlineFile == null || morphlineFile.trim().length() == 0) {
+ throw new MorphlineCompilationException("Missing parameter: " + MORPHLINE_FILE_PARAM, null);
+ }
+ Map morphlineVariables = new HashMap();
+ for (Map.Entry entry : configuration) {
+ String variablePrefix = MORPHLINE_VARIABLE_PARAM + ".";
+ if (entry.getKey().startsWith(variablePrefix)) {
+ morphlineVariables.put(entry.getKey().substring(variablePrefix.length()), entry.getValue());
+ }
+ }
+ Config override = ConfigFactory.parseMap(morphlineVariables);
+ morphline = new Compiler().compile(new File(morphlineFile), morphlineId, morphlineContext, null, override);
+ morphlineFileAndId = morphlineFile + "@" + morphlineId;
+
+ disableFileOpen = configuration.getBoolean(DISABLE_FILE_OPEN, false);
+ LOG.debug("disableFileOpen: {}", disableFileOpen);
+
+ commandLineMorphlineHeaders = new HashMap();
+ for (Map.Entry entry : configuration) {
+ if (entry.getKey().startsWith(MORPHLINE_FIELD_PREFIX)) {
+ commandLineMorphlineHeaders.put(entry.getKey().substring(MORPHLINE_FIELD_PREFIX.length()), entry.getValue());
+ }
+ }
+ LOG.debug("Headers, including MIME types, passed by force from the CLI to morphline: {}", commandLineMorphlineHeaders);
+
+ String metricName = MetricRegistry.name(Utils.getShortClassName(getClass()), Metrics.ELAPSED_TIME);
+ this.elapsedTime = morphlineContext.getMetricRegistry().timer(metricName);
+ Notifications.notifyBeginTransaction(morphline);
+ }
+
+ /**
+ * Extract content from the path specified in the value. Key is useless.
+ */
+ public void map(String value, Configuration configuration, Context context) throws IOException {
+ LOG.info("Processing file {}", value);
+ InputStream in = null;
+ Record record = null;
+ Timer.Context timerContext = elapsedTime.time();
+ try {
+ PathParts parts = new PathParts(value.toString(), configuration);
+ record = getRecord(parts);
+ if (record == null) {
+ return; // ignore
+ }
+ for (Map.Entry entry : commandLineMorphlineHeaders.entrySet()) {
+ record.replaceValues(entry.getKey(), entry.getValue());
+ }
+ long fileLength = parts.getFileStatus().getLen();
+ if (disableFileOpen) {
+ in = new ByteArrayInputStream(new byte[0]);
+ } else {
+ in = new BufferedInputStream(parts.getFileSystem().open(parts.getUploadPath()));
+ }
+ record.put(Fields.ATTACHMENT_BODY, in);
+ Notifications.notifyStartSession(morphline);
+ if (!morphline.process(record)) {
+ LOG.warn("Morphline {} failed to process record: {}", morphlineFileAndId, record);
+ }
+ if (context != null) {
+ context.getCounter(MorphlineCounters.class.getName(), MorphlineCounters.FILES_READ.toString()).increment(1);
+ context.getCounter(MorphlineCounters.class.getName(), MorphlineCounters.FILE_BYTES_READ.toString()).increment(fileLength);
+ }
+ } catch (Exception e) {
+ LOG.error("Unable to process file " + value, e);
+ if (context != null) {
+ context.getCounter(getClass().getName() + ".errors", e.getClass().getName()).increment(1);
+ }
+ morphlineContext.getExceptionHandler().handleException(e, record);
+ } finally {
+ timerContext.stop();
+ if (in != null) {
+ in.close();
+ }
+ }
+ }
+
+ protected Record getRecord(PathParts parts) {
+ FileStatus stats;
+ try {
+ stats = parts.getFileStatus();
+ } catch (IOException e) {
+ stats = null;
+ }
+ if (stats == null) {
+ LOG.warn("Ignoring file that somehow has become unavailable since the job was submitted: {}",
+ parts.getUploadURL());
+ return null;
+ }
+
+ Record headers = new Record();
+ //headers.put(getSchema().getUniqueKeyField().getName(), parts.getId()); // use HDFS file path as docId if no docId is specified
+ headers.put(Fields.BASE_ID, parts.getId()); // with sanitizeUniqueKey command, use HDFS file path as docId if no docId is specified
+ headers.put(Fields.ATTACHMENT_NAME, parts.getName()); // Tika can use the file name in guessing the right MIME type
+
+ // enable indexing and storing of file meta data in Solr
+ headers.put(HdfsFileFieldNames.FILE_UPLOAD_URL, parts.getUploadURL());
+ headers.put(HdfsFileFieldNames.FILE_DOWNLOAD_URL, parts.getDownloadURL());
+ headers.put(HdfsFileFieldNames.FILE_SCHEME, parts.getScheme());
+ headers.put(HdfsFileFieldNames.FILE_HOST, parts.getHost());
+ headers.put(HdfsFileFieldNames.FILE_PORT, String.valueOf(parts.getPort()));
+ headers.put(HdfsFileFieldNames.FILE_PATH, parts.getURIPath());
+ headers.put(HdfsFileFieldNames.FILE_NAME, parts.getName());
+ headers.put(HdfsFileFieldNames.FILE_LAST_MODIFIED, String.valueOf(stats.getModificationTime())); // FIXME also add in SpoolDirectorySource
+ headers.put(HdfsFileFieldNames.FILE_LENGTH, String.valueOf(stats.getLen())); // FIXME also add in SpoolDirectorySource
+ headers.put(HdfsFileFieldNames.FILE_OWNER, stats.getOwner());
+ headers.put(HdfsFileFieldNames.FILE_GROUP, stats.getGroup());
+ headers.put(HdfsFileFieldNames.FILE_PERMISSIONS_USER, stats.getPermission().getUserAction().SYMBOL);
+ headers.put(HdfsFileFieldNames.FILE_PERMISSIONS_GROUP, stats.getPermission().getGroupAction().SYMBOL);
+ headers.put(HdfsFileFieldNames.FILE_PERMISSIONS_OTHER, stats.getPermission().getOtherAction().SYMBOL);
+ headers.put(HdfsFileFieldNames.FILE_PERMISSIONS_STICKYBIT, String.valueOf(stats.getPermission().getStickyBit()));
+ // TODO: consider to add stats.getAccessTime(), stats.getReplication(), stats.isSymlink(), stats.getBlockSize()
+
+ return headers;
+ }
+
+ public void cleanup() {
+ Notifications.notifyCommitTransaction(morphline);
+ Notifications.notifyShutdown(morphline);
+ }
+
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/morphline/MorphlineMapper.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/morphline/MorphlineMapper.java
new file mode 100644
index 00000000000..8ded6041547
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/morphline/MorphlineMapper.java
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.hadoop.morphline;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Map;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.response.SolrPingResponse;
+import org.apache.solr.client.solrj.response.UpdateResponse;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.SolrInputField;
+import org.apache.solr.hadoop.HeartBeater;
+import org.apache.solr.hadoop.SolrInputDocumentWritable;
+import org.apache.solr.hadoop.SolrMapper;
+import org.apache.solr.morphlines.solr.DocumentLoader;
+import org.apache.solr.schema.IndexSchema;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.codahale.metrics.Counter;
+import com.codahale.metrics.Counting;
+import com.codahale.metrics.Histogram;
+import com.codahale.metrics.Meter;
+import com.codahale.metrics.MetricRegistry;
+import com.codahale.metrics.Timer;
+
+/**
+ * This class takes the input files, extracts the relevant content, transforms
+ * it and hands SolrInputDocuments to a set of reducers.
+ *
+ * More specifically, it consumes a list of <offset, hdfsFilePath> input pairs.
+ * For each such pair extracts a set of zero or more SolrInputDocuments and
+ * sends them to a downstream Reducer. The key for the reducer is the unique id
+ * of the SolrInputDocument specified in Solr schema.xml.
+ */
+public class MorphlineMapper extends SolrMapper {
+
+ private Context context;
+ private MorphlineMapRunner runner;
+ private HeartBeater heartBeater;
+
+ private static final Logger LOG = LoggerFactory.getLogger(MorphlineMapper.class);
+
+ protected IndexSchema getSchema() {
+ return runner.getSchema();
+ }
+
+ protected Context getContext() {
+ return context;
+ }
+
+ @Override
+ protected void setup(Context context) throws IOException, InterruptedException {
+ super.setup(context);
+ this.context = context;
+ heartBeater = new HeartBeater(context);
+ this.runner = new MorphlineMapRunner(
+ context.getConfiguration(), new MyDocumentLoader(), getSolrHomeDir().toString());
+ }
+
+ /**
+ * Extract content from the path specified in the value. Key is useless.
+ */
+ @Override
+ public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
+ heartBeater.needHeartBeat();
+ try {
+ runner.map(value.toString(), context.getConfiguration(), context);
+ } finally {
+ heartBeater.cancelHeartBeat();
+ }
+ }
+
+ @Override
+ protected void cleanup(Context context) throws IOException, InterruptedException {
+ heartBeater.close();
+ runner.cleanup();
+ addMetricsToMRCounters(runner.getMorphlineContext().getMetricRegistry(), context);
+ super.cleanup(context);
+ }
+
+ private void addMetricsToMRCounters(MetricRegistry metricRegistry, Context context) {
+ for (Map.Entry entry : metricRegistry.getCounters().entrySet()) {
+ addCounting(entry.getKey(), entry.getValue(), 1);
+ }
+ for (Map.Entry entry : metricRegistry.getHistograms().entrySet()) {
+ addCounting(entry.getKey(), entry.getValue(), 1);
+ }
+ for (Map.Entry entry : metricRegistry.getMeters().entrySet()) {
+ addCounting(entry.getKey(), entry.getValue(), 1);
+ }
+ for (Map.Entry entry : metricRegistry.getTimers().entrySet()) {
+ long nanosPerMilliSec = 1000 * 1000;
+ addCounting(entry.getKey(), entry.getValue(), nanosPerMilliSec);
+ }
+ }
+
+ private void addCounting(String metricName, Counting value, long scale) {
+ context.getCounter("morphline", metricName).increment(value.getCount() / scale);
+ }
+
+ ///////////////////////////////////////////////////////////////////////////////
+ // Nested classes:
+ ///////////////////////////////////////////////////////////////////////////////
+ private final class MyDocumentLoader implements DocumentLoader {
+
+ @Override
+ public void beginTransaction() {
+ }
+
+ @Override
+ public void load(SolrInputDocument doc) throws IOException, SolrServerException {
+ String uniqueKeyFieldName = getSchema().getUniqueKeyField().getName();
+ Object id = doc.getFieldValue(uniqueKeyFieldName);
+ if (id == null) {
+ throw new IllegalArgumentException("Missing value for (required) unique document key: " + uniqueKeyFieldName
+ + " (see Solr schema.xml)");
+ }
+ try {
+ context.write(new Text(id.toString()), new SolrInputDocumentWritable(doc));
+ } catch (InterruptedException e) {
+ throw new IOException("Interrupted while writing " + doc, e);
+ }
+
+ if (LOG.isDebugEnabled()) {
+ long numParserOutputBytes = 0;
+ for (SolrInputField field : doc.values()) {
+ numParserOutputBytes += sizeOf(field.getValue());
+ }
+ context.getCounter(MorphlineCounters.class.getName(), MorphlineCounters.PARSER_OUTPUT_BYTES.toString()).increment(numParserOutputBytes);
+ }
+ context.getCounter(MorphlineCounters.class.getName(), MorphlineCounters.DOCS_READ.toString()).increment(1);
+ }
+
+ // just an approximation
+ private long sizeOf(Object value) {
+ if (value instanceof CharSequence) {
+ return ((CharSequence) value).length();
+ } else if (value instanceof Integer) {
+ return 4;
+ } else if (value instanceof Long) {
+ return 8;
+ } else if (value instanceof Collection) {
+ long size = 0;
+ for (Object val : (Collection) value) {
+ size += sizeOf(val);
+ }
+ return size;
+ } else {
+ return String.valueOf(value).length();
+ }
+ }
+
+ @Override
+ public void commitTransaction() {
+ }
+
+ @Override
+ public UpdateResponse rollbackTransaction() throws SolrServerException, IOException {
+ return new UpdateResponse();
+ }
+
+ @Override
+ public void shutdown() {
+ }
+
+ @Override
+ public SolrPingResponse ping() throws SolrServerException, IOException {
+ return new SolrPingResponse();
+ }
+
+ }
+
+}
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/morphline/package.html b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/morphline/package.html
new file mode 100644
index 00000000000..9597a15d4f5
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/morphline/package.html
@@ -0,0 +1,22 @@
+
+
+
+
+Morphlines related code.
+
+
diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/package.html b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/package.html
new file mode 100644
index 00000000000..c90c7a24775
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/package.html
@@ -0,0 +1,22 @@
+
+
+
+
+{@link org.apache.solr.hadoop.MapReduceIndexerTool} and related code.
+
+
diff --git a/solr/contrib/solr-mr/src/java/overview.html b/solr/contrib/solr-mr/src/java/overview.html
new file mode 100644
index 00000000000..c97f378ca2e
--- /dev/null
+++ b/solr/contrib/solr-mr/src/java/overview.html
@@ -0,0 +1,21 @@
+
+
+
+Apache Solr Search Server: Solr MapReduce index building contrib
+
+
diff --git a/solr/contrib/solr-mr/src/test-files/custom-mimetypes.xml b/solr/contrib/solr-mr/src/test-files/custom-mimetypes.xml
new file mode 100644
index 00000000000..6891e42d616
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/custom-mimetypes.xml
@@ -0,0 +1,38 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/currency.xml b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/currency.xml
new file mode 100644
index 00000000000..3a9c58afee8
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/currency.xml
@@ -0,0 +1,67 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/elevate.xml b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/elevate.xml
new file mode 100644
index 00000000000..25d5cebe4fb
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/elevate.xml
@@ -0,0 +1,38 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/contractions_ca.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/contractions_ca.txt
new file mode 100644
index 00000000000..307a85f913d
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/contractions_ca.txt
@@ -0,0 +1,8 @@
+# Set of Catalan contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+d
+l
+m
+n
+s
+t
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/contractions_fr.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/contractions_fr.txt
new file mode 100644
index 00000000000..722db588333
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/contractions_fr.txt
@@ -0,0 +1,9 @@
+# Set of French contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+l
+m
+t
+qu
+n
+s
+j
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/contractions_ga.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/contractions_ga.txt
new file mode 100644
index 00000000000..9ebe7fa349a
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/contractions_ga.txt
@@ -0,0 +1,5 @@
+# Set of Irish contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+d
+m
+b
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/contractions_it.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/contractions_it.txt
new file mode 100644
index 00000000000..cac04095372
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/contractions_it.txt
@@ -0,0 +1,23 @@
+# Set of Italian contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+c
+l
+all
+dall
+dell
+nell
+sull
+coll
+pell
+gl
+agl
+dagl
+degl
+negl
+sugl
+un
+m
+t
+s
+v
+d
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/hyphenations_ga.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/hyphenations_ga.txt
new file mode 100644
index 00000000000..4d2642cc5a3
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/hyphenations_ga.txt
@@ -0,0 +1,5 @@
+# Set of Irish hyphenations for StopFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+h
+n
+t
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stemdict_nl.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stemdict_nl.txt
new file mode 100644
index 00000000000..441072971d3
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stemdict_nl.txt
@@ -0,0 +1,6 @@
+# Set of overrides for the dutch stemmer
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+fiets fiets
+bromfiets bromfiets
+ei eier
+kind kinder
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stoptags_ja.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stoptags_ja.txt
new file mode 100644
index 00000000000..71b750845e3
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stoptags_ja.txt
@@ -0,0 +1,420 @@
+#
+# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter.
+#
+# Any token with a part-of-speech tag that exactly matches those defined in this
+# file are removed from the token stream.
+#
+# Set your own stoptags by uncommenting the lines below. Note that comments are
+# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists,
+# etc. that can be useful for building you own stoptag set.
+#
+# The entire possible tagset is provided below for convenience.
+#
+#####
+# noun: unclassified nouns
+#åè©ž
+#
+# noun-common: Common nouns or nouns where the sub-classification is undefined
+#åè©ž-一般
+#
+# noun-proper: Proper nouns where the sub-classification is undefined
+#åè©ž-固有åè©ž
+#
+# noun-proper-misc: miscellaneous proper nouns
+#åè©ž-固有åè©ž-一般
+#
+# noun-proper-person: Personal names where the sub-classification is undefined
+#åè©ž-固有åè©ž-人å
+#
+# noun-proper-person-misc: names that cannot be divided into surname and
+# given name; foreign names; names where the surname or given name is unknown.
+# e.g. ãŠå¸‚ã®æ–¹
+#åè©ž-固有åè©ž-人å-一般
+#
+# noun-proper-person-surname: Mainly Japanese surnames.
+# e.g. 山田
+#åè©ž-固有åè©ž-人å-姓
+#
+# noun-proper-person-given_name: Mainly Japanese given names.
+# e.g. 太郎
+#åè©ž-固有åè©ž-人å-å
+#
+# noun-proper-organization: Names representing organizations.
+# e.g. 通産çœ, NHK
+#åè©ž-固有åè©ž-組織
+#
+# noun-proper-place: Place names where the sub-classification is undefined
+#åè©ž-固有åè©ž-地域
+#
+# noun-proper-place-misc: Place names excluding countries.
+# e.g. アジア, ãƒãƒ«ã‚»ãƒãƒŠ, 京都
+#åè©ž-固有åè©ž-地域-一般
+#
+# noun-proper-place-country: Country names.
+# e.g. 日本, オーストラリア
+#åè©ž-固有åè©ž-地域-国
+#
+# noun-pronoun: Pronouns where the sub-classification is undefined
+#åè©ž-代åè©ž
+#
+# noun-pronoun-misc: miscellaneous pronouns:
+# e.g. ãã‚Œ, ã“ã“, ã‚ã„ã¤, ã‚ãªãŸ, ã‚ã¡ã“ã¡, ã„ãã¤, ã©ã“ã‹, ãªã«, ã¿ãªã•ã‚“, ã¿ã‚“ãª, ã‚ãŸãã—, ã‚ã‚Œã‚ã‚Œ
+#åè©ž-代åè©ž-一般
+#
+# noun-pronoun-contraction: Spoken language contraction made by combining a
+# pronoun and the particle 'wa'.
+# e.g. ã‚りゃ, ã“りゃ, ã“りゃã‚, ãりゃ, ãりゃã‚
+#åè©ž-代åè©ž-縮約
+#
+# noun-adverbial: Temporal nouns such as names of days or months that behave
+# like adverbs. Nouns that represent amount or ratios and can be used adverbially,
+# e.g. 金曜, 一月, åˆå¾Œ, å°‘é‡
+#åè©ž-副詞å¯èƒ½
+#
+# noun-verbal: Nouns that take arguments with case and can appear followed by
+# 'suru' and related verbs (ã™ã‚‹, ã§ãã‚‹, ãªã•ã‚‹, ãã ã•ã‚‹)
+# e.g. インプット, æ„›ç€, 悪化, 悪戦苦闘, 一安心, 下å–ã‚Š
+#åè©ž-サ変接続
+#
+# noun-adjective-base: The base form of adjectives, words that appear before 㪠("na")
+# e.g. å¥åº·, 安易, 駄目, ã ã‚
+#åè©ž-形容動詞語幹
+#
+# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数.
+# e.g. 0, 1, 2, 何, 数, 幾
+#åè©ž-æ•°
+#
+# noun-affix: noun affixes where the sub-classification is undefined
+#åè©ž-éžè‡ªç«‹
+#
+# noun-affix-misc: Of adnominalizers, the case-marker ã® ("no"), and words that
+# attach to the base form of inflectional words, words that cannot be classified
+# into any of the other categories below. This category includes indefinite nouns.
+# e.g. ã‚ã‹ã¤ã, æš, ã‹ã„, 甲æ–, æ°—, ãらã„, å«Œã„, ãã›, ç™–, ã“ã¨, 事, ã”ã¨, 毎, ã—ã ã„, 次第,
+# é †, ã›ã„, 所為, ã¤ã„ã§, åºã§, ã¤ã‚‚ã‚Š, ç©ã‚‚ã‚Š, 点, ã©ã“ã‚, ã®, ã¯ãš, çˆ, ã¯ãšã¿, å¼¾ã¿,
+# æ‹å, ãµã†, ãµã‚Š, 振り, ã»ã†, æ–¹, æ—¨, ã‚‚ã®, 物, 者, ゆãˆ, æ•…, ゆãˆã‚“, 所以, ã‚ã‘, 訳,
+# ã‚ã‚Š, 割り, 割, ã‚“-å£èªž/, ã‚‚ã‚“-å£èªž/
+#åè©ž-éžè‡ªç«‹-一般
+#
+# noun-affix-adverbial: noun affixes that that can behave as adverbs.
+# e.g. ã‚ã„ã , é–“, ã‚ã’ã, 挙ã’å¥, ã‚ã¨, 後, 余り, 以外, 以é™, 以後, 以上, 以å‰, 一方, ã†ãˆ,
+# 上, ã†ã¡, 内, ãŠã‚Š, 折り, ã‹ãŽã‚Š, é™ã‚Š, ãã‚Š, ã£ãã‚Š, çµæžœ, ã“ã‚, é ƒ, ã•ã„, éš›, 最ä¸, ã•ãªã‹,
+# 最ä¸, ã˜ãŸã„, 自体, ãŸã³, 度, ãŸã‚, 為, ã¤ã©, 都度, ã¨ãŠã‚Š, 通り, ã¨ã, 時, ã¨ã“ã‚, 所,
+# ã¨ãŸã‚“, 途端, ãªã‹, ä¸, ã®ã¡, 後, ã°ã‚ã„, å ´åˆ, æ—¥, ã¶ã‚“, 分, ã»ã‹, ä»–, ã¾ãˆ, å‰, ã¾ã¾,
+# 儘, ä¾, ã¿ãŽã‚Š, 矢先
+#åè©ž-éžè‡ªç«‹-副詞å¯èƒ½
+#
+# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars
+# with the stem よã†(ã ) ("you(da)").
+# e.g. よã†, ã‚„ã†, 様 (よã†)
+#åè©ž-éžè‡ªç«‹-助動詞語幹
+#
+# noun-affix-adjective-base: noun affixes that can connect to the indeclinable
+# connection form 㪠(aux "da").
+# e.g. ã¿ãŸã„, ãµã†
+#åè©ž-éžè‡ªç«‹-形容動詞語幹
+#
+# noun-special: special nouns where the sub-classification is undefined.
+#åè©ž-特殊
+#
+# noun-special-aux: The ãã†ã ("souda") stem form that is used for reporting news, is
+# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base
+# form of inflectional words.
+# e.g. ãã†
+#åè©ž-特殊-助動詞語幹
+#
+# noun-suffix: noun suffixes where the sub-classification is undefined.
+#åè©ž-接尾
+#
+# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect
+# to ガル or タイ and can combine into compound nouns, words that cannot be classified into
+# any of the other categories below. In general, this category is more inclusive than
+# 接尾語 ("suffix") and is usually the last element in a compound noun.
+# e.g. ãŠã, ã‹ãŸ, æ–¹, ç”²æ– (ãŒã„), ãŒã‹ã‚Š, ãŽã¿, 気味, ãã‚‹ã¿, (~ã—ãŸ) ã•, 次第, 済 (ãš) ã¿,
+# よã†, (ã§ã)ã£ã“, æ„Ÿ, 観, 性, å¦, é¡ž, é¢, 用
+#åè©ž-接尾-一般
+#
+# noun-suffix-person: Suffixes that form nouns and attach to person names more often
+# than other nouns.
+# e.g. å›, 様, è‘—
+#åè©ž-接尾-人å
+#
+# noun-suffix-place: Suffixes that form nouns and attach to place names more often
+# than other nouns.
+# e.g. 町, 市, 県
+#åè©ž-接尾-地域
+#
+# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that
+# can appear before スル ("suru").
+# e.g. 化, 視, 分ã‘, 入り, è½ã¡, è²·ã„
+#åè©ž-接尾-サ変接続
+#
+# noun-suffix-aux: The stem form of ãã†ã (様態) that is used to indicate conditions,
+# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the
+# conjunctive form of inflectional words.
+# e.g. ãã†
+#åè©ž-接尾-助動詞語幹
+#
+# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive
+# form of inflectional words and appear before the copula ã ("da").
+# e.g. çš„, ã’, ãŒã¡
+#åè©ž-接尾-形容動詞語幹
+#
+# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs.
+# e.g. 後 (ã”), 以後, 以é™, 以å‰, å‰å¾Œ, ä¸, 末, 上, 時 (ã˜)
+#åè©ž-接尾-副詞å¯èƒ½
+#
+# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category
+# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach
+# to numbers.
+# e.g. 個, ã¤, 本, 冊, パーセント, cm, kg, カ月, ã‹å›½, 区画, 時間, 時åŠ
+#åè©ž-接尾-助数詞
+#
+# noun-suffix-special: Special suffixes that mainly attach to inflecting words.
+# e.g. (楽ã—) ã•, (考ãˆ) æ–¹
+#åè©ž-接尾-特殊
+#
+# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words
+# together.
+# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦)
+#åè©ž-接続詞的
+#
+# noun-verbal_aux: Nouns that attach to the conjunctive particle 㦠("te") and are
+# semantically verb-like.
+# e.g. ã”らん, ã”覧, 御覧, é ‚æˆ´
+#åè©ž-å‹•è©žéžè‡ªç«‹çš„
+#
+# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry,
+# dialects, English, etc. Currently, the only entry for åè©ž 引用文å—列 ("noun quotation")
+# is ã„ã‚ã ("iwaku").
+#åè©ž-引用文å—列
+#
+# noun-nai_adjective: Words that appear before the auxiliary verb ãªã„ ("nai") and
+# behave like an adjective.
+# e.g. 申ã—訳, 仕方, ã¨ã‚“ã§ã‚‚, é•ã„
+#åè©ž-ナイ形容詞語幹
+#
+#####
+# prefix: unclassified prefixes
+#接é è©ž
+#
+# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms)
+# excluding numerical expressions.
+# e.g. ㊠(æ°´), æŸ (æ°), åŒ (社), æ•… (~æ°), 高 (å“質), ㊠(見事), ã” (ç«‹æ´¾)
+#接é è©ž-å詞接続
+#
+# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb
+# in conjunctive form followed by ãªã‚‹/ãªã•ã‚‹/ãã ã•ã‚‹.
+# e.g. ㊠(èªã¿ãªã•ã„), ㊠(座り)
+#接é è©ž-動詞接続
+#
+# prefix-adjectival: Prefixes that attach to adjectives.
+# e.g. ㊠(寒ã„ã§ã™ããˆ), ãƒã‚« (ã§ã‹ã„)
+#接é è©ž-形容詞接続
+#
+# prefix-numerical: Prefixes that attach to numerical expressions.
+# e.g. ç´„, ãŠã‚ˆã, 毎時
+#接é è©ž-数接続
+#
+#####
+# verb: unclassified verbs
+#å‹•è©ž
+#
+# verb-main:
+#動詞-自立
+#
+# verb-auxiliary:
+#å‹•è©ž-éžè‡ªç«‹
+#
+# verb-suffix:
+#動詞-接尾
+#
+#####
+# adjective: unclassified adjectives
+#形容詞
+#
+# adjective-main:
+#形容詞-自立
+#
+# adjective-auxiliary:
+#形容詞-éžè‡ªç«‹
+#
+# adjective-suffix:
+#形容詞-接尾
+#
+#####
+# adverb: unclassified adverbs
+#副詞
+#
+# adverb-misc: Words that can be segmented into one unit and where adnominal
+# modification is not possible.
+# e.g. ã‚ã„ã‹ã‚らãš, 多分
+#副詞-一般
+#
+# adverb-particle_conjunction: Adverbs that can be followed by ã®, ã¯, ã«,
+# ãª, ã™ã‚‹, ã , etc.
+# e.g. ã“ã‚“ãªã«, ãã‚“ãªã«, ã‚ã‚“ãªã«, ãªã«ã‹, ãªã‚“ã§ã‚‚
+#副詞-助詞類接続
+#
+#####
+# adnominal: Words that only have noun-modifying forms.
+# e.g. ã“ã®, ãã®, ã‚ã®, ã©ã®, ã„ã‚ゆる, ãªã‚“らã‹ã®, 何らã‹ã®, ã„ã‚ã‚“ãª, ã“ã†ã„ã†, ãã†ã„ã†, ã‚ã‚ã„ã†,
+# ã©ã†ã„ã†, ã“ã‚“ãª, ãã‚“ãª, ã‚ã‚“ãª, ã©ã‚“ãª, 大ããª, å°ã•ãª, ãŠã‹ã—ãª, ã»ã‚“ã®, ãŸã„ã—ãŸ,
+# 「(, ã‚‚) ã•ã‚‹ (ã“ã¨ãªãŒã‚‰)ã€, 微々ãŸã‚‹, å ‚ã€…ãŸã‚‹, å˜ãªã‚‹, ã„ã‹ãªã‚‹, 我ãŒã€ã€ŒåŒã˜, 亡ã
+#連体詞
+#
+#####
+# conjunction: Conjunctions that can occur independently.
+# e.g. ãŒ, ã‘ã‚Œã©ã‚‚, ãã—ã¦, ã˜ã‚ƒã‚, ãã‚Œã©ã“ã‚ã‹
+接続詞
+#
+#####
+# particle: unclassified particles.
+助詞
+#
+# particle-case: case particles where the subclassification is undefined.
+助詞-æ ¼åŠ©è©ž
+#
+# particle-case-misc: Case particles.
+# e.g. ã‹ã‚‰, ãŒ, ã§, ã¨, ã«, ã¸, より, ã‚’, ã®, ã«ã¦
+助詞-æ ¼åŠ©è©ž-一般
+#
+# particle-case-quote: the "to" that appears after nouns, a person’s speech,
+# quotation marks, expressions of decisions from a meeting, reasons, judgements,
+# conjectures, etc.
+# e.g. ( ã ) 㨠(è¿°ã¹ãŸ.), ( ã§ã‚ã‚‹) 㨠(ã—ã¦åŸ·è¡ŒçŒ¶äºˆ...)
+助詞-æ ¼åŠ©è©ž-引用
+#
+# particle-case-compound: Compounds of particles and verbs that mainly behave
+# like case particles.
+# e.g. ã¨ã„ã†, ã¨ã„ã£ãŸ, ã¨ã‹ã„ã†, ã¨ã—ã¦, ã¨ã¨ã‚‚ã«, ã¨å…±ã«, ã§ã‚‚ã£ã¦, ã«ã‚ãŸã£ã¦, ã«å½“ãŸã£ã¦, ã«å½“ã£ã¦,
+# ã«ã‚ãŸã‚Š, ã«å½“ãŸã‚Š, ã«å½“ã‚Š, ã«å½“ãŸã‚‹, ã«ã‚ãŸã‚‹, ã«ãŠã„ã¦, ã«æ–¼ã„ã¦,ã«æ–¼ã¦, ã«ãŠã‘ã‚‹, ã«æ–¼ã‘ã‚‹,
+# ã«ã‹ã‘, ã«ã‹ã‘ã¦, ã«ã‹ã‚“ã—, ã«é–¢ã—, ã«ã‹ã‚“ã—ã¦, ã«é–¢ã—ã¦, ã«ã‹ã‚“ã™ã‚‹, ã«é–¢ã™ã‚‹, ã«éš›ã—,
+# ã«éš›ã—ã¦, ã«ã—ãŸãŒã„, ã«å¾“ã„, ã«å¾“ã†, ã«ã—ãŸãŒã£ã¦, ã«å¾“ã£ã¦, ã«ãŸã„ã—, ã«å¯¾ã—, ã«ãŸã„ã—ã¦,
+# ã«å¯¾ã—ã¦, ã«ãŸã„ã™ã‚‹, ã«å¯¾ã™ã‚‹, ã«ã¤ã„ã¦, ã«ã¤ã, ã«ã¤ã‘, ã«ã¤ã‘ã¦, ã«ã¤ã‚Œ, ã«ã¤ã‚Œã¦, ã«ã¨ã£ã¦,
+# ã«ã¨ã‚Š, ã«ã¾ã¤ã‚ã‚‹, ã«ã‚ˆã£ã¦, ã«ä¾ã£ã¦, ã«å› ã£ã¦, ã«ã‚ˆã‚Š, ã«ä¾ã‚Š, ã«å› ã‚Š, ã«ã‚ˆã‚‹, ã«ä¾ã‚‹, ã«å› ã‚‹,
+# ã«ã‚ãŸã£ã¦, ã«ã‚ãŸã‚‹, ã‚’ã‚‚ã£ã¦, を以ã£ã¦, を通ã˜, を通ã˜ã¦, を通ã—ã¦, ã‚’ã‚ãã£ã¦, ã‚’ã‚ãã‚Š, ã‚’ã‚ãã‚‹,
+# ã£ã¦-å£èªž/, ã¡ã‚…ã†-関西å¼ã€Œã¨ã„ã†ã€/, (何) ã¦ã„ㆠ(人)-å£èªž/, ã£ã¦ã„ã†-å£èªž/, ã¨ã„ãµ, ã¨ã‹ã„ãµ
+助詞-æ ¼åŠ©è©ž-連語
+#
+# particle-conjunctive:
+# e.g. ã‹ã‚‰, ã‹ã‚‰ã«ã¯, ãŒ, ã‘ã‚Œã©, ã‘ã‚Œã©ã‚‚, ã‘ã©, ã—, ã¤ã¤, ã¦, ã§, ã¨, ã¨ã“ã‚ãŒ, ã©ã“ã‚ã‹, ã¨ã‚‚, ã©ã‚‚,
+# ãªãŒã‚‰, ãªã‚Š, ã®ã§, ã®ã«, ã°, ã‚‚ã®ã®, ã‚„ ( ã—ãŸ), ã‚„ã„ãªã‚„, (ã“ã‚ã‚“) ã˜ã‚ƒ(ã„ã‘ãªã„)-å£èªž/,
+# (è¡Œã£) ã¡ã‚ƒ(ã„ã‘ãªã„)-å£èªž/, (言ã£) ãŸã£ã¦ (ã—ã‹ãŸãŒãªã„)-å£èªž/, (ãã‚ŒãŒãªã)ã£ãŸã£ã¦ (平気)-å£èªž/
+助詞-接続助詞
+#
+# particle-dependency:
+# e.g. ã“ã, ã•ãˆ, ã—ã‹, ã™ã‚‰, ã¯, ã‚‚, ãž
+助詞-係助詞
+#
+# particle-adverbial:
+# e.g. ãŒã¦ã‚‰, ã‹ã‚‚, ãらã„, ä½, ãらã„, ã—ã‚‚, (å¦æ ¡) ã˜ã‚ƒ(ã“ã‚ŒãŒæµè¡Œã£ã¦ã„ã‚‹)-å£èªž/,
+# (ãã‚Œ)ã˜ã‚ƒã‚ (よããªã„)-å£èªž/, ãšã¤, (ç§) ãªãž, ãªã©, (ç§) ãªã‚Š (ã«), (先生) ãªã‚“ã‹ (大嫌ã„)-å£èªž/,
+# (ç§) ãªã‚“ãž, (先生) ãªã‚“㦠(大嫌ã„)-å£èªž/, ã®ã¿, ã ã‘, (ç§) ã ã£ã¦-å£èªž/, ã ã«,
+# (å½¼)ã£ãŸã‚‰-å£èªž/, (ãŠèŒ¶) ã§ã‚‚ (ã„ã‹ãŒ), ç‰ (ã¨ã†), (今後) ã¨ã‚‚, ã°ã‹ã‚Š, ã°ã£ã‹-å£èªž/, ã°ã£ã‹ã‚Š-å£èªž/,
+# ã»ã©, 程, ã¾ã§, è¿„, (誰) ã‚‚ (ãŒ)([助詞-æ ¼åŠ©è©ž] ãŠã‚ˆã³ [助詞-係助詞] ã®å‰ã«ä½ç½®ã™ã‚‹ã€Œã‚‚ã€)
+助詞-副助詞
+#
+# particle-interjective: particles with interjective grammatical roles.
+# e.g. (æ¾å³¶) ã‚„
+助詞-間投助詞
+#
+# particle-coordinate:
+# e.g. ã¨, ãŸã‚Š, ã ã®, ã ã‚Š, ã¨ã‹, ãªã‚Š, ã‚„, やら
+助詞-並立助詞
+#
+# particle-final:
+# e.g. ã‹ã„, ã‹ã—ら, ã•, ãœ, (ã )ã£ã‘-å£èªž/, (ã¨ã¾ã£ã¦ã‚‹) ã§-方言/, ãª, ナ, ãªã‚-å£èªž/, ãž, ã, ãƒ,
+# ãã‡-å£èªž/, ããˆ-å£èªž/, ãã‚“-方言/, ã®, ã®ã†-å£èªž/, ã‚„, よ, ヨ, よã‰-å£èªž/, ã‚, ã‚ã„-å£èªž/
+助詞-終助詞
+#
+# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is
+# adverbial, conjunctive, or sentence final. For example:
+# (a) 「A ã‹ B ã‹ã€. Ex:「(国内ã§é‹ç”¨ã™ã‚‹) ã‹,(海外ã§é‹ç”¨ã™ã‚‹) ã‹ (.)ã€
+# (b) Inside an adverb phrase. Ex:「(幸ã„ã¨ã„ã†) ã‹ (, æ»è€…ã¯ã„ãªã‹ã£ãŸ.)ã€
+# 「(祈りãŒå±Šã„ãŸã›ã„) ã‹ (, 試験ã«åˆæ ¼ã—ãŸ.)ã€
+# (c) 「ã‹ã®ã‚ˆã†ã«ã€. Ex:「(何もãªã‹ã£ãŸ) ã‹ (ã®ã‚ˆã†ã«æŒ¯ã‚‹èˆžã£ãŸ.)ã€
+# e.g. ã‹
+助詞-副助詞ï¼ä¸¦ç«‹åŠ©è©žï¼çµ‚助詞
+#
+# particle-adnominalizer: The "no" that attaches to nouns and modifies
+# non-inflectional words.
+助詞-連体化
+#
+# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs
+# that are giongo, giseigo, or gitaigo.
+# e.g. ã«, ã¨
+助詞-副詞化
+#
+# particle-special: A particle that does not fit into one of the above classifications.
+# This includes particles that are used in Tanka, Haiku, and other poetry.
+# e.g. ã‹ãª, ã‘ã‚€, ( ã—ãŸã ã‚ã†) ã«, (ã‚ã‚“ãŸ) ã«ã‚ƒ(ã‚ã‹ã‚‰ã‚“), (俺) ã‚“ (家)
+助詞-特殊
+#
+#####
+# auxiliary-verb:
+助動詞
+#
+#####
+# interjection: Greetings and other exclamations.
+# e.g. ãŠã¯ã‚ˆã†, ãŠã¯ã‚ˆã†ã”ã–ã„ã¾ã™, ã“ã‚“ã«ã¡ã¯, ã“ã‚“ã°ã‚“ã¯, ã‚ã‚ŠãŒã¨ã†, ã©ã†ã‚‚ã‚ã‚ŠãŒã¨ã†, ã‚ã‚ŠãŒã¨ã†ã”ã–ã„ã¾ã™,
+# ã„ãŸã ãã¾ã™, ã”ã¡ãã†ã•ã¾, ã•ã‚ˆãªã‚‰, ã•ã‚ˆã†ãªã‚‰, ã¯ã„, ã„ã„ãˆ, ã”ã‚ã‚“, ã”ã‚ã‚“ãªã•ã„
+#æ„Ÿå‹•è©ž
+#
+#####
+# symbol: unclassified Symbols.
+記å·
+#
+# symbol-misc: A general symbol not in one of the categories below.
+# e.g. [○◎@$〒→+]
+記å·-一般
+#
+# symbol-comma: Commas
+# e.g. [,ã€]
+記å·-èªç‚¹
+#
+# symbol-period: Periods and full stops.
+# e.g. [..。]
+記å·-å¥ç‚¹
+#
+# symbol-space: Full-width whitespace.
+記å·-空白
+#
+# symbol-open_bracket:
+# e.g. [({‘“『ã€]
+記å·-括弧開
+#
+# symbol-close_bracket:
+# e.g. [)}’â€ã€ã€ã€‘]
+記å·-括弧閉
+#
+# symbol-alphabetic:
+#記å·-アルファベット
+#
+#####
+# other: unclassified other
+#ãã®ä»–
+#
+# other-interjection: Words that are hard to classify as noun-suffixes or
+# sentence-final particles.
+# e.g. (ã )ã‚¡
+ãã®ä»–-間投
+#
+#####
+# filler: Aizuchi that occurs during a conversation or sounds inserted as filler.
+# e.g. ã‚ã®, ã†ã‚“ã¨, ãˆã¨
+フィラー
+#
+#####
+# non-verbal: non-verbal sound.
+éžè¨€èªžéŸ³
+#
+#####
+# fragment:
+#語æ–片
+#
+#####
+# unknown: unknown part of speech.
+#未知語
+#
+##### End of file
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ar.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ar.txt
new file mode 100644
index 00000000000..046829db6a2
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ar.txt
@@ -0,0 +1,125 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# Cleaned on October 11, 2009 (not normalized, so use before normalization)
+# This means that when modifying this list, you might need to add some
+# redundant entries, for example containing forms with both أ and ا
+من
+ومن
+منها
+منه
+ÙÙŠ
+ÙˆÙÙŠ
+Ùيها
+Ùيه
+Ùˆ
+Ù
+ثم
+او
+أو
+ب
+بها
+به
+ا
+Ø£
+اى
+اي
+أي
+أى
+لا
+ولا
+الا
+ألا
+إلا
+لكن
+ما
+وما
+كما
+Ùما
+عن
+مع
+اذا
+إذا
+ان
+أن
+إن
+انها
+أنها
+إنها
+انه
+أنه
+إنه
+بان
+بأن
+Ùان
+Ùأن
+وان
+وأن
+وإن
+التى
+التي
+الذى
+الذي
+الذين
+الى
+الي
+إلى
+إلي
+على
+عليها
+عليه
+اما
+أما
+إما
+ايضا
+أيضا
+كل
+وكل
+لم
+ولم
+لن
+ولن
+هى
+هي
+هو
+وهى
+وهي
+وهو
+Ùهى
+Ùهي
+Ùهو
+انت
+أنت
+لك
+لها
+له
+هذه
+هذا
+تلك
+ذلك
+هناك
+كانت
+كان
+يكون
+تكون
+وكانت
+وكان
+غير
+بعض
+قد
+Ù†ØÙˆ
+بين
+بينما
+منذ
+ضمن
+Øيث
+الان
+الآن
+خلال
+بعد
+قبل
+Øتى
+عند
+عندما
+لدى
+جميع
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_bg.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_bg.txt
new file mode 100644
index 00000000000..1ae4ba2ae38
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_bg.txt
@@ -0,0 +1,193 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+а
+аз
+ако
+ала
+бе
+без
+беше
+би
+бил
+била
+били
+било
+близо
+бъдат
+бъде
+бÑха
+в
+ваÑ
+ваш
+ваша
+вероÑтно
+вече
+взема
+ви
+вие
+винаги
+вÑе
+вÑеки
+вÑички
+вÑичко
+вÑÑка
+във
+въпреки
+върху
+г
+ги
+главно
+го
+д
+да
+дали
+до
+докато
+докога
+дори
+доÑега
+доÑта
+е
+едва
+един
+ето
+за
+зад
+заедно
+заради
+заÑега
+затова
+защо
+защото
+и
+из
+или
+им
+има
+имат
+иÑка
+й
+каза
+как
+каква
+какво
+както
+какъв
+като
+кога
+когато
+което
+които
+кой
+който
+колко
+коÑто
+къде
+където
+към
+ли
+м
+ме
+между
+мен
+ми
+мнозина
+мога
+могат
+може
+молÑ
+момента
+му
+н
+на
+над
+назад
+най
+направи
+напред
+например
+наÑ
+не
+него
+неÑ
+ни
+ние
+никой
+нито
+но
+нÑкои
+нÑкой
+нÑма
+обаче
+около
+оÑвен
+оÑобено
+от
+отгоре
+отново
+още
+пак
+по
+повече
+повечето
+под
+поне
+поради
+поÑле
+почти
+прави
+пред
+преди
+през
+при
+пък
+първо
+Ñ
+Ñа
+Ñамо
+Ñе
+Ñега
+Ñи
+Ñкоро
+Ñлед
+Ñме
+Ñпоред
+Ñред
+Ñрещу
+Ñте
+Ñъм
+ÑÑŠÑ
+Ñъщо
+Ñ‚
+тази
+така
+такива
+такъв
+там
+твой
+те
+тези
+ти
+тн
+то
+това
+тогава
+този
+той
+толкова
+точно
+Ñ‚Ñ€Ñбва
+тук
+тъй
+Ñ‚Ñ
+Ñ‚ÑÑ…
+у
+хареÑва
+ч
+че
+чеÑто
+чрез
+ще
+щом
+Ñ
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ca.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ca.txt
new file mode 100644
index 00000000000..3da65deafe1
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ca.txt
@@ -0,0 +1,220 @@
+# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
+a
+abans
+acÃ
+ah
+aixÃ
+això
+al
+als
+aleshores
+algun
+alguna
+algunes
+alguns
+alhora
+allÃ
+allÃ
+allò
+altra
+altre
+altres
+amb
+ambdós
+ambdues
+apa
+aquell
+aquella
+aquelles
+aquells
+aquest
+aquesta
+aquestes
+aquests
+aquÃ
+baix
+cada
+cadascú
+cadascuna
+cadascunes
+cadascuns
+com
+contra
+d'un
+d'una
+d'unes
+d'uns
+dalt
+de
+del
+dels
+des
+després
+dins
+dintre
+donat
+doncs
+durant
+e
+eh
+el
+els
+em
+en
+encara
+ens
+entre
+érem
+eren
+éreu
+es
+és
+esta
+estÃ
+està vem
+estaven
+està veu
+esteu
+et
+etc
+ets
+fins
+fora
+gairebé
+ha
+han
+has
+havia
+he
+hem
+heu
+hi
+ho
+i
+igual
+iguals
+ja
+l'hi
+la
+les
+li
+li'n
+llavors
+m'he
+ma
+mal
+malgrat
+mateix
+mateixa
+mateixes
+mateixos
+me
+mentre
+més
+meu
+meus
+meva
+meves
+molt
+molta
+moltes
+molts
+mon
+mons
+n'he
+n'hi
+ne
+ni
+no
+nogensmenys
+només
+nosaltres
+nostra
+nostre
+nostres
+o
+oh
+oi
+on
+pas
+pel
+pels
+per
+però
+perquè
+poc
+poca
+pocs
+poques
+potser
+propi
+qual
+quals
+quan
+quant
+que
+què
+quelcom
+qui
+quin
+quina
+quines
+quins
+s'ha
+s'han
+sa
+semblant
+semblants
+ses
+seu
+seus
+seva
+seva
+seves
+si
+sobre
+sobretot
+sóc
+solament
+sols
+son
+són
+sons
+sota
+sou
+t'ha
+t'han
+t'he
+ta
+tal
+també
+tampoc
+tan
+tant
+tanta
+tantes
+teu
+teus
+teva
+teves
+ton
+tons
+tot
+tota
+totes
+tots
+un
+una
+unes
+uns
+us
+va
+vaig
+vam
+van
+vas
+veu
+vosaltres
+vostra
+vostre
+vostres
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_cz.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_cz.txt
new file mode 100644
index 00000000000..53c6097dac7
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_cz.txt
@@ -0,0 +1,172 @@
+a
+s
+k
+o
+i
+u
+v
+z
+dnes
+cz
+tÃmto
+budeš
+budem
+byli
+jseš
+můj
+svým
+ta
+tomto
+tohle
+tuto
+tyto
+jej
+zda
+proÄ
+máte
+tato
+kam
+tohoto
+kdo
+kteÅ™Ã
+mi
+nám
+tom
+tomuto
+mÃt
+nic
+proto
+kterou
+byla
+toho
+protože
+asi
+ho
+naši
+napište
+re
+což
+tÃm
+takže
+svých
+jejÃ
+svými
+jste
+aj
+tu
+tedy
+teto
+bylo
+kde
+ke
+pravé
+ji
+nad
+nejsou
+Äi
+pod
+téma
+mezi
+přes
+ty
+pak
+vám
+ani
+když
+však
+neg
+jsem
+tento
+Älánku
+Älánky
+aby
+jsme
+před
+pta
+jejich
+byl
+ještě
+až
+bez
+také
+pouze
+prvnÃ
+vaše
+která
+nás
+nový
+tipy
+pokud
+může
+strana
+jeho
+své
+jiné
+zprávy
+nové
+nenÃ
+vás
+jen
+podle
+zde
+už
+být
+vÃce
+bude
+již
+než
+který
+by
+které
+co
+nebo
+ten
+tak
+má
+při
+od
+po
+jsou
+jak
+dalÅ¡Ã
+ale
+si
+se
+ve
+to
+jako
+za
+zpět
+ze
+do
+pro
+je
+na
+atd
+atp
+jakmile
+pÅ™iÄemž
+já
+on
+ona
+ono
+oni
+ony
+my
+vy
+jÃ
+ji
+mÄ›
+mne
+jemu
+tomu
+těm
+těmu
+němu
+němuž
+jehož
+jÞ
+jelikož
+jež
+jakož
+naÄež
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_da.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_da.txt
new file mode 100644
index 00000000000..a3ff5fe122c
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_da.txt
@@ -0,0 +1,108 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Danish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+
+og | and
+i | in
+jeg | I
+det | that (dem. pronoun)/it (pers. pronoun)
+at | that (in front of a sentence)/to (with infinitive)
+en | a/an
+den | it (pers. pronoun)/that (dem. pronoun)
+til | to/at/for/until/against/by/of/into, more
+er | present tense of "to be"
+som | who, as
+på | on/upon/in/on/at/to/after/of/with/for, on
+de | they
+med | with/by/in, along
+han | he
+af | of/by/from/off/for/in/with/on, off
+for | at/for/to/from/by/of/ago, in front/before, because
+ikke | not
+der | who/which, there/those
+var | past tense of "to be"
+mig | me/myself
+sig | oneself/himself/herself/itself/themselves
+men | but
+et | a/an/one, one (number), someone/somebody/one
+har | present tense of "to have"
+om | round/about/for/in/a, about/around/down, if
+vi | we
+min | my
+havde | past tense of "to have"
+ham | him
+hun | she
+nu | now
+over | over/above/across/by/beyond/past/on/about, over/past
+da | then, when/as/since
+fra | from/off/since, off, since
+du | you
+ud | out
+sin | his/her/its/one's
+dem | them
+os | us/ourselves
+op | up
+man | you/one
+hans | his
+hvor | where
+eller | or
+hvad | what
+skal | must/shall etc.
+selv | myself/youself/herself/ourselves etc., even
+her | here
+alle | all/everyone/everybody etc.
+vil | will (verb)
+blev | past tense of "to stay/to remain/to get/to become"
+kunne | could
+ind | in
+når | when
+være | present tense of "to be"
+dog | however/yet/after all
+noget | something
+ville | would
+jo | you know/you see (adv), yes
+deres | their/theirs
+efter | after/behind/according to/for/by/from, later/afterwards
+ned | down
+skulle | should
+denne | this
+end | than
+dette | this
+mit | my/mine
+også | also
+under | under/beneath/below/during, below/underneath
+have | have
+dig | you
+anden | other
+hende | her
+mine | my
+alt | everything
+meget | much/very, plenty of
+sit | his, her, its, one's
+sine | his, her, its, one's
+vor | our
+mod | against
+disse | these
+hvis | if
+din | your/yours
+nogle | some
+hos | by/at
+blive | be/become
+mange | many
+ad | by/through
+bliver | present tense of "to be/to become"
+hendes | her/hers
+været | be
+thi | for (conj)
+jer | you
+sådan | such, like this/like that
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_de.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_de.txt
new file mode 100644
index 00000000000..f7703841887
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_de.txt
@@ -0,0 +1,292 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A German stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | The number of forms in this list is reduced significantly by passing it
+ | through the German stemmer.
+
+
+aber | but
+
+alle | all
+allem
+allen
+aller
+alles
+
+als | than, as
+also | so
+am | an + dem
+an | at
+
+ander | other
+andere
+anderem
+anderen
+anderer
+anderes
+anderm
+andern
+anderr
+anders
+
+auch | also
+auf | on
+aus | out of
+bei | by
+bin | am
+bis | until
+bist | art
+da | there
+damit | with it
+dann | then
+
+der | the
+den
+des
+dem
+die
+das
+
+daß | that
+
+derselbe | the same
+derselben
+denselben
+desselben
+demselben
+dieselbe
+dieselben
+dasselbe
+
+dazu | to that
+
+dein | thy
+deine
+deinem
+deinen
+deiner
+deines
+
+denn | because
+
+derer | of those
+dessen | of him
+
+dich | thee
+dir | to thee
+du | thou
+
+dies | this
+diese
+diesem
+diesen
+dieser
+dieses
+
+
+doch | (several meanings)
+dort | (over) there
+
+
+durch | through
+
+ein | a
+eine
+einem
+einen
+einer
+eines
+
+einig | some
+einige
+einigem
+einigen
+einiger
+einiges
+
+einmal | once
+
+er | he
+ihn | him
+ihm | to him
+
+es | it
+etwas | something
+
+euer | your
+eure
+eurem
+euren
+eurer
+eures
+
+für | for
+gegen | towards
+gewesen | p.p. of sein
+hab | have
+habe | have
+haben | have
+hat | has
+hatte | had
+hatten | had
+hier | here
+hin | there
+hinter | behind
+
+ich | I
+mich | me
+mir | to me
+
+
+ihr | you, to her
+ihre
+ihrem
+ihren
+ihrer
+ihres
+euch | to you
+
+im | in + dem
+in | in
+indem | while
+ins | in + das
+ist | is
+
+jede | each, every
+jedem
+jeden
+jeder
+jedes
+
+jene | that
+jenem
+jenen
+jener
+jenes
+
+jetzt | now
+kann | can
+
+kein | no
+keine
+keinem
+keinen
+keiner
+keines
+
+können | can
+könnte | could
+machen | do
+man | one
+
+manche | some, many a
+manchem
+manchen
+mancher
+manches
+
+mein | my
+meine
+meinem
+meinen
+meiner
+meines
+
+mit | with
+muss | must
+musste | had to
+nach | to(wards)
+nicht | not
+nichts | nothing
+noch | still, yet
+nun | now
+nur | only
+ob | whether
+oder | or
+ohne | without
+sehr | very
+
+sein | his
+seine
+seinem
+seinen
+seiner
+seines
+
+selbst | self
+sich | herself
+
+sie | they, she
+ihnen | to them
+
+sind | are
+so | so
+
+solche | such
+solchem
+solchen
+solcher
+solches
+
+soll | shall
+sollte | should
+sondern | but
+sonst | else
+über | over
+um | about, around
+und | and
+
+uns | us
+unse
+unsem
+unsen
+unser
+unses
+
+unter | under
+viel | much
+vom | von + dem
+von | from
+vor | before
+während | while
+war | was
+waren | were
+warst | wast
+was | what
+weg | away, off
+weil | because
+weiter | further
+
+welche | which
+welchem
+welchen
+welcher
+welches
+
+wenn | when
+werde | will
+werden | will
+wie | how
+wieder | again
+will | want
+wir | we
+wird | will
+wirst | willst
+wo | where
+wollen | want
+wollte | wanted
+würde | would
+würden | would
+zu | to
+zum | zu + dem
+zur | zu + der
+zwar | indeed
+zwischen | between
+
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_el.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_el.txt
new file mode 100644
index 00000000000..232681f5bd6
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_el.txt
@@ -0,0 +1,78 @@
+# Lucene Greek Stopwords list
+# Note: by default this file is used after GreekLowerCaseFilter,
+# so when modifying this file use 'σ' instead of 'ς'
+ο
+η
+το
+οι
+τα
+του
+τησ
+των
+τον
+την
+και
+κι
+κ
+ειμαι
+εισαι
+ειναι
+ειμαστε
+ειστε
+στο
+στον
+στη
+στην
+μα
+αλλα
+απο
+για
+Ï€Ïοσ
+με
+σε
+ωσ
+παÏα
+αντι
+κατα
+μετα
+θα
+να
+δε
+δεν
+μη
+μην
+επι
+ενω
+εαν
+αν
+τοτε
+που
+πωσ
+ποιοσ
+ποια
+ποιο
+ποιοι
+ποιεσ
+ποιων
+ποιουσ
+αυτοσ
+αυτη
+αυτο
+αυτοι
+αυτων
+αυτουσ
+αυτεσ
+αυτα
+εκεινοσ
+εκεινη
+εκεινο
+εκεινοι
+εκεινεσ
+εκεινα
+εκεινων
+εκεινουσ
+οπωσ
+ομωσ
+ισωσ
+οσο
+οτι
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_en.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_en.txt
new file mode 100644
index 00000000000..2c164c0b2a1
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_en.txt
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# a couple of test stopwords to test that the words are really being
+# configured from this file:
+stopworda
+stopwordb
+
+# Standard english stop words taken from Lucene's StopAnalyzer
+a
+an
+and
+are
+as
+at
+be
+but
+by
+for
+if
+in
+into
+is
+it
+no
+not
+of
+on
+or
+such
+that
+the
+their
+then
+there
+these
+they
+this
+to
+was
+will
+with
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_es.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_es.txt
new file mode 100644
index 00000000000..2db14760075
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_es.txt
@@ -0,0 +1,354 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Spanish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de | from, of
+la | the, her
+que | who, that
+el | the
+en | in
+y | and
+a | to
+los | the, them
+del | de + el
+se | himself, from him etc
+las | the, them
+por | for, by, etc
+un | a
+para | for
+con | with
+no | no
+una | a
+su | his, her
+al | a + el
+ | es from SER
+lo | him
+como | how
+más | more
+pero | pero
+sus | su plural
+le | to him, her
+ya | already
+o | or
+ | fue from SER
+este | this
+ | ha from HABER
+sà | himself etc
+porque | because
+esta | this
+ | son from SER
+entre | between
+ | está from ESTAR
+cuando | when
+muy | very
+sin | without
+sobre | on
+ | ser from SER
+ | tiene from TENER
+también | also
+me | me
+hasta | until
+hay | there is/are
+donde | where
+ | han from HABER
+quien | whom, that
+ | están from ESTAR
+ | estado from ESTAR
+desde | from
+todo | all
+nos | us
+durante | during
+ | estados from ESTAR
+todos | all
+uno | a
+les | to them
+ni | nor
+contra | against
+otros | other
+ | fueron from SER
+ese | that
+eso | that
+ | habÃa from HABER
+ante | before
+ellos | they
+e | and (variant of y)
+esto | this
+mà | me
+antes | before
+algunos | some
+qué | what?
+unos | a
+yo | I
+otro | other
+otras | other
+otra | other
+él | he
+tanto | so much, many
+esa | that
+estos | these
+mucho | much, many
+quienes | who
+nada | nothing
+muchos | many
+cual | who
+ | sea from SER
+poco | few
+ella | she
+estar | to be
+ | haber from HABER
+estas | these
+ | estaba from ESTAR
+ | estamos from ESTAR
+algunas | some
+algo | something
+nosotros | we
+
+ | other forms
+
+mi | me
+mis | mi plural
+tú | thou
+te | thee
+ti | thee
+tu | thy
+tus | tu plural
+ellas | they
+nosotras | we
+vosotros | you
+vosotras | you
+os | you
+mÃo | mine
+mÃa |
+mÃos |
+mÃas |
+tuyo | thine
+tuya |
+tuyos |
+tuyas |
+suyo | his, hers, theirs
+suya |
+suyos |
+suyas |
+nuestro | ours
+nuestra |
+nuestros |
+nuestras |
+vuestro | yours
+vuestra |
+vuestros |
+vuestras |
+esos | those
+esas | those
+
+ | forms of estar, to be (not including the infinitive):
+estoy
+estás
+está
+estamos
+estáis
+están
+esté
+estés
+estemos
+estéis
+estén
+estaré
+estarás
+estará
+estaremos
+estaréis
+estarán
+estarÃa
+estarÃas
+estarÃamos
+estarÃais
+estarÃan
+estaba
+estabas
+estábamos
+estabais
+estaban
+estuve
+estuviste
+estuvo
+estuvimos
+estuvisteis
+estuvieron
+estuviera
+estuvieras
+estuviéramos
+estuvierais
+estuvieran
+estuviese
+estuvieses
+estuviésemos
+estuvieseis
+estuviesen
+estando
+estado
+estada
+estados
+estadas
+estad
+
+ | forms of haber, to have (not including the infinitive):
+he
+has
+ha
+hemos
+habéis
+han
+haya
+hayas
+hayamos
+hayáis
+hayan
+habré
+habrás
+habrá
+habremos
+habréis
+habrán
+habrÃa
+habrÃas
+habrÃamos
+habrÃais
+habrÃan
+habÃa
+habÃas
+habÃamos
+habÃais
+habÃan
+hube
+hubiste
+hubo
+hubimos
+hubisteis
+hubieron
+hubiera
+hubieras
+hubiéramos
+hubierais
+hubieran
+hubiese
+hubieses
+hubiésemos
+hubieseis
+hubiesen
+habiendo
+habido
+habida
+habidos
+habidas
+
+ | forms of ser, to be (not including the infinitive):
+soy
+eres
+es
+somos
+sois
+son
+sea
+seas
+seamos
+seáis
+sean
+seré
+serás
+será
+seremos
+seréis
+serán
+serÃa
+serÃas
+serÃamos
+serÃais
+serÃan
+era
+eras
+éramos
+erais
+eran
+fui
+fuiste
+fue
+fuimos
+fuisteis
+fueron
+fuera
+fueras
+fuéramos
+fuerais
+fueran
+fuese
+fueses
+fuésemos
+fueseis
+fuesen
+siendo
+sido
+ | sed also means 'thirst'
+
+ | forms of tener, to have (not including the infinitive):
+tengo
+tienes
+tiene
+tenemos
+tenéis
+tienen
+tenga
+tengas
+tengamos
+tengáis
+tengan
+tendré
+tendrás
+tendrá
+tendremos
+tendréis
+tendrán
+tendrÃa
+tendrÃas
+tendrÃamos
+tendrÃais
+tendrÃan
+tenÃa
+tenÃas
+tenÃamos
+tenÃais
+tenÃan
+tuve
+tuviste
+tuvo
+tuvimos
+tuvisteis
+tuvieron
+tuviera
+tuvieras
+tuviéramos
+tuvierais
+tuvieran
+tuviese
+tuvieses
+tuviésemos
+tuvieseis
+tuviesen
+teniendo
+tenido
+tenida
+tenidos
+tenidas
+tened
+
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_eu.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_eu.txt
new file mode 100644
index 00000000000..25f1db93460
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_eu.txt
@@ -0,0 +1,99 @@
+# example set of basque stopwords
+al
+anitz
+arabera
+asko
+baina
+bat
+batean
+batek
+bati
+batzuei
+batzuek
+batzuetan
+batzuk
+bera
+beraiek
+berau
+berauek
+bere
+berori
+beroriek
+beste
+bezala
+da
+dago
+dira
+ditu
+du
+dute
+edo
+egin
+ere
+eta
+eurak
+ez
+gainera
+gu
+gutxi
+guzti
+haiei
+haiek
+haietan
+hainbeste
+hala
+han
+handik
+hango
+hara
+hari
+hark
+hartan
+hau
+hauei
+hauek
+hauetan
+hemen
+hemendik
+hemengo
+hi
+hona
+honek
+honela
+honetan
+honi
+hor
+hori
+horiei
+horiek
+horietan
+horko
+horra
+horrek
+horrela
+horretan
+horri
+hortik
+hura
+izan
+ni
+noiz
+nola
+non
+nondik
+nongo
+nor
+nora
+ze
+zein
+zen
+zenbait
+zenbat
+zer
+zergatik
+ziren
+zituen
+zu
+zuek
+zuen
+zuten
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_fa.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_fa.txt
new file mode 100644
index 00000000000..723641c6da7
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_fa.txt
@@ -0,0 +1,313 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# Note: by default this file is used after normalization, so when adding entries
+# to this file, use the arabic 'ي' instead of 'ی'
+انان
+نداشته
+سراسر
+خياه
+ايشان
+وي
+تاكنون
+بيشتري
+دوم
+پس
+ناشي
+ÙˆÚ¯Ùˆ
+يا
+داشتند
+سپس
+هنگام
+هرگز
+پنج
+نشان
+امسال
+ديگر
+گروهي
+شدند
+چطور
+ده
+Ùˆ
+دو
+نخستين
+ولي
+چرا
+Ú†Ù‡
+وسط
+Ù‡
+كدام
+قابل
+يك
+رÙت
+Ù‡Ùت
+همچنين
+در
+هزار
+بله
+بلي
+شايد
+اما
+شناسي
+گرÙته
+دهد
+داشته
+دانست
+داشتن
+خواهيم
+ميليارد
+وقتيكه
+امد
+خواهد
+جز
+اورده
+شده
+بلكه
+خدمات
+شدن
+برخي
+نبود
+بسياري
+جلوگيري
+ØÙ‚
+كردند
+نوعي
+بعري
+نكرده
+نظير
+نبايد
+بوده
+بودن
+داد
+اورد
+هست
+جايي
+شود
+دنبال
+داده
+بايد
+سابق
+هيچ
+همان
+انجا
+كمتر
+كجاست
+گردد
+كسي
+تر
+مردم
+تان
+دادن
+بودند
+سري
+جدا
+ندارند
+مگر
+يكديگر
+دارد
+دهند
+بنابراين
+هنگامي
+سمت
+جا
+انچه
+خود
+دادند
+زياد
+دارند
+اثر
+بدون
+بهترين
+بيشتر
+البته
+به
+براساس
+بيرون
+كرد
+بعضي
+گرÙت
+توي
+اي
+ميليون
+او
+جريان
+تول
+بر
+مانند
+برابر
+باشيم
+مدتي
+گويند
+اكنون
+تا
+تنها
+جديد
+چند
+بي
+نشده
+كردن
+كردم
+گويد
+كرده
+كنيم
+نمي
+نزد
+روي
+قصد
+Ùقط
+بالاي
+ديگران
+اين
+ديروز
+توسط
+سوم
+ايم
+دانند
+سوي
+استÙاده
+شما
+كنار
+داريم
+ساخته
+طور
+امده
+رÙته
+نخست
+بيست
+نزديك
+طي
+كنيد
+از
+انها
+تمامي
+داشت
+يكي
+طريق
+اش
+چيست
+روب
+نمايد
+Ú¯Ùت
+چندين
+چيزي
+تواند
+ام
+ايا
+با
+ان
+ايد
+ترين
+اينكه
+ديگري
+راه
+هايي
+بروز
+همچنان
+پاعين
+كس
+Øدود
+مختلÙ
+مقابل
+چيز
+گيرد
+ندارد
+ضد
+همچون
+سازي
+شان
+مورد
+باره
+مرسي
+خويش
+برخوردار
+چون
+خارج
+شش
+هنوز
+تØت
+ضمن
+هستيم
+Ú¯Ùته
+Ùكر
+بسيار
+پيش
+براي
+روزهاي
+انكه
+نخواهد
+بالا
+كل
+وقتي
+كي
+چنين
+كه
+گيري
+نيست
+است
+كجا
+كند
+نيز
+يابد
+بندي
+Øتي
+توانند
+عقب
+خواست
+كنند
+بين
+تمام
+همه
+ما
+باشند
+مثل
+شد
+اري
+باشد
+اره
+طبق
+بعد
+اگر
+صورت
+غير
+جاي
+بيش
+ريزي
+اند
+زيرا
+چگونه
+بار
+لطÙا
+مي
+درباره
+من
+ديده
+همين
+گذاري
+برداري
+علت
+گذاشته
+هم
+Ùوق
+نه
+ها
+شوند
+اباد
+همواره
+هر
+اول
+خواهند
+چهار
+نام
+امروز
+مان
+هاي
+قبل
+كنم
+سعي
+تازه
+را
+هستند
+زير
+جلوي
+عنوان
+بود
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_fi.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_fi.txt
new file mode 100644
index 00000000000..addad798c4b
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_fi.txt
@@ -0,0 +1,95 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+| forms of BE
+
+olla
+olen
+olet
+on
+olemme
+olette
+ovat
+ole | negative form
+
+oli
+olisi
+olisit
+olisin
+olisimme
+olisitte
+olisivat
+olit
+olin
+olimme
+olitte
+olivat
+ollut
+olleet
+
+en | negation
+et
+ei
+emme
+ette
+eivät
+
+|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans
+minä minun minut minua minussa minusta minuun minulla minulta minulle | I
+sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you
+hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she
+me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we
+te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you
+he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they
+
+tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this
+tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that
+se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it
+nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these
+nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those
+ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they
+
+kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
+ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl)
+mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what
+mitkä | (pl)
+
+joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which
+jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl)
+
+| conjunctions
+
+että | that
+ja | and
+jos | if
+koska | because
+kuin | than
+mutta | but
+niin | so
+sekä | and
+sillä | for
+tai | or
+vaan | but
+vai | or
+vaikka | although
+
+
+| prepositions
+
+kanssa | with
+mukaan | according to
+noin | about
+poikki | across
+yli | over, across
+
+| other
+
+kun | when
+niin | so
+nyt | now
+itse | self
+
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_fr.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_fr.txt
new file mode 100644
index 00000000000..c00837ea939
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_fr.txt
@@ -0,0 +1,183 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A French stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+au | a + le
+aux | a + les
+avec | with
+ce | this
+ces | these
+dans | with
+de | of
+des | de + les
+du | de + le
+elle | she
+en | `of them' etc
+et | and
+eux | them
+il | he
+je | I
+la | the
+le | the
+leur | their
+lui | him
+ma | my (fem)
+mais | but
+me | me
+même | same; as in moi-même (myself) etc
+mes | me (pl)
+moi | me
+mon | my (masc)
+ne | not
+nos | our (pl)
+notre | our
+nous | we
+on | one
+ou | where
+par | by
+pas | not
+pour | for
+qu | que before vowel
+que | that
+qui | who
+sa | his, her (fem)
+se | oneself
+ses | his (pl)
+son | his, her (masc)
+sur | on
+ta | thy (fem)
+te | thee
+tes | thy (pl)
+toi | thee
+ton | thy (masc)
+tu | thou
+un | a
+une | a
+vos | your (pl)
+votre | your
+vous | you
+
+ | single letter forms
+
+c | c'
+d | d'
+j | j'
+l | l'
+Ã | to, at
+m | m'
+n | n'
+s | s'
+t | t'
+y | there
+
+ | forms of être (not including the infinitive):
+été
+étée
+étées
+étés
+étant
+suis
+es
+est
+sommes
+êtes
+sont
+serai
+seras
+sera
+serons
+serez
+seront
+serais
+serait
+serions
+seriez
+seraient
+étais
+était
+étions
+étiez
+étaient
+fus
+fut
+fûmes
+fûtes
+furent
+sois
+soit
+soyons
+soyez
+soient
+fusse
+fusses
+fût
+fussions
+fussiez
+fussent
+
+ | forms of avoir (not including the infinitive):
+ayant
+eu
+eue
+eues
+eus
+ai
+as
+avons
+avez
+ont
+aurai
+auras
+aura
+aurons
+aurez
+auront
+aurais
+aurait
+aurions
+auriez
+auraient
+avais
+avait
+avions
+aviez
+avaient
+eut
+eûmes
+eûtes
+eurent
+aie
+aies
+ait
+ayons
+ayez
+aient
+eusse
+eusses
+eût
+eussions
+eussiez
+eussent
+
+ | Later additions (from Jean-Christophe Deschamps)
+ceci | this
+celà  | that
+cet | this
+cette | this
+ici | here
+ils | they
+les | the (pl)
+leurs | their (pl)
+quel | which
+quels | which
+quelle | which
+quelles | which
+sans | without
+soi | oneself
+
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ga.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ga.txt
new file mode 100644
index 00000000000..9ff88d747e5
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ga.txt
@@ -0,0 +1,110 @@
+
+a
+ach
+ag
+agus
+an
+aon
+ar
+arna
+as
+b'
+ba
+beirt
+bhúr
+caoga
+ceathair
+ceathrar
+chomh
+chtó
+chuig
+chun
+cois
+céad
+cúig
+cúigear
+d'
+daichead
+dar
+de
+deich
+deichniúr
+den
+dhá
+do
+don
+dtÃ
+dá
+dár
+dó
+faoi
+faoin
+faoina
+faoinár
+fara
+fiche
+gach
+gan
+go
+gur
+haon
+hocht
+i
+iad
+idir
+in
+ina
+ins
+inár
+is
+le
+leis
+lena
+lenár
+m'
+mar
+mo
+mé
+na
+nach
+naoi
+naonúr
+ná
+nÃ
+nÃor
+nó
+nócha
+ocht
+ochtar
+os
+roimh
+sa
+seacht
+seachtar
+seachtó
+seasca
+seisear
+siad
+sibh
+sinn
+sna
+sé
+sÃ
+tar
+thar
+thú
+triúr
+trÃ
+trÃna
+trÃnár
+trÃocha
+tú
+um
+ár
+é
+éis
+Ã
+ó
+ón
+óna
+ónár
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_gl.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_gl.txt
new file mode 100644
index 00000000000..d8760b12c14
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_gl.txt
@@ -0,0 +1,161 @@
+# galican stopwords
+a
+aÃnda
+alÃ
+aquel
+aquela
+aquelas
+aqueles
+aquilo
+aquÃ
+ao
+aos
+as
+asÃ
+á
+ben
+cando
+che
+co
+coa
+comigo
+con
+connosco
+contigo
+convosco
+coas
+cos
+cun
+cuns
+cunha
+cunhas
+da
+dalgunha
+dalgunhas
+dalgún
+dalgúns
+das
+de
+del
+dela
+delas
+deles
+desde
+deste
+do
+dos
+dun
+duns
+dunha
+dunhas
+e
+el
+ela
+elas
+eles
+en
+era
+eran
+esa
+esas
+ese
+eses
+esta
+estar
+estaba
+está
+están
+este
+estes
+estiven
+estou
+eu
+é
+facer
+foi
+foron
+fun
+habÃa
+hai
+iso
+isto
+la
+las
+lle
+lles
+lo
+los
+mais
+me
+meu
+meus
+min
+miña
+miñas
+moi
+na
+nas
+neste
+nin
+no
+non
+nos
+nosa
+nosas
+noso
+nosos
+nós
+nun
+nunha
+nuns
+nunhas
+o
+os
+ou
+ó
+ós
+para
+pero
+pode
+pois
+pola
+polas
+polo
+polos
+por
+que
+se
+senón
+ser
+seu
+seus
+sexa
+sido
+sobre
+súa
+súas
+tamén
+tan
+te
+ten
+teñen
+teño
+ter
+teu
+teus
+ti
+tido
+tiña
+tiven
+túa
+túas
+un
+unha
+unhas
+uns
+vos
+vosa
+vosas
+voso
+vosos
+vós
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_hi.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_hi.txt
new file mode 100644
index 00000000000..86286bb083b
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_hi.txt
@@ -0,0 +1,235 @@
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# Note: by default this file also contains forms normalized by HindiNormalizer
+# for spelling variation (see section below), such that it can be used whether or
+# not you enable that feature. When adding additional entries to this list,
+# please add the normalized form as well.
+अंदर
+अत
+अपना
+अपनी
+अपने
+अà¤à¥€
+आदि
+आप
+इतà¥à¤¯à¤¾à¤¦à¤¿
+इन
+इनका
+इनà¥à¤¹à¥€à¤‚
+इनà¥à¤¹à¥‡à¤‚
+इनà¥à¤¹à¥‹à¤‚
+इस
+इसका
+इसकी
+इसके
+इसमें
+इसी
+इसे
+उन
+उनका
+उनकी
+उनके
+उनको
+उनà¥à¤¹à¥€à¤‚
+उनà¥à¤¹à¥‡à¤‚
+उनà¥à¤¹à¥‹à¤‚
+उस
+उसके
+उसी
+उसे
+à¤à¤•
+à¤à¤µà¤‚
+à¤à¤¸
+à¤à¤¸à¥‡
+और
+कई
+कर
+करता
+करते
+करना
+करने
+करें
+कहते
+कहा
+का
+काफ़ी
+कि
+कितना
+किनà¥à¤¹à¥‡à¤‚
+किनà¥à¤¹à¥‹à¤‚
+किया
+किर
+किस
+किसी
+किसे
+की
+कà¥à¤›
+कà¥à¤²
+के
+को
+कोई
+कौन
+कौनसा
+गया
+घर
+जब
+जहाà¤
+जा
+जितना
+जिन
+जिनà¥à¤¹à¥‡à¤‚
+जिनà¥à¤¹à¥‹à¤‚
+जिस
+जिसे
+जीधर
+जैसा
+जैसे
+जो
+तक
+तब
+तरह
+तिन
+तिनà¥à¤¹à¥‡à¤‚
+तिनà¥à¤¹à¥‹à¤‚
+तिस
+तिसे
+तो
+था
+थी
+थे
+दबारा
+दिया
+दà¥à¤¸à¤°à¤¾
+दूसरे
+दो
+दà¥à¤µà¤¾à¤°à¤¾
+न
+नहीं
+ना
+निहायत
+नीचे
+ने
+पर
+पर
+पहले
+पूरा
+पे
+फिर
+बनी
+बही
+बहà¥à¤¤
+बाद
+बाला
+बिलकà¥à¤²
+à¤à¥€
+à¤à¥€à¤¤à¤°
+मगर
+मानो
+मे
+में
+यदि
+यह
+यहाà¤
+यही
+या
+यिह
+ये
+रखें
+रहा
+रहे
+ऱà¥à¤µà¤¾à¤¸à¤¾
+लिà¤
+लिये
+लेकिन
+व
+वरà¥à¤—
+वह
+वह
+वहाà¤
+वहीं
+वाले
+वà¥à¤¹
+वे
+वग़ैरह
+संग
+सकता
+सकते
+सबसे
+सà¤à¥€
+साथ
+साबà¥à¤¤
+साà¤
+सारा
+से
+सो
+ही
+हà¥à¤†
+हà¥à¤ˆ
+हà¥à¤
+है
+हैं
+हो
+होता
+होती
+होते
+होना
+होने
+# additional normalized forms of the above
+अपनि
+जेसे
+होति
+सà¤à¤¿
+तिंहों
+इंहों
+दवारा
+इसि
+किंहें
+थि
+उंहों
+ओर
+जिंहें
+वहिं
+अà¤à¤¿
+बनि
+हि
+उंहिं
+उंहें
+हें
+वगेरह
+à¤à¤¸à¥‡
+रवासा
+कोन
+निचे
+काफि
+उसि
+पà¥à¤°à¤¾
+à¤à¤¿à¤¤à¤°
+हे
+बहि
+वहां
+कोइ
+यहां
+जिंहों
+तिंहें
+किसि
+कइ
+यहि
+इंहिं
+जिधर
+इंहें
+अदि
+इतयादि
+हà¥à¤‡
+कोनसा
+इसकि
+दà¥à¤¸à¤°à¥‡
+जहां
+अप
+किंहों
+उनकि
+à¤à¤¿
+वरग
+हà¥à¤…
+जेसा
+नहिं
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_hu.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_hu.txt
new file mode 100644
index 00000000000..1a96f1db6f2
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_hu.txt
@@ -0,0 +1,209 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+| Hungarian stop word list
+| prepared by Anna Tordai
+
+a
+ahogy
+ahol
+aki
+akik
+akkor
+alatt
+által
+általában
+amely
+amelyek
+amelyekben
+amelyeket
+amelyet
+amelynek
+ami
+amit
+amolyan
+amÃg
+amikor
+át
+abban
+ahhoz
+annak
+arra
+arról
+az
+azok
+azon
+azt
+azzal
+azért
+aztán
+azután
+azonban
+bár
+be
+belül
+benne
+cikk
+cikkek
+cikkeket
+csak
+de
+e
+eddig
+egész
+egy
+egyes
+egyetlen
+egyéb
+egyik
+egyre
+ekkor
+el
+elég
+ellen
+elő
+először
+előtt
+első
+én
+éppen
+ebben
+ehhez
+emilyen
+ennek
+erre
+ez
+ezt
+ezek
+ezen
+ezzel
+ezért
+és
+fel
+felé
+hanem
+hiszen
+hogy
+hogyan
+igen
+Ãgy
+illetve
+ill.
+ill
+ilyen
+ilyenkor
+ison
+ismét
+itt
+jó
+jól
+jobban
+kell
+kellett
+keresztül
+keressünk
+ki
+kÃvül
+között
+közül
+legalább
+lehet
+lehetett
+legyen
+lenne
+lenni
+lesz
+lett
+maga
+magát
+majd
+majd
+már
+más
+másik
+meg
+még
+mellett
+mert
+mely
+melyek
+mi
+mit
+mÃg
+miért
+milyen
+mikor
+minden
+mindent
+mindenki
+mindig
+mint
+mintha
+mivel
+most
+nagy
+nagyobb
+nagyon
+ne
+néha
+nekem
+neki
+nem
+néhány
+nélkül
+nincs
+olyan
+ott
+össze
+Å‘
+Å‘k
+Å‘ket
+pedig
+persze
+rá
+s
+saját
+sem
+semmi
+sok
+sokat
+sokkal
+számára
+szemben
+szerint
+szinte
+talán
+tehát
+teljes
+tovább
+továbbá
+több
+úgy
+ugyanis
+új
+újabb
+újra
+után
+utána
+utolsó
+vagy
+vagyis
+valaki
+valami
+valamint
+való
+vagyok
+van
+vannak
+volt
+voltam
+voltak
+voltunk
+vissza
+vele
+viszont
+volna
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_hy.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_hy.txt
new file mode 100644
index 00000000000..60c1c50fbc8
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_hy.txt
@@ -0,0 +1,46 @@
+# example set of Armenian stopwords.
+Õ¡ÕµÕ¤
+Õ¡ÕµÕ¬
+Õ¡ÕµÕ¶
+Õ¡ÕµÕ½
+Õ¤Õ¸Ö‚
+Õ¤Õ¸Ö‚Ö„
+Õ¥Õ´
+Õ¥Õ¶
+Õ¥Õ¶Ö„
+Õ¥Õ½
+Õ¥Ö„
+Õ§
+Õ§Õ«
+Õ§Õ«Õ¶
+Õ§Õ«Õ¶Ö„
+Õ§Õ«Ö€
+Õ§Õ«Ö„
+Õ§Ö€
+Õ¨Õ½Õ¿
+Õ©
+Õ«
+Õ«Õ¶
+Õ«Õ½Õ¯
+Õ«Ö€
+Õ¯Õ¡Õ´
+Õ°Õ¡Õ´Õ¡Ö€
+Õ°Õ¥Õ¿
+Õ°Õ¥Õ¿Õ¸
+Õ´Õ¥Õ¶Ö„
+Õ´Õ¥Õ»
+Õ´Õ«
+Õ¶
+Õ¶Õ¡
+Õ¶Õ¡Ö‡
+Õ¶Ö€Õ¡
+Õ¶Ö€Õ¡Õ¶Ö„
+Õ¸Ö€
+Õ¸Ö€Õ¨
+Õ¸Ö€Õ¸Õ¶Ö„
+Õ¸Ö€ÕºÕ¥Õ½
+Õ¸Ö‚
+Õ¸Ö‚Õ´
+ÕºÕ«Õ¿Õ«
+Õ¾Ö€Õ¡
+Ö‡
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_id.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_id.txt
new file mode 100644
index 00000000000..4617f83a5c5
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_id.txt
@@ -0,0 +1,359 @@
+# from appendix D of: A Study of Stemming Effects on Information
+# Retrieval in Bahasa Indonesia
+ada
+adanya
+adalah
+adapun
+agak
+agaknya
+agar
+akan
+akankah
+akhirnya
+aku
+akulah
+amat
+amatlah
+anda
+andalah
+antar
+diantaranya
+antara
+antaranya
+diantara
+apa
+apaan
+mengapa
+apabila
+apakah
+apalagi
+apatah
+atau
+ataukah
+ataupun
+bagai
+bagaikan
+sebagai
+sebagainya
+bagaimana
+bagaimanapun
+sebagaimana
+bagaimanakah
+bagi
+bahkan
+bahwa
+bahwasanya
+sebaliknya
+banyak
+sebanyak
+beberapa
+seberapa
+begini
+beginian
+beginikah
+beginilah
+sebegini
+begitu
+begitukah
+begitulah
+begitupun
+sebegitu
+belum
+belumlah
+sebelum
+sebelumnya
+sebenarnya
+berapa
+berapakah
+berapalah
+berapapun
+betulkah
+sebetulnya
+biasa
+biasanya
+bila
+bilakah
+bisa
+bisakah
+sebisanya
+boleh
+bolehkah
+bolehlah
+buat
+bukan
+bukankah
+bukanlah
+bukannya
+cuma
+percuma
+dahulu
+dalam
+dan
+dapat
+dari
+daripada
+dekat
+demi
+demikian
+demikianlah
+sedemikian
+dengan
+depan
+di
+dia
+dialah
+dini
+diri
+dirinya
+terdiri
+dong
+dulu
+enggak
+enggaknya
+entah
+entahlah
+terhadap
+terhadapnya
+hal
+hampir
+hanya
+hanyalah
+harus
+haruslah
+harusnya
+seharusnya
+hendak
+hendaklah
+hendaknya
+hingga
+sehingga
+ia
+ialah
+ibarat
+ingin
+inginkah
+inginkan
+ini
+inikah
+inilah
+itu
+itukah
+itulah
+jangan
+jangankan
+janganlah
+jika
+jikalau
+juga
+justru
+kala
+kalau
+kalaulah
+kalaupun
+kalian
+kami
+kamilah
+kamu
+kamulah
+kan
+kapan
+kapankah
+kapanpun
+dikarenakan
+karena
+karenanya
+ke
+kecil
+kemudian
+kenapa
+kepada
+kepadanya
+ketika
+seketika
+khususnya
+kini
+kinilah
+kiranya
+sekiranya
+kita
+kitalah
+kok
+lagi
+lagian
+selagi
+lah
+lain
+lainnya
+melainkan
+selaku
+lalu
+melalui
+terlalu
+lama
+lamanya
+selama
+selama
+selamanya
+lebih
+terlebih
+bermacam
+macam
+semacam
+maka
+makanya
+makin
+malah
+malahan
+mampu
+mampukah
+mana
+manakala
+manalagi
+masih
+masihkah
+semasih
+masing
+mau
+maupun
+semaunya
+memang
+mereka
+merekalah
+meski
+meskipun
+semula
+mungkin
+mungkinkah
+nah
+namun
+nanti
+nantinya
+nyaris
+oleh
+olehnya
+seorang
+seseorang
+pada
+padanya
+padahal
+paling
+sepanjang
+pantas
+sepantasnya
+sepantasnyalah
+para
+pasti
+pastilah
+per
+pernah
+pula
+pun
+merupakan
+rupanya
+serupa
+saat
+saatnya
+sesaat
+saja
+sajalah
+saling
+bersama
+sama
+sesama
+sambil
+sampai
+sana
+sangat
+sangatlah
+saya
+sayalah
+se
+sebab
+sebabnya
+sebuah
+tersebut
+tersebutlah
+sedang
+sedangkan
+sedikit
+sedikitnya
+segala
+segalanya
+segera
+sesegera
+sejak
+sejenak
+sekali
+sekalian
+sekalipun
+sesekali
+sekaligus
+sekarang
+sekarang
+sekitar
+sekitarnya
+sela
+selain
+selalu
+seluruh
+seluruhnya
+semakin
+sementara
+sempat
+semua
+semuanya
+sendiri
+sendirinya
+seolah
+seperti
+sepertinya
+sering
+seringnya
+serta
+siapa
+siapakah
+siapapun
+disini
+disinilah
+sini
+sinilah
+sesuatu
+sesuatunya
+suatu
+sesudah
+sesudahnya
+sudah
+sudahkah
+sudahlah
+supaya
+tadi
+tadinya
+tak
+tanpa
+setelah
+telah
+tentang
+tentu
+tentulah
+tentunya
+tertentu
+seterusnya
+tapi
+tetapi
+setiap
+tiap
+setidaknya
+tidak
+tidakkah
+tidaklah
+toh
+waduh
+wah
+wahai
+sewaktu
+walau
+walaupun
+wong
+yaitu
+yakni
+yang
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_it.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_it.txt
new file mode 100644
index 00000000000..4cb5b0891b1
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_it.txt
@@ -0,0 +1,301 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | An Italian stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ad | a (to) before vowel
+al | a + il
+allo | a + lo
+ai | a + i
+agli | a + gli
+all | a + l'
+agl | a + gl'
+alla | a + la
+alle | a + le
+con | with
+col | con + il
+coi | con + i (forms collo, cogli etc are now very rare)
+da | from
+dal | da + il
+dallo | da + lo
+dai | da + i
+dagli | da + gli
+dall | da + l'
+dagl | da + gll'
+dalla | da + la
+dalle | da + le
+di | of
+del | di + il
+dello | di + lo
+dei | di + i
+degli | di + gli
+dell | di + l'
+degl | di + gl'
+della | di + la
+delle | di + le
+in | in
+nel | in + el
+nello | in + lo
+nei | in + i
+negli | in + gli
+nell | in + l'
+negl | in + gl'
+nella | in + la
+nelle | in + le
+su | on
+sul | su + il
+sullo | su + lo
+sui | su + i
+sugli | su + gli
+sull | su + l'
+sugl | su + gl'
+sulla | su + la
+sulle | su + le
+per | through, by
+tra | among
+contro | against
+io | I
+tu | thou
+lui | he
+lei | she
+noi | we
+voi | you
+loro | they
+mio | my
+mia |
+miei |
+mie |
+tuo |
+tua |
+tuoi | thy
+tue |
+suo |
+sua |
+suoi | his, her
+sue |
+nostro | our
+nostra |
+nostri |
+nostre |
+vostro | your
+vostra |
+vostri |
+vostre |
+mi | me
+ti | thee
+ci | us, there
+vi | you, there
+lo | him, the
+la | her, the
+li | them
+le | them, the
+gli | to him, the
+ne | from there etc
+il | the
+un | a
+uno | a
+una | a
+ma | but
+ed | and
+se | if
+perché | why, because
+anche | also
+come | how
+dov | where (as dov')
+dove | where
+che | who, that
+chi | who
+cui | whom
+non | not
+più | more
+quale | who, that
+quanto | how much
+quanti |
+quanta |
+quante |
+quello | that
+quelli |
+quella |
+quelle |
+questo | this
+questi |
+questa |
+queste |
+si | yes
+tutto | all
+tutti | all
+
+ | single letter forms:
+
+a | at
+c | as c' for ce or ci
+e | and
+i | the
+l | as l'
+o | or
+
+ | forms of avere, to have (not including the infinitive):
+
+ho
+hai
+ha
+abbiamo
+avete
+hanno
+abbia
+abbiate
+abbiano
+avrò
+avrai
+avrÃ
+avremo
+avrete
+avranno
+avrei
+avresti
+avrebbe
+avremmo
+avreste
+avrebbero
+avevo
+avevi
+aveva
+avevamo
+avevate
+avevano
+ebbi
+avesti
+ebbe
+avemmo
+aveste
+ebbero
+avessi
+avesse
+avessimo
+avessero
+avendo
+avuto
+avuta
+avuti
+avute
+
+ | forms of essere, to be (not including the infinitive):
+sono
+sei
+è
+siamo
+siete
+sia
+siate
+siano
+sarò
+sarai
+sarÃ
+saremo
+sarete
+saranno
+sarei
+saresti
+sarebbe
+saremmo
+sareste
+sarebbero
+ero
+eri
+era
+eravamo
+eravate
+erano
+fui
+fosti
+fu
+fummo
+foste
+furono
+fossi
+fosse
+fossimo
+fossero
+essendo
+
+ | forms of fare, to do (not including the infinitive, fa, fat-):
+faccio
+fai
+facciamo
+fanno
+faccia
+facciate
+facciano
+farò
+farai
+farÃ
+faremo
+farete
+faranno
+farei
+faresti
+farebbe
+faremmo
+fareste
+farebbero
+facevo
+facevi
+faceva
+facevamo
+facevate
+facevano
+feci
+facesti
+fece
+facemmo
+faceste
+fecero
+facessi
+facesse
+facessimo
+facessero
+facendo
+
+ | forms of stare, to be (not including the infinitive):
+sto
+stai
+sta
+stiamo
+stanno
+stia
+stiate
+stiano
+starò
+starai
+starÃ
+staremo
+starete
+staranno
+starei
+staresti
+starebbe
+staremmo
+stareste
+starebbero
+stavo
+stavi
+stava
+stavamo
+stavate
+stavano
+stetti
+stesti
+stette
+stemmo
+steste
+stettero
+stessi
+stesse
+stessimo
+stessero
+stando
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ja.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ja.txt
new file mode 100644
index 00000000000..d4321be6b16
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ja.txt
@@ -0,0 +1,127 @@
+#
+# This file defines a stopword set for Japanese.
+#
+# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia.
+# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745
+# for frequency lists, etc. that can be useful for making your own set (if desired)
+#
+# Note that there is an overlap between these stopwords and the terms stopped when used
+# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note
+# that comments are not allowed on the same line as stopwords.
+#
+# Also note that stopping is done in a case-insensitive manner. Change your StopFilter
+# configuration if you need case-sensitive stopping. Lastly, note that stopping is done
+# using the same character width as the entries in this file. Since this StopFilter is
+# normally done after a CJKWidthFilter in your chain, you would usually want your romaji
+# entries to be in half-width and your kana entries to be in full-width.
+#
+ã®
+ã«
+ã¯
+ã‚’
+ãŸ
+ãŒ
+ã§
+ã¦
+ã¨
+ã—
+れ
+ã•
+ã‚ã‚‹
+ã„ã‚‹
+ã‚‚
+ã™ã‚‹
+ã‹ã‚‰
+ãª
+ã“ã¨
+ã¨ã—ã¦
+ã„
+ã‚„
+れる
+ãªã©
+ãªã£
+ãªã„
+ã“ã®
+ãŸã‚
+ãã®
+ã‚ã£
+よã†
+ã¾ãŸ
+ã‚‚ã®
+ã¨ã„ã†
+ã‚ã‚Š
+ã¾ã§
+られ
+ãªã‚‹
+ã¸
+ã‹
+ã
+ã“ã‚Œ
+ã«ã‚ˆã£ã¦
+ã«ã‚ˆã‚Š
+ãŠã‚Š
+より
+ã«ã‚ˆã‚‹
+ãš
+ãªã‚Š
+られる
+ã«ãŠã„ã¦
+ã°
+ãªã‹ã£
+ãªã
+ã—ã‹ã—
+ã«ã¤ã„ã¦
+ã›
+ã ã£
+ãã®å¾Œ
+ã§ãã‚‹
+ãã‚Œ
+ã†
+ã®ã§
+ãªãŠ
+ã®ã¿
+ã§ã
+ã
+ã¤
+ã«ãŠã‘ã‚‹
+ãŠã‚ˆã³
+ã„ã†
+ã•ã‚‰ã«
+ã§ã‚‚
+ら
+ãŸã‚Š
+ãã®ä»–
+ã«é–¢ã™ã‚‹
+ãŸã¡
+ã¾ã™
+ã‚“
+ãªã‚‰
+ã«å¯¾ã—ã¦
+特ã«
+ã›ã‚‹
+åŠã³
+ã“れら
+ã¨ã
+ã§ã¯
+ã«ã¦
+ã»ã‹
+ãªãŒã‚‰
+ã†ã¡
+ãã—ã¦
+ã¨ã¨ã‚‚ã«
+ãŸã ã—
+ã‹ã¤ã¦
+ãã‚Œãžã‚Œ
+ã¾ãŸã¯
+ãŠ
+ã»ã©
+ã‚‚ã®ã®
+ã«å¯¾ã™ã‚‹
+ã»ã¨ã‚“ã©
+ã¨å…±ã«
+ã¨ã„ã£ãŸ
+ã§ã™
+ã¨ã‚‚
+ã¨ã“ã‚
+ã“ã“
+##### End of file
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_lv.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_lv.txt
new file mode 100644
index 00000000000..e21a23c06c3
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_lv.txt
@@ -0,0 +1,172 @@
+# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins
+# the original list of over 800 forms was refined:
+# pronouns, adverbs, interjections were removed
+#
+# prepositions
+aiz
+ap
+ar
+apakš
+Ärpus
+augšpus
+bez
+caur
+dēļ
+gar
+iekš
+iz
+kopš
+labad
+lejpus
+līdz
+no
+otrpus
+pa
+par
+pÄr
+pēc
+pie
+pirms
+pret
+priekš
+starp
+Å¡aipus
+uz
+viņpus
+virs
+virspus
+zem
+apakšpus
+# Conjunctions
+un
+bet
+jo
+ja
+ka
+lai
+tomēr
+tikko
+turpretī
+arī
+kaut
+gan
+tÄdēļ
+tÄ
+ne
+tikvien
+vien
+kÄ
+ir
+te
+vai
+kamēr
+# Particles
+ar
+diezin
+droši
+diemžēl
+nebūt
+ik
+it
+taÄu
+nu
+pat
+tiklab
+iekšpus
+nedz
+tik
+nevis
+turpretim
+jeb
+iekam
+iekÄm
+iekÄms
+kolīdz
+līdzko
+tiklīdz
+jebšu
+tÄlab
+tÄpÄ“c
+nekÄ
+itin
+jÄ
+jau
+jel
+nē
+nezin
+tad
+tikai
+vis
+tak
+iekams
+vien
+# modal verbs
+būt
+biju
+biji
+bija
+bijÄm
+bijÄt
+esmu
+esi
+esam
+esat
+būšu
+būsi
+būs
+būsim
+būsiet
+tikt
+tiku
+tiki
+tika
+tikÄm
+tikÄt
+tieku
+tiec
+tiek
+tiekam
+tiekat
+tikšu
+tiks
+tiksim
+tiksiet
+tapt
+tapi
+tapÄt
+topat
+tapšu
+tapsi
+taps
+tapsim
+tapsiet
+kļūt
+kļuvu
+kļuvi
+kļuva
+kļuvÄm
+kļuvÄt
+kļūstu
+kļūsti
+kļūst
+kļūstam
+kļūstat
+kļūšu
+kļūsi
+kļūs
+kļūsim
+kļūsiet
+# verbs
+varēt
+varēju
+varÄ“jÄm
+varēšu
+varēsim
+var
+varēji
+varÄ“jÄt
+varēsi
+varēsiet
+varat
+varēja
+varēs
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_nl.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_nl.txt
new file mode 100644
index 00000000000..f4d61f5092c
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_nl.txt
@@ -0,0 +1,117 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Dutch stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large sample of Dutch text.
+
+ | Dutch stop words frequently exhibit homonym clashes. These are indicated
+ | clearly below.
+
+de | the
+en | and
+van | of, from
+ik | I, the ego
+te | (1) chez, at etc, (2) to, (3) too
+dat | that, which
+die | that, those, who, which
+in | in, inside
+een | a, an, one
+hij | he
+het | the, it
+niet | not, nothing, naught
+zijn | (1) to be, being, (2) his, one's, its
+is | is
+was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
+op | on, upon, at, in, up, used up
+aan | on, upon, to (as dative)
+met | with, by
+als | like, such as, when
+voor | (1) before, in front of, (2) furrow
+had | had, past tense all persons sing. of 'hebben' (have)
+er | there
+maar | but, only
+om | round, about, for etc
+hem | him
+dan | then
+zou | should/would, past tense all persons sing. of 'zullen'
+of | or, whether, if
+wat | what, something, anything
+mijn | possessive and noun 'mine'
+men | people, 'one'
+dit | this
+zo | so, thus, in this way
+door | through by
+over | over, across
+ze | she, her, they, them
+zich | oneself
+bij | (1) a bee, (2) by, near, at
+ook | also, too
+tot | till, until
+je | you
+mij | me
+uit | out of, from
+der | Old Dutch form of 'van der' still found in surnames
+daar | (1) there, (2) because
+haar | (1) her, their, them, (2) hair
+naar | (1) unpleasant, unwell etc, (2) towards, (3) as
+heb | present first person sing. of 'to have'
+hoe | how, why
+heeft | present third person sing. of 'to have'
+hebben | 'to have' and various parts thereof
+deze | this
+u | you
+want | (1) for, (2) mitten, (3) rigging
+nog | yet, still
+zal | 'shall', first and third person sing. of verb 'zullen' (will)
+me | me
+zij | she, they
+nu | now
+ge | 'thou', still used in Belgium and south Netherlands
+geen | none
+omdat | because
+iets | something, somewhat
+worden | to become, grow, get
+toch | yet, still
+al | all, every, each
+waren | (1) 'were' (2) to wander, (3) wares, (3)
+veel | much, many
+meer | (1) more, (2) lake
+doen | to do, to make
+toen | then, when
+moet | noun 'spot/mote' and present form of 'to must'
+ben | (1) am, (2) 'are' in interrogative second person singular of 'to be'
+zonder | without
+kan | noun 'can' and present form of 'to be able'
+hun | their, them
+dus | so, consequently
+alles | all, everything, anything
+onder | under, beneath
+ja | yes, of course
+eens | once, one day
+hier | here
+wie | who
+werd | imperfect third person sing. of 'become'
+altijd | always
+doch | yet, but etc
+wordt | present third person sing. of 'become'
+wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans
+kunnen | to be able
+ons | us/our
+zelf | self
+tegen | against, towards, at
+na | after, near
+reeds | already
+wil | (1) present tense of 'want', (2) 'will', noun, (3) fender
+kon | could; past tense of 'to be able'
+niets | nothing
+uw | your
+iemand | somebody
+geweest | been; past participle of 'be'
+andere | other
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_no.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_no.txt
new file mode 100644
index 00000000000..e76f36e69ed
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_no.txt
@@ -0,0 +1,192 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Norwegian stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This stop word list is for the dominant bokmål dialect. Words unique
+ | to nynorsk are marked *.
+
+ | Revised by Jan Bruusgaard , Jan 2005
+
+og | and
+i | in
+jeg | I
+det | it/this/that
+at | to (w. inf.)
+en | a/an
+et | a/an
+den | it/this/that
+til | to
+er | is/am/are
+som | who/that
+på | on
+de | they / you(formal)
+med | with
+han | he
+av | of
+ikke | not
+ikkje | not *
+der | there
+så | so
+var | was/were
+meg | me
+seg | you
+men | but
+ett | one
+har | have
+om | about
+vi | we
+min | my
+mitt | my
+ha | have
+hadde | had
+hun | she
+nå | now
+over | over
+da | when/as
+ved | by/know
+fra | from
+du | you
+ut | out
+sin | your
+dem | them
+oss | us
+opp | up
+man | you/one
+kan | can
+hans | his
+hvor | where
+eller | or
+hva | what
+skal | shall/must
+selv | self (reflective)
+sjøl | self (reflective)
+her | here
+alle | all
+vil | will
+bli | become
+ble | became
+blei | became *
+blitt | have become
+kunne | could
+inn | in
+når | when
+være | be
+kom | come
+noen | some
+noe | some
+ville | would
+dere | you
+som | who/which/that
+deres | their/theirs
+kun | only/just
+ja | yes
+etter | after
+ned | down
+skulle | should
+denne | this
+for | for/because
+deg | you
+si | hers/his
+sine | hers/his
+sitt | hers/his
+mot | against
+Ã¥ | to
+meget | much
+hvorfor | why
+dette | this
+disse | these/those
+uten | without
+hvordan | how
+ingen | none
+din | your
+ditt | your
+blir | become
+samme | same
+hvilken | which
+hvilke | which (plural)
+sånn | such a
+inni | inside/within
+mellom | between
+vår | our
+hver | each
+hvem | who
+vors | us/ours
+hvis | whose
+både | both
+bare | only/just
+enn | than
+fordi | as/because
+før | before
+mange | many
+også | also
+slik | just
+vært | been
+være | to be
+båe | both *
+begge | both
+siden | since
+dykk | your *
+dykkar | yours *
+dei | they *
+deira | them *
+deires | theirs *
+deim | them *
+di | your (fem.) *
+då | as/when *
+eg | I *
+ein | a/an *
+eit | a/an *
+eitt | a/an *
+elles | or *
+honom | he *
+hjå | at *
+ho | she *
+hoe | she *
+henne | her
+hennar | her/hers
+hennes | hers
+hoss | how *
+hossen | how *
+ikkje | not *
+ingi | noone *
+inkje | noone *
+korleis | how *
+korso | how *
+kva | what/which *
+kvar | where *
+kvarhelst | where *
+kven | who/whom *
+kvi | why *
+kvifor | why *
+me | we *
+medan | while *
+mi | my *
+mine | my *
+mykje | much *
+no | now *
+nokon | some (masc./neut.) *
+noka | some (fem.) *
+nokor | some *
+noko | some *
+nokre | some *
+si | his/hers *
+sia | since *
+sidan | since *
+so | so *
+somt | some *
+somme | some *
+um | about*
+upp | up *
+vere | be *
+vore | was *
+verte | become *
+vort | become *
+varte | became *
+vart | became *
+
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_pt.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_pt.txt
new file mode 100644
index 00000000000..276c1b446f2
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_pt.txt
@@ -0,0 +1,251 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Portuguese stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de | of, from
+a | the; to, at; her
+o | the; him
+que | who, that
+e | and
+do | de + o
+da | de + a
+em | in
+um | a
+para | for
+ | é from SER
+com | with
+não | not, no
+uma | a
+os | the; them
+no | em + o
+se | himself etc
+na | em + a
+por | for
+mais | more
+as | the; them
+dos | de + os
+como | as, like
+mas | but
+ | foi from SER
+ao | a + o
+ele | he
+das | de + as
+ | tem from TER
+Ã | a + a
+seu | his
+sua | her
+ou | or
+ | ser from SER
+quando | when
+muito | much
+ | há from HAV
+nos | em + os; us
+já | already, now
+ | está from EST
+eu | I
+também | also
+só | only, just
+pelo | per + o
+pela | per + a
+até | up to
+isso | that
+ela | he
+entre | between
+ | era from SER
+depois | after
+sem | without
+mesmo | same
+aos | a + os
+ | ter from TER
+seus | his
+quem | whom
+nas | em + as
+me | me
+esse | that
+eles | they
+ | estão from EST
+você | you
+ | tinha from TER
+ | foram from SER
+essa | that
+num | em + um
+nem | nor
+suas | her
+meu | my
+Ã s | a + as
+minha | my
+ | têm from TER
+numa | em + uma
+pelos | per + os
+elas | they
+ | havia from HAV
+ | seja from SER
+qual | which
+ | será from SER
+nós | we
+ | tenho from TER
+lhe | to him, her
+deles | of them
+essas | those
+esses | those
+pelas | per + as
+este | this
+ | fosse from SER
+dele | of him
+
+ | other words. There are many contractions such as naquele = em+aquele,
+ | mo = me+o, but they are rare.
+ | Indefinite article plural forms are also rare.
+
+tu | thou
+te | thee
+vocês | you (plural)
+vos | you
+lhes | to them
+meus | my
+minhas
+teu | thy
+tua
+teus
+tuas
+nosso | our
+nossa
+nossos
+nossas
+
+dela | of her
+delas | of them
+
+esta | this
+estes | these
+estas | these
+aquele | that
+aquela | that
+aqueles | those
+aquelas | those
+isto | this
+aquilo | that
+
+ | forms of estar, to be (not including the infinitive):
+estou
+está
+estamos
+estão
+estive
+esteve
+estivemos
+estiveram
+estava
+estávamos
+estavam
+estivera
+estivéramos
+esteja
+estejamos
+estejam
+estivesse
+estivéssemos
+estivessem
+estiver
+estivermos
+estiverem
+
+ | forms of haver, to have (not including the infinitive):
+hei
+há
+havemos
+hão
+houve
+houvemos
+houveram
+houvera
+houvéramos
+haja
+hajamos
+hajam
+houvesse
+houvéssemos
+houvessem
+houver
+houvermos
+houverem
+houverei
+houverá
+houveremos
+houverão
+houveria
+houverÃamos
+houveriam
+
+ | forms of ser, to be (not including the infinitive):
+sou
+somos
+são
+era
+éramos
+eram
+fui
+foi
+fomos
+foram
+fora
+fôramos
+seja
+sejamos
+sejam
+fosse
+fôssemos
+fossem
+for
+formos
+forem
+serei
+será
+seremos
+serão
+seria
+serÃamos
+seriam
+
+ | forms of ter, to have (not including the infinitive):
+tenho
+tem
+temos
+tém
+tinha
+tÃnhamos
+tinham
+tive
+teve
+tivemos
+tiveram
+tivera
+tivéramos
+tenha
+tenhamos
+tenham
+tivesse
+tivéssemos
+tivessem
+tiver
+tivermos
+tiverem
+terei
+terá
+teremos
+terão
+teria
+terÃamos
+teriam
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ro.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ro.txt
new file mode 100644
index 00000000000..4fdee90a5ba
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ro.txt
@@ -0,0 +1,233 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+acea
+aceasta
+această
+aceea
+acei
+aceia
+acel
+acela
+acele
+acelea
+acest
+acesta
+aceste
+acestea
+aceÅŸti
+aceÅŸtia
+acolo
+acum
+ai
+aia
+aibă
+aici
+al
+ăla
+ale
+alea
+ălea
+altceva
+altcineva
+am
+ar
+are
+aÅŸ
+aÅŸadar
+asemenea
+asta
+ăsta
+astăzi
+astea
+ăstea
+ăştia
+asupra
+aţi
+au
+avea
+avem
+aveţi
+azi
+bine
+bucur
+bună
+ca
+că
+căci
+când
+care
+cărei
+căror
+cărui
+cât
+câte
+câţi
+către
+câtva
+ce
+cel
+ceva
+chiar
+cînd
+cine
+cineva
+cît
+cîte
+cîţi
+cîtva
+contra
+cu
+cum
+cumva
+curând
+curînd
+da
+dă
+dacă
+dar
+datorită
+de
+deci
+deja
+deoarece
+departe
+deÅŸi
+din
+dinaintea
+dintr
+dintre
+drept
+după
+ea
+ei
+el
+ele
+eram
+este
+eÅŸti
+eu
+face
+fără
+fi
+fie
+fiecare
+fii
+fim
+fiţi
+iar
+ieri
+îi
+îl
+îmi
+împotriva
+în
+înainte
+înaintea
+încât
+încît
+încotro
+între
+întrucât
+întrucît
+îţi
+la
+lângă
+le
+li
+lîngă
+lor
+lui
+mă
+mâine
+mea
+mei
+mele
+mereu
+meu
+mi
+mine
+mult
+multă
+mulţi
+ne
+nicăieri
+nici
+nimeni
+niÅŸte
+noastră
+noastre
+noi
+noÅŸtri
+nostru
+nu
+ori
+oricând
+oricare
+oricât
+orice
+oricînd
+oricine
+oricît
+oricum
+oriunde
+până
+pe
+pentru
+peste
+pînă
+poate
+pot
+prea
+prima
+primul
+prin
+printr
+sa
+să
+săi
+sale
+sau
+său
+se
+ÅŸi
+sînt
+sîntem
+sînteţi
+spre
+sub
+sunt
+suntem
+sunteţi
+ta
+tăi
+tale
+tău
+te
+Å£i
+Å£ie
+tine
+toată
+toate
+tot
+toţi
+totuÅŸi
+tu
+un
+una
+unde
+undeva
+unei
+unele
+uneori
+unor
+vă
+vi
+voastră
+voastre
+voi
+voÅŸtri
+vostru
+vouă
+vreo
+vreun
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ru.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ru.txt
new file mode 100644
index 00000000000..64307693457
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ru.txt
@@ -0,0 +1,241 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | a russian stop word list. comments begin with vertical bar. each stop
+ | word is at the start of a line.
+
+ | this is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | letter `ё' is translated to `е'.
+
+и | and
+в | in/into
+во | alternative form
+не | not
+что | what/that
+он | he
+на | on/onto
+Ñ | i
+Ñ | from
+Ñо | alternative form
+как | how
+а | milder form of `no' (but)
+то | conjunction and form of `that'
+вÑе | all
+она | she
+так | so, thus
+его | him
+но | but
+да | yes/and
+Ñ‚Ñ‹ | thou
+к | towards, by
+у | around, chez
+же | intensifier particle
+вы | you
+за | beyond, behind
+бы | conditional/subj. particle
+по | up to, along
+только | only
+ее | her
+мне | to me
+было | it was
+вот | here is/are, particle
+от | away from
+Ð¼ÐµÐ½Ñ | me
+еще | still, yet, more
+нет | no, there isnt/arent
+о | about
+из | out of
+ему | to him
+теперь | now
+когда | when
+даже | even
+ну | so, well
+вдруг | suddenly
+ли | interrogative particle
+еÑли | if
+уже | already, but homonym of `narrower'
+или | or
+ни | neither
+быть | to be
+был | he was
+него | prepositional form of его
+до | up to
+Ð²Ð°Ñ | you accusative
+нибудь | indef. suffix preceded by hyphen
+опÑÑ‚ÑŒ | again
+уж | already, but homonym of `adder'
+вам | to you
+Ñказал | he said
+ведь | particle `after all'
+там | there
+потом | then
+ÑÐµÐ±Ñ | oneself
+ничего | nothing
+ей | to her
+может | usually with `быть' as `maybe'
+они | they
+тут | here
+где | where
+еÑÑ‚ÑŒ | there is/are
+надо | got to, must
+ней | prepositional form of ей
+Ð´Ð»Ñ | for
+мы | we
+Ñ‚ÐµÐ±Ñ | thee
+их | them, their
+чем | than
+была | she was
+Ñам | self
+чтоб | in order to
+без | without
+будто | as if
+человек | man, person, one
+чего | genitive form of `what'
+раз | once
+тоже | also
+Ñебе | to oneself
+под | beneath
+жизнь | life
+будет | will be
+ж | short form of intensifer particle `же'
+тогда | then
+кто | who
+Ñтот | this
+говорил | was saying
+того | genitive form of `that'
+потому | for that reason
+Ñтого | genitive form of `this'
+какой | which
+ÑовÑем | altogether
+ним | prepositional form of `его', `они'
+здеÑÑŒ | here
+Ñтом | prepositional form of `Ñтот'
+один | one
+почти | almost
+мой | my
+тем | instrumental/dative plural of `тот', `то'
+чтобы | full form of `in order that'
+нее | her (acc.)
+кажетÑÑ | it seems
+ÑÐµÐ¹Ñ‡Ð°Ñ | now
+были | they were
+куда | where to
+зачем | why
+Ñказать | to say
+вÑех | all (acc., gen. preposn. plural)
+никогда | never
+ÑÐµÐ³Ð¾Ð´Ð½Ñ | today
+можно | possible, one can
+при | by
+наконец | finally
+два | two
+об | alternative form of `о', about
+другой | another
+хоть | even
+поÑле | after
+над | above
+больше | more
+тот | that one (masc.)
+через | across, in
+Ñти | these
+Ð½Ð°Ñ | us
+про | about
+вÑего | in all, only, of all
+них | prepositional form of `они' (they)
+ÐºÐ°ÐºÐ°Ñ | which, feminine
+много | lots
+разве | interrogative particle
+Ñказала | she said
+три | three
+Ñту | this, acc. fem. sing.
+Ð¼Ð¾Ñ | my, feminine
+впрочем | moreover, besides
+хорошо | good
+Ñвою | ones own, acc. fem. sing.
+Ñтой | oblique form of `Ñта', fem. `this'
+перед | in front of
+иногда | sometimes
+лучше | better
+чуть | a little
+том | preposn. form of `that one'
+Ð½ÐµÐ»ÑŒÐ·Ñ | one must not
+такой | such a one
+им | to them
+более | more
+вÑегда | always
+конечно | of course
+вÑÑŽ | acc. fem. sing of `all'
+между | between
+
+
+ | b: some paradigms
+ |
+ | personal pronouns
+ |
+ | Ñ Ð¼ÐµÐ½Ñ Ð¼Ð½Ðµ мной [мною]
+ | Ñ‚Ñ‹ Ñ‚ÐµÐ±Ñ Ñ‚ÐµÐ±Ðµ тобой [тобою]
+ | он его ему им [него, нему, ним]
+ | она ее Ñи ею [нее, нÑи, нею]
+ | оно его ему им [него, нему, ним]
+ |
+ | мы Ð½Ð°Ñ Ð½Ð°Ð¼ нами
+ | вы Ð²Ð°Ñ Ð²Ð°Ð¼ вами
+ | они их им ими [них, ним, ними]
+ |
+ | ÑÐµÐ±Ñ Ñебе Ñобой [Ñобою]
+ |
+ | demonstrative pronouns: Ñтот (this), тот (that)
+ |
+ | Ñтот Ñта Ñто Ñти
+ | Ñтого ÑÑ‚Ñ‹ Ñто Ñти
+ | Ñтого Ñтой Ñтого Ñтих
+ | Ñтому Ñтой Ñтому Ñтим
+ | Ñтим Ñтой Ñтим [Ñтою] Ñтими
+ | Ñтом Ñтой Ñтом Ñтих
+ |
+ | тот та то те
+ | того ту то те
+ | того той того тех
+ | тому той тому тем
+ | тем той тем [тою] теми
+ | том той том тех
+ |
+ | determinative pronouns
+ |
+ | (a) веÑÑŒ (all)
+ |
+ | веÑÑŒ вÑÑ Ð²Ñе вÑе
+ | вÑего вÑÑŽ вÑе вÑе
+ | вÑего вÑей вÑего вÑех
+ | вÑему вÑей вÑему вÑем
+ | вÑем вÑей вÑем [вÑею] вÑеми
+ | вÑем вÑей вÑем вÑех
+ |
+ | (b) Ñам (himself etc)
+ |
+ | Ñам Ñама Ñамо Ñами
+ | Ñамого Ñаму Ñамо Ñамих
+ | Ñамого Ñамой Ñамого Ñамих
+ | Ñамому Ñамой Ñамому Ñамим
+ | Ñамим Ñамой Ñамим [Ñамою] Ñамими
+ | Ñамом Ñамой Ñамом Ñамих
+ |
+ | stems of verbs `to be', `to have', `to do' and modal
+ |
+ | быть бы буд быв еÑÑ‚ÑŒ Ñуть
+ | име
+ | дел
+ | мог мож мочь
+ | уме
+ | хоч хот
+ | долж
+ | можн
+ | нужн
+ | нельзÑ
+
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_sv.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_sv.txt
new file mode 100644
index 00000000000..22bddfd8cb3
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_sv.txt
@@ -0,0 +1,131 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Swedish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | Swedish stop words occasionally exhibit homonym clashes. For example
+ | så = so, but also seed. These are indicated clearly below.
+
+och | and
+det | it, this/that
+att | to (with infinitive)
+i | in, at
+en | a
+jag | I
+hon | she
+som | who, that
+han | he
+på | on
+den | it, this/that
+med | with
+var | where, each
+sig | him(self) etc
+för | for
+så | so (also: seed)
+till | to
+är | is
+men | but
+ett | a
+om | if; around, about
+hade | had
+de | they, these/those
+av | of
+icke | not, no
+mig | me
+du | you
+henne | her
+då | then, when
+sin | his
+nu | now
+har | have
+inte | inte någon = no one
+hans | his
+honom | him
+skulle | 'sake'
+hennes | her
+där | there
+min | my
+man | one (pronoun)
+ej | nor
+vid | at, by, on (also: vast)
+kunde | could
+något | some etc
+från | from, off
+ut | out
+när | when
+efter | after, behind
+upp | up
+vi | we
+dem | them
+vara | be
+vad | what
+över | over
+än | than
+dig | you
+kan | can
+sina | his
+här | here
+ha | have
+mot | towards
+alla | all
+under | under (also: wonder)
+någon | some etc
+eller | or (else)
+allt | all
+mycket | much
+sedan | since
+ju | why
+denna | this/that
+själv | myself, yourself etc
+detta | this/that
+Ã¥t | to
+utan | without
+varit | was
+hur | how
+ingen | no
+mitt | my
+ni | you
+bli | to be, become
+blev | from bli
+oss | us
+din | thy
+dessa | these/those
+några | some etc
+deras | their
+blir | from bli
+mina | my
+samma | (the) same
+vilken | who, that
+er | you, your
+sådan | such a
+vår | our
+blivit | from bli
+dess | its
+inom | within
+mellan | between
+sådant | such a
+varför | why
+varje | each
+vilka | who, that
+ditt | thy
+vem | who
+vilket | who, that
+sitta | his
+sådana | such a
+vart | each
+dina | thy
+vars | whose
+vårt | our
+våra | our
+ert | your
+era | your
+vilkas | whose
+
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_th.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_th.txt
new file mode 100644
index 00000000000..07f0fabe692
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_th.txt
@@ -0,0 +1,119 @@
+# Thai stopwords from:
+# "Opinion Detection in Thai Political News Columns
+# Based on Subjectivity Analysis"
+# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak
+ไว้
+ไม่
+ไป
+ได้
+ให้
+ใน
+โดย
+à¹à¸«à¹ˆà¸‡
+à¹à¸¥à¹‰à¸§
+à¹à¸¥à¸°
+à¹à¸£à¸
+à¹à¸šà¸š
+à¹à¸•à¹ˆ
+เà¸à¸‡
+เห็น
+เลย
+เริ่ม
+เรา
+เมื่à¸
+เพื่à¸
+เพราะ
+เป็นà¸à¸²à¸£
+เป็น
+เปิดเผย
+เปิด
+เนื่à¸à¸‡à¸ˆà¸²à¸
+เดียวà¸à¸±à¸™
+เดียว
+เช่น
+เฉพาะ
+เคย
+เข้า
+เขา
+à¸à¸µà¸
+à¸à¸²à¸ˆ
+à¸à¸°à¹„ร
+à¸à¸à¸
+à¸à¸¢à¹ˆà¸²à¸‡
+à¸à¸¢à¸¹à¹ˆ
+à¸à¸¢à¸²à¸
+หาà¸
+หลาย
+หลังจาà¸
+หลัง
+หรืà¸
+หนึ่ง
+ส่วน
+ส่ง
+สุด
+สà¹à¸²à¸«à¸£à¸±à¸š
+ว่า
+วัน
+ลง
+ร่วม
+ราย
+รับ
+ระหว่าง
+รวม
+ยัง
+มี
+มาà¸
+มา
+พร้à¸à¸¡
+พบ
+ผ่าน
+ผล
+บาง
+น่า
+นี้
+นà¹à¸²
+นั้น
+นัà¸
+นà¸à¸à¸ˆà¸²à¸
+ทุà¸
+ที่สุด
+ที่
+ทà¹à¸²à¹ƒà¸«à¹‰
+ทà¹à¸²
+ทาง
+ทั้งนี้
+ทั้ง
+ถ้า
+ถูà¸
+ถึง
+ต้à¸à¸‡
+ต่างๆ
+ต่าง
+ต่à¸
+ตาม
+ตั้งà¹à¸•à¹ˆ
+ตั้ง
+ด้าน
+ด้วย
+ดัง
+ซึ่ง
+ช่วง
+จึง
+จาà¸
+จัด
+จะ
+คืà¸
+ความ
+ครั้ง
+คง
+ขึ้น
+ขà¸à¸‡
+ขà¸
+ขณะ
+à¸à¹ˆà¸à¸™
+à¸à¹‡
+à¸à¸²à¸£
+à¸à¸±à¸š
+à¸à¸±à¸™
+à¸à¸§à¹ˆà¸²
+à¸à¸¥à¹ˆà¸²à¸§
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_tr.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_tr.txt
new file mode 100644
index 00000000000..84d9408d4ea
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_tr.txt
@@ -0,0 +1,212 @@
+# Turkish stopwords from LUCENE-559
+# merged with the list from "Information Retrieval on Turkish Texts"
+# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
+acaba
+altmış
+altı
+ama
+ancak
+arada
+aslında
+ayrıca
+bana
+bazı
+belki
+ben
+benden
+beni
+benim
+beri
+beÅŸ
+bile
+bin
+bir
+birçok
+biri
+birkaç
+birkez
+birÅŸey
+birÅŸeyi
+biz
+bize
+bizden
+bizi
+bizim
+böyle
+böylece
+bu
+buna
+bunda
+bundan
+bunlar
+bunları
+bunların
+bunu
+bunun
+burada
+çok
+çünkü
+da
+daha
+dahi
+de
+defa
+deÄŸil
+diÄŸer
+diye
+doksan
+dokuz
+dolayı
+dolayısıyla
+dört
+edecek
+eden
+ederek
+edilecek
+ediliyor
+edilmesi
+ediyor
+eÄŸer
+elli
+en
+etmesi
+etti
+ettiÄŸi
+ettiÄŸini
+gibi
+göre
+halen
+hangi
+hatta
+hem
+henüz
+hep
+hepsi
+her
+herhangi
+herkesin
+hiç
+hiçbir
+için
+iki
+ile
+ilgili
+ise
+iÅŸte
+itibaren
+itibariyle
+kadar
+karşın
+katrilyon
+kendi
+kendilerine
+kendini
+kendisi
+kendisine
+kendisini
+kez
+ki
+kim
+kimden
+kime
+kimi
+kimse
+kırk
+milyar
+milyon
+mu
+mü
+mı
+nasıl
+ne
+neden
+nedenle
+nerde
+nerede
+nereye
+niye
+niçin
+o
+olan
+olarak
+oldu
+olduÄŸu
+olduÄŸunu
+olduklarını
+olmadı
+olmadığı
+olmak
+olması
+olmayan
+olmaz
+olsa
+olsun
+olup
+olur
+olursa
+oluyor
+on
+ona
+ondan
+onlar
+onlardan
+onları
+onların
+onu
+onun
+otuz
+oysa
+öyle
+pek
+raÄŸmen
+sadece
+sanki
+sekiz
+seksen
+sen
+senden
+seni
+senin
+siz
+sizden
+sizi
+sizin
+ÅŸey
+ÅŸeyden
+ÅŸeyi
+ÅŸeyler
+şöyle
+ÅŸu
+ÅŸuna
+ÅŸunda
+ÅŸundan
+şunları
+ÅŸunu
+tarafından
+trilyon
+tüm
+üç
+üzere
+var
+vardı
+ve
+veya
+ya
+yani
+yapacak
+yapılan
+yapılması
+yapıyor
+yapmak
+yaptı
+yaptığı
+yaptığını
+yaptıkları
+yedi
+yerine
+yetmiÅŸ
+yine
+yirmi
+yoksa
+yüz
+zaten
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/userdict_ja.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/userdict_ja.txt
new file mode 100644
index 00000000000..6f0368e4d81
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/userdict_ja.txt
@@ -0,0 +1,29 @@
+#
+# This is a sample user dictionary for Kuromoji (JapaneseTokenizer)
+#
+# Add entries to this file in order to override the statistical model in terms
+# of segmentation, readings and part-of-speech tags. Notice that entries do
+# not have weights since they are always used when found. This is by-design
+# in order to maximize ease-of-use.
+#
+# Entries are defined using the following CSV format:
+# , ... , ... ,
+#
+# Notice that a single half-width space separates tokens and readings, and
+# that the number tokens and readings must match exactly.
+#
+# Also notice that multiple entries with the same is undefined.
+#
+# Whitespace only lines are ignored. Comments are not allowed on entry lines.
+#
+
+# Custom segmentation for kanji compounds
+日本経済新èž,日本 経済 æ–°èž,ニホン ケイザイ シンブン,カスタムåè©ž
+関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタムåè©ž
+
+# Custom segmentation for compound katakana
+トートãƒãƒƒã‚°,トート ãƒãƒƒã‚°,トート ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž
+ショルダーãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž
+
+# Custom reading for former sumo wrestler
+æœé’é¾,æœé’é¾,アサショウリュウ,カスタム人å
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/protwords.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/protwords.txt
new file mode 100644
index 00000000000..1dfc0abecbf
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/protwords.txt
@@ -0,0 +1,21 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+# Use a protected word file to protect against the stemmer reducing two
+# unrelated words to the same base word.
+
+# Some non-words that normally won't be encountered,
+# just to test that they won't be stemmed.
+dontstems
+zwhacky
+
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/schema.xml
new file mode 100644
index 00000000000..ae2c56d18ae
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/schema.xml
@@ -0,0 +1,947 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/solrconfig.xml b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/solrconfig.xml
new file mode 100644
index 00000000000..9d9178746cf
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/solrconfig.xml
@@ -0,0 +1,1764 @@
+
+
+
+
+
+
+
+
+ LUCENE_43
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.data.dir:}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.maxIndexingThreads:8}
+
+
+
+
+
+ 128
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.ulog.dir:}
+
+
+
+
+ ${solr.autoCommit.maxTime:60000}
+ false
+
+
+
+
+
+ ${solr.autoSoftCommit.maxTime:1000}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1024
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+
+
+
+
+
+ 20
+
+
+ 200
+
+
+
+
+
+
+
+
+
+
+
+ static firstSearcher warming in solrconfig.xml
+
+
+
+
+
+ false
+
+
+ 4
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ explicit
+ 10
+ text
+
+
+
+
+
+
+
+
+
+
+
+
+
+ explicit
+ json
+ true
+ text
+
+
+
+
+
+
+
+ true
+ json
+ true
+
+
+
+
+
+
+
+ explicit
+
+
+ velocity
+ browse
+ layout
+ Solritas
+
+
+ edismax
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+ title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
+
+ text
+ 100%
+ *:*
+ 10
+ *,score
+
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+ title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
+
+ text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename
+ 3
+
+
+ on
+ cat
+ manu_exact
+ content_type
+ author_s
+ ipod
+ GB
+ 1
+ cat,inStock
+ after
+ price
+ 0
+ 600
+ 50
+ popularity
+ 0
+ 10
+ 3
+ manufacturedate_dt
+ NOW/YEAR-10YEARS
+ NOW
+ +1YEAR
+ before
+ after
+
+
+ on
+ content features title name
+ html
+ <b>
+ </b>
+ 0
+ title
+ 0
+ name
+ 3
+ 200
+ content
+ 750
+
+
+ on
+ false
+ 5
+ 2
+ 5
+ true
+ true
+ 5
+ 3
+
+
+
+
+ spellcheck
+
+
+
+
+
+
+
+
+
+
+
+
+
+ application/json
+
+
+
+
+ application/csv
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ solrpingquery
+
+
+ all
+
+
+
+
+
+
+
+
+ explicit
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ textSpell
+
+
+
+
+
+ default
+ name
+ solr.DirectSolrSpellChecker
+
+ internal
+
+ 0.5
+
+ 2
+
+ 1
+
+ 5
+
+ 4
+
+ 0.01
+
+
+
+
+
+ wordbreak
+ solr.WordBreakSolrSpellChecker
+ name
+ true
+ true
+ 10
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text
+
+ default
+ wordbreak
+ on
+ true
+ 10
+ 5
+ 5
+ true
+ true
+ 10
+ 5
+
+
+ spellcheck
+
+
+
+
+
+
+
+
+
+ text
+ true
+
+
+ tvComponent
+
+
+
+
+
+
+
+
+ default
+
+
+ org.carrot2.clustering.lingo.LingoClusteringAlgorithm
+
+
+ 20
+
+
+ clustering/carrot2
+
+
+ ENGLISH
+
+
+ stc
+ org.carrot2.clustering.stc.STCClusteringAlgorithm
+
+
+
+
+
+
+ true
+ default
+ true
+
+ name
+ id
+
+ features
+
+ true
+
+
+
+ false
+
+ edismax
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+
+ *:*
+ 10
+ *,score
+
+
+ clustering
+
+
+
+
+
+
+
+
+
+ true
+
+
+ terms
+
+
+
+
+
+
+
+ string
+ elevate.xml
+
+
+
+
+
+ explicit
+ text
+
+
+ elevator
+
+
+
+
+
+
+
+
+
+
+ 100
+
+
+
+
+
+
+
+ 70
+
+ 0.5
+
+ [-\w ,/\n\"']{20,200}
+
+
+
+
+
+
+ ]]>
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ,,
+ ,,
+ ,,
+ ,,
+ ,]]>
+ ]]>
+
+
+
+
+
+ 10
+ .,!?
+
+
+
+
+
+
+ WORD
+
+
+ en
+ US
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text/plain; charset=UTF-8
+
+
+
+
+
+
+
+
+ 5
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ *:*
+
+
+
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/stopwords.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/stopwords.txt
new file mode 100644
index 00000000000..ae1e83eeb3d
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/stopwords.txt
@@ -0,0 +1,14 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/synonyms.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/synonyms.txt
new file mode 100644
index 00000000000..7f72128303b
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/synonyms.txt
@@ -0,0 +1,29 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+#some test synonym mappings unlikely to appear in real input text
+aaafoo => aaabar
+bbbfoo => bbbfoo bbbbar
+cccfoo => cccbar cccbaz
+fooaaa,baraaa,bazaaa
+
+# Some synonym groups specific to this example
+GB,gib,gigabyte,gigabytes
+MB,mib,megabyte,megabytes
+Television, Televisions, TV, TVs
+#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
+#after us won't split it into two words.
+
+# Synonym mappings can be used for spelling correction too
+pixima => pixma
+
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/currency.xml b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/currency.xml
new file mode 100644
index 00000000000..3a9c58afee8
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/currency.xml
@@ -0,0 +1,67 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/elevate.xml b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/elevate.xml
new file mode 100644
index 00000000000..25d5cebe4fb
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/elevate.xml
@@ -0,0 +1,38 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/contractions_ca.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/contractions_ca.txt
new file mode 100644
index 00000000000..307a85f913d
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/contractions_ca.txt
@@ -0,0 +1,8 @@
+# Set of Catalan contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+d
+l
+m
+n
+s
+t
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/contractions_fr.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/contractions_fr.txt
new file mode 100644
index 00000000000..722db588333
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/contractions_fr.txt
@@ -0,0 +1,9 @@
+# Set of French contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+l
+m
+t
+qu
+n
+s
+j
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/contractions_ga.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/contractions_ga.txt
new file mode 100644
index 00000000000..9ebe7fa349a
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/contractions_ga.txt
@@ -0,0 +1,5 @@
+# Set of Irish contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+d
+m
+b
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/contractions_it.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/contractions_it.txt
new file mode 100644
index 00000000000..cac04095372
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/contractions_it.txt
@@ -0,0 +1,23 @@
+# Set of Italian contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+c
+l
+all
+dall
+dell
+nell
+sull
+coll
+pell
+gl
+agl
+dagl
+degl
+negl
+sugl
+un
+m
+t
+s
+v
+d
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/hyphenations_ga.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/hyphenations_ga.txt
new file mode 100644
index 00000000000..4d2642cc5a3
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/hyphenations_ga.txt
@@ -0,0 +1,5 @@
+# Set of Irish hyphenations for StopFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+h
+n
+t
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stemdict_nl.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stemdict_nl.txt
new file mode 100644
index 00000000000..441072971d3
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stemdict_nl.txt
@@ -0,0 +1,6 @@
+# Set of overrides for the dutch stemmer
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+fiets fiets
+bromfiets bromfiets
+ei eier
+kind kinder
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stoptags_ja.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stoptags_ja.txt
new file mode 100644
index 00000000000..71b750845e3
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stoptags_ja.txt
@@ -0,0 +1,420 @@
+#
+# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter.
+#
+# Any token with a part-of-speech tag that exactly matches those defined in this
+# file are removed from the token stream.
+#
+# Set your own stoptags by uncommenting the lines below. Note that comments are
+# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists,
+# etc. that can be useful for building you own stoptag set.
+#
+# The entire possible tagset is provided below for convenience.
+#
+#####
+# noun: unclassified nouns
+#åè©ž
+#
+# noun-common: Common nouns or nouns where the sub-classification is undefined
+#åè©ž-一般
+#
+# noun-proper: Proper nouns where the sub-classification is undefined
+#åè©ž-固有åè©ž
+#
+# noun-proper-misc: miscellaneous proper nouns
+#åè©ž-固有åè©ž-一般
+#
+# noun-proper-person: Personal names where the sub-classification is undefined
+#åè©ž-固有åè©ž-人å
+#
+# noun-proper-person-misc: names that cannot be divided into surname and
+# given name; foreign names; names where the surname or given name is unknown.
+# e.g. ãŠå¸‚ã®æ–¹
+#åè©ž-固有åè©ž-人å-一般
+#
+# noun-proper-person-surname: Mainly Japanese surnames.
+# e.g. 山田
+#åè©ž-固有åè©ž-人å-姓
+#
+# noun-proper-person-given_name: Mainly Japanese given names.
+# e.g. 太郎
+#åè©ž-固有åè©ž-人å-å
+#
+# noun-proper-organization: Names representing organizations.
+# e.g. 通産çœ, NHK
+#åè©ž-固有åè©ž-組織
+#
+# noun-proper-place: Place names where the sub-classification is undefined
+#åè©ž-固有åè©ž-地域
+#
+# noun-proper-place-misc: Place names excluding countries.
+# e.g. アジア, ãƒãƒ«ã‚»ãƒãƒŠ, 京都
+#åè©ž-固有åè©ž-地域-一般
+#
+# noun-proper-place-country: Country names.
+# e.g. 日本, オーストラリア
+#åè©ž-固有åè©ž-地域-国
+#
+# noun-pronoun: Pronouns where the sub-classification is undefined
+#åè©ž-代åè©ž
+#
+# noun-pronoun-misc: miscellaneous pronouns:
+# e.g. ãã‚Œ, ã“ã“, ã‚ã„ã¤, ã‚ãªãŸ, ã‚ã¡ã“ã¡, ã„ãã¤, ã©ã“ã‹, ãªã«, ã¿ãªã•ã‚“, ã¿ã‚“ãª, ã‚ãŸãã—, ã‚ã‚Œã‚ã‚Œ
+#åè©ž-代åè©ž-一般
+#
+# noun-pronoun-contraction: Spoken language contraction made by combining a
+# pronoun and the particle 'wa'.
+# e.g. ã‚りゃ, ã“りゃ, ã“りゃã‚, ãりゃ, ãりゃã‚
+#åè©ž-代åè©ž-縮約
+#
+# noun-adverbial: Temporal nouns such as names of days or months that behave
+# like adverbs. Nouns that represent amount or ratios and can be used adverbially,
+# e.g. 金曜, 一月, åˆå¾Œ, å°‘é‡
+#åè©ž-副詞å¯èƒ½
+#
+# noun-verbal: Nouns that take arguments with case and can appear followed by
+# 'suru' and related verbs (ã™ã‚‹, ã§ãã‚‹, ãªã•ã‚‹, ãã ã•ã‚‹)
+# e.g. インプット, æ„›ç€, 悪化, 悪戦苦闘, 一安心, 下å–ã‚Š
+#åè©ž-サ変接続
+#
+# noun-adjective-base: The base form of adjectives, words that appear before 㪠("na")
+# e.g. å¥åº·, 安易, 駄目, ã ã‚
+#åè©ž-形容動詞語幹
+#
+# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数.
+# e.g. 0, 1, 2, 何, 数, 幾
+#åè©ž-æ•°
+#
+# noun-affix: noun affixes where the sub-classification is undefined
+#åè©ž-éžè‡ªç«‹
+#
+# noun-affix-misc: Of adnominalizers, the case-marker ã® ("no"), and words that
+# attach to the base form of inflectional words, words that cannot be classified
+# into any of the other categories below. This category includes indefinite nouns.
+# e.g. ã‚ã‹ã¤ã, æš, ã‹ã„, 甲æ–, æ°—, ãらã„, å«Œã„, ãã›, ç™–, ã“ã¨, 事, ã”ã¨, 毎, ã—ã ã„, 次第,
+# é †, ã›ã„, 所為, ã¤ã„ã§, åºã§, ã¤ã‚‚ã‚Š, ç©ã‚‚ã‚Š, 点, ã©ã“ã‚, ã®, ã¯ãš, çˆ, ã¯ãšã¿, å¼¾ã¿,
+# æ‹å, ãµã†, ãµã‚Š, 振り, ã»ã†, æ–¹, æ—¨, ã‚‚ã®, 物, 者, ゆãˆ, æ•…, ゆãˆã‚“, 所以, ã‚ã‘, 訳,
+# ã‚ã‚Š, 割り, 割, ã‚“-å£èªž/, ã‚‚ã‚“-å£èªž/
+#åè©ž-éžè‡ªç«‹-一般
+#
+# noun-affix-adverbial: noun affixes that that can behave as adverbs.
+# e.g. ã‚ã„ã , é–“, ã‚ã’ã, 挙ã’å¥, ã‚ã¨, 後, 余り, 以外, 以é™, 以後, 以上, 以å‰, 一方, ã†ãˆ,
+# 上, ã†ã¡, 内, ãŠã‚Š, 折り, ã‹ãŽã‚Š, é™ã‚Š, ãã‚Š, ã£ãã‚Š, çµæžœ, ã“ã‚, é ƒ, ã•ã„, éš›, 最ä¸, ã•ãªã‹,
+# 最ä¸, ã˜ãŸã„, 自体, ãŸã³, 度, ãŸã‚, 為, ã¤ã©, 都度, ã¨ãŠã‚Š, 通り, ã¨ã, 時, ã¨ã“ã‚, 所,
+# ã¨ãŸã‚“, 途端, ãªã‹, ä¸, ã®ã¡, 後, ã°ã‚ã„, å ´åˆ, æ—¥, ã¶ã‚“, 分, ã»ã‹, ä»–, ã¾ãˆ, å‰, ã¾ã¾,
+# 儘, ä¾, ã¿ãŽã‚Š, 矢先
+#åè©ž-éžè‡ªç«‹-副詞å¯èƒ½
+#
+# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars
+# with the stem よã†(ã ) ("you(da)").
+# e.g. よã†, ã‚„ã†, 様 (よã†)
+#åè©ž-éžè‡ªç«‹-助動詞語幹
+#
+# noun-affix-adjective-base: noun affixes that can connect to the indeclinable
+# connection form 㪠(aux "da").
+# e.g. ã¿ãŸã„, ãµã†
+#åè©ž-éžè‡ªç«‹-形容動詞語幹
+#
+# noun-special: special nouns where the sub-classification is undefined.
+#åè©ž-特殊
+#
+# noun-special-aux: The ãã†ã ("souda") stem form that is used for reporting news, is
+# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base
+# form of inflectional words.
+# e.g. ãã†
+#åè©ž-特殊-助動詞語幹
+#
+# noun-suffix: noun suffixes where the sub-classification is undefined.
+#åè©ž-接尾
+#
+# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect
+# to ガル or タイ and can combine into compound nouns, words that cannot be classified into
+# any of the other categories below. In general, this category is more inclusive than
+# 接尾語 ("suffix") and is usually the last element in a compound noun.
+# e.g. ãŠã, ã‹ãŸ, æ–¹, ç”²æ– (ãŒã„), ãŒã‹ã‚Š, ãŽã¿, 気味, ãã‚‹ã¿, (~ã—ãŸ) ã•, 次第, 済 (ãš) ã¿,
+# よã†, (ã§ã)ã£ã“, æ„Ÿ, 観, 性, å¦, é¡ž, é¢, 用
+#åè©ž-接尾-一般
+#
+# noun-suffix-person: Suffixes that form nouns and attach to person names more often
+# than other nouns.
+# e.g. å›, 様, è‘—
+#åè©ž-接尾-人å
+#
+# noun-suffix-place: Suffixes that form nouns and attach to place names more often
+# than other nouns.
+# e.g. 町, 市, 県
+#åè©ž-接尾-地域
+#
+# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that
+# can appear before スル ("suru").
+# e.g. 化, 視, 分ã‘, 入り, è½ã¡, è²·ã„
+#åè©ž-接尾-サ変接続
+#
+# noun-suffix-aux: The stem form of ãã†ã (様態) that is used to indicate conditions,
+# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the
+# conjunctive form of inflectional words.
+# e.g. ãã†
+#åè©ž-接尾-助動詞語幹
+#
+# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive
+# form of inflectional words and appear before the copula ã ("da").
+# e.g. çš„, ã’, ãŒã¡
+#åè©ž-接尾-形容動詞語幹
+#
+# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs.
+# e.g. 後 (ã”), 以後, 以é™, 以å‰, å‰å¾Œ, ä¸, 末, 上, 時 (ã˜)
+#åè©ž-接尾-副詞å¯èƒ½
+#
+# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category
+# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach
+# to numbers.
+# e.g. 個, ã¤, 本, 冊, パーセント, cm, kg, カ月, ã‹å›½, 区画, 時間, 時åŠ
+#åè©ž-接尾-助数詞
+#
+# noun-suffix-special: Special suffixes that mainly attach to inflecting words.
+# e.g. (楽ã—) ã•, (考ãˆ) æ–¹
+#åè©ž-接尾-特殊
+#
+# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words
+# together.
+# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦)
+#åè©ž-接続詞的
+#
+# noun-verbal_aux: Nouns that attach to the conjunctive particle 㦠("te") and are
+# semantically verb-like.
+# e.g. ã”らん, ã”覧, 御覧, é ‚æˆ´
+#åè©ž-å‹•è©žéžè‡ªç«‹çš„
+#
+# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry,
+# dialects, English, etc. Currently, the only entry for åè©ž 引用文å—列 ("noun quotation")
+# is ã„ã‚ã ("iwaku").
+#åè©ž-引用文å—列
+#
+# noun-nai_adjective: Words that appear before the auxiliary verb ãªã„ ("nai") and
+# behave like an adjective.
+# e.g. 申ã—訳, 仕方, ã¨ã‚“ã§ã‚‚, é•ã„
+#åè©ž-ナイ形容詞語幹
+#
+#####
+# prefix: unclassified prefixes
+#接é è©ž
+#
+# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms)
+# excluding numerical expressions.
+# e.g. ㊠(æ°´), æŸ (æ°), åŒ (社), æ•… (~æ°), 高 (å“質), ㊠(見事), ã” (ç«‹æ´¾)
+#接é è©ž-å詞接続
+#
+# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb
+# in conjunctive form followed by ãªã‚‹/ãªã•ã‚‹/ãã ã•ã‚‹.
+# e.g. ㊠(èªã¿ãªã•ã„), ㊠(座り)
+#接é è©ž-動詞接続
+#
+# prefix-adjectival: Prefixes that attach to adjectives.
+# e.g. ㊠(寒ã„ã§ã™ããˆ), ãƒã‚« (ã§ã‹ã„)
+#接é è©ž-形容詞接続
+#
+# prefix-numerical: Prefixes that attach to numerical expressions.
+# e.g. ç´„, ãŠã‚ˆã, 毎時
+#接é è©ž-数接続
+#
+#####
+# verb: unclassified verbs
+#å‹•è©ž
+#
+# verb-main:
+#動詞-自立
+#
+# verb-auxiliary:
+#å‹•è©ž-éžè‡ªç«‹
+#
+# verb-suffix:
+#動詞-接尾
+#
+#####
+# adjective: unclassified adjectives
+#形容詞
+#
+# adjective-main:
+#形容詞-自立
+#
+# adjective-auxiliary:
+#形容詞-éžè‡ªç«‹
+#
+# adjective-suffix:
+#形容詞-接尾
+#
+#####
+# adverb: unclassified adverbs
+#副詞
+#
+# adverb-misc: Words that can be segmented into one unit and where adnominal
+# modification is not possible.
+# e.g. ã‚ã„ã‹ã‚らãš, 多分
+#副詞-一般
+#
+# adverb-particle_conjunction: Adverbs that can be followed by ã®, ã¯, ã«,
+# ãª, ã™ã‚‹, ã , etc.
+# e.g. ã“ã‚“ãªã«, ãã‚“ãªã«, ã‚ã‚“ãªã«, ãªã«ã‹, ãªã‚“ã§ã‚‚
+#副詞-助詞類接続
+#
+#####
+# adnominal: Words that only have noun-modifying forms.
+# e.g. ã“ã®, ãã®, ã‚ã®, ã©ã®, ã„ã‚ゆる, ãªã‚“らã‹ã®, 何らã‹ã®, ã„ã‚ã‚“ãª, ã“ã†ã„ã†, ãã†ã„ã†, ã‚ã‚ã„ã†,
+# ã©ã†ã„ã†, ã“ã‚“ãª, ãã‚“ãª, ã‚ã‚“ãª, ã©ã‚“ãª, 大ããª, å°ã•ãª, ãŠã‹ã—ãª, ã»ã‚“ã®, ãŸã„ã—ãŸ,
+# 「(, ã‚‚) ã•ã‚‹ (ã“ã¨ãªãŒã‚‰)ã€, 微々ãŸã‚‹, å ‚ã€…ãŸã‚‹, å˜ãªã‚‹, ã„ã‹ãªã‚‹, 我ãŒã€ã€ŒåŒã˜, 亡ã
+#連体詞
+#
+#####
+# conjunction: Conjunctions that can occur independently.
+# e.g. ãŒ, ã‘ã‚Œã©ã‚‚, ãã—ã¦, ã˜ã‚ƒã‚, ãã‚Œã©ã“ã‚ã‹
+接続詞
+#
+#####
+# particle: unclassified particles.
+助詞
+#
+# particle-case: case particles where the subclassification is undefined.
+助詞-æ ¼åŠ©è©ž
+#
+# particle-case-misc: Case particles.
+# e.g. ã‹ã‚‰, ãŒ, ã§, ã¨, ã«, ã¸, より, ã‚’, ã®, ã«ã¦
+助詞-æ ¼åŠ©è©ž-一般
+#
+# particle-case-quote: the "to" that appears after nouns, a person’s speech,
+# quotation marks, expressions of decisions from a meeting, reasons, judgements,
+# conjectures, etc.
+# e.g. ( ã ) 㨠(è¿°ã¹ãŸ.), ( ã§ã‚ã‚‹) 㨠(ã—ã¦åŸ·è¡ŒçŒ¶äºˆ...)
+助詞-æ ¼åŠ©è©ž-引用
+#
+# particle-case-compound: Compounds of particles and verbs that mainly behave
+# like case particles.
+# e.g. ã¨ã„ã†, ã¨ã„ã£ãŸ, ã¨ã‹ã„ã†, ã¨ã—ã¦, ã¨ã¨ã‚‚ã«, ã¨å…±ã«, ã§ã‚‚ã£ã¦, ã«ã‚ãŸã£ã¦, ã«å½“ãŸã£ã¦, ã«å½“ã£ã¦,
+# ã«ã‚ãŸã‚Š, ã«å½“ãŸã‚Š, ã«å½“ã‚Š, ã«å½“ãŸã‚‹, ã«ã‚ãŸã‚‹, ã«ãŠã„ã¦, ã«æ–¼ã„ã¦,ã«æ–¼ã¦, ã«ãŠã‘ã‚‹, ã«æ–¼ã‘ã‚‹,
+# ã«ã‹ã‘, ã«ã‹ã‘ã¦, ã«ã‹ã‚“ã—, ã«é–¢ã—, ã«ã‹ã‚“ã—ã¦, ã«é–¢ã—ã¦, ã«ã‹ã‚“ã™ã‚‹, ã«é–¢ã™ã‚‹, ã«éš›ã—,
+# ã«éš›ã—ã¦, ã«ã—ãŸãŒã„, ã«å¾“ã„, ã«å¾“ã†, ã«ã—ãŸãŒã£ã¦, ã«å¾“ã£ã¦, ã«ãŸã„ã—, ã«å¯¾ã—, ã«ãŸã„ã—ã¦,
+# ã«å¯¾ã—ã¦, ã«ãŸã„ã™ã‚‹, ã«å¯¾ã™ã‚‹, ã«ã¤ã„ã¦, ã«ã¤ã, ã«ã¤ã‘, ã«ã¤ã‘ã¦, ã«ã¤ã‚Œ, ã«ã¤ã‚Œã¦, ã«ã¨ã£ã¦,
+# ã«ã¨ã‚Š, ã«ã¾ã¤ã‚ã‚‹, ã«ã‚ˆã£ã¦, ã«ä¾ã£ã¦, ã«å› ã£ã¦, ã«ã‚ˆã‚Š, ã«ä¾ã‚Š, ã«å› ã‚Š, ã«ã‚ˆã‚‹, ã«ä¾ã‚‹, ã«å› ã‚‹,
+# ã«ã‚ãŸã£ã¦, ã«ã‚ãŸã‚‹, ã‚’ã‚‚ã£ã¦, を以ã£ã¦, を通ã˜, を通ã˜ã¦, を通ã—ã¦, ã‚’ã‚ãã£ã¦, ã‚’ã‚ãã‚Š, ã‚’ã‚ãã‚‹,
+# ã£ã¦-å£èªž/, ã¡ã‚…ã†-関西å¼ã€Œã¨ã„ã†ã€/, (何) ã¦ã„ㆠ(人)-å£èªž/, ã£ã¦ã„ã†-å£èªž/, ã¨ã„ãµ, ã¨ã‹ã„ãµ
+助詞-æ ¼åŠ©è©ž-連語
+#
+# particle-conjunctive:
+# e.g. ã‹ã‚‰, ã‹ã‚‰ã«ã¯, ãŒ, ã‘ã‚Œã©, ã‘ã‚Œã©ã‚‚, ã‘ã©, ã—, ã¤ã¤, ã¦, ã§, ã¨, ã¨ã“ã‚ãŒ, ã©ã“ã‚ã‹, ã¨ã‚‚, ã©ã‚‚,
+# ãªãŒã‚‰, ãªã‚Š, ã®ã§, ã®ã«, ã°, ã‚‚ã®ã®, ã‚„ ( ã—ãŸ), ã‚„ã„ãªã‚„, (ã“ã‚ã‚“) ã˜ã‚ƒ(ã„ã‘ãªã„)-å£èªž/,
+# (è¡Œã£) ã¡ã‚ƒ(ã„ã‘ãªã„)-å£èªž/, (言ã£) ãŸã£ã¦ (ã—ã‹ãŸãŒãªã„)-å£èªž/, (ãã‚ŒãŒãªã)ã£ãŸã£ã¦ (平気)-å£èªž/
+助詞-接続助詞
+#
+# particle-dependency:
+# e.g. ã“ã, ã•ãˆ, ã—ã‹, ã™ã‚‰, ã¯, ã‚‚, ãž
+助詞-係助詞
+#
+# particle-adverbial:
+# e.g. ãŒã¦ã‚‰, ã‹ã‚‚, ãらã„, ä½, ãらã„, ã—ã‚‚, (å¦æ ¡) ã˜ã‚ƒ(ã“ã‚ŒãŒæµè¡Œã£ã¦ã„ã‚‹)-å£èªž/,
+# (ãã‚Œ)ã˜ã‚ƒã‚ (よããªã„)-å£èªž/, ãšã¤, (ç§) ãªãž, ãªã©, (ç§) ãªã‚Š (ã«), (先生) ãªã‚“ã‹ (大嫌ã„)-å£èªž/,
+# (ç§) ãªã‚“ãž, (先生) ãªã‚“㦠(大嫌ã„)-å£èªž/, ã®ã¿, ã ã‘, (ç§) ã ã£ã¦-å£èªž/, ã ã«,
+# (å½¼)ã£ãŸã‚‰-å£èªž/, (ãŠèŒ¶) ã§ã‚‚ (ã„ã‹ãŒ), ç‰ (ã¨ã†), (今後) ã¨ã‚‚, ã°ã‹ã‚Š, ã°ã£ã‹-å£èªž/, ã°ã£ã‹ã‚Š-å£èªž/,
+# ã»ã©, 程, ã¾ã§, è¿„, (誰) ã‚‚ (ãŒ)([助詞-æ ¼åŠ©è©ž] ãŠã‚ˆã³ [助詞-係助詞] ã®å‰ã«ä½ç½®ã™ã‚‹ã€Œã‚‚ã€)
+助詞-副助詞
+#
+# particle-interjective: particles with interjective grammatical roles.
+# e.g. (æ¾å³¶) ã‚„
+助詞-間投助詞
+#
+# particle-coordinate:
+# e.g. ã¨, ãŸã‚Š, ã ã®, ã ã‚Š, ã¨ã‹, ãªã‚Š, ã‚„, やら
+助詞-並立助詞
+#
+# particle-final:
+# e.g. ã‹ã„, ã‹ã—ら, ã•, ãœ, (ã )ã£ã‘-å£èªž/, (ã¨ã¾ã£ã¦ã‚‹) ã§-方言/, ãª, ナ, ãªã‚-å£èªž/, ãž, ã, ãƒ,
+# ãã‡-å£èªž/, ããˆ-å£èªž/, ãã‚“-方言/, ã®, ã®ã†-å£èªž/, ã‚„, よ, ヨ, よã‰-å£èªž/, ã‚, ã‚ã„-å£èªž/
+助詞-終助詞
+#
+# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is
+# adverbial, conjunctive, or sentence final. For example:
+# (a) 「A ã‹ B ã‹ã€. Ex:「(国内ã§é‹ç”¨ã™ã‚‹) ã‹,(海外ã§é‹ç”¨ã™ã‚‹) ã‹ (.)ã€
+# (b) Inside an adverb phrase. Ex:「(幸ã„ã¨ã„ã†) ã‹ (, æ»è€…ã¯ã„ãªã‹ã£ãŸ.)ã€
+# 「(祈りãŒå±Šã„ãŸã›ã„) ã‹ (, 試験ã«åˆæ ¼ã—ãŸ.)ã€
+# (c) 「ã‹ã®ã‚ˆã†ã«ã€. Ex:「(何もãªã‹ã£ãŸ) ã‹ (ã®ã‚ˆã†ã«æŒ¯ã‚‹èˆžã£ãŸ.)ã€
+# e.g. ã‹
+助詞-副助詞ï¼ä¸¦ç«‹åŠ©è©žï¼çµ‚助詞
+#
+# particle-adnominalizer: The "no" that attaches to nouns and modifies
+# non-inflectional words.
+助詞-連体化
+#
+# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs
+# that are giongo, giseigo, or gitaigo.
+# e.g. ã«, ã¨
+助詞-副詞化
+#
+# particle-special: A particle that does not fit into one of the above classifications.
+# This includes particles that are used in Tanka, Haiku, and other poetry.
+# e.g. ã‹ãª, ã‘ã‚€, ( ã—ãŸã ã‚ã†) ã«, (ã‚ã‚“ãŸ) ã«ã‚ƒ(ã‚ã‹ã‚‰ã‚“), (俺) ã‚“ (家)
+助詞-特殊
+#
+#####
+# auxiliary-verb:
+助動詞
+#
+#####
+# interjection: Greetings and other exclamations.
+# e.g. ãŠã¯ã‚ˆã†, ãŠã¯ã‚ˆã†ã”ã–ã„ã¾ã™, ã“ã‚“ã«ã¡ã¯, ã“ã‚“ã°ã‚“ã¯, ã‚ã‚ŠãŒã¨ã†, ã©ã†ã‚‚ã‚ã‚ŠãŒã¨ã†, ã‚ã‚ŠãŒã¨ã†ã”ã–ã„ã¾ã™,
+# ã„ãŸã ãã¾ã™, ã”ã¡ãã†ã•ã¾, ã•ã‚ˆãªã‚‰, ã•ã‚ˆã†ãªã‚‰, ã¯ã„, ã„ã„ãˆ, ã”ã‚ã‚“, ã”ã‚ã‚“ãªã•ã„
+#æ„Ÿå‹•è©ž
+#
+#####
+# symbol: unclassified Symbols.
+記å·
+#
+# symbol-misc: A general symbol not in one of the categories below.
+# e.g. [○◎@$〒→+]
+記å·-一般
+#
+# symbol-comma: Commas
+# e.g. [,ã€]
+記å·-èªç‚¹
+#
+# symbol-period: Periods and full stops.
+# e.g. [..。]
+記å·-å¥ç‚¹
+#
+# symbol-space: Full-width whitespace.
+記å·-空白
+#
+# symbol-open_bracket:
+# e.g. [({‘“『ã€]
+記å·-括弧開
+#
+# symbol-close_bracket:
+# e.g. [)}’â€ã€ã€ã€‘]
+記å·-括弧閉
+#
+# symbol-alphabetic:
+#記å·-アルファベット
+#
+#####
+# other: unclassified other
+#ãã®ä»–
+#
+# other-interjection: Words that are hard to classify as noun-suffixes or
+# sentence-final particles.
+# e.g. (ã )ã‚¡
+ãã®ä»–-間投
+#
+#####
+# filler: Aizuchi that occurs during a conversation or sounds inserted as filler.
+# e.g. ã‚ã®, ã†ã‚“ã¨, ãˆã¨
+フィラー
+#
+#####
+# non-verbal: non-verbal sound.
+éžè¨€èªžéŸ³
+#
+#####
+# fragment:
+#語æ–片
+#
+#####
+# unknown: unknown part of speech.
+#未知語
+#
+##### End of file
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ar.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ar.txt
new file mode 100644
index 00000000000..046829db6a2
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ar.txt
@@ -0,0 +1,125 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# Cleaned on October 11, 2009 (not normalized, so use before normalization)
+# This means that when modifying this list, you might need to add some
+# redundant entries, for example containing forms with both أ and ا
+من
+ومن
+منها
+منه
+ÙÙŠ
+ÙˆÙÙŠ
+Ùيها
+Ùيه
+Ùˆ
+Ù
+ثم
+او
+أو
+ب
+بها
+به
+ا
+Ø£
+اى
+اي
+أي
+أى
+لا
+ولا
+الا
+ألا
+إلا
+لكن
+ما
+وما
+كما
+Ùما
+عن
+مع
+اذا
+إذا
+ان
+أن
+إن
+انها
+أنها
+إنها
+انه
+أنه
+إنه
+بان
+بأن
+Ùان
+Ùأن
+وان
+وأن
+وإن
+التى
+التي
+الذى
+الذي
+الذين
+الى
+الي
+إلى
+إلي
+على
+عليها
+عليه
+اما
+أما
+إما
+ايضا
+أيضا
+كل
+وكل
+لم
+ولم
+لن
+ولن
+هى
+هي
+هو
+وهى
+وهي
+وهو
+Ùهى
+Ùهي
+Ùهو
+انت
+أنت
+لك
+لها
+له
+هذه
+هذا
+تلك
+ذلك
+هناك
+كانت
+كان
+يكون
+تكون
+وكانت
+وكان
+غير
+بعض
+قد
+Ù†ØÙˆ
+بين
+بينما
+منذ
+ضمن
+Øيث
+الان
+الآن
+خلال
+بعد
+قبل
+Øتى
+عند
+عندما
+لدى
+جميع
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_bg.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_bg.txt
new file mode 100644
index 00000000000..1ae4ba2ae38
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_bg.txt
@@ -0,0 +1,193 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+а
+аз
+ако
+ала
+бе
+без
+беше
+би
+бил
+била
+били
+било
+близо
+бъдат
+бъде
+бÑха
+в
+ваÑ
+ваш
+ваша
+вероÑтно
+вече
+взема
+ви
+вие
+винаги
+вÑе
+вÑеки
+вÑички
+вÑичко
+вÑÑка
+във
+въпреки
+върху
+г
+ги
+главно
+го
+д
+да
+дали
+до
+докато
+докога
+дори
+доÑега
+доÑта
+е
+едва
+един
+ето
+за
+зад
+заедно
+заради
+заÑега
+затова
+защо
+защото
+и
+из
+или
+им
+има
+имат
+иÑка
+й
+каза
+как
+каква
+какво
+както
+какъв
+като
+кога
+когато
+което
+които
+кой
+който
+колко
+коÑто
+къде
+където
+към
+ли
+м
+ме
+между
+мен
+ми
+мнозина
+мога
+могат
+може
+молÑ
+момента
+му
+н
+на
+над
+назад
+най
+направи
+напред
+например
+наÑ
+не
+него
+неÑ
+ни
+ние
+никой
+нито
+но
+нÑкои
+нÑкой
+нÑма
+обаче
+около
+оÑвен
+оÑобено
+от
+отгоре
+отново
+още
+пак
+по
+повече
+повечето
+под
+поне
+поради
+поÑле
+почти
+прави
+пред
+преди
+през
+при
+пък
+първо
+Ñ
+Ñа
+Ñамо
+Ñе
+Ñега
+Ñи
+Ñкоро
+Ñлед
+Ñме
+Ñпоред
+Ñред
+Ñрещу
+Ñте
+Ñъм
+ÑÑŠÑ
+Ñъщо
+Ñ‚
+тази
+така
+такива
+такъв
+там
+твой
+те
+тези
+ти
+тн
+то
+това
+тогава
+този
+той
+толкова
+точно
+Ñ‚Ñ€Ñбва
+тук
+тъй
+Ñ‚Ñ
+Ñ‚ÑÑ…
+у
+хареÑва
+ч
+че
+чеÑто
+чрез
+ще
+щом
+Ñ
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ca.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ca.txt
new file mode 100644
index 00000000000..3da65deafe1
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ca.txt
@@ -0,0 +1,220 @@
+# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
+a
+abans
+acÃ
+ah
+aixÃ
+això
+al
+als
+aleshores
+algun
+alguna
+algunes
+alguns
+alhora
+allÃ
+allÃ
+allò
+altra
+altre
+altres
+amb
+ambdós
+ambdues
+apa
+aquell
+aquella
+aquelles
+aquells
+aquest
+aquesta
+aquestes
+aquests
+aquÃ
+baix
+cada
+cadascú
+cadascuna
+cadascunes
+cadascuns
+com
+contra
+d'un
+d'una
+d'unes
+d'uns
+dalt
+de
+del
+dels
+des
+després
+dins
+dintre
+donat
+doncs
+durant
+e
+eh
+el
+els
+em
+en
+encara
+ens
+entre
+érem
+eren
+éreu
+es
+és
+esta
+estÃ
+està vem
+estaven
+està veu
+esteu
+et
+etc
+ets
+fins
+fora
+gairebé
+ha
+han
+has
+havia
+he
+hem
+heu
+hi
+ho
+i
+igual
+iguals
+ja
+l'hi
+la
+les
+li
+li'n
+llavors
+m'he
+ma
+mal
+malgrat
+mateix
+mateixa
+mateixes
+mateixos
+me
+mentre
+més
+meu
+meus
+meva
+meves
+molt
+molta
+moltes
+molts
+mon
+mons
+n'he
+n'hi
+ne
+ni
+no
+nogensmenys
+només
+nosaltres
+nostra
+nostre
+nostres
+o
+oh
+oi
+on
+pas
+pel
+pels
+per
+però
+perquè
+poc
+poca
+pocs
+poques
+potser
+propi
+qual
+quals
+quan
+quant
+que
+què
+quelcom
+qui
+quin
+quina
+quines
+quins
+s'ha
+s'han
+sa
+semblant
+semblants
+ses
+seu
+seus
+seva
+seva
+seves
+si
+sobre
+sobretot
+sóc
+solament
+sols
+son
+són
+sons
+sota
+sou
+t'ha
+t'han
+t'he
+ta
+tal
+també
+tampoc
+tan
+tant
+tanta
+tantes
+teu
+teus
+teva
+teves
+ton
+tons
+tot
+tota
+totes
+tots
+un
+una
+unes
+uns
+us
+va
+vaig
+vam
+van
+vas
+veu
+vosaltres
+vostra
+vostre
+vostres
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_cz.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_cz.txt
new file mode 100644
index 00000000000..53c6097dac7
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_cz.txt
@@ -0,0 +1,172 @@
+a
+s
+k
+o
+i
+u
+v
+z
+dnes
+cz
+tÃmto
+budeš
+budem
+byli
+jseš
+můj
+svým
+ta
+tomto
+tohle
+tuto
+tyto
+jej
+zda
+proÄ
+máte
+tato
+kam
+tohoto
+kdo
+kteÅ™Ã
+mi
+nám
+tom
+tomuto
+mÃt
+nic
+proto
+kterou
+byla
+toho
+protože
+asi
+ho
+naši
+napište
+re
+což
+tÃm
+takže
+svých
+jejÃ
+svými
+jste
+aj
+tu
+tedy
+teto
+bylo
+kde
+ke
+pravé
+ji
+nad
+nejsou
+Äi
+pod
+téma
+mezi
+přes
+ty
+pak
+vám
+ani
+když
+však
+neg
+jsem
+tento
+Älánku
+Älánky
+aby
+jsme
+před
+pta
+jejich
+byl
+ještě
+až
+bez
+také
+pouze
+prvnÃ
+vaše
+která
+nás
+nový
+tipy
+pokud
+může
+strana
+jeho
+své
+jiné
+zprávy
+nové
+nenÃ
+vás
+jen
+podle
+zde
+už
+být
+vÃce
+bude
+již
+než
+který
+by
+které
+co
+nebo
+ten
+tak
+má
+při
+od
+po
+jsou
+jak
+dalÅ¡Ã
+ale
+si
+se
+ve
+to
+jako
+za
+zpět
+ze
+do
+pro
+je
+na
+atd
+atp
+jakmile
+pÅ™iÄemž
+já
+on
+ona
+ono
+oni
+ony
+my
+vy
+jÃ
+ji
+mÄ›
+mne
+jemu
+tomu
+těm
+těmu
+němu
+němuž
+jehož
+jÞ
+jelikož
+jež
+jakož
+naÄež
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_da.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_da.txt
new file mode 100644
index 00000000000..a3ff5fe122c
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_da.txt
@@ -0,0 +1,108 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Danish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+
+og | and
+i | in
+jeg | I
+det | that (dem. pronoun)/it (pers. pronoun)
+at | that (in front of a sentence)/to (with infinitive)
+en | a/an
+den | it (pers. pronoun)/that (dem. pronoun)
+til | to/at/for/until/against/by/of/into, more
+er | present tense of "to be"
+som | who, as
+på | on/upon/in/on/at/to/after/of/with/for, on
+de | they
+med | with/by/in, along
+han | he
+af | of/by/from/off/for/in/with/on, off
+for | at/for/to/from/by/of/ago, in front/before, because
+ikke | not
+der | who/which, there/those
+var | past tense of "to be"
+mig | me/myself
+sig | oneself/himself/herself/itself/themselves
+men | but
+et | a/an/one, one (number), someone/somebody/one
+har | present tense of "to have"
+om | round/about/for/in/a, about/around/down, if
+vi | we
+min | my
+havde | past tense of "to have"
+ham | him
+hun | she
+nu | now
+over | over/above/across/by/beyond/past/on/about, over/past
+da | then, when/as/since
+fra | from/off/since, off, since
+du | you
+ud | out
+sin | his/her/its/one's
+dem | them
+os | us/ourselves
+op | up
+man | you/one
+hans | his
+hvor | where
+eller | or
+hvad | what
+skal | must/shall etc.
+selv | myself/youself/herself/ourselves etc., even
+her | here
+alle | all/everyone/everybody etc.
+vil | will (verb)
+blev | past tense of "to stay/to remain/to get/to become"
+kunne | could
+ind | in
+når | when
+være | present tense of "to be"
+dog | however/yet/after all
+noget | something
+ville | would
+jo | you know/you see (adv), yes
+deres | their/theirs
+efter | after/behind/according to/for/by/from, later/afterwards
+ned | down
+skulle | should
+denne | this
+end | than
+dette | this
+mit | my/mine
+også | also
+under | under/beneath/below/during, below/underneath
+have | have
+dig | you
+anden | other
+hende | her
+mine | my
+alt | everything
+meget | much/very, plenty of
+sit | his, her, its, one's
+sine | his, her, its, one's
+vor | our
+mod | against
+disse | these
+hvis | if
+din | your/yours
+nogle | some
+hos | by/at
+blive | be/become
+mange | many
+ad | by/through
+bliver | present tense of "to be/to become"
+hendes | her/hers
+været | be
+thi | for (conj)
+jer | you
+sådan | such, like this/like that
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_de.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_de.txt
new file mode 100644
index 00000000000..f7703841887
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_de.txt
@@ -0,0 +1,292 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A German stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | The number of forms in this list is reduced significantly by passing it
+ | through the German stemmer.
+
+
+aber | but
+
+alle | all
+allem
+allen
+aller
+alles
+
+als | than, as
+also | so
+am | an + dem
+an | at
+
+ander | other
+andere
+anderem
+anderen
+anderer
+anderes
+anderm
+andern
+anderr
+anders
+
+auch | also
+auf | on
+aus | out of
+bei | by
+bin | am
+bis | until
+bist | art
+da | there
+damit | with it
+dann | then
+
+der | the
+den
+des
+dem
+die
+das
+
+daß | that
+
+derselbe | the same
+derselben
+denselben
+desselben
+demselben
+dieselbe
+dieselben
+dasselbe
+
+dazu | to that
+
+dein | thy
+deine
+deinem
+deinen
+deiner
+deines
+
+denn | because
+
+derer | of those
+dessen | of him
+
+dich | thee
+dir | to thee
+du | thou
+
+dies | this
+diese
+diesem
+diesen
+dieser
+dieses
+
+
+doch | (several meanings)
+dort | (over) there
+
+
+durch | through
+
+ein | a
+eine
+einem
+einen
+einer
+eines
+
+einig | some
+einige
+einigem
+einigen
+einiger
+einiges
+
+einmal | once
+
+er | he
+ihn | him
+ihm | to him
+
+es | it
+etwas | something
+
+euer | your
+eure
+eurem
+euren
+eurer
+eures
+
+für | for
+gegen | towards
+gewesen | p.p. of sein
+hab | have
+habe | have
+haben | have
+hat | has
+hatte | had
+hatten | had
+hier | here
+hin | there
+hinter | behind
+
+ich | I
+mich | me
+mir | to me
+
+
+ihr | you, to her
+ihre
+ihrem
+ihren
+ihrer
+ihres
+euch | to you
+
+im | in + dem
+in | in
+indem | while
+ins | in + das
+ist | is
+
+jede | each, every
+jedem
+jeden
+jeder
+jedes
+
+jene | that
+jenem
+jenen
+jener
+jenes
+
+jetzt | now
+kann | can
+
+kein | no
+keine
+keinem
+keinen
+keiner
+keines
+
+können | can
+könnte | could
+machen | do
+man | one
+
+manche | some, many a
+manchem
+manchen
+mancher
+manches
+
+mein | my
+meine
+meinem
+meinen
+meiner
+meines
+
+mit | with
+muss | must
+musste | had to
+nach | to(wards)
+nicht | not
+nichts | nothing
+noch | still, yet
+nun | now
+nur | only
+ob | whether
+oder | or
+ohne | without
+sehr | very
+
+sein | his
+seine
+seinem
+seinen
+seiner
+seines
+
+selbst | self
+sich | herself
+
+sie | they, she
+ihnen | to them
+
+sind | are
+so | so
+
+solche | such
+solchem
+solchen
+solcher
+solches
+
+soll | shall
+sollte | should
+sondern | but
+sonst | else
+über | over
+um | about, around
+und | and
+
+uns | us
+unse
+unsem
+unsen
+unser
+unses
+
+unter | under
+viel | much
+vom | von + dem
+von | from
+vor | before
+während | while
+war | was
+waren | were
+warst | wast
+was | what
+weg | away, off
+weil | because
+weiter | further
+
+welche | which
+welchem
+welchen
+welcher
+welches
+
+wenn | when
+werde | will
+werden | will
+wie | how
+wieder | again
+will | want
+wir | we
+wird | will
+wirst | willst
+wo | where
+wollen | want
+wollte | wanted
+würde | would
+würden | would
+zu | to
+zum | zu + dem
+zur | zu + der
+zwar | indeed
+zwischen | between
+
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_el.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_el.txt
new file mode 100644
index 00000000000..232681f5bd6
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_el.txt
@@ -0,0 +1,78 @@
+# Lucene Greek Stopwords list
+# Note: by default this file is used after GreekLowerCaseFilter,
+# so when modifying this file use 'σ' instead of 'ς'
+ο
+η
+το
+οι
+τα
+του
+τησ
+των
+τον
+την
+και
+κι
+κ
+ειμαι
+εισαι
+ειναι
+ειμαστε
+ειστε
+στο
+στον
+στη
+στην
+μα
+αλλα
+απο
+για
+Ï€Ïοσ
+με
+σε
+ωσ
+παÏα
+αντι
+κατα
+μετα
+θα
+να
+δε
+δεν
+μη
+μην
+επι
+ενω
+εαν
+αν
+τοτε
+που
+πωσ
+ποιοσ
+ποια
+ποιο
+ποιοι
+ποιεσ
+ποιων
+ποιουσ
+αυτοσ
+αυτη
+αυτο
+αυτοι
+αυτων
+αυτουσ
+αυτεσ
+αυτα
+εκεινοσ
+εκεινη
+εκεινο
+εκεινοι
+εκεινεσ
+εκεινα
+εκεινων
+εκεινουσ
+οπωσ
+ομωσ
+ισωσ
+οσο
+οτι
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_en.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_en.txt
new file mode 100644
index 00000000000..2c164c0b2a1
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_en.txt
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# a couple of test stopwords to test that the words are really being
+# configured from this file:
+stopworda
+stopwordb
+
+# Standard english stop words taken from Lucene's StopAnalyzer
+a
+an
+and
+are
+as
+at
+be
+but
+by
+for
+if
+in
+into
+is
+it
+no
+not
+of
+on
+or
+such
+that
+the
+their
+then
+there
+these
+they
+this
+to
+was
+will
+with
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_es.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_es.txt
new file mode 100644
index 00000000000..2db14760075
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_es.txt
@@ -0,0 +1,354 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Spanish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de | from, of
+la | the, her
+que | who, that
+el | the
+en | in
+y | and
+a | to
+los | the, them
+del | de + el
+se | himself, from him etc
+las | the, them
+por | for, by, etc
+un | a
+para | for
+con | with
+no | no
+una | a
+su | his, her
+al | a + el
+ | es from SER
+lo | him
+como | how
+más | more
+pero | pero
+sus | su plural
+le | to him, her
+ya | already
+o | or
+ | fue from SER
+este | this
+ | ha from HABER
+sà | himself etc
+porque | because
+esta | this
+ | son from SER
+entre | between
+ | está from ESTAR
+cuando | when
+muy | very
+sin | without
+sobre | on
+ | ser from SER
+ | tiene from TENER
+también | also
+me | me
+hasta | until
+hay | there is/are
+donde | where
+ | han from HABER
+quien | whom, that
+ | están from ESTAR
+ | estado from ESTAR
+desde | from
+todo | all
+nos | us
+durante | during
+ | estados from ESTAR
+todos | all
+uno | a
+les | to them
+ni | nor
+contra | against
+otros | other
+ | fueron from SER
+ese | that
+eso | that
+ | habÃa from HABER
+ante | before
+ellos | they
+e | and (variant of y)
+esto | this
+mà | me
+antes | before
+algunos | some
+qué | what?
+unos | a
+yo | I
+otro | other
+otras | other
+otra | other
+él | he
+tanto | so much, many
+esa | that
+estos | these
+mucho | much, many
+quienes | who
+nada | nothing
+muchos | many
+cual | who
+ | sea from SER
+poco | few
+ella | she
+estar | to be
+ | haber from HABER
+estas | these
+ | estaba from ESTAR
+ | estamos from ESTAR
+algunas | some
+algo | something
+nosotros | we
+
+ | other forms
+
+mi | me
+mis | mi plural
+tú | thou
+te | thee
+ti | thee
+tu | thy
+tus | tu plural
+ellas | they
+nosotras | we
+vosotros | you
+vosotras | you
+os | you
+mÃo | mine
+mÃa |
+mÃos |
+mÃas |
+tuyo | thine
+tuya |
+tuyos |
+tuyas |
+suyo | his, hers, theirs
+suya |
+suyos |
+suyas |
+nuestro | ours
+nuestra |
+nuestros |
+nuestras |
+vuestro | yours
+vuestra |
+vuestros |
+vuestras |
+esos | those
+esas | those
+
+ | forms of estar, to be (not including the infinitive):
+estoy
+estás
+está
+estamos
+estáis
+están
+esté
+estés
+estemos
+estéis
+estén
+estaré
+estarás
+estará
+estaremos
+estaréis
+estarán
+estarÃa
+estarÃas
+estarÃamos
+estarÃais
+estarÃan
+estaba
+estabas
+estábamos
+estabais
+estaban
+estuve
+estuviste
+estuvo
+estuvimos
+estuvisteis
+estuvieron
+estuviera
+estuvieras
+estuviéramos
+estuvierais
+estuvieran
+estuviese
+estuvieses
+estuviésemos
+estuvieseis
+estuviesen
+estando
+estado
+estada
+estados
+estadas
+estad
+
+ | forms of haber, to have (not including the infinitive):
+he
+has
+ha
+hemos
+habéis
+han
+haya
+hayas
+hayamos
+hayáis
+hayan
+habré
+habrás
+habrá
+habremos
+habréis
+habrán
+habrÃa
+habrÃas
+habrÃamos
+habrÃais
+habrÃan
+habÃa
+habÃas
+habÃamos
+habÃais
+habÃan
+hube
+hubiste
+hubo
+hubimos
+hubisteis
+hubieron
+hubiera
+hubieras
+hubiéramos
+hubierais
+hubieran
+hubiese
+hubieses
+hubiésemos
+hubieseis
+hubiesen
+habiendo
+habido
+habida
+habidos
+habidas
+
+ | forms of ser, to be (not including the infinitive):
+soy
+eres
+es
+somos
+sois
+son
+sea
+seas
+seamos
+seáis
+sean
+seré
+serás
+será
+seremos
+seréis
+serán
+serÃa
+serÃas
+serÃamos
+serÃais
+serÃan
+era
+eras
+éramos
+erais
+eran
+fui
+fuiste
+fue
+fuimos
+fuisteis
+fueron
+fuera
+fueras
+fuéramos
+fuerais
+fueran
+fuese
+fueses
+fuésemos
+fueseis
+fuesen
+siendo
+sido
+ | sed also means 'thirst'
+
+ | forms of tener, to have (not including the infinitive):
+tengo
+tienes
+tiene
+tenemos
+tenéis
+tienen
+tenga
+tengas
+tengamos
+tengáis
+tengan
+tendré
+tendrás
+tendrá
+tendremos
+tendréis
+tendrán
+tendrÃa
+tendrÃas
+tendrÃamos
+tendrÃais
+tendrÃan
+tenÃa
+tenÃas
+tenÃamos
+tenÃais
+tenÃan
+tuve
+tuviste
+tuvo
+tuvimos
+tuvisteis
+tuvieron
+tuviera
+tuvieras
+tuviéramos
+tuvierais
+tuvieran
+tuviese
+tuvieses
+tuviésemos
+tuvieseis
+tuviesen
+teniendo
+tenido
+tenida
+tenidos
+tenidas
+tened
+
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_eu.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_eu.txt
new file mode 100644
index 00000000000..25f1db93460
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_eu.txt
@@ -0,0 +1,99 @@
+# example set of basque stopwords
+al
+anitz
+arabera
+asko
+baina
+bat
+batean
+batek
+bati
+batzuei
+batzuek
+batzuetan
+batzuk
+bera
+beraiek
+berau
+berauek
+bere
+berori
+beroriek
+beste
+bezala
+da
+dago
+dira
+ditu
+du
+dute
+edo
+egin
+ere
+eta
+eurak
+ez
+gainera
+gu
+gutxi
+guzti
+haiei
+haiek
+haietan
+hainbeste
+hala
+han
+handik
+hango
+hara
+hari
+hark
+hartan
+hau
+hauei
+hauek
+hauetan
+hemen
+hemendik
+hemengo
+hi
+hona
+honek
+honela
+honetan
+honi
+hor
+hori
+horiei
+horiek
+horietan
+horko
+horra
+horrek
+horrela
+horretan
+horri
+hortik
+hura
+izan
+ni
+noiz
+nola
+non
+nondik
+nongo
+nor
+nora
+ze
+zein
+zen
+zenbait
+zenbat
+zer
+zergatik
+ziren
+zituen
+zu
+zuek
+zuen
+zuten
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_fa.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_fa.txt
new file mode 100644
index 00000000000..723641c6da7
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_fa.txt
@@ -0,0 +1,313 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# Note: by default this file is used after normalization, so when adding entries
+# to this file, use the arabic 'ي' instead of 'ی'
+انان
+نداشته
+سراسر
+خياه
+ايشان
+وي
+تاكنون
+بيشتري
+دوم
+پس
+ناشي
+ÙˆÚ¯Ùˆ
+يا
+داشتند
+سپس
+هنگام
+هرگز
+پنج
+نشان
+امسال
+ديگر
+گروهي
+شدند
+چطور
+ده
+Ùˆ
+دو
+نخستين
+ولي
+چرا
+Ú†Ù‡
+وسط
+Ù‡
+كدام
+قابل
+يك
+رÙت
+Ù‡Ùت
+همچنين
+در
+هزار
+بله
+بلي
+شايد
+اما
+شناسي
+گرÙته
+دهد
+داشته
+دانست
+داشتن
+خواهيم
+ميليارد
+وقتيكه
+امد
+خواهد
+جز
+اورده
+شده
+بلكه
+خدمات
+شدن
+برخي
+نبود
+بسياري
+جلوگيري
+ØÙ‚
+كردند
+نوعي
+بعري
+نكرده
+نظير
+نبايد
+بوده
+بودن
+داد
+اورد
+هست
+جايي
+شود
+دنبال
+داده
+بايد
+سابق
+هيچ
+همان
+انجا
+كمتر
+كجاست
+گردد
+كسي
+تر
+مردم
+تان
+دادن
+بودند
+سري
+جدا
+ندارند
+مگر
+يكديگر
+دارد
+دهند
+بنابراين
+هنگامي
+سمت
+جا
+انچه
+خود
+دادند
+زياد
+دارند
+اثر
+بدون
+بهترين
+بيشتر
+البته
+به
+براساس
+بيرون
+كرد
+بعضي
+گرÙت
+توي
+اي
+ميليون
+او
+جريان
+تول
+بر
+مانند
+برابر
+باشيم
+مدتي
+گويند
+اكنون
+تا
+تنها
+جديد
+چند
+بي
+نشده
+كردن
+كردم
+گويد
+كرده
+كنيم
+نمي
+نزد
+روي
+قصد
+Ùقط
+بالاي
+ديگران
+اين
+ديروز
+توسط
+سوم
+ايم
+دانند
+سوي
+استÙاده
+شما
+كنار
+داريم
+ساخته
+طور
+امده
+رÙته
+نخست
+بيست
+نزديك
+طي
+كنيد
+از
+انها
+تمامي
+داشت
+يكي
+طريق
+اش
+چيست
+روب
+نمايد
+Ú¯Ùت
+چندين
+چيزي
+تواند
+ام
+ايا
+با
+ان
+ايد
+ترين
+اينكه
+ديگري
+راه
+هايي
+بروز
+همچنان
+پاعين
+كس
+Øدود
+مختلÙ
+مقابل
+چيز
+گيرد
+ندارد
+ضد
+همچون
+سازي
+شان
+مورد
+باره
+مرسي
+خويش
+برخوردار
+چون
+خارج
+شش
+هنوز
+تØت
+ضمن
+هستيم
+Ú¯Ùته
+Ùكر
+بسيار
+پيش
+براي
+روزهاي
+انكه
+نخواهد
+بالا
+كل
+وقتي
+كي
+چنين
+كه
+گيري
+نيست
+است
+كجا
+كند
+نيز
+يابد
+بندي
+Øتي
+توانند
+عقب
+خواست
+كنند
+بين
+تمام
+همه
+ما
+باشند
+مثل
+شد
+اري
+باشد
+اره
+طبق
+بعد
+اگر
+صورت
+غير
+جاي
+بيش
+ريزي
+اند
+زيرا
+چگونه
+بار
+لطÙا
+مي
+درباره
+من
+ديده
+همين
+گذاري
+برداري
+علت
+گذاشته
+هم
+Ùوق
+نه
+ها
+شوند
+اباد
+همواره
+هر
+اول
+خواهند
+چهار
+نام
+امروز
+مان
+هاي
+قبل
+كنم
+سعي
+تازه
+را
+هستند
+زير
+جلوي
+عنوان
+بود
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_fi.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_fi.txt
new file mode 100644
index 00000000000..addad798c4b
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_fi.txt
@@ -0,0 +1,95 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+| forms of BE
+
+olla
+olen
+olet
+on
+olemme
+olette
+ovat
+ole | negative form
+
+oli
+olisi
+olisit
+olisin
+olisimme
+olisitte
+olisivat
+olit
+olin
+olimme
+olitte
+olivat
+ollut
+olleet
+
+en | negation
+et
+ei
+emme
+ette
+eivät
+
+|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans
+minä minun minut minua minussa minusta minuun minulla minulta minulle | I
+sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you
+hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she
+me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we
+te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you
+he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they
+
+tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this
+tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that
+se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it
+nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these
+nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those
+ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they
+
+kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
+ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl)
+mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what
+mitkä | (pl)
+
+joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which
+jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl)
+
+| conjunctions
+
+että | that
+ja | and
+jos | if
+koska | because
+kuin | than
+mutta | but
+niin | so
+sekä | and
+sillä | for
+tai | or
+vaan | but
+vai | or
+vaikka | although
+
+
+| prepositions
+
+kanssa | with
+mukaan | according to
+noin | about
+poikki | across
+yli | over, across
+
+| other
+
+kun | when
+niin | so
+nyt | now
+itse | self
+
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_fr.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_fr.txt
new file mode 100644
index 00000000000..c00837ea939
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_fr.txt
@@ -0,0 +1,183 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A French stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+au | a + le
+aux | a + les
+avec | with
+ce | this
+ces | these
+dans | with
+de | of
+des | de + les
+du | de + le
+elle | she
+en | `of them' etc
+et | and
+eux | them
+il | he
+je | I
+la | the
+le | the
+leur | their
+lui | him
+ma | my (fem)
+mais | but
+me | me
+même | same; as in moi-même (myself) etc
+mes | me (pl)
+moi | me
+mon | my (masc)
+ne | not
+nos | our (pl)
+notre | our
+nous | we
+on | one
+ou | where
+par | by
+pas | not
+pour | for
+qu | que before vowel
+que | that
+qui | who
+sa | his, her (fem)
+se | oneself
+ses | his (pl)
+son | his, her (masc)
+sur | on
+ta | thy (fem)
+te | thee
+tes | thy (pl)
+toi | thee
+ton | thy (masc)
+tu | thou
+un | a
+une | a
+vos | your (pl)
+votre | your
+vous | you
+
+ | single letter forms
+
+c | c'
+d | d'
+j | j'
+l | l'
+Ã | to, at
+m | m'
+n | n'
+s | s'
+t | t'
+y | there
+
+ | forms of être (not including the infinitive):
+été
+étée
+étées
+étés
+étant
+suis
+es
+est
+sommes
+êtes
+sont
+serai
+seras
+sera
+serons
+serez
+seront
+serais
+serait
+serions
+seriez
+seraient
+étais
+était
+étions
+étiez
+étaient
+fus
+fut
+fûmes
+fûtes
+furent
+sois
+soit
+soyons
+soyez
+soient
+fusse
+fusses
+fût
+fussions
+fussiez
+fussent
+
+ | forms of avoir (not including the infinitive):
+ayant
+eu
+eue
+eues
+eus
+ai
+as
+avons
+avez
+ont
+aurai
+auras
+aura
+aurons
+aurez
+auront
+aurais
+aurait
+aurions
+auriez
+auraient
+avais
+avait
+avions
+aviez
+avaient
+eut
+eûmes
+eûtes
+eurent
+aie
+aies
+ait
+ayons
+ayez
+aient
+eusse
+eusses
+eût
+eussions
+eussiez
+eussent
+
+ | Later additions (from Jean-Christophe Deschamps)
+ceci | this
+celà  | that
+cet | this
+cette | this
+ici | here
+ils | they
+les | the (pl)
+leurs | their (pl)
+quel | which
+quels | which
+quelle | which
+quelles | which
+sans | without
+soi | oneself
+
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ga.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ga.txt
new file mode 100644
index 00000000000..9ff88d747e5
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ga.txt
@@ -0,0 +1,110 @@
+
+a
+ach
+ag
+agus
+an
+aon
+ar
+arna
+as
+b'
+ba
+beirt
+bhúr
+caoga
+ceathair
+ceathrar
+chomh
+chtó
+chuig
+chun
+cois
+céad
+cúig
+cúigear
+d'
+daichead
+dar
+de
+deich
+deichniúr
+den
+dhá
+do
+don
+dtÃ
+dá
+dár
+dó
+faoi
+faoin
+faoina
+faoinár
+fara
+fiche
+gach
+gan
+go
+gur
+haon
+hocht
+i
+iad
+idir
+in
+ina
+ins
+inár
+is
+le
+leis
+lena
+lenár
+m'
+mar
+mo
+mé
+na
+nach
+naoi
+naonúr
+ná
+nÃ
+nÃor
+nó
+nócha
+ocht
+ochtar
+os
+roimh
+sa
+seacht
+seachtar
+seachtó
+seasca
+seisear
+siad
+sibh
+sinn
+sna
+sé
+sÃ
+tar
+thar
+thú
+triúr
+trÃ
+trÃna
+trÃnár
+trÃocha
+tú
+um
+ár
+é
+éis
+Ã
+ó
+ón
+óna
+ónár
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_gl.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_gl.txt
new file mode 100644
index 00000000000..d8760b12c14
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_gl.txt
@@ -0,0 +1,161 @@
+# galican stopwords
+a
+aÃnda
+alÃ
+aquel
+aquela
+aquelas
+aqueles
+aquilo
+aquÃ
+ao
+aos
+as
+asÃ
+á
+ben
+cando
+che
+co
+coa
+comigo
+con
+connosco
+contigo
+convosco
+coas
+cos
+cun
+cuns
+cunha
+cunhas
+da
+dalgunha
+dalgunhas
+dalgún
+dalgúns
+das
+de
+del
+dela
+delas
+deles
+desde
+deste
+do
+dos
+dun
+duns
+dunha
+dunhas
+e
+el
+ela
+elas
+eles
+en
+era
+eran
+esa
+esas
+ese
+eses
+esta
+estar
+estaba
+está
+están
+este
+estes
+estiven
+estou
+eu
+é
+facer
+foi
+foron
+fun
+habÃa
+hai
+iso
+isto
+la
+las
+lle
+lles
+lo
+los
+mais
+me
+meu
+meus
+min
+miña
+miñas
+moi
+na
+nas
+neste
+nin
+no
+non
+nos
+nosa
+nosas
+noso
+nosos
+nós
+nun
+nunha
+nuns
+nunhas
+o
+os
+ou
+ó
+ós
+para
+pero
+pode
+pois
+pola
+polas
+polo
+polos
+por
+que
+se
+senón
+ser
+seu
+seus
+sexa
+sido
+sobre
+súa
+súas
+tamén
+tan
+te
+ten
+teñen
+teño
+ter
+teu
+teus
+ti
+tido
+tiña
+tiven
+túa
+túas
+un
+unha
+unhas
+uns
+vos
+vosa
+vosas
+voso
+vosos
+vós
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_hi.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_hi.txt
new file mode 100644
index 00000000000..86286bb083b
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_hi.txt
@@ -0,0 +1,235 @@
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# Note: by default this file also contains forms normalized by HindiNormalizer
+# for spelling variation (see section below), such that it can be used whether or
+# not you enable that feature. When adding additional entries to this list,
+# please add the normalized form as well.
+अंदर
+अत
+अपना
+अपनी
+अपने
+अà¤à¥€
+आदि
+आप
+इतà¥à¤¯à¤¾à¤¦à¤¿
+इन
+इनका
+इनà¥à¤¹à¥€à¤‚
+इनà¥à¤¹à¥‡à¤‚
+इनà¥à¤¹à¥‹à¤‚
+इस
+इसका
+इसकी
+इसके
+इसमें
+इसी
+इसे
+उन
+उनका
+उनकी
+उनके
+उनको
+उनà¥à¤¹à¥€à¤‚
+उनà¥à¤¹à¥‡à¤‚
+उनà¥à¤¹à¥‹à¤‚
+उस
+उसके
+उसी
+उसे
+à¤à¤•
+à¤à¤µà¤‚
+à¤à¤¸
+à¤à¤¸à¥‡
+और
+कई
+कर
+करता
+करते
+करना
+करने
+करें
+कहते
+कहा
+का
+काफ़ी
+कि
+कितना
+किनà¥à¤¹à¥‡à¤‚
+किनà¥à¤¹à¥‹à¤‚
+किया
+किर
+किस
+किसी
+किसे
+की
+कà¥à¤›
+कà¥à¤²
+के
+को
+कोई
+कौन
+कौनसा
+गया
+घर
+जब
+जहाà¤
+जा
+जितना
+जिन
+जिनà¥à¤¹à¥‡à¤‚
+जिनà¥à¤¹à¥‹à¤‚
+जिस
+जिसे
+जीधर
+जैसा
+जैसे
+जो
+तक
+तब
+तरह
+तिन
+तिनà¥à¤¹à¥‡à¤‚
+तिनà¥à¤¹à¥‹à¤‚
+तिस
+तिसे
+तो
+था
+थी
+थे
+दबारा
+दिया
+दà¥à¤¸à¤°à¤¾
+दूसरे
+दो
+दà¥à¤µà¤¾à¤°à¤¾
+न
+नहीं
+ना
+निहायत
+नीचे
+ने
+पर
+पर
+पहले
+पूरा
+पे
+फिर
+बनी
+बही
+बहà¥à¤¤
+बाद
+बाला
+बिलकà¥à¤²
+à¤à¥€
+à¤à¥€à¤¤à¤°
+मगर
+मानो
+मे
+में
+यदि
+यह
+यहाà¤
+यही
+या
+यिह
+ये
+रखें
+रहा
+रहे
+ऱà¥à¤µà¤¾à¤¸à¤¾
+लिà¤
+लिये
+लेकिन
+व
+वरà¥à¤—
+वह
+वह
+वहाà¤
+वहीं
+वाले
+वà¥à¤¹
+वे
+वग़ैरह
+संग
+सकता
+सकते
+सबसे
+सà¤à¥€
+साथ
+साबà¥à¤¤
+साà¤
+सारा
+से
+सो
+ही
+हà¥à¤†
+हà¥à¤ˆ
+हà¥à¤
+है
+हैं
+हो
+होता
+होती
+होते
+होना
+होने
+# additional normalized forms of the above
+अपनि
+जेसे
+होति
+सà¤à¤¿
+तिंहों
+इंहों
+दवारा
+इसि
+किंहें
+थि
+उंहों
+ओर
+जिंहें
+वहिं
+अà¤à¤¿
+बनि
+हि
+उंहिं
+उंहें
+हें
+वगेरह
+à¤à¤¸à¥‡
+रवासा
+कोन
+निचे
+काफि
+उसि
+पà¥à¤°à¤¾
+à¤à¤¿à¤¤à¤°
+हे
+बहि
+वहां
+कोइ
+यहां
+जिंहों
+तिंहें
+किसि
+कइ
+यहि
+इंहिं
+जिधर
+इंहें
+अदि
+इतयादि
+हà¥à¤‡
+कोनसा
+इसकि
+दà¥à¤¸à¤°à¥‡
+जहां
+अप
+किंहों
+उनकि
+à¤à¤¿
+वरग
+हà¥à¤…
+जेसा
+नहिं
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_hu.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_hu.txt
new file mode 100644
index 00000000000..1a96f1db6f2
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_hu.txt
@@ -0,0 +1,209 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+| Hungarian stop word list
+| prepared by Anna Tordai
+
+a
+ahogy
+ahol
+aki
+akik
+akkor
+alatt
+által
+általában
+amely
+amelyek
+amelyekben
+amelyeket
+amelyet
+amelynek
+ami
+amit
+amolyan
+amÃg
+amikor
+át
+abban
+ahhoz
+annak
+arra
+arról
+az
+azok
+azon
+azt
+azzal
+azért
+aztán
+azután
+azonban
+bár
+be
+belül
+benne
+cikk
+cikkek
+cikkeket
+csak
+de
+e
+eddig
+egész
+egy
+egyes
+egyetlen
+egyéb
+egyik
+egyre
+ekkor
+el
+elég
+ellen
+elő
+először
+előtt
+első
+én
+éppen
+ebben
+ehhez
+emilyen
+ennek
+erre
+ez
+ezt
+ezek
+ezen
+ezzel
+ezért
+és
+fel
+felé
+hanem
+hiszen
+hogy
+hogyan
+igen
+Ãgy
+illetve
+ill.
+ill
+ilyen
+ilyenkor
+ison
+ismét
+itt
+jó
+jól
+jobban
+kell
+kellett
+keresztül
+keressünk
+ki
+kÃvül
+között
+közül
+legalább
+lehet
+lehetett
+legyen
+lenne
+lenni
+lesz
+lett
+maga
+magát
+majd
+majd
+már
+más
+másik
+meg
+még
+mellett
+mert
+mely
+melyek
+mi
+mit
+mÃg
+miért
+milyen
+mikor
+minden
+mindent
+mindenki
+mindig
+mint
+mintha
+mivel
+most
+nagy
+nagyobb
+nagyon
+ne
+néha
+nekem
+neki
+nem
+néhány
+nélkül
+nincs
+olyan
+ott
+össze
+Å‘
+Å‘k
+Å‘ket
+pedig
+persze
+rá
+s
+saját
+sem
+semmi
+sok
+sokat
+sokkal
+számára
+szemben
+szerint
+szinte
+talán
+tehát
+teljes
+tovább
+továbbá
+több
+úgy
+ugyanis
+új
+újabb
+újra
+után
+utána
+utolsó
+vagy
+vagyis
+valaki
+valami
+valamint
+való
+vagyok
+van
+vannak
+volt
+voltam
+voltak
+voltunk
+vissza
+vele
+viszont
+volna
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_hy.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_hy.txt
new file mode 100644
index 00000000000..60c1c50fbc8
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_hy.txt
@@ -0,0 +1,46 @@
+# example set of Armenian stopwords.
+Õ¡ÕµÕ¤
+Õ¡ÕµÕ¬
+Õ¡ÕµÕ¶
+Õ¡ÕµÕ½
+Õ¤Õ¸Ö‚
+Õ¤Õ¸Ö‚Ö„
+Õ¥Õ´
+Õ¥Õ¶
+Õ¥Õ¶Ö„
+Õ¥Õ½
+Õ¥Ö„
+Õ§
+Õ§Õ«
+Õ§Õ«Õ¶
+Õ§Õ«Õ¶Ö„
+Õ§Õ«Ö€
+Õ§Õ«Ö„
+Õ§Ö€
+Õ¨Õ½Õ¿
+Õ©
+Õ«
+Õ«Õ¶
+Õ«Õ½Õ¯
+Õ«Ö€
+Õ¯Õ¡Õ´
+Õ°Õ¡Õ´Õ¡Ö€
+Õ°Õ¥Õ¿
+Õ°Õ¥Õ¿Õ¸
+Õ´Õ¥Õ¶Ö„
+Õ´Õ¥Õ»
+Õ´Õ«
+Õ¶
+Õ¶Õ¡
+Õ¶Õ¡Ö‡
+Õ¶Ö€Õ¡
+Õ¶Ö€Õ¡Õ¶Ö„
+Õ¸Ö€
+Õ¸Ö€Õ¨
+Õ¸Ö€Õ¸Õ¶Ö„
+Õ¸Ö€ÕºÕ¥Õ½
+Õ¸Ö‚
+Õ¸Ö‚Õ´
+ÕºÕ«Õ¿Õ«
+Õ¾Ö€Õ¡
+Ö‡
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_id.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_id.txt
new file mode 100644
index 00000000000..4617f83a5c5
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_id.txt
@@ -0,0 +1,359 @@
+# from appendix D of: A Study of Stemming Effects on Information
+# Retrieval in Bahasa Indonesia
+ada
+adanya
+adalah
+adapun
+agak
+agaknya
+agar
+akan
+akankah
+akhirnya
+aku
+akulah
+amat
+amatlah
+anda
+andalah
+antar
+diantaranya
+antara
+antaranya
+diantara
+apa
+apaan
+mengapa
+apabila
+apakah
+apalagi
+apatah
+atau
+ataukah
+ataupun
+bagai
+bagaikan
+sebagai
+sebagainya
+bagaimana
+bagaimanapun
+sebagaimana
+bagaimanakah
+bagi
+bahkan
+bahwa
+bahwasanya
+sebaliknya
+banyak
+sebanyak
+beberapa
+seberapa
+begini
+beginian
+beginikah
+beginilah
+sebegini
+begitu
+begitukah
+begitulah
+begitupun
+sebegitu
+belum
+belumlah
+sebelum
+sebelumnya
+sebenarnya
+berapa
+berapakah
+berapalah
+berapapun
+betulkah
+sebetulnya
+biasa
+biasanya
+bila
+bilakah
+bisa
+bisakah
+sebisanya
+boleh
+bolehkah
+bolehlah
+buat
+bukan
+bukankah
+bukanlah
+bukannya
+cuma
+percuma
+dahulu
+dalam
+dan
+dapat
+dari
+daripada
+dekat
+demi
+demikian
+demikianlah
+sedemikian
+dengan
+depan
+di
+dia
+dialah
+dini
+diri
+dirinya
+terdiri
+dong
+dulu
+enggak
+enggaknya
+entah
+entahlah
+terhadap
+terhadapnya
+hal
+hampir
+hanya
+hanyalah
+harus
+haruslah
+harusnya
+seharusnya
+hendak
+hendaklah
+hendaknya
+hingga
+sehingga
+ia
+ialah
+ibarat
+ingin
+inginkah
+inginkan
+ini
+inikah
+inilah
+itu
+itukah
+itulah
+jangan
+jangankan
+janganlah
+jika
+jikalau
+juga
+justru
+kala
+kalau
+kalaulah
+kalaupun
+kalian
+kami
+kamilah
+kamu
+kamulah
+kan
+kapan
+kapankah
+kapanpun
+dikarenakan
+karena
+karenanya
+ke
+kecil
+kemudian
+kenapa
+kepada
+kepadanya
+ketika
+seketika
+khususnya
+kini
+kinilah
+kiranya
+sekiranya
+kita
+kitalah
+kok
+lagi
+lagian
+selagi
+lah
+lain
+lainnya
+melainkan
+selaku
+lalu
+melalui
+terlalu
+lama
+lamanya
+selama
+selama
+selamanya
+lebih
+terlebih
+bermacam
+macam
+semacam
+maka
+makanya
+makin
+malah
+malahan
+mampu
+mampukah
+mana
+manakala
+manalagi
+masih
+masihkah
+semasih
+masing
+mau
+maupun
+semaunya
+memang
+mereka
+merekalah
+meski
+meskipun
+semula
+mungkin
+mungkinkah
+nah
+namun
+nanti
+nantinya
+nyaris
+oleh
+olehnya
+seorang
+seseorang
+pada
+padanya
+padahal
+paling
+sepanjang
+pantas
+sepantasnya
+sepantasnyalah
+para
+pasti
+pastilah
+per
+pernah
+pula
+pun
+merupakan
+rupanya
+serupa
+saat
+saatnya
+sesaat
+saja
+sajalah
+saling
+bersama
+sama
+sesama
+sambil
+sampai
+sana
+sangat
+sangatlah
+saya
+sayalah
+se
+sebab
+sebabnya
+sebuah
+tersebut
+tersebutlah
+sedang
+sedangkan
+sedikit
+sedikitnya
+segala
+segalanya
+segera
+sesegera
+sejak
+sejenak
+sekali
+sekalian
+sekalipun
+sesekali
+sekaligus
+sekarang
+sekarang
+sekitar
+sekitarnya
+sela
+selain
+selalu
+seluruh
+seluruhnya
+semakin
+sementara
+sempat
+semua
+semuanya
+sendiri
+sendirinya
+seolah
+seperti
+sepertinya
+sering
+seringnya
+serta
+siapa
+siapakah
+siapapun
+disini
+disinilah
+sini
+sinilah
+sesuatu
+sesuatunya
+suatu
+sesudah
+sesudahnya
+sudah
+sudahkah
+sudahlah
+supaya
+tadi
+tadinya
+tak
+tanpa
+setelah
+telah
+tentang
+tentu
+tentulah
+tentunya
+tertentu
+seterusnya
+tapi
+tetapi
+setiap
+tiap
+setidaknya
+tidak
+tidakkah
+tidaklah
+toh
+waduh
+wah
+wahai
+sewaktu
+walau
+walaupun
+wong
+yaitu
+yakni
+yang
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_it.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_it.txt
new file mode 100644
index 00000000000..4cb5b0891b1
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_it.txt
@@ -0,0 +1,301 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | An Italian stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ad | a (to) before vowel
+al | a + il
+allo | a + lo
+ai | a + i
+agli | a + gli
+all | a + l'
+agl | a + gl'
+alla | a + la
+alle | a + le
+con | with
+col | con + il
+coi | con + i (forms collo, cogli etc are now very rare)
+da | from
+dal | da + il
+dallo | da + lo
+dai | da + i
+dagli | da + gli
+dall | da + l'
+dagl | da + gll'
+dalla | da + la
+dalle | da + le
+di | of
+del | di + il
+dello | di + lo
+dei | di + i
+degli | di + gli
+dell | di + l'
+degl | di + gl'
+della | di + la
+delle | di + le
+in | in
+nel | in + el
+nello | in + lo
+nei | in + i
+negli | in + gli
+nell | in + l'
+negl | in + gl'
+nella | in + la
+nelle | in + le
+su | on
+sul | su + il
+sullo | su + lo
+sui | su + i
+sugli | su + gli
+sull | su + l'
+sugl | su + gl'
+sulla | su + la
+sulle | su + le
+per | through, by
+tra | among
+contro | against
+io | I
+tu | thou
+lui | he
+lei | she
+noi | we
+voi | you
+loro | they
+mio | my
+mia |
+miei |
+mie |
+tuo |
+tua |
+tuoi | thy
+tue |
+suo |
+sua |
+suoi | his, her
+sue |
+nostro | our
+nostra |
+nostri |
+nostre |
+vostro | your
+vostra |
+vostri |
+vostre |
+mi | me
+ti | thee
+ci | us, there
+vi | you, there
+lo | him, the
+la | her, the
+li | them
+le | them, the
+gli | to him, the
+ne | from there etc
+il | the
+un | a
+uno | a
+una | a
+ma | but
+ed | and
+se | if
+perché | why, because
+anche | also
+come | how
+dov | where (as dov')
+dove | where
+che | who, that
+chi | who
+cui | whom
+non | not
+più | more
+quale | who, that
+quanto | how much
+quanti |
+quanta |
+quante |
+quello | that
+quelli |
+quella |
+quelle |
+questo | this
+questi |
+questa |
+queste |
+si | yes
+tutto | all
+tutti | all
+
+ | single letter forms:
+
+a | at
+c | as c' for ce or ci
+e | and
+i | the
+l | as l'
+o | or
+
+ | forms of avere, to have (not including the infinitive):
+
+ho
+hai
+ha
+abbiamo
+avete
+hanno
+abbia
+abbiate
+abbiano
+avrò
+avrai
+avrÃ
+avremo
+avrete
+avranno
+avrei
+avresti
+avrebbe
+avremmo
+avreste
+avrebbero
+avevo
+avevi
+aveva
+avevamo
+avevate
+avevano
+ebbi
+avesti
+ebbe
+avemmo
+aveste
+ebbero
+avessi
+avesse
+avessimo
+avessero
+avendo
+avuto
+avuta
+avuti
+avute
+
+ | forms of essere, to be (not including the infinitive):
+sono
+sei
+è
+siamo
+siete
+sia
+siate
+siano
+sarò
+sarai
+sarÃ
+saremo
+sarete
+saranno
+sarei
+saresti
+sarebbe
+saremmo
+sareste
+sarebbero
+ero
+eri
+era
+eravamo
+eravate
+erano
+fui
+fosti
+fu
+fummo
+foste
+furono
+fossi
+fosse
+fossimo
+fossero
+essendo
+
+ | forms of fare, to do (not including the infinitive, fa, fat-):
+faccio
+fai
+facciamo
+fanno
+faccia
+facciate
+facciano
+farò
+farai
+farÃ
+faremo
+farete
+faranno
+farei
+faresti
+farebbe
+faremmo
+fareste
+farebbero
+facevo
+facevi
+faceva
+facevamo
+facevate
+facevano
+feci
+facesti
+fece
+facemmo
+faceste
+fecero
+facessi
+facesse
+facessimo
+facessero
+facendo
+
+ | forms of stare, to be (not including the infinitive):
+sto
+stai
+sta
+stiamo
+stanno
+stia
+stiate
+stiano
+starò
+starai
+starÃ
+staremo
+starete
+staranno
+starei
+staresti
+starebbe
+staremmo
+stareste
+starebbero
+stavo
+stavi
+stava
+stavamo
+stavate
+stavano
+stetti
+stesti
+stette
+stemmo
+steste
+stettero
+stessi
+stesse
+stessimo
+stessero
+stando
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ja.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ja.txt
new file mode 100644
index 00000000000..d4321be6b16
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ja.txt
@@ -0,0 +1,127 @@
+#
+# This file defines a stopword set for Japanese.
+#
+# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia.
+# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745
+# for frequency lists, etc. that can be useful for making your own set (if desired)
+#
+# Note that there is an overlap between these stopwords and the terms stopped when used
+# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note
+# that comments are not allowed on the same line as stopwords.
+#
+# Also note that stopping is done in a case-insensitive manner. Change your StopFilter
+# configuration if you need case-sensitive stopping. Lastly, note that stopping is done
+# using the same character width as the entries in this file. Since this StopFilter is
+# normally done after a CJKWidthFilter in your chain, you would usually want your romaji
+# entries to be in half-width and your kana entries to be in full-width.
+#
+ã®
+ã«
+ã¯
+ã‚’
+ãŸ
+ãŒ
+ã§
+ã¦
+ã¨
+ã—
+れ
+ã•
+ã‚ã‚‹
+ã„ã‚‹
+ã‚‚
+ã™ã‚‹
+ã‹ã‚‰
+ãª
+ã“ã¨
+ã¨ã—ã¦
+ã„
+ã‚„
+れる
+ãªã©
+ãªã£
+ãªã„
+ã“ã®
+ãŸã‚
+ãã®
+ã‚ã£
+よã†
+ã¾ãŸ
+ã‚‚ã®
+ã¨ã„ã†
+ã‚ã‚Š
+ã¾ã§
+られ
+ãªã‚‹
+ã¸
+ã‹
+ã
+ã“ã‚Œ
+ã«ã‚ˆã£ã¦
+ã«ã‚ˆã‚Š
+ãŠã‚Š
+より
+ã«ã‚ˆã‚‹
+ãš
+ãªã‚Š
+られる
+ã«ãŠã„ã¦
+ã°
+ãªã‹ã£
+ãªã
+ã—ã‹ã—
+ã«ã¤ã„ã¦
+ã›
+ã ã£
+ãã®å¾Œ
+ã§ãã‚‹
+ãã‚Œ
+ã†
+ã®ã§
+ãªãŠ
+ã®ã¿
+ã§ã
+ã
+ã¤
+ã«ãŠã‘ã‚‹
+ãŠã‚ˆã³
+ã„ã†
+ã•ã‚‰ã«
+ã§ã‚‚
+ら
+ãŸã‚Š
+ãã®ä»–
+ã«é–¢ã™ã‚‹
+ãŸã¡
+ã¾ã™
+ã‚“
+ãªã‚‰
+ã«å¯¾ã—ã¦
+特ã«
+ã›ã‚‹
+åŠã³
+ã“れら
+ã¨ã
+ã§ã¯
+ã«ã¦
+ã»ã‹
+ãªãŒã‚‰
+ã†ã¡
+ãã—ã¦
+ã¨ã¨ã‚‚ã«
+ãŸã ã—
+ã‹ã¤ã¦
+ãã‚Œãžã‚Œ
+ã¾ãŸã¯
+ãŠ
+ã»ã©
+ã‚‚ã®ã®
+ã«å¯¾ã™ã‚‹
+ã»ã¨ã‚“ã©
+ã¨å…±ã«
+ã¨ã„ã£ãŸ
+ã§ã™
+ã¨ã‚‚
+ã¨ã“ã‚
+ã“ã“
+##### End of file
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_lv.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_lv.txt
new file mode 100644
index 00000000000..e21a23c06c3
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_lv.txt
@@ -0,0 +1,172 @@
+# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins
+# the original list of over 800 forms was refined:
+# pronouns, adverbs, interjections were removed
+#
+# prepositions
+aiz
+ap
+ar
+apakš
+Ärpus
+augšpus
+bez
+caur
+dēļ
+gar
+iekš
+iz
+kopš
+labad
+lejpus
+līdz
+no
+otrpus
+pa
+par
+pÄr
+pēc
+pie
+pirms
+pret
+priekš
+starp
+Å¡aipus
+uz
+viņpus
+virs
+virspus
+zem
+apakšpus
+# Conjunctions
+un
+bet
+jo
+ja
+ka
+lai
+tomēr
+tikko
+turpretī
+arī
+kaut
+gan
+tÄdēļ
+tÄ
+ne
+tikvien
+vien
+kÄ
+ir
+te
+vai
+kamēr
+# Particles
+ar
+diezin
+droši
+diemžēl
+nebūt
+ik
+it
+taÄu
+nu
+pat
+tiklab
+iekšpus
+nedz
+tik
+nevis
+turpretim
+jeb
+iekam
+iekÄm
+iekÄms
+kolīdz
+līdzko
+tiklīdz
+jebšu
+tÄlab
+tÄpÄ“c
+nekÄ
+itin
+jÄ
+jau
+jel
+nē
+nezin
+tad
+tikai
+vis
+tak
+iekams
+vien
+# modal verbs
+būt
+biju
+biji
+bija
+bijÄm
+bijÄt
+esmu
+esi
+esam
+esat
+būšu
+būsi
+būs
+būsim
+būsiet
+tikt
+tiku
+tiki
+tika
+tikÄm
+tikÄt
+tieku
+tiec
+tiek
+tiekam
+tiekat
+tikšu
+tiks
+tiksim
+tiksiet
+tapt
+tapi
+tapÄt
+topat
+tapšu
+tapsi
+taps
+tapsim
+tapsiet
+kļūt
+kļuvu
+kļuvi
+kļuva
+kļuvÄm
+kļuvÄt
+kļūstu
+kļūsti
+kļūst
+kļūstam
+kļūstat
+kļūšu
+kļūsi
+kļūs
+kļūsim
+kļūsiet
+# verbs
+varēt
+varēju
+varÄ“jÄm
+varēšu
+varēsim
+var
+varēji
+varÄ“jÄt
+varēsi
+varēsiet
+varat
+varēja
+varēs
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_nl.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_nl.txt
new file mode 100644
index 00000000000..f4d61f5092c
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_nl.txt
@@ -0,0 +1,117 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Dutch stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large sample of Dutch text.
+
+ | Dutch stop words frequently exhibit homonym clashes. These are indicated
+ | clearly below.
+
+de | the
+en | and
+van | of, from
+ik | I, the ego
+te | (1) chez, at etc, (2) to, (3) too
+dat | that, which
+die | that, those, who, which
+in | in, inside
+een | a, an, one
+hij | he
+het | the, it
+niet | not, nothing, naught
+zijn | (1) to be, being, (2) his, one's, its
+is | is
+was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
+op | on, upon, at, in, up, used up
+aan | on, upon, to (as dative)
+met | with, by
+als | like, such as, when
+voor | (1) before, in front of, (2) furrow
+had | had, past tense all persons sing. of 'hebben' (have)
+er | there
+maar | but, only
+om | round, about, for etc
+hem | him
+dan | then
+zou | should/would, past tense all persons sing. of 'zullen'
+of | or, whether, if
+wat | what, something, anything
+mijn | possessive and noun 'mine'
+men | people, 'one'
+dit | this
+zo | so, thus, in this way
+door | through by
+over | over, across
+ze | she, her, they, them
+zich | oneself
+bij | (1) a bee, (2) by, near, at
+ook | also, too
+tot | till, until
+je | you
+mij | me
+uit | out of, from
+der | Old Dutch form of 'van der' still found in surnames
+daar | (1) there, (2) because
+haar | (1) her, their, them, (2) hair
+naar | (1) unpleasant, unwell etc, (2) towards, (3) as
+heb | present first person sing. of 'to have'
+hoe | how, why
+heeft | present third person sing. of 'to have'
+hebben | 'to have' and various parts thereof
+deze | this
+u | you
+want | (1) for, (2) mitten, (3) rigging
+nog | yet, still
+zal | 'shall', first and third person sing. of verb 'zullen' (will)
+me | me
+zij | she, they
+nu | now
+ge | 'thou', still used in Belgium and south Netherlands
+geen | none
+omdat | because
+iets | something, somewhat
+worden | to become, grow, get
+toch | yet, still
+al | all, every, each
+waren | (1) 'were' (2) to wander, (3) wares, (3)
+veel | much, many
+meer | (1) more, (2) lake
+doen | to do, to make
+toen | then, when
+moet | noun 'spot/mote' and present form of 'to must'
+ben | (1) am, (2) 'are' in interrogative second person singular of 'to be'
+zonder | without
+kan | noun 'can' and present form of 'to be able'
+hun | their, them
+dus | so, consequently
+alles | all, everything, anything
+onder | under, beneath
+ja | yes, of course
+eens | once, one day
+hier | here
+wie | who
+werd | imperfect third person sing. of 'become'
+altijd | always
+doch | yet, but etc
+wordt | present third person sing. of 'become'
+wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans
+kunnen | to be able
+ons | us/our
+zelf | self
+tegen | against, towards, at
+na | after, near
+reeds | already
+wil | (1) present tense of 'want', (2) 'will', noun, (3) fender
+kon | could; past tense of 'to be able'
+niets | nothing
+uw | your
+iemand | somebody
+geweest | been; past participle of 'be'
+andere | other
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_no.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_no.txt
new file mode 100644
index 00000000000..e76f36e69ed
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_no.txt
@@ -0,0 +1,192 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Norwegian stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This stop word list is for the dominant bokmål dialect. Words unique
+ | to nynorsk are marked *.
+
+ | Revised by Jan Bruusgaard , Jan 2005
+
+og | and
+i | in
+jeg | I
+det | it/this/that
+at | to (w. inf.)
+en | a/an
+et | a/an
+den | it/this/that
+til | to
+er | is/am/are
+som | who/that
+på | on
+de | they / you(formal)
+med | with
+han | he
+av | of
+ikke | not
+ikkje | not *
+der | there
+så | so
+var | was/were
+meg | me
+seg | you
+men | but
+ett | one
+har | have
+om | about
+vi | we
+min | my
+mitt | my
+ha | have
+hadde | had
+hun | she
+nå | now
+over | over
+da | when/as
+ved | by/know
+fra | from
+du | you
+ut | out
+sin | your
+dem | them
+oss | us
+opp | up
+man | you/one
+kan | can
+hans | his
+hvor | where
+eller | or
+hva | what
+skal | shall/must
+selv | self (reflective)
+sjøl | self (reflective)
+her | here
+alle | all
+vil | will
+bli | become
+ble | became
+blei | became *
+blitt | have become
+kunne | could
+inn | in
+når | when
+være | be
+kom | come
+noen | some
+noe | some
+ville | would
+dere | you
+som | who/which/that
+deres | their/theirs
+kun | only/just
+ja | yes
+etter | after
+ned | down
+skulle | should
+denne | this
+for | for/because
+deg | you
+si | hers/his
+sine | hers/his
+sitt | hers/his
+mot | against
+Ã¥ | to
+meget | much
+hvorfor | why
+dette | this
+disse | these/those
+uten | without
+hvordan | how
+ingen | none
+din | your
+ditt | your
+blir | become
+samme | same
+hvilken | which
+hvilke | which (plural)
+sånn | such a
+inni | inside/within
+mellom | between
+vår | our
+hver | each
+hvem | who
+vors | us/ours
+hvis | whose
+både | both
+bare | only/just
+enn | than
+fordi | as/because
+før | before
+mange | many
+også | also
+slik | just
+vært | been
+være | to be
+båe | both *
+begge | both
+siden | since
+dykk | your *
+dykkar | yours *
+dei | they *
+deira | them *
+deires | theirs *
+deim | them *
+di | your (fem.) *
+då | as/when *
+eg | I *
+ein | a/an *
+eit | a/an *
+eitt | a/an *
+elles | or *
+honom | he *
+hjå | at *
+ho | she *
+hoe | she *
+henne | her
+hennar | her/hers
+hennes | hers
+hoss | how *
+hossen | how *
+ikkje | not *
+ingi | noone *
+inkje | noone *
+korleis | how *
+korso | how *
+kva | what/which *
+kvar | where *
+kvarhelst | where *
+kven | who/whom *
+kvi | why *
+kvifor | why *
+me | we *
+medan | while *
+mi | my *
+mine | my *
+mykje | much *
+no | now *
+nokon | some (masc./neut.) *
+noka | some (fem.) *
+nokor | some *
+noko | some *
+nokre | some *
+si | his/hers *
+sia | since *
+sidan | since *
+so | so *
+somt | some *
+somme | some *
+um | about*
+upp | up *
+vere | be *
+vore | was *
+verte | become *
+vort | become *
+varte | became *
+vart | became *
+
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_pt.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_pt.txt
new file mode 100644
index 00000000000..276c1b446f2
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_pt.txt
@@ -0,0 +1,251 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Portuguese stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de | of, from
+a | the; to, at; her
+o | the; him
+que | who, that
+e | and
+do | de + o
+da | de + a
+em | in
+um | a
+para | for
+ | é from SER
+com | with
+não | not, no
+uma | a
+os | the; them
+no | em + o
+se | himself etc
+na | em + a
+por | for
+mais | more
+as | the; them
+dos | de + os
+como | as, like
+mas | but
+ | foi from SER
+ao | a + o
+ele | he
+das | de + as
+ | tem from TER
+Ã | a + a
+seu | his
+sua | her
+ou | or
+ | ser from SER
+quando | when
+muito | much
+ | há from HAV
+nos | em + os; us
+já | already, now
+ | está from EST
+eu | I
+também | also
+só | only, just
+pelo | per + o
+pela | per + a
+até | up to
+isso | that
+ela | he
+entre | between
+ | era from SER
+depois | after
+sem | without
+mesmo | same
+aos | a + os
+ | ter from TER
+seus | his
+quem | whom
+nas | em + as
+me | me
+esse | that
+eles | they
+ | estão from EST
+você | you
+ | tinha from TER
+ | foram from SER
+essa | that
+num | em + um
+nem | nor
+suas | her
+meu | my
+Ã s | a + as
+minha | my
+ | têm from TER
+numa | em + uma
+pelos | per + os
+elas | they
+ | havia from HAV
+ | seja from SER
+qual | which
+ | será from SER
+nós | we
+ | tenho from TER
+lhe | to him, her
+deles | of them
+essas | those
+esses | those
+pelas | per + as
+este | this
+ | fosse from SER
+dele | of him
+
+ | other words. There are many contractions such as naquele = em+aquele,
+ | mo = me+o, but they are rare.
+ | Indefinite article plural forms are also rare.
+
+tu | thou
+te | thee
+vocês | you (plural)
+vos | you
+lhes | to them
+meus | my
+minhas
+teu | thy
+tua
+teus
+tuas
+nosso | our
+nossa
+nossos
+nossas
+
+dela | of her
+delas | of them
+
+esta | this
+estes | these
+estas | these
+aquele | that
+aquela | that
+aqueles | those
+aquelas | those
+isto | this
+aquilo | that
+
+ | forms of estar, to be (not including the infinitive):
+estou
+está
+estamos
+estão
+estive
+esteve
+estivemos
+estiveram
+estava
+estávamos
+estavam
+estivera
+estivéramos
+esteja
+estejamos
+estejam
+estivesse
+estivéssemos
+estivessem
+estiver
+estivermos
+estiverem
+
+ | forms of haver, to have (not including the infinitive):
+hei
+há
+havemos
+hão
+houve
+houvemos
+houveram
+houvera
+houvéramos
+haja
+hajamos
+hajam
+houvesse
+houvéssemos
+houvessem
+houver
+houvermos
+houverem
+houverei
+houverá
+houveremos
+houverão
+houveria
+houverÃamos
+houveriam
+
+ | forms of ser, to be (not including the infinitive):
+sou
+somos
+são
+era
+éramos
+eram
+fui
+foi
+fomos
+foram
+fora
+fôramos
+seja
+sejamos
+sejam
+fosse
+fôssemos
+fossem
+for
+formos
+forem
+serei
+será
+seremos
+serão
+seria
+serÃamos
+seriam
+
+ | forms of ter, to have (not including the infinitive):
+tenho
+tem
+temos
+tém
+tinha
+tÃnhamos
+tinham
+tive
+teve
+tivemos
+tiveram
+tivera
+tivéramos
+tenha
+tenhamos
+tenham
+tivesse
+tivéssemos
+tivessem
+tiver
+tivermos
+tiverem
+terei
+terá
+teremos
+terão
+teria
+terÃamos
+teriam
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ro.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ro.txt
new file mode 100644
index 00000000000..4fdee90a5ba
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ro.txt
@@ -0,0 +1,233 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+acea
+aceasta
+această
+aceea
+acei
+aceia
+acel
+acela
+acele
+acelea
+acest
+acesta
+aceste
+acestea
+aceÅŸti
+aceÅŸtia
+acolo
+acum
+ai
+aia
+aibă
+aici
+al
+ăla
+ale
+alea
+ălea
+altceva
+altcineva
+am
+ar
+are
+aÅŸ
+aÅŸadar
+asemenea
+asta
+ăsta
+astăzi
+astea
+ăstea
+ăştia
+asupra
+aţi
+au
+avea
+avem
+aveţi
+azi
+bine
+bucur
+bună
+ca
+că
+căci
+când
+care
+cărei
+căror
+cărui
+cât
+câte
+câţi
+către
+câtva
+ce
+cel
+ceva
+chiar
+cînd
+cine
+cineva
+cît
+cîte
+cîţi
+cîtva
+contra
+cu
+cum
+cumva
+curând
+curînd
+da
+dă
+dacă
+dar
+datorită
+de
+deci
+deja
+deoarece
+departe
+deÅŸi
+din
+dinaintea
+dintr
+dintre
+drept
+după
+ea
+ei
+el
+ele
+eram
+este
+eÅŸti
+eu
+face
+fără
+fi
+fie
+fiecare
+fii
+fim
+fiţi
+iar
+ieri
+îi
+îl
+îmi
+împotriva
+în
+înainte
+înaintea
+încât
+încît
+încotro
+între
+întrucât
+întrucît
+îţi
+la
+lângă
+le
+li
+lîngă
+lor
+lui
+mă
+mâine
+mea
+mei
+mele
+mereu
+meu
+mi
+mine
+mult
+multă
+mulţi
+ne
+nicăieri
+nici
+nimeni
+niÅŸte
+noastră
+noastre
+noi
+noÅŸtri
+nostru
+nu
+ori
+oricând
+oricare
+oricât
+orice
+oricînd
+oricine
+oricît
+oricum
+oriunde
+până
+pe
+pentru
+peste
+pînă
+poate
+pot
+prea
+prima
+primul
+prin
+printr
+sa
+să
+săi
+sale
+sau
+său
+se
+ÅŸi
+sînt
+sîntem
+sînteţi
+spre
+sub
+sunt
+suntem
+sunteţi
+ta
+tăi
+tale
+tău
+te
+Å£i
+Å£ie
+tine
+toată
+toate
+tot
+toţi
+totuÅŸi
+tu
+un
+una
+unde
+undeva
+unei
+unele
+uneori
+unor
+vă
+vi
+voastră
+voastre
+voi
+voÅŸtri
+vostru
+vouă
+vreo
+vreun
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ru.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ru.txt
new file mode 100644
index 00000000000..64307693457
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ru.txt
@@ -0,0 +1,241 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | a russian stop word list. comments begin with vertical bar. each stop
+ | word is at the start of a line.
+
+ | this is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | letter `ё' is translated to `е'.
+
+и | and
+в | in/into
+во | alternative form
+не | not
+что | what/that
+он | he
+на | on/onto
+Ñ | i
+Ñ | from
+Ñо | alternative form
+как | how
+а | milder form of `no' (but)
+то | conjunction and form of `that'
+вÑе | all
+она | she
+так | so, thus
+его | him
+но | but
+да | yes/and
+Ñ‚Ñ‹ | thou
+к | towards, by
+у | around, chez
+же | intensifier particle
+вы | you
+за | beyond, behind
+бы | conditional/subj. particle
+по | up to, along
+только | only
+ее | her
+мне | to me
+было | it was
+вот | here is/are, particle
+от | away from
+Ð¼ÐµÐ½Ñ | me
+еще | still, yet, more
+нет | no, there isnt/arent
+о | about
+из | out of
+ему | to him
+теперь | now
+когда | when
+даже | even
+ну | so, well
+вдруг | suddenly
+ли | interrogative particle
+еÑли | if
+уже | already, but homonym of `narrower'
+или | or
+ни | neither
+быть | to be
+был | he was
+него | prepositional form of его
+до | up to
+Ð²Ð°Ñ | you accusative
+нибудь | indef. suffix preceded by hyphen
+опÑÑ‚ÑŒ | again
+уж | already, but homonym of `adder'
+вам | to you
+Ñказал | he said
+ведь | particle `after all'
+там | there
+потом | then
+ÑÐµÐ±Ñ | oneself
+ничего | nothing
+ей | to her
+может | usually with `быть' as `maybe'
+они | they
+тут | here
+где | where
+еÑÑ‚ÑŒ | there is/are
+надо | got to, must
+ней | prepositional form of ей
+Ð´Ð»Ñ | for
+мы | we
+Ñ‚ÐµÐ±Ñ | thee
+их | them, their
+чем | than
+была | she was
+Ñам | self
+чтоб | in order to
+без | without
+будто | as if
+человек | man, person, one
+чего | genitive form of `what'
+раз | once
+тоже | also
+Ñебе | to oneself
+под | beneath
+жизнь | life
+будет | will be
+ж | short form of intensifer particle `же'
+тогда | then
+кто | who
+Ñтот | this
+говорил | was saying
+того | genitive form of `that'
+потому | for that reason
+Ñтого | genitive form of `this'
+какой | which
+ÑовÑем | altogether
+ним | prepositional form of `его', `они'
+здеÑÑŒ | here
+Ñтом | prepositional form of `Ñтот'
+один | one
+почти | almost
+мой | my
+тем | instrumental/dative plural of `тот', `то'
+чтобы | full form of `in order that'
+нее | her (acc.)
+кажетÑÑ | it seems
+ÑÐµÐ¹Ñ‡Ð°Ñ | now
+были | they were
+куда | where to
+зачем | why
+Ñказать | to say
+вÑех | all (acc., gen. preposn. plural)
+никогда | never
+ÑÐµÐ³Ð¾Ð´Ð½Ñ | today
+можно | possible, one can
+при | by
+наконец | finally
+два | two
+об | alternative form of `о', about
+другой | another
+хоть | even
+поÑле | after
+над | above
+больше | more
+тот | that one (masc.)
+через | across, in
+Ñти | these
+Ð½Ð°Ñ | us
+про | about
+вÑего | in all, only, of all
+них | prepositional form of `они' (they)
+ÐºÐ°ÐºÐ°Ñ | which, feminine
+много | lots
+разве | interrogative particle
+Ñказала | she said
+три | three
+Ñту | this, acc. fem. sing.
+Ð¼Ð¾Ñ | my, feminine
+впрочем | moreover, besides
+хорошо | good
+Ñвою | ones own, acc. fem. sing.
+Ñтой | oblique form of `Ñта', fem. `this'
+перед | in front of
+иногда | sometimes
+лучше | better
+чуть | a little
+том | preposn. form of `that one'
+Ð½ÐµÐ»ÑŒÐ·Ñ | one must not
+такой | such a one
+им | to them
+более | more
+вÑегда | always
+конечно | of course
+вÑÑŽ | acc. fem. sing of `all'
+между | between
+
+
+ | b: some paradigms
+ |
+ | personal pronouns
+ |
+ | Ñ Ð¼ÐµÐ½Ñ Ð¼Ð½Ðµ мной [мною]
+ | Ñ‚Ñ‹ Ñ‚ÐµÐ±Ñ Ñ‚ÐµÐ±Ðµ тобой [тобою]
+ | он его ему им [него, нему, ним]
+ | она ее Ñи ею [нее, нÑи, нею]
+ | оно его ему им [него, нему, ним]
+ |
+ | мы Ð½Ð°Ñ Ð½Ð°Ð¼ нами
+ | вы Ð²Ð°Ñ Ð²Ð°Ð¼ вами
+ | они их им ими [них, ним, ними]
+ |
+ | ÑÐµÐ±Ñ Ñебе Ñобой [Ñобою]
+ |
+ | demonstrative pronouns: Ñтот (this), тот (that)
+ |
+ | Ñтот Ñта Ñто Ñти
+ | Ñтого ÑÑ‚Ñ‹ Ñто Ñти
+ | Ñтого Ñтой Ñтого Ñтих
+ | Ñтому Ñтой Ñтому Ñтим
+ | Ñтим Ñтой Ñтим [Ñтою] Ñтими
+ | Ñтом Ñтой Ñтом Ñтих
+ |
+ | тот та то те
+ | того ту то те
+ | того той того тех
+ | тому той тому тем
+ | тем той тем [тою] теми
+ | том той том тех
+ |
+ | determinative pronouns
+ |
+ | (a) веÑÑŒ (all)
+ |
+ | веÑÑŒ вÑÑ Ð²Ñе вÑе
+ | вÑего вÑÑŽ вÑе вÑе
+ | вÑего вÑей вÑего вÑех
+ | вÑему вÑей вÑему вÑем
+ | вÑем вÑей вÑем [вÑею] вÑеми
+ | вÑем вÑей вÑем вÑех
+ |
+ | (b) Ñам (himself etc)
+ |
+ | Ñам Ñама Ñамо Ñами
+ | Ñамого Ñаму Ñамо Ñамих
+ | Ñамого Ñамой Ñамого Ñамих
+ | Ñамому Ñамой Ñамому Ñамим
+ | Ñамим Ñамой Ñамим [Ñамою] Ñамими
+ | Ñамом Ñамой Ñамом Ñамих
+ |
+ | stems of verbs `to be', `to have', `to do' and modal
+ |
+ | быть бы буд быв еÑÑ‚ÑŒ Ñуть
+ | име
+ | дел
+ | мог мож мочь
+ | уме
+ | хоч хот
+ | долж
+ | можн
+ | нужн
+ | нельзÑ
+
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_sv.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_sv.txt
new file mode 100644
index 00000000000..22bddfd8cb3
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_sv.txt
@@ -0,0 +1,131 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+
+ | A Swedish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | Swedish stop words occasionally exhibit homonym clashes. For example
+ | så = so, but also seed. These are indicated clearly below.
+
+och | and
+det | it, this/that
+att | to (with infinitive)
+i | in, at
+en | a
+jag | I
+hon | she
+som | who, that
+han | he
+på | on
+den | it, this/that
+med | with
+var | where, each
+sig | him(self) etc
+för | for
+så | so (also: seed)
+till | to
+är | is
+men | but
+ett | a
+om | if; around, about
+hade | had
+de | they, these/those
+av | of
+icke | not, no
+mig | me
+du | you
+henne | her
+då | then, when
+sin | his
+nu | now
+har | have
+inte | inte någon = no one
+hans | his
+honom | him
+skulle | 'sake'
+hennes | her
+där | there
+min | my
+man | one (pronoun)
+ej | nor
+vid | at, by, on (also: vast)
+kunde | could
+något | some etc
+från | from, off
+ut | out
+när | when
+efter | after, behind
+upp | up
+vi | we
+dem | them
+vara | be
+vad | what
+över | over
+än | than
+dig | you
+kan | can
+sina | his
+här | here
+ha | have
+mot | towards
+alla | all
+under | under (also: wonder)
+någon | some etc
+eller | or (else)
+allt | all
+mycket | much
+sedan | since
+ju | why
+denna | this/that
+själv | myself, yourself etc
+detta | this/that
+Ã¥t | to
+utan | without
+varit | was
+hur | how
+ingen | no
+mitt | my
+ni | you
+bli | to be, become
+blev | from bli
+oss | us
+din | thy
+dessa | these/those
+några | some etc
+deras | their
+blir | from bli
+mina | my
+samma | (the) same
+vilken | who, that
+er | you, your
+sådan | such a
+vår | our
+blivit | from bli
+dess | its
+inom | within
+mellan | between
+sådant | such a
+varför | why
+varje | each
+vilka | who, that
+ditt | thy
+vem | who
+vilket | who, that
+sitta | his
+sådana | such a
+vart | each
+dina | thy
+vars | whose
+vårt | our
+våra | our
+ert | your
+era | your
+vilkas | whose
+
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_th.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_th.txt
new file mode 100644
index 00000000000..07f0fabe692
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_th.txt
@@ -0,0 +1,119 @@
+# Thai stopwords from:
+# "Opinion Detection in Thai Political News Columns
+# Based on Subjectivity Analysis"
+# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak
+ไว้
+ไม่
+ไป
+ได้
+ให้
+ใน
+โดย
+à¹à¸«à¹ˆà¸‡
+à¹à¸¥à¹‰à¸§
+à¹à¸¥à¸°
+à¹à¸£à¸
+à¹à¸šà¸š
+à¹à¸•à¹ˆ
+เà¸à¸‡
+เห็น
+เลย
+เริ่ม
+เรา
+เมื่à¸
+เพื่à¸
+เพราะ
+เป็นà¸à¸²à¸£
+เป็น
+เปิดเผย
+เปิด
+เนื่à¸à¸‡à¸ˆà¸²à¸
+เดียวà¸à¸±à¸™
+เดียว
+เช่น
+เฉพาะ
+เคย
+เข้า
+เขา
+à¸à¸µà¸
+à¸à¸²à¸ˆ
+à¸à¸°à¹„ร
+à¸à¸à¸
+à¸à¸¢à¹ˆà¸²à¸‡
+à¸à¸¢à¸¹à¹ˆ
+à¸à¸¢à¸²à¸
+หาà¸
+หลาย
+หลังจาà¸
+หลัง
+หรืà¸
+หนึ่ง
+ส่วน
+ส่ง
+สุด
+สà¹à¸²à¸«à¸£à¸±à¸š
+ว่า
+วัน
+ลง
+ร่วม
+ราย
+รับ
+ระหว่าง
+รวม
+ยัง
+มี
+มาà¸
+มา
+พร้à¸à¸¡
+พบ
+ผ่าน
+ผล
+บาง
+น่า
+นี้
+นà¹à¸²
+นั้น
+นัà¸
+นà¸à¸à¸ˆà¸²à¸
+ทุà¸
+ที่สุด
+ที่
+ทà¹à¸²à¹ƒà¸«à¹‰
+ทà¹à¸²
+ทาง
+ทั้งนี้
+ทั้ง
+ถ้า
+ถูà¸
+ถึง
+ต้à¸à¸‡
+ต่างๆ
+ต่าง
+ต่à¸
+ตาม
+ตั้งà¹à¸•à¹ˆ
+ตั้ง
+ด้าน
+ด้วย
+ดัง
+ซึ่ง
+ช่วง
+จึง
+จาà¸
+จัด
+จะ
+คืà¸
+ความ
+ครั้ง
+คง
+ขึ้น
+ขà¸à¸‡
+ขà¸
+ขณะ
+à¸à¹ˆà¸à¸™
+à¸à¹‡
+à¸à¸²à¸£
+à¸à¸±à¸š
+à¸à¸±à¸™
+à¸à¸§à¹ˆà¸²
+à¸à¸¥à¹ˆà¸²à¸§
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_tr.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_tr.txt
new file mode 100644
index 00000000000..84d9408d4ea
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_tr.txt
@@ -0,0 +1,212 @@
+# Turkish stopwords from LUCENE-559
+# merged with the list from "Information Retrieval on Turkish Texts"
+# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
+acaba
+altmış
+altı
+ama
+ancak
+arada
+aslında
+ayrıca
+bana
+bazı
+belki
+ben
+benden
+beni
+benim
+beri
+beÅŸ
+bile
+bin
+bir
+birçok
+biri
+birkaç
+birkez
+birÅŸey
+birÅŸeyi
+biz
+bize
+bizden
+bizi
+bizim
+böyle
+böylece
+bu
+buna
+bunda
+bundan
+bunlar
+bunları
+bunların
+bunu
+bunun
+burada
+çok
+çünkü
+da
+daha
+dahi
+de
+defa
+deÄŸil
+diÄŸer
+diye
+doksan
+dokuz
+dolayı
+dolayısıyla
+dört
+edecek
+eden
+ederek
+edilecek
+ediliyor
+edilmesi
+ediyor
+eÄŸer
+elli
+en
+etmesi
+etti
+ettiÄŸi
+ettiÄŸini
+gibi
+göre
+halen
+hangi
+hatta
+hem
+henüz
+hep
+hepsi
+her
+herhangi
+herkesin
+hiç
+hiçbir
+için
+iki
+ile
+ilgili
+ise
+iÅŸte
+itibaren
+itibariyle
+kadar
+karşın
+katrilyon
+kendi
+kendilerine
+kendini
+kendisi
+kendisine
+kendisini
+kez
+ki
+kim
+kimden
+kime
+kimi
+kimse
+kırk
+milyar
+milyon
+mu
+mü
+mı
+nasıl
+ne
+neden
+nedenle
+nerde
+nerede
+nereye
+niye
+niçin
+o
+olan
+olarak
+oldu
+olduÄŸu
+olduÄŸunu
+olduklarını
+olmadı
+olmadığı
+olmak
+olması
+olmayan
+olmaz
+olsa
+olsun
+olup
+olur
+olursa
+oluyor
+on
+ona
+ondan
+onlar
+onlardan
+onları
+onların
+onu
+onun
+otuz
+oysa
+öyle
+pek
+raÄŸmen
+sadece
+sanki
+sekiz
+seksen
+sen
+senden
+seni
+senin
+siz
+sizden
+sizi
+sizin
+ÅŸey
+ÅŸeyden
+ÅŸeyi
+ÅŸeyler
+şöyle
+ÅŸu
+ÅŸuna
+ÅŸunda
+ÅŸundan
+şunları
+ÅŸunu
+tarafından
+trilyon
+tüm
+üç
+üzere
+var
+vardı
+ve
+veya
+ya
+yani
+yapacak
+yapılan
+yapılması
+yapıyor
+yapmak
+yaptı
+yaptığı
+yaptığını
+yaptıkları
+yedi
+yerine
+yetmiÅŸ
+yine
+yirmi
+yoksa
+yüz
+zaten
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/userdict_ja.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/userdict_ja.txt
new file mode 100644
index 00000000000..6f0368e4d81
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/userdict_ja.txt
@@ -0,0 +1,29 @@
+#
+# This is a sample user dictionary for Kuromoji (JapaneseTokenizer)
+#
+# Add entries to this file in order to override the statistical model in terms
+# of segmentation, readings and part-of-speech tags. Notice that entries do
+# not have weights since they are always used when found. This is by-design
+# in order to maximize ease-of-use.
+#
+# Entries are defined using the following CSV format:
+# , ... , ... ,
+#
+# Notice that a single half-width space separates tokens and readings, and
+# that the number tokens and readings must match exactly.
+#
+# Also notice that multiple entries with the same is undefined.
+#
+# Whitespace only lines are ignored. Comments are not allowed on entry lines.
+#
+
+# Custom segmentation for kanji compounds
+日本経済新èž,日本 経済 æ–°èž,ニホン ケイザイ シンブン,カスタムåè©ž
+関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタムåè©ž
+
+# Custom segmentation for compound katakana
+トートãƒãƒƒã‚°,トート ãƒãƒƒã‚°,トート ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž
+ショルダーãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž
+
+# Custom reading for former sumo wrestler
+æœé’é¾,æœé’é¾,アサショウリュウ,カスタム人å
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/protwords.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/protwords.txt
new file mode 100644
index 00000000000..1dfc0abecbf
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/protwords.txt
@@ -0,0 +1,21 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+# Use a protected word file to protect against the stemmer reducing two
+# unrelated words to the same base word.
+
+# Some non-words that normally won't be encountered,
+# just to test that they won't be stemmed.
+dontstems
+zwhacky
+
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/schema.xml b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/schema.xml
new file mode 100644
index 00000000000..65192efe442
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/schema.xml
@@ -0,0 +1,961 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/solrconfig.xml b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/solrconfig.xml
new file mode 100644
index 00000000000..beff1b2af0a
--- /dev/null
+++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/solrconfig.xml
@@ -0,0 +1,1784 @@
+
+
+
+
+
+
+
+
+ LUCENE_43
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.data.dir:}
+
+
+
+
+ ${solr.hdfs.home:}
+ ${solr.hdfs.confdir:}
+ ${solr.hdfs.blockcache.enabled:true}
+ ${solr.hdfs.blockcache.slab.count:1}
+ ${solr.hdfs.blockcache.direct.memory.allocation:true}
+ ${solr.hdfs.blockcache.blocksperbank:16384}
+ ${solr.hdfs.blockcache.read.enabled:true}
+ ${solr.hdfs.blockcache.write.enabled:true}
+ ${solr.hdfs.nrtcachingdirectory.enable:true}
+ ${solr.hdfs.nrtcachingdirectory.maxmergesizemb:16}
+ ${solr.hdfs.nrtcachingdirectory.maxcachedmb:192}
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.maxIndexingThreads:8}
+
+
+
+
+
+ 128
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.lock.type:hdfs}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.ulog.dir:}
+
+
+
+
+ ${solr.autoCommit.maxTime:60000}
+ false
+
+
+
+
+ ${solr.autoSoftCommit.maxTime:1000}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1024
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+
+
+
+
+
+ 20
+
+
+ 200
+
+
+
+
+
+
+
+
+
+
+
+ static firstSearcher warming in solrconfig.xml
+
+
+
+
+
+ false
+
+
+ 4
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ explicit
+ 10
+ text
+
+
+
+
+
+
+
+
+
+
+
+
+
+ explicit
+ json
+ true
+ text
+
+
+
+
+
+
+
+ true
+ json
+ true
+
+
+
+
+
+
+
+ explicit
+
+
+ velocity
+ browse
+ layout
+ Solritas
+
+
+ edismax
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+ title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
+
+ text
+ 100%
+ *:*
+ 10
+ *,score
+
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+ title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
+
+ text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename
+ 3
+
+
+ on
+ cat
+ manu_exact
+ content_type
+ author_s
+ ipod
+ GB
+ 1
+ cat,inStock
+ after
+ price
+ 0
+ 600
+ 50
+ popularity
+ 0
+ 10
+ 3
+ manufacturedate_dt
+ NOW/YEAR-10YEARS
+ NOW
+ +1YEAR
+ before
+ after
+
+
+ on
+ content features title name
+ html
+ <b>
+ </b>
+ 0
+ title
+ 0
+ name
+ 3
+ 200
+ content
+ 750
+
+
+ on
+ false
+ 5
+ 2
+ 5
+ true
+ true
+ 5
+ 3
+
+
+
+
+ spellcheck
+
+
+
+
+
+
+
+
+
+
+
+
+
+ application/json
+
+
+
+
+ application/csv
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ solrpingquery
+
+
+ all
+
+
+
+
+
+
+
+
+ explicit
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text_general
+
+
+
+
+
+ default
+ text
+ solr.DirectSolrSpellChecker
+
+ internal
+
+ 0.5
+
+ 2
+
+ 1
+
+ 5
+
+ 4
+
+ 0.01
+
+
+
+
+
+ wordbreak
+ solr.WordBreakSolrSpellChecker
+ name
+ true
+ true
+ 10
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text
+
+ default
+ wordbreak
+ on
+ true
+ 10
+ 5
+ 5
+ true
+ true
+ 10
+ 5
+
+
+ spellcheck
+
+
+
+
+
+
+
+
+
+ text
+ true
+
+
+ tvComponent
+
+
+
+
+
+
+
+
+ default
+
+
+ org.carrot2.clustering.lingo.LingoClusteringAlgorithm
+
+
+ 20
+
+
+ clustering/carrot2
+
+
+ ENGLISH
+
+
+ stc
+ org.carrot2.clustering.stc.STCClusteringAlgorithm
+
+
+
+
+
+
+ true
+ default
+ true
+
+ name
+ id
+
+ features
+
+ true
+
+
+
+ false
+
+ edismax
+
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+
+ *:*
+ 10
+ *,score
+
+
+ clustering
+
+
+
+
+
+
+
+
+
+ true
+ false
+
+
+ terms
+
+
+
+
+
+
+
+ string
+ elevate.xml
+
+
+
+
+
+ explicit
+ text
+
+
+ elevator
+
+
+
+
+
+
+
+
+
+
+ 100
+
+
+
+
+
+
+
+ 70
+
+ 0.5
+
+ [-\w ,/\n\"']{20,200}
+
+
+
+
+
+
+ ]]>
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ,,
+ ,,
+ ,,
+ ,