diff --git a/dev-tools/idea/.idea/ant.xml b/dev-tools/idea/.idea/ant.xml index e9f3c85bcb6..a8e73f6d0b5 100644 --- a/dev-tools/idea/.idea/ant.xml +++ b/dev-tools/idea/.idea/ant.xml @@ -45,6 +45,9 @@ + + + diff --git a/dev-tools/idea/.idea/libraries/Solr_morphlines_cell_library.xml b/dev-tools/idea/.idea/libraries/Solr_morphlines_cell_library.xml new file mode 100644 index 00000000000..c1a0a6f0b0f --- /dev/null +++ b/dev-tools/idea/.idea/libraries/Solr_morphlines_cell_library.xml @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/dev-tools/idea/.idea/libraries/Solr_morphlines_core_library.xml b/dev-tools/idea/.idea/libraries/Solr_morphlines_core_library.xml new file mode 100644 index 00000000000..25770ed8c4d --- /dev/null +++ b/dev-tools/idea/.idea/libraries/Solr_morphlines_core_library.xml @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/dev-tools/idea/.idea/libraries/Solr_morphlines_core_test_library.xml b/dev-tools/idea/.idea/libraries/Solr_morphlines_core_test_library.xml new file mode 100644 index 00000000000..0ae745a64b8 --- /dev/null +++ b/dev-tools/idea/.idea/libraries/Solr_morphlines_core_test_library.xml @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/dev-tools/idea/.idea/modules.xml b/dev-tools/idea/.idea/modules.xml index 58111cfec1b..b8c9fb4e569 100644 --- a/dev-tools/idea/.idea/modules.xml +++ b/dev-tools/idea/.idea/modules.xml @@ -49,6 +49,9 @@ + + + diff --git a/dev-tools/idea/.idea/workspace.xml b/dev-tools/idea/.idea/workspace.xml index 0c578089bce..e5bdc922f07 100644 --- a/dev-tools/idea/.idea/workspace.xml +++ b/dev-tools/idea/.idea/workspace.xml @@ -235,6 +235,27 @@ + + + + + + + + + + + + - + @@ -281,10 +302,13 @@ - - - - + + + + + + + diff --git a/dev-tools/idea/solr/contrib/solr-morphlines-cell/solr-morphlines-cell.iml b/dev-tools/idea/solr/contrib/solr-morphlines-cell/solr-morphlines-cell.iml new file mode 100644 index 00000000000..e2a4cc3f864 --- /dev/null +++ b/dev-tools/idea/solr/contrib/solr-morphlines-cell/solr-morphlines-cell.iml @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dev-tools/idea/solr/contrib/solr-morphlines-core/solr-morphlines-core.iml b/dev-tools/idea/solr/contrib/solr-morphlines-core/solr-morphlines-core.iml new file mode 100644 index 00000000000..4942c8565a8 --- /dev/null +++ b/dev-tools/idea/solr/contrib/solr-morphlines-core/solr-morphlines-core.iml @@ -0,0 +1,27 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dev-tools/idea/solr/contrib/solr-mr/solr-mr.iml b/dev-tools/idea/solr/contrib/solr-mr/solr-mr.iml new file mode 100644 index 00000000000..3e7e3b466e0 --- /dev/null +++ b/dev-tools/idea/solr/contrib/solr-mr/solr-mr.iml @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dev-tools/maven/solr/contrib/pom.xml.template b/dev-tools/maven/solr/contrib/pom.xml.template index b0f9bc8606c..3873b1d59ab 100644 --- a/dev-tools/maven/solr/contrib/pom.xml.template +++ b/dev-tools/maven/solr/contrib/pom.xml.template @@ -37,6 +37,9 @@ dataimporthandler-extras extraction langid + solr-morphlines-cell + solr-morphlines-core + solr-mr uima velocity diff --git a/dev-tools/maven/solr/contrib/solr-morphlines-cell/pom.xml.template b/dev-tools/maven/solr/contrib/solr-morphlines-cell/pom.xml.template new file mode 100644 index 00000000000..ebd13754c25 --- /dev/null +++ b/dev-tools/maven/solr/contrib/solr-morphlines-cell/pom.xml.template @@ -0,0 +1,104 @@ + + + 4.0.0 + + org.apache.solr + solr-parent + @version@ + ../../pom.xml + + org.apache.solr + solr-morphlines-cell + jar + Apache Solr Cell Morphlines + Apache Solr - Cell Morphlines + + solr/contrib/solr-morphlines-cell + ../../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + + + org.apache.lucene + lucene-test-framework + test + + + org.apache.solr + solr-test-framework + test + + + org.apache.solr + solr-morphlines-core + ${project.version} + test-jar + test + +@solr-morphlines-cell.internal.dependencies@ +@solr-morphlines-cell.external.dependencies@ +@solr-morphlines-cell.internal.test.dependencies@ +@solr-morphlines-cell.external.test.dependencies@ + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/test-files + + + ${top-level}/dev-tools/maven/solr + + maven.testlogging.properties + + + + + + de.thetaphi + forbiddenapis + + + test-check-forbidden-servlet-api + + + ${top-level}/lucene/tools/forbiddenApis/servlet-api.txt + + + + testCheck + + + + + + + diff --git a/dev-tools/maven/solr/contrib/solr-morphlines-core/pom.xml.template b/dev-tools/maven/solr/contrib/solr-morphlines-core/pom.xml.template new file mode 100644 index 00000000000..92c785c23e5 --- /dev/null +++ b/dev-tools/maven/solr/contrib/solr-morphlines-core/pom.xml.template @@ -0,0 +1,108 @@ + + + 4.0.0 + + org.apache.solr + solr-parent + @version@ + ../../pom.xml + + org.apache.solr + solr-morphlines-core + jar + Apache Solr Morphlines Core + Apache Solr - Morphlines Core + + solr/contrib/solr-morphlines-core + ../../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + + + org.apache.lucene + lucene-test-framework + test + + + org.apache.solr + solr-test-framework + test + +@solr-morphlines-core.internal.dependencies@ +@solr-morphlines-core.external.dependencies@ +@solr-morphlines-core.internal.test.dependencies@ +@solr-morphlines-core.external.test.dependencies@ + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/test-files + + + ${top-level}/dev-tools/maven/solr + + maven.testlogging.properties + + + + + + de.thetaphi + forbiddenapis + + + test-check-forbidden-servlet-api + + + ${top-level}/lucene/tools/forbiddenApis/servlet-api.txt + + + + testCheck + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + + diff --git a/dev-tools/maven/solr/contrib/solr-mr/pom.xml.template b/dev-tools/maven/solr/contrib/solr-mr/pom.xml.template new file mode 100644 index 00000000000..1683ef2c586 --- /dev/null +++ b/dev-tools/maven/solr/contrib/solr-mr/pom.xml.template @@ -0,0 +1,97 @@ + + + 4.0.0 + + org.apache.solr + solr-parent + @version@ + ../../pom.xml + + org.apache.solr + solr-mr + jar + Apache Solr map-reduce index construction + Apache Solr - map-reduce index construction + + solr/contrib/solr-mr + ../../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + + + org.apache.lucene + lucene-test-framework + test + + + org.apache.solr + solr-test-framework + test + +@solr-mr.internal.dependencies@ +@solr-mr.external.dependencies@ +@solr-mr.internal.test.dependencies@ +@solr-mr.external.test.dependencies@ + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/test-files + + + ${top-level}/dev-tools/maven/solr + + maven.testlogging.properties + + + + + + de.thetaphi + forbiddenapis + + + test-check-forbidden-servlet-api + + + ${top-level}/lucene/tools/forbiddenApis/servlet-api.txt + + + + testCheck + + + + + + + diff --git a/dev-tools/maven/solr/pom.xml.template b/dev-tools/maven/solr/pom.xml.template index 7554d6987ff..73ceda700b5 100644 --- a/dev-tools/maven/solr/pom.xml.template +++ b/dev-tools/maven/solr/pom.xml.template @@ -81,6 +81,11 @@ Public online Restlet repository http://maven.restlet.org + + releases.cloudera.com + Cloudera Releases + https://repository.cloudera.com/artifactory/libs-release + diff --git a/lucene/common-build.xml b/lucene/common-build.xml index a99239d135f..938813c536b 100644 --- a/lucene/common-build.xml +++ b/lucene/common-build.xml @@ -355,7 +355,7 @@ - @@ -623,6 +623,7 @@ value="The Apache Software Foundation"/> + diff --git a/lucene/ivy-settings.xml b/lucene/ivy-settings.xml index f7b24ad4562..4b8b8c57a5c 100644 --- a/lucene/ivy-settings.xml +++ b/lucene/ivy-settings.xml @@ -33,6 +33,8 @@ + + @@ -53,6 +55,8 @@ + + diff --git a/lucene/ivy-versions.properties b/lucene/ivy-versions.properties index a71f538dc51..a340d2bbdae 100644 --- a/lucene/ivy-versions.properties +++ b/lucene/ivy-versions.properties @@ -2,23 +2,63 @@ # Blank lines, comment lines, and keys that aren't in /org/name format are ignored # when the lexical sort check is performed by the ant check-lib-versions target. + +/aopalliance/aopalliance = 1.0 +/asm/asm = 3.1 /cglib/cglib-nodep = 2.2 +/com.adobe.xmp/xmpcore = 5.1.2 com.carrotsearch.randomizedtesting.version = 2.0.13 /com.carrotsearch.randomizedtesting/junit4-ant = ${com.carrotsearch.randomizedtesting.version} /com.carrotsearch.randomizedtesting/randomizedtesting-runner = ${com.carrotsearch.randomizedtesting.version} /com.carrotsearch/hppc = 0.5.2 + +com.cloudera.cdk.cdk-morphlines.version = 0.8.1 +/com.cloudera.cdk/cdk-morphlines-avro = ${com.cloudera.cdk.cdk-morphlines.version} +/com.cloudera.cdk/cdk-morphlines-core = ${com.cloudera.cdk.cdk-morphlines.version} +/com.cloudera.cdk/cdk-morphlines-hadoop-sequencefile = ${com.cloudera.cdk.cdk-morphlines.version} +/com.cloudera.cdk/cdk-morphlines-json = ${com.cloudera.cdk.cdk-morphlines.version} +/com.cloudera.cdk/cdk-morphlines-saxon = ${com.cloudera.cdk.cdk-morphlines.version} +/com.cloudera.cdk/cdk-morphlines-tika-core = ${com.cloudera.cdk.cdk-morphlines.version} +/com.cloudera.cdk/cdk-morphlines-tika-decompress = ${com.cloudera.cdk.cdk-morphlines.version} +/com.cloudera.cdk/cdk-morphlines-twitter = ${com.cloudera.cdk.cdk-morphlines.version} + +com.codahale.metrics.version = 3.0.1 +/com.codahale.metrics/metrics-core = ${com.codahale.metrics.version} +/com.codahale.metrics/metrics-healthchecks = ${com.codahale.metrics.version} + /com.cybozu.labs/langdetect = 1.1-20120112 /com.drewnoakes/metadata-extractor = 2.6.2 + +com.fasterxml.jackson.core.version = 2.2.3 +/com.fasterxml.jackson.core/jackson-annotations = ${com.fasterxml.jackson.core.version} +/com.fasterxml.jackson.core/jackson-core = ${com.fasterxml.jackson.core.version} +/com.fasterxml.jackson.core/jackson-databind = ${com.fasterxml.jackson.core.version} + /com.google.guava/guava = 14.0.1 + +com.google.inject.guice.version = 3.0 +/com.google.inject.extensions/guice-servlet = ${com.google.inject.guice.version} +/com.google.inject/guice = ${com.google.inject.guice.version} + /com.google.protobuf/protobuf-java = 2.5.0 /com.googlecode.concurrentlinkedhashmap/concurrentlinkedhashmap-lru = 1.2 /com.googlecode.juniversalchardet/juniversalchardet = 1.0.3 /com.googlecode.mp4parser/isoparser = 1.0-RC-1 /com.ibm.icu/icu4j = 49.1 /com.spatial4j/spatial4j = 0.3 -/com.sun.jersey/jersey-core = 1.16 + +com.sun.jersey.version = 1.8 +/com.sun.jersey.contribs/jersey-guice = ${com.sun.jersey.version} +/com.sun.jersey/jersey-bundle = ${com.sun.jersey.version} +/com.sun.jersey/jersey-core = ${com.sun.jersey.version} +/com.sun.jersey/jersey-json = ${com.sun.jersey.version} +/com.sun.jersey/jersey-server = ${com.sun.jersey.version} + +/com.sun.xml.bind/jaxb-impl = 2.2.2 +/com.thoughtworks.paranamer/paranamer = 2.3 +/com.typesafe/config = 1.0.2 /commons-beanutils/commons-beanutils = 1.7.0 /commons-cli/commons-cli = 1.2 /commons-codec/commons-codec = 1.7 @@ -33,8 +73,10 @@ com.carrotsearch.randomizedtesting.version = 2.0.13 /dom4j/dom4j = 1.6.1 /edu.ucar/netcdf = 4.2-min /hsqldb/hsqldb = 1.8.0.10 +/io.netty/netty = 3.6.2.Final /jakarta-regexp/jakarta-regexp = 1.4 /javax.activation/activation = 1.1 +/javax.inject/javax.inject= 1 /javax.mail/mail = 1.4.1 /javax.servlet/javax.servlet-api = 3.0.1 /javax.servlet/servlet-api = 2.4 @@ -45,9 +87,12 @@ com.carrotsearch.randomizedtesting.version = 2.0.13 /mecab/mecab-ipadic = 2.7.0-20070801 /mecab/mecab-naist-jdic = 0.6.3b-20111013 /net.arnx/jsonic = 1.2.7 +/net.sf.saxon/Saxon-HE = 9.5.1-2 +/net.sourceforge.argparse4j/argparse4j = 0.4.0 /net.sourceforge.nekohtml/nekohtml = 1.9.17 /org.antlr/antlr-runtime = 3.5 /org.apache.ant/ant = 1.8.2 +/org.apache.avro/avro = 1.7.4 /org.apache.commons/commons-compress = 1.4.1 /org.apache.derby/derby = 10.9.1.0 @@ -57,18 +102,35 @@ org.apache.hadoop.version = 2.2.0 /org.apache.hadoop/hadoop-common = ${org.apache.hadoop.version} /org.apache.hadoop/hadoop-hdfs = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-mapreduce-client-app = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-mapreduce-client-common = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-mapreduce-client-core = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-mapreduce-client-hs = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-mapreduce-client-jobclient = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-mapreduce-client-shuffle = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-yarn-api = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-yarn-client = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-yarn-common = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-yarn-server = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-yarn-server-common = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-yarn-server-nodemanager = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-yarn-server-resourcemanager = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-yarn-server-tests = ${org.apache.hadoop.version} +/org.apache.hadoop/hadoop-yarn-server-web-proxy = ${org.apache.hadoop.version} + # The httpcore version is often different from the httpclient and httpmime versions, # so the httpcore version value should not share the same symbolic name with them. /org.apache.httpcomponents/httpclient = 4.2.6 /org.apache.httpcomponents/httpcore = 4.2.5 /org.apache.httpcomponents/httpmime = 4.2.6 -org.apache.james.apache.mime4j = 0.7.2 -/org.apache.james/apache-mime4j-core = ${org.apache.james.apache.mime4j} -/org.apache.james/apache-mime4j-dom = ${org.apache.james.apache.mime4j} +org.apache.james.apache.mime4j.version = 0.7.2 +/org.apache.james/apache-mime4j-core = ${org.apache.james.apache.mime4j.version} +/org.apache.james/apache-mime4j-dom = ${org.apache.james.apache.mime4j.version} /org.apache.mahout/mahout-collections = 1.0 /org.apache.mahout/mahout-math = 0.6 +/org.apache.mrunit/mrunit = 1.0.0 org.apache.pdfbox.version = 1.8.1 /org.apache.pdfbox/fontbox = ${org.apache.pdfbox.version} @@ -84,6 +146,7 @@ org.apache.poi.version = 3.9 org.apache.tika.version = 1.4 /org.apache.tika/tika-core = ${org.apache.tika.version} /org.apache.tika/tika-parsers = ${org.apache.tika.version} +/org.apache.tika/tika-xmp = ${org.apache.tika.version} org.apache.uima.version = 2.3.1 /org.apache.uima/AlchemyAPIAnnotator = ${org.apache.uima.version} @@ -96,6 +159,7 @@ org.apache.uima.version = 2.3.1 /org.apache.velocity/velocity-tools = 2.0 /org.apache.xmlbeans/xmlbeans = 2.3.0 /org.apache.zookeeper/zookeeper = 3.4.5 +/org.aspectj/aspectjrt = 1.6.11 org.bouncycastle.version = 1.45 /org.bouncycastle/bcmail-jdk15 = ${org.bouncycastle.version} @@ -111,8 +175,9 @@ org.carrot2.morfologik.version = 1.7.1 /org.ccil.cowan.tagsoup/tagsoup = 1.2.1 -org.codehaus.jackson.version = 1.7.4 +org.codehaus.jackson.version = 1.9.13 /org.codehaus.jackson/jackson-core-asl = ${org.codehaus.jackson.version} +/org.codehaus.jackson/jackson-jaxrs = ${org.codehaus.jackson.version} /org.codehaus.jackson/jackson-mapper-asl = ${org.codehaus.jackson.version} /org.codehaus.woodstox/wstx-asl = 3.2.7 @@ -137,6 +202,8 @@ org.gagravarr.vorbis.java.version = 0.1 /org.gagravarr/vorbis-java-core = ${org.gagravarr.vorbis.java.version} /org.gagravarr/vorbis-java-tika = ${org.gagravarr.vorbis.java.version} +/org.mockito/mockito-core = 1.9.5 + org.mortbay.jetty.version = 6.1.26 /org.mortbay.jetty/jetty = ${org.mortbay.jetty.version} /org.mortbay.jetty/jetty-util = ${org.mortbay.jetty.version} @@ -161,5 +228,6 @@ org.slf4j.version = 1.6.6 /org.slf4j/slf4j-log4j12 = ${org.slf4j.version} /org.tukaani/xz = 1.0 +/org.xerial.snappy/snappy-java = 1.0.4.1 /rome/rome = 0.9 /xerces/xercesImpl = 2.9.1 diff --git a/lucene/tools/custom-tasks.xml b/lucene/tools/custom-tasks.xml index e17480b1d3c..e38b0b137a4 100644 --- a/lucene/tools/custom-tasks.xml +++ b/lucene/tools/custom-tasks.xml @@ -45,7 +45,7 @@ - + diff --git a/lucene/tools/junit4/tests.policy b/lucene/tools/junit4/tests.policy index 0933cab39bf..b1c4311e3b0 100644 --- a/lucene/tools/junit4/tests.policy +++ b/lucene/tools/junit4/tests.policy @@ -63,6 +63,7 @@ grant { permission javax.security.auth.PrivateCredentialPermission "org.apache.hadoop.security.Credentials * \"*\"", "read"; permission java.security.SecurityPermission "putProviderProperty.SaslPlainServer"; permission java.security.SecurityPermission "insertProvider.SaslPlainServer"; + permission javax.xml.bind.JAXBPermission "setDatatypeConverter"; // TIKA uses BouncyCastle and that registers new provider for PDF parsing + MSOffice parsing. Maybe report as bug! permission java.security.SecurityPermission "putProviderProperty.BC"; diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 9189b01c1d7..1e5f40d38a9 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -44,6 +44,15 @@ Upgrading from Solr 4.x Detailed Change List ---------------------- +New Features +---------------------- + +* SOLR-1301: Add a Solr contrib that allows for building Solr indexes via + Hadoop's MapReduce. (Matt Revelle, Alexander Kanarsky, Steve Rowe, + Mark Miller, Greg Bowyer, Jason Rutherglen, Kris Jirapinyo, Jason Venner , + Andrzej Bialecki, Patrick Hunt, Wolfgang Hoschek, Roman Shaposhnik, + Eric Wong) + Other Changes ---------------------- diff --git a/solr/contrib/extraction/ivy.xml b/solr/contrib/extraction/ivy.xml index 40e5201f60d..263c48832c9 100644 --- a/solr/contrib/extraction/ivy.xml +++ b/solr/contrib/extraction/ivy.xml @@ -22,6 +22,7 @@ + @@ -44,12 +45,19 @@ + + + + + + + diff --git a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandlerFactory.java b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandlerFactory.java index c91dd47306f..acf94a2d801 100644 --- a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandlerFactory.java +++ b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandlerFactory.java @@ -1,4 +1,4 @@ -/* +/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. diff --git a/solr/contrib/solr-morphlines-cell/build.xml b/solr/contrib/solr-morphlines-cell/build.xml new file mode 100644 index 00000000000..e0da709634a --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/build.xml @@ -0,0 +1,143 @@ + + + + + + + + Solr Cell Morphline commands. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-cell/ivy.xml b/solr/contrib/solr-morphlines-cell/ivy.xml new file mode 100644 index 00000000000..ee652bd8363 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/ivy.xml @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java b/solr/contrib/solr-morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java new file mode 100644 index 00000000000..8d5873fe4e3 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java @@ -0,0 +1,344 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.cell; + +import java.io.IOException; +import java.io.InputStream; +import java.io.StringWriter; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Map.Entry; +import java.util.TreeMap; + +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.SolrInputField; +import org.apache.solr.common.params.MultiMapSolrParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.DateUtil; +import org.apache.solr.handler.extraction.ExtractingParams; +import org.apache.solr.handler.extraction.SolrContentHandler; +import org.apache.solr.handler.extraction.SolrContentHandlerFactory; +import org.apache.solr.morphlines.solr.SolrLocator; +import org.apache.solr.schema.IndexSchema; +import org.apache.tika.exception.TikaException; +import org.apache.tika.io.TikaInputStream; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.Parser; +import org.apache.tika.sax.TeeContentHandler; +import org.apache.tika.sax.XHTMLContentHandler; +import org.apache.tika.sax.xpath.Matcher; +import org.apache.tika.sax.xpath.MatchingContentHandler; +import org.apache.tika.sax.xpath.XPathParser; +import org.apache.xml.serialize.OutputFormat; +import org.apache.xml.serialize.XMLSerializer; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; + +import com.cloudera.cdk.morphline.api.Command; +import com.cloudera.cdk.morphline.api.CommandBuilder; +import com.cloudera.cdk.morphline.api.MorphlineCompilationException; +import com.cloudera.cdk.morphline.api.MorphlineContext; +import com.cloudera.cdk.morphline.api.MorphlineRuntimeException; +import com.cloudera.cdk.morphline.api.Record; +import com.cloudera.cdk.morphline.base.Fields; +import com.cloudera.cdk.morphline.stdio.AbstractParser; +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.ArrayListMultimap; +import com.google.common.collect.ListMultimap; +import com.google.common.io.Closeables; +import com.typesafe.config.Config; + +/** + * Command that pipes the first attachment of a record into one of the given Tika parsers, then maps + * the Tika output back to a record using SolrCell. + *

+ * The Tika parser is chosen from the configurable list of parsers, depending on the MIME type + * specified in the input record. Typically, this requires an upstream DetectMimeTypeBuilder + * in a prior command. + */ +public final class SolrCellBuilder implements CommandBuilder { + + @Override + public Collection getNames() { + return Collections.singletonList("solrCell"); + } + + @Override + public Command build(Config config, Command parent, Command child, MorphlineContext context) { + return new SolrCell(config, parent, child, context); + } + + + /////////////////////////////////////////////////////////////////////////////// + // Nested classes: + /////////////////////////////////////////////////////////////////////////////// + private static final class SolrCell extends AbstractParser { + + private final IndexSchema schema; + private final List dateFormats; + private final String xpathExpr; + private final List parsers = new ArrayList(); + private final SolrContentHandlerFactory solrContentHandlerFactory; + + private final SolrParams solrParams; + private final Map mediaTypeToParserMap; + + private static final XPathParser PARSER = new XPathParser("xhtml", XHTMLContentHandler.XHTML); + + public static final String ADDITIONAL_SUPPORTED_MIME_TYPES = "additionalSupportedMimeTypes"; + + public SolrCell(Config config, Command parent, Command child, MorphlineContext context) { + super(config, parent, child, context); + + Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator"); + SolrLocator locator = new SolrLocator(solrLocatorConfig, context); + LOG.debug("solrLocator: {}", locator); + this.schema = locator.getIndexSchema(); + Preconditions.checkNotNull(schema); + LOG.trace("Solr schema: \n{}", Joiner.on("\n").join(new TreeMap(schema.getFields()).values())); + + ListMultimap cellParams = ArrayListMultimap.create(); + String uprefix = getConfigs().getString(config, ExtractingParams.UNKNOWN_FIELD_PREFIX, null); + if (uprefix != null) { + cellParams.put(ExtractingParams.UNKNOWN_FIELD_PREFIX, uprefix); + } + for (String capture : getConfigs().getStringList(config, ExtractingParams.CAPTURE_ELEMENTS, Collections.EMPTY_LIST)) { + cellParams.put(ExtractingParams.CAPTURE_ELEMENTS, capture); + } + Config fmapConfig = getConfigs().getConfig(config, "fmap", null); + if (fmapConfig != null) { + for (Map.Entry entry : fmapConfig.root().unwrapped().entrySet()) { + cellParams.put(ExtractingParams.MAP_PREFIX + entry.getKey(), entry.getValue().toString()); + } + } + String captureAttributes = getConfigs().getString(config, ExtractingParams.CAPTURE_ATTRIBUTES, null); + if (captureAttributes != null) { + cellParams.put(ExtractingParams.CAPTURE_ATTRIBUTES, captureAttributes); + } + String lowerNames = getConfigs().getString(config, ExtractingParams.LOWERNAMES, null); + if (lowerNames != null) { + cellParams.put(ExtractingParams.LOWERNAMES, lowerNames); + } + String defaultField = getConfigs().getString(config, ExtractingParams.DEFAULT_FIELD, null); + if (defaultField != null) { + cellParams.put(ExtractingParams.DEFAULT_FIELD, defaultField); + } + xpathExpr = getConfigs().getString(config, ExtractingParams.XPATH_EXPRESSION, null); + if (xpathExpr != null) { + cellParams.put(ExtractingParams.XPATH_EXPRESSION, xpathExpr); + } + + this.dateFormats = getConfigs().getStringList(config, "dateFormats", new ArrayList(DateUtil.DEFAULT_DATE_FORMATS)); + + String handlerStr = getConfigs().getString(config, "solrContentHandlerFactory", TrimSolrContentHandlerFactory.class.getName()); + Class factoryClass; + try { + factoryClass = (Class)Class.forName(handlerStr); + } catch (ClassNotFoundException cnfe) { + throw new MorphlineCompilationException("Could not find class " + + handlerStr + " to use for " + "solrContentHandlerFactory", config, cnfe); + } + this.solrContentHandlerFactory = getSolrContentHandlerFactory(factoryClass, dateFormats, config); + + this.mediaTypeToParserMap = new HashMap(); + //MimeTypes mimeTypes = MimeTypes.getDefaultMimeTypes(); // FIXME getMediaTypeRegistry.normalize() + + List parserConfigs = getConfigs().getConfigList(config, "parsers"); + for (Config parserConfig : parserConfigs) { + String parserClassName = getConfigs().getString(parserConfig, "parser"); + + Object obj; + try { + obj = Class.forName(parserClassName).newInstance(); + } catch (Throwable e) { + throw new MorphlineCompilationException("Cannot instantiate Tika parser: " + parserClassName, config, e); + } + if (!(obj instanceof Parser)) { + throw new MorphlineCompilationException("Tika parser " + obj.getClass().getName() + + " must be an instance of class " + Parser.class.getName(), config); + } + Parser parser = (Parser) obj; + this.parsers.add(parser); + + List mediaTypes = getConfigs().getStringList(parserConfig, SUPPORTED_MIME_TYPES, Collections.EMPTY_LIST); + for (String mediaTypeStr : mediaTypes) { + MediaType mediaType = parseMediaType(mediaTypeStr); + addSupportedMimeType(mediaTypeStr); + this.mediaTypeToParserMap.put(mediaType, parser); + } + + if (!parserConfig.hasPath(SUPPORTED_MIME_TYPES)) { + for (MediaType mediaType : parser.getSupportedTypes(new ParseContext())) { + mediaType = mediaType.getBaseType(); + addSupportedMimeType(mediaType.toString()); + this.mediaTypeToParserMap.put(mediaType, parser); + } + List extras = getConfigs().getStringList(parserConfig, ADDITIONAL_SUPPORTED_MIME_TYPES, Collections.EMPTY_LIST); + for (String mediaTypeStr : extras) { + MediaType mediaType = parseMediaType(mediaTypeStr); + addSupportedMimeType(mediaTypeStr); + this.mediaTypeToParserMap.put(mediaType, parser); + } + } + } + //LOG.info("mediaTypeToParserMap="+mediaTypeToParserMap); + + Map tmp = new HashMap(); + for (Map.Entry> entry : cellParams.asMap().entrySet()) { + tmp.put(entry.getKey(), entry.getValue().toArray(new String[entry.getValue().size()])); + } + this.solrParams = new MultiMapSolrParams(tmp); + validateArguments(); + } + + @Override + protected boolean doProcess(Record record, InputStream inputStream) { + Parser parser = detectParser(record); + if (parser == null) { + return false; + } + + ParseContext parseContext = new ParseContext(); + + // necessary for gzipped files or tar files, etc! copied from TikaCLI + parseContext.set(Parser.class, parser); + + Metadata metadata = new Metadata(); + for (Entry entry : record.getFields().entries()) { + metadata.add(entry.getKey(), entry.getValue().toString()); + } + + SolrContentHandler handler = solrContentHandlerFactory.createSolrContentHandler(metadata, solrParams, schema); + + try { + inputStream = TikaInputStream.get(inputStream); + + ContentHandler parsingHandler = handler; + StringWriter debugWriter = null; + if (LOG.isTraceEnabled()) { + debugWriter = new StringWriter(); + ContentHandler serializer = new XMLSerializer(debugWriter, new OutputFormat("XML", "UTF-8", true)); + parsingHandler = new TeeContentHandler(parsingHandler, serializer); + } + + // String xpathExpr = "/xhtml:html/xhtml:body/xhtml:div/descendant:node()"; + if (xpathExpr != null) { + Matcher matcher = PARSER.parse(xpathExpr); + parsingHandler = new MatchingContentHandler(parsingHandler, matcher); + } + + try { + parser.parse(inputStream, parsingHandler, metadata, parseContext); + } catch (IOException e) { + throw new MorphlineRuntimeException("Cannot parse", e); + } catch (SAXException e) { + throw new MorphlineRuntimeException("Cannot parse", e); + } catch (TikaException e) { + throw new MorphlineRuntimeException("Cannot parse", e); + } + + LOG.trace("debug XML doc: {}", debugWriter); + } finally { + if (inputStream != null) { + Closeables.closeQuietly(inputStream); + } + } + + SolrInputDocument doc = handler.newDocument(); + LOG.debug("solr doc: {}", doc); + Record outputRecord = toRecord(doc); + return getChild().process(outputRecord); + } + + private Parser detectParser(Record record) { + if (!hasAtLeastOneMimeType(record)) { + return null; + } + String mediaTypeStr = (String) record.getFirstValue(Fields.ATTACHMENT_MIME_TYPE); //ExtractingParams.STREAM_TYPE); + assert mediaTypeStr != null; + + MediaType mediaType = parseMediaType(mediaTypeStr).getBaseType(); + Parser parser = mediaTypeToParserMap.get(mediaType); // fast path + if (parser != null) { + return parser; + } + // wildcard matching + for (Map.Entry entry : mediaTypeToParserMap.entrySet()) { + if (isMediaTypeMatch(mediaType, entry.getKey())) { + return entry.getValue(); + } + } + if (LOG.isDebugEnabled()) { + LOG.debug("No supported MIME type parser found for " + Fields.ATTACHMENT_MIME_TYPE + "=" + mediaTypeStr); + } + return null; + } + + private boolean hasAtLeastOneMimeType(Record record) { + if (!record.getFields().containsKey(Fields.ATTACHMENT_MIME_TYPE)) { + LOG.debug("Command failed because of missing MIME type for record: {}", record); + return false; + } + return true; + } + + private MediaType parseMediaType(String mediaTypeStr) { + MediaType mediaType = MediaType.parse(mediaTypeStr.trim().toLowerCase(Locale.ROOT)); + return mediaType.getBaseType(); + }; + + /** Returns true if mediaType falls withing the given range (pattern), false otherwise */ + private boolean isMediaTypeMatch(MediaType mediaType, MediaType rangePattern) { + String WILDCARD = "*"; + String rangePatternType = rangePattern.getType(); + String rangePatternSubtype = rangePattern.getSubtype(); + return (rangePatternType.equals(WILDCARD) || rangePatternType.equals(mediaType.getType())) + && (rangePatternSubtype.equals(WILDCARD) || rangePatternSubtype.equals(mediaType.getSubtype())); + } + + private static SolrContentHandlerFactory getSolrContentHandlerFactory( + Class factoryClass, Collection dateFormats, Config config) { + try { + return factoryClass.getConstructor(Collection.class).newInstance(dateFormats); + } catch (NoSuchMethodException nsme) { + throw new MorphlineCompilationException("Unable to find valid constructor of type " + + factoryClass.getName() + " for creating SolrContentHandler", config, nsme); + } catch (Exception e) { + throw new MorphlineCompilationException("Unexpected exception when trying to create SolrContentHandlerFactory of type " + + factoryClass.getName(), config, e); + } + } + + private Record toRecord(SolrInputDocument doc) { + Record record = new Record(); + for (Entry entry : doc.entrySet()) { + record.getFields().putAll(entry.getKey(), entry.getValue().getValues()); + } + return record; + } + + } + +} diff --git a/solr/contrib/solr-morphlines-cell/src/java/org/apache/solr/morphlines/cell/StripNonCharSolrContentHandlerFactory.java b/solr/contrib/solr-morphlines-cell/src/java/org/apache/solr/morphlines/cell/StripNonCharSolrContentHandlerFactory.java new file mode 100644 index 00000000000..81f49afd4e5 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/java/org/apache/solr/morphlines/cell/StripNonCharSolrContentHandlerFactory.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.cell; + +import java.util.Collection; + +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.handler.extraction.SolrContentHandler; +import org.apache.solr.handler.extraction.SolrContentHandlerFactory; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; +import org.apache.tika.metadata.Metadata; + +/** + * {@link SolrContentHandler} and associated factory that strips non-characters and trims on output. + * This prevents exceptions on parsing integer fields inside Solr server. + */ +public class StripNonCharSolrContentHandlerFactory extends SolrContentHandlerFactory { + + public StripNonCharSolrContentHandlerFactory(Collection dateFormats) { + super(dateFormats); + } + + @Override + public SolrContentHandler createSolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema) { + return new StripNonCharSolrContentHandler(metadata, params, schema, dateFormats); + } + + + /////////////////////////////////////////////////////////////////////////////// + // Nested classes: + /////////////////////////////////////////////////////////////////////////////// + private static final class StripNonCharSolrContentHandler extends SolrContentHandler { + + public StripNonCharSolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema, Collection dateFormats) { + super(metadata, params, schema, dateFormats); + } + + /** + * Strip all non-characters, which can cause SolrReducer problems if present. + * This is borrowed from Apache Nutch. + */ + private static String stripNonCharCodepoints(String input) { + StringBuilder stripped = new StringBuilder(input.length()); + char ch; + for (int i = 0; i < input.length(); i++) { + ch = input.charAt(i); + // Strip all non-characters http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Noncharacter_Code_Point=True:] + // and non-printable control characters except tabulator, new line and carriage return + if (ch % 0x10000 != 0xffff && // 0xffff - 0x10ffff range step 0x10000 + ch % 0x10000 != 0xfffe && // 0xfffe - 0x10fffe range + (ch <= 0xfdd0 || ch >= 0xfdef) && // 0xfdd0 - 0xfdef + (ch > 0x1F || ch == 0x9 || ch == 0xa || ch == 0xd)) { + stripped.append(ch); + } + } + return stripped.toString(); + } + + @Override + protected String transformValue(String val, SchemaField schemaField) { + String ret = super.transformValue(val, schemaField).trim(); + ret = stripNonCharCodepoints(ret); + return ret; + } + } +} diff --git a/solr/contrib/solr-morphlines-cell/src/java/org/apache/solr/morphlines/cell/TrimSolrContentHandlerFactory.java b/solr/contrib/solr-morphlines-cell/src/java/org/apache/solr/morphlines/cell/TrimSolrContentHandlerFactory.java new file mode 100644 index 00000000000..6e7df593ff8 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/java/org/apache/solr/morphlines/cell/TrimSolrContentHandlerFactory.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.cell; + +import java.util.Collection; + +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.handler.extraction.SolrContentHandler; +import org.apache.solr.handler.extraction.SolrContentHandlerFactory; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; +import org.apache.tika.metadata.Metadata; + +/** + * {@link SolrContentHandler} and associated factory that trims field values on output. + * This prevents exceptions on parsing integer fields inside Solr server. + */ +public class TrimSolrContentHandlerFactory extends SolrContentHandlerFactory { + + public TrimSolrContentHandlerFactory(Collection dateFormats) { + super(dateFormats); + } + + @Override + public SolrContentHandler createSolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema) { + return new TrimSolrContentHandler(metadata, params, schema, dateFormats); + } + + + /////////////////////////////////////////////////////////////////////////////// + // Nested classes: + /////////////////////////////////////////////////////////////////////////////// + private static final class TrimSolrContentHandler extends SolrContentHandler { + + public TrimSolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema, Collection dateFormats) { + super(metadata, params, schema, dateFormats); + } + + @Override + protected String transformValue(String val, SchemaField schemaField) { + return super.transformValue(val, schemaField).trim(); + } + } +} diff --git a/solr/contrib/solr-morphlines-cell/src/java/org/apache/solr/morphlines/cell/package.html b/solr/contrib/solr-morphlines-cell/src/java/org/apache/solr/morphlines/cell/package.html new file mode 100644 index 00000000000..9d5daec89bb --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/java/org/apache/solr/morphlines/cell/package.html @@ -0,0 +1,22 @@ + + + + +Morphlines Solr Cell related code. + + diff --git a/solr/contrib/solr-morphlines-cell/src/java/overview.html b/solr/contrib/solr-morphlines-cell/src/java/overview.html new file mode 100644 index 00000000000..3e25367d302 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/java/overview.html @@ -0,0 +1,21 @@ + + + +Apache Solr Search Server: Solr Cell Morphline Commands + + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/currency.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/currency.xml new file mode 100644 index 00000000000..3a9c58afee8 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/currency.xml @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/elevate.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/elevate.xml new file mode 100644 index 00000000000..25d5cebe4fb --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/elevate.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_ca.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_ca.txt new file mode 100644 index 00000000000..307a85f913d --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_ca.txt @@ -0,0 +1,8 @@ +# Set of Catalan contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +l +m +n +s +t diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_fr.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_fr.txt new file mode 100644 index 00000000000..722db588333 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_fr.txt @@ -0,0 +1,9 @@ +# Set of French contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +l +m +t +qu +n +s +j diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_ga.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_ga.txt new file mode 100644 index 00000000000..9ebe7fa349a --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +m +b diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_it.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_it.txt new file mode 100644 index 00000000000..cac04095372 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/contractions_it.txt @@ -0,0 +1,23 @@ +# Set of Italian contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +c +l +all +dall +dell +nell +sull +coll +pell +gl +agl +dagl +degl +negl +sugl +un +m +t +s +v +d diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/hyphenations_ga.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/hyphenations_ga.txt new file mode 100644 index 00000000000..4d2642cc5a3 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/hyphenations_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish hyphenations for StopFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +h +n +t diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stemdict_nl.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stemdict_nl.txt new file mode 100644 index 00000000000..441072971d3 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stemdict_nl.txt @@ -0,0 +1,6 @@ +# Set of overrides for the dutch stemmer +# TODO: load this as a resource from the analyzer and sync it in build.xml +fiets fiets +bromfiets bromfiets +ei eier +kind kinder diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stoptags_ja.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stoptags_ja.txt new file mode 100644 index 00000000000..71b750845e3 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stoptags_ja.txt @@ -0,0 +1,420 @@ +# +# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. +# +# Any token with a part-of-speech tag that exactly matches those defined in this +# file are removed from the token stream. +# +# Set your own stoptags by uncommenting the lines below. Note that comments are +# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, +# etc. that can be useful for building you own stoptag set. +# +# The entire possible tagset is provided below for convenience. +# +##### +# noun: unclassified nouns +#åè©ž +# +# noun-common: Common nouns or nouns where the sub-classification is undefined +#åè©ž-一般 +# +# noun-proper: Proper nouns where the sub-classification is undefined +#åè©ž-固有åè©ž +# +# noun-proper-misc: miscellaneous proper nouns +#åè©ž-固有åè©ž-一般 +# +# noun-proper-person: Personal names where the sub-classification is undefined +#åè©ž-固有åè©ž-人å +# +# noun-proper-person-misc: names that cannot be divided into surname and +# given name; foreign names; names where the surname or given name is unknown. +# e.g. ãŠå¸‚ã®æ–¹ +#åè©ž-固有åè©ž-人å-一般 +# +# noun-proper-person-surname: Mainly Japanese surnames. +# e.g. 山田 +#åè©ž-固有åè©ž-人å-姓 +# +# noun-proper-person-given_name: Mainly Japanese given names. +# e.g. 太郎 +#åè©ž-固有åè©ž-人å-å +# +# noun-proper-organization: Names representing organizations. +# e.g. 通産çœ, NHK +#åè©ž-固有åè©ž-組織 +# +# noun-proper-place: Place names where the sub-classification is undefined +#åè©ž-固有åè©ž-地域 +# +# noun-proper-place-misc: Place names excluding countries. +# e.g. アジア, ãƒãƒ«ã‚»ãƒ­ãƒŠ, 京都 +#åè©ž-固有åè©ž-地域-一般 +# +# noun-proper-place-country: Country names. +# e.g. 日本, オーストラリア +#åè©ž-固有åè©ž-地域-国 +# +# noun-pronoun: Pronouns where the sub-classification is undefined +#åè©ž-代åè©ž +# +# noun-pronoun-misc: miscellaneous pronouns: +# e.g. ãã‚Œ, ã“ã“, ã‚ã„ã¤, ã‚ãªãŸ, ã‚ã¡ã“ã¡, ã„ãã¤, ã©ã“ã‹, ãªã«, ã¿ãªã•ã‚“, ã¿ã‚“ãª, ã‚ãŸãã—, ã‚ã‚Œã‚ã‚Œ +#åè©ž-代åè©ž-一般 +# +# noun-pronoun-contraction: Spoken language contraction made by combining a +# pronoun and the particle 'wa'. +# e.g. ã‚りゃ, ã“りゃ, ã“りゃã‚, ãりゃ, ãりゃ゠+#åè©ž-代åè©ž-縮約 +# +# noun-adverbial: Temporal nouns such as names of days or months that behave +# like adverbs. Nouns that represent amount or ratios and can be used adverbially, +# e.g. 金曜, 一月, åˆå¾Œ, å°‘é‡ +#åè©ž-副詞å¯èƒ½ +# +# noun-verbal: Nouns that take arguments with case and can appear followed by +# 'suru' and related verbs (ã™ã‚‹, ã§ãã‚‹, ãªã•ã‚‹, ãã ã•ã‚‹) +# e.g. インプット, æ„›ç€, 悪化, 悪戦苦闘, 一安心, 下å–ã‚Š +#åè©ž-サ変接続 +# +# noun-adjective-base: The base form of adjectives, words that appear before 㪠("na") +# e.g. å¥åº·, 安易, 駄目, ã ã‚ +#åè©ž-形容動詞語幹 +# +# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), æ•°. +# e.g. 0, 1, 2, 何, æ•°, å¹¾ +#åè©ž-æ•° +# +# noun-affix: noun affixes where the sub-classification is undefined +#åè©ž-éžè‡ªç«‹ +# +# noun-affix-misc: Of adnominalizers, the case-marker ã® ("no"), and words that +# attach to the base form of inflectional words, words that cannot be classified +# into any of the other categories below. This category includes indefinite nouns. +# e.g. ã‚ã‹ã¤ã, æš, ã‹ã„, 甲æ–, æ°—, ãらã„, å«Œã„, ãã›, ç™–, ã“ã¨, 事, ã”ã¨, 毎, ã—ã ã„, 次第, +# é †, ã›ã„, 所為, ã¤ã„ã§, åºã§, ã¤ã‚‚ã‚Š, ç©ã‚‚ã‚Š, 点, ã©ã“ã‚, ã®, ã¯ãš, ç­ˆ, ã¯ãšã¿, å¼¾ã¿, +# æ‹å­, ãµã†, ãµã‚Š, 振り, ã»ã†, æ–¹, æ—¨, ã‚‚ã®, 物, 者, ゆãˆ, æ•…, ゆãˆã‚“, 所以, ã‚ã‘, 訳, +# ã‚ã‚Š, 割り, 割, ã‚“-å£èªž/, ã‚‚ã‚“-å£èªž/ +#åè©ž-éžè‡ªç«‹-一般 +# +# noun-affix-adverbial: noun affixes that that can behave as adverbs. +# e.g. ã‚ã„ã , é–“, ã‚ã’ã, 挙ã’å¥, ã‚ã¨, 後, 余り, 以外, 以é™, 以後, 以上, 以å‰, 一方, ã†ãˆ, +# 上, ã†ã¡, 内, ãŠã‚Š, 折り, ã‹ãŽã‚Š, é™ã‚Š, ãã‚Š, ã£ãã‚Š, çµæžœ, ã“ã‚, é ƒ, ã•ã„, éš›, 最中, ã•ãªã‹, +# 最中, ã˜ãŸã„, 自体, ãŸã³, 度, ãŸã‚, 為, ã¤ã©, 都度, ã¨ãŠã‚Š, 通り, ã¨ã, 時, ã¨ã“ã‚, 所, +# ã¨ãŸã‚“, 途端, ãªã‹, 中, ã®ã¡, 後, ã°ã‚ã„, å ´åˆ, æ—¥, ã¶ã‚“, 分, ã»ã‹, ä»–, ã¾ãˆ, å‰, ã¾ã¾, +# 儘, ä¾­, ã¿ãŽã‚Š, 矢先 +#åè©ž-éžè‡ªç«‹-副詞å¯èƒ½ +# +# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars +# with the stem よã†(ã ) ("you(da)"). +# e.g. よã†, ã‚„ã†, 様 (よã†) +#åè©ž-éžè‡ªç«‹-助動詞語幹 +# +# noun-affix-adjective-base: noun affixes that can connect to the indeclinable +# connection form 㪠(aux "da"). +# e.g. ã¿ãŸã„, ãµã† +#åè©ž-éžè‡ªç«‹-形容動詞語幹 +# +# noun-special: special nouns where the sub-classification is undefined. +#åè©ž-特殊 +# +# noun-special-aux: The ãã†ã  ("souda") stem form that is used for reporting news, is +# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base +# form of inflectional words. +# e.g. ãㆠ+#åè©ž-特殊-助動詞語幹 +# +# noun-suffix: noun suffixes where the sub-classification is undefined. +#åè©ž-接尾 +# +# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect +# to ガル or タイ and can combine into compound nouns, words that cannot be classified into +# any of the other categories below. In general, this category is more inclusive than +# 接尾語 ("suffix") and is usually the last element in a compound noun. +# e.g. ãŠã, ã‹ãŸ, æ–¹, ç”²æ– (ãŒã„), ãŒã‹ã‚Š, ãŽã¿, 気味, ãã‚‹ã¿, (~ã—ãŸ) ã•, 次第, 済 (ãš) ã¿, +# よã†, (ã§ã)ã£ã“, æ„Ÿ, 観, 性, å­¦, é¡ž, é¢, 用 +#åè©ž-接尾-一般 +# +# noun-suffix-person: Suffixes that form nouns and attach to person names more often +# than other nouns. +# e.g. å›, 様, è‘— +#åè©ž-接尾-人å +# +# noun-suffix-place: Suffixes that form nouns and attach to place names more often +# than other nouns. +# e.g. 町, 市, 県 +#åè©ž-接尾-地域 +# +# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that +# can appear before スル ("suru"). +# e.g. 化, 視, 分ã‘, 入り, è½ã¡, è²·ã„ +#åè©ž-接尾-サ変接続 +# +# noun-suffix-aux: The stem form of ãã†ã  (様態) that is used to indicate conditions, +# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the +# conjunctive form of inflectional words. +# e.g. ãㆠ+#åè©ž-接尾-助動詞語幹 +# +# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive +# form of inflectional words and appear before the copula ã  ("da"). +# e.g. çš„, ã’, ãŒã¡ +#åè©ž-接尾-形容動詞語幹 +# +# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. +# e.g. 後 (ã”), 以後, 以é™, 以å‰, å‰å¾Œ, 中, 末, 上, 時 (ã˜) +#åè©ž-接尾-副詞å¯èƒ½ +# +# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category +# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach +# to numbers. +# e.g. 個, ã¤, 本, 冊, パーセント, cm, kg, カ月, ã‹å›½, 区画, 時間, æ™‚åŠ +#åè©ž-接尾-助数詞 +# +# noun-suffix-special: Special suffixes that mainly attach to inflecting words. +# e.g. (楽ã—) ã•, (考ãˆ) æ–¹ +#åè©ž-接尾-特殊 +# +# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words +# together. +# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) å…¼ (主婦) +#åè©ž-接続詞的 +# +# noun-verbal_aux: Nouns that attach to the conjunctive particle 㦠("te") and are +# semantically verb-like. +# e.g. ã”らん, ã”覧, 御覧, 頂戴 +#åè©ž-å‹•è©žéžè‡ªç«‹çš„ +# +# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, +# dialects, English, etc. Currently, the only entry for åè©ž 引用文字列 ("noun quotation") +# is ã„ã‚ã ("iwaku"). +#åè©ž-引用文字列 +# +# noun-nai_adjective: Words that appear before the auxiliary verb ãªã„ ("nai") and +# behave like an adjective. +# e.g. 申ã—訳, 仕方, ã¨ã‚“ã§ã‚‚, é•ã„ +#åè©ž-ナイ形容詞語幹 +# +##### +# prefix: unclassified prefixes +#接頭詞 +# +# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) +# excluding numerical expressions. +# e.g. ㊠(æ°´), æŸ (æ°), åŒ (社), æ•… (~æ°), 高 (å“質), ㊠(見事), ã” (ç«‹æ´¾) +#接頭詞-å詞接続 +# +# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb +# in conjunctive form followed by ãªã‚‹/ãªã•ã‚‹/ãã ã•ã‚‹. +# e.g. ㊠(読ã¿ãªã•ã„), ㊠(座り) +#接頭詞-動詞接続 +# +# prefix-adjectival: Prefixes that attach to adjectives. +# e.g. ㊠(寒ã„ã§ã™ã­ãˆ), ãƒã‚« (ã§ã‹ã„) +#接頭詞-形容詞接続 +# +# prefix-numerical: Prefixes that attach to numerical expressions. +# e.g. ç´„, ãŠã‚ˆã, 毎時 +#接頭詞-数接続 +# +##### +# verb: unclassified verbs +#å‹•è©ž +# +# verb-main: +#å‹•è©ž-自立 +# +# verb-auxiliary: +#å‹•è©ž-éžè‡ªç«‹ +# +# verb-suffix: +#å‹•è©ž-接尾 +# +##### +# adjective: unclassified adjectives +#形容詞 +# +# adjective-main: +#形容詞-自立 +# +# adjective-auxiliary: +#形容詞-éžè‡ªç«‹ +# +# adjective-suffix: +#形容詞-接尾 +# +##### +# adverb: unclassified adverbs +#副詞 +# +# adverb-misc: Words that can be segmented into one unit and where adnominal +# modification is not possible. +# e.g. ã‚ã„ã‹ã‚らãš, 多分 +#副詞-一般 +# +# adverb-particle_conjunction: Adverbs that can be followed by ã®, ã¯, ã«, +# ãª, ã™ã‚‹, ã , etc. +# e.g. ã“ã‚“ãªã«, ãã‚“ãªã«, ã‚ã‚“ãªã«, ãªã«ã‹, ãªã‚“ã§ã‚‚ +#副詞-助詞類接続 +# +##### +# adnominal: Words that only have noun-modifying forms. +# e.g. ã“ã®, ãã®, ã‚ã®, ã©ã®, ã„ã‚ゆる, ãªã‚“らã‹ã®, 何らã‹ã®, ã„ã‚ã‚“ãª, ã“ã†ã„ã†, ãã†ã„ã†, ã‚ã‚ã„ã†, +# ã©ã†ã„ã†, ã“ã‚“ãª, ãã‚“ãª, ã‚ã‚“ãª, ã©ã‚“ãª, 大ããª, å°ã•ãª, ãŠã‹ã—ãª, ã»ã‚“ã®, ãŸã„ã—ãŸ, +# 「(, ã‚‚) ã•ã‚‹ (ã“ã¨ãªãŒã‚‰)ã€, 微々ãŸã‚‹, 堂々ãŸã‚‹, å˜ãªã‚‹, ã„ã‹ãªã‚‹, 我ãŒã€ã€ŒåŒã˜, 亡ã +#連体詞 +# +##### +# conjunction: Conjunctions that can occur independently. +# e.g. ãŒ, ã‘ã‚Œã©ã‚‚, ãã—ã¦, ã˜ã‚ƒã‚, ãã‚Œã©ã“ã‚ã‹ +接続詞 +# +##### +# particle: unclassified particles. +助詞 +# +# particle-case: case particles where the subclassification is undefined. +助詞-格助詞 +# +# particle-case-misc: Case particles. +# e.g. ã‹ã‚‰, ãŒ, ã§, ã¨, ã«, ã¸, より, ã‚’, ã®, ã«ã¦ +助詞-格助詞-一般 +# +# particle-case-quote: the "to" that appears after nouns, a person’s speech, +# quotation marks, expressions of decisions from a meeting, reasons, judgements, +# conjectures, etc. +# e.g. ( ã ) 㨠(è¿°ã¹ãŸ.), ( ã§ã‚ã‚‹) 㨠(ã—ã¦åŸ·è¡ŒçŒ¶äºˆ...) +助詞-格助詞-引用 +# +# particle-case-compound: Compounds of particles and verbs that mainly behave +# like case particles. +# e.g. ã¨ã„ã†, ã¨ã„ã£ãŸ, ã¨ã‹ã„ã†, ã¨ã—ã¦, ã¨ã¨ã‚‚ã«, ã¨å…±ã«, ã§ã‚‚ã£ã¦, ã«ã‚ãŸã£ã¦, ã«å½“ãŸã£ã¦, ã«å½“ã£ã¦, +# ã«ã‚ãŸã‚Š, ã«å½“ãŸã‚Š, ã«å½“ã‚Š, ã«å½“ãŸã‚‹, ã«ã‚ãŸã‚‹, ã«ãŠã„ã¦, ã«æ–¼ã„ã¦,ã«æ–¼ã¦, ã«ãŠã‘ã‚‹, ã«æ–¼ã‘ã‚‹, +# ã«ã‹ã‘, ã«ã‹ã‘ã¦, ã«ã‹ã‚“ã—, ã«é–¢ã—, ã«ã‹ã‚“ã—ã¦, ã«é–¢ã—ã¦, ã«ã‹ã‚“ã™ã‚‹, ã«é–¢ã™ã‚‹, ã«éš›ã—, +# ã«éš›ã—ã¦, ã«ã—ãŸãŒã„, ã«å¾“ã„, ã«å¾“ã†, ã«ã—ãŸãŒã£ã¦, ã«å¾“ã£ã¦, ã«ãŸã„ã—, ã«å¯¾ã—, ã«ãŸã„ã—ã¦, +# ã«å¯¾ã—ã¦, ã«ãŸã„ã™ã‚‹, ã«å¯¾ã™ã‚‹, ã«ã¤ã„ã¦, ã«ã¤ã, ã«ã¤ã‘, ã«ã¤ã‘ã¦, ã«ã¤ã‚Œ, ã«ã¤ã‚Œã¦, ã«ã¨ã£ã¦, +# ã«ã¨ã‚Š, ã«ã¾ã¤ã‚ã‚‹, ã«ã‚ˆã£ã¦, ã«ä¾ã£ã¦, ã«å› ã£ã¦, ã«ã‚ˆã‚Š, ã«ä¾ã‚Š, ã«å› ã‚Š, ã«ã‚ˆã‚‹, ã«ä¾ã‚‹, ã«å› ã‚‹, +# ã«ã‚ãŸã£ã¦, ã«ã‚ãŸã‚‹, ã‚’ã‚‚ã£ã¦, を以ã£ã¦, を通ã˜, を通ã˜ã¦, を通ã—ã¦, ã‚’ã‚ãã£ã¦, ã‚’ã‚ãã‚Š, ã‚’ã‚ãã‚‹, +# ã£ã¦-å£èªž/, ã¡ã‚…ã†-関西å¼ã€Œã¨ã„ã†ã€/, (何) ã¦ã„ㆠ(人)-å£èªž/, ã£ã¦ã„ã†-å£èªž/, ã¨ã„ãµ, ã¨ã‹ã„ãµ +助詞-格助詞-連語 +# +# particle-conjunctive: +# e.g. ã‹ã‚‰, ã‹ã‚‰ã«ã¯, ãŒ, ã‘ã‚Œã©, ã‘ã‚Œã©ã‚‚, ã‘ã©, ã—, ã¤ã¤, ã¦, ã§, ã¨, ã¨ã“ã‚ãŒ, ã©ã“ã‚ã‹, ã¨ã‚‚, ã©ã‚‚, +# ãªãŒã‚‰, ãªã‚Š, ã®ã§, ã®ã«, ã°, ã‚‚ã®ã®, ã‚„ ( ã—ãŸ), ã‚„ã„ãªã‚„, (ã“ã‚ã‚“) ã˜ã‚ƒ(ã„ã‘ãªã„)-å£èªž/, +# (è¡Œã£) ã¡ã‚ƒ(ã„ã‘ãªã„)-å£èªž/, (言ã£) ãŸã£ã¦ (ã—ã‹ãŸãŒãªã„)-å£èªž/, (ãã‚ŒãŒãªã)ã£ãŸã£ã¦ (平気)-å£èªž/ +助詞-接続助詞 +# +# particle-dependency: +# e.g. ã“ã, ã•ãˆ, ã—ã‹, ã™ã‚‰, ã¯, ã‚‚, ãž +助詞-係助詞 +# +# particle-adverbial: +# e.g. ãŒã¦ã‚‰, ã‹ã‚‚, ãらã„, ä½, ãらã„, ã—ã‚‚, (学校) ã˜ã‚ƒ(ã“ã‚ŒãŒæµè¡Œã£ã¦ã„ã‚‹)-å£èªž/, +# (ãã‚Œ)ã˜ã‚ƒã‚ (よããªã„)-å£èªž/, ãšã¤, (ç§) ãªãž, ãªã©, (ç§) ãªã‚Š (ã«), (先生) ãªã‚“ã‹ (大嫌ã„)-å£èªž/, +# (ç§) ãªã‚“ãž, (先生) ãªã‚“㦠(大嫌ã„)-å£èªž/, ã®ã¿, ã ã‘, (ç§) ã ã£ã¦-å£èªž/, ã ã«, +# (å½¼)ã£ãŸã‚‰-å£èªž/, (ãŠèŒ¶) ã§ã‚‚ (ã„ã‹ãŒ), ç­‰ (ã¨ã†), (今後) ã¨ã‚‚, ã°ã‹ã‚Š, ã°ã£ã‹-å£èªž/, ã°ã£ã‹ã‚Š-å£èªž/, +# ã»ã©, 程, ã¾ã§, è¿„, (誰) ã‚‚ (ãŒ)([助詞-格助詞] ãŠã‚ˆã³ [助詞-係助詞] ã®å‰ã«ä½ç½®ã™ã‚‹ã€Œã‚‚ã€) +助詞-副助詞 +# +# particle-interjective: particles with interjective grammatical roles. +# e.g. (æ¾å³¶) ã‚„ +助詞-間投助詞 +# +# particle-coordinate: +# e.g. ã¨, ãŸã‚Š, ã ã®, ã ã‚Š, ã¨ã‹, ãªã‚Š, ã‚„, やら +助詞-並立助詞 +# +# particle-final: +# e.g. ã‹ã„, ã‹ã—ら, ã•, ãœ, (ã )ã£ã‘-å£èªž/, (ã¨ã¾ã£ã¦ã‚‹) ã§-方言/, ãª, ナ, ãªã‚-å£èªž/, ãž, ã­, ãƒ, +# ã­ã‡-å£èªž/, ã­ãˆ-å£èªž/, ã­ã‚“-方言/, ã®, ã®ã†-å£èªž/, ã‚„, よ, ヨ, よã‰-å£èªž/, ã‚, ã‚ã„-å£èªž/ +助詞-終助詞 +# +# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is +# adverbial, conjunctive, or sentence final. For example: +# (a) 「A ã‹ B ã‹ã€. Ex:「(国内ã§é‹ç”¨ã™ã‚‹) ã‹,(海外ã§é‹ç”¨ã™ã‚‹) ã‹ (.)〠+# (b) Inside an adverb phrase. Ex:「(幸ã„ã¨ã„ã†) ã‹ (, 死者ã¯ã„ãªã‹ã£ãŸ.)〠+# 「(祈りãŒå±Šã„ãŸã›ã„) ã‹ (, 試験ã«åˆæ ¼ã—ãŸ.)〠+# (c) 「ã‹ã®ã‚ˆã†ã«ã€. Ex:「(何もãªã‹ã£ãŸ) ã‹ (ã®ã‚ˆã†ã«æŒ¯ã‚‹èˆžã£ãŸ.)〠+# e.g. ã‹ +助詞-副助詞ï¼ä¸¦ç«‹åŠ©è©žï¼çµ‚助詞 +# +# particle-adnominalizer: The "no" that attaches to nouns and modifies +# non-inflectional words. +助詞-連体化 +# +# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs +# that are giongo, giseigo, or gitaigo. +# e.g. ã«, 㨠+助詞-副詞化 +# +# particle-special: A particle that does not fit into one of the above classifications. +# This includes particles that are used in Tanka, Haiku, and other poetry. +# e.g. ã‹ãª, ã‘ã‚€, ( ã—ãŸã ã‚ã†) ã«, (ã‚ã‚“ãŸ) ã«ã‚ƒ(ã‚ã‹ã‚‰ã‚“), (俺) ã‚“ (家) +助詞-特殊 +# +##### +# auxiliary-verb: +助動詞 +# +##### +# interjection: Greetings and other exclamations. +# e.g. ãŠã¯ã‚ˆã†, ãŠã¯ã‚ˆã†ã”ã–ã„ã¾ã™, ã“ã‚“ã«ã¡ã¯, ã“ã‚“ã°ã‚“ã¯, ã‚ã‚ŠãŒã¨ã†, ã©ã†ã‚‚ã‚ã‚ŠãŒã¨ã†, ã‚ã‚ŠãŒã¨ã†ã”ã–ã„ã¾ã™, +# ã„ãŸã ãã¾ã™, ã”ã¡ãã†ã•ã¾, ã•ã‚ˆãªã‚‰, ã•ã‚ˆã†ãªã‚‰, ã¯ã„, ã„ã„ãˆ, ã”ã‚ã‚“, ã”ã‚ã‚“ãªã•ã„ +#æ„Ÿå‹•è©ž +# +##### +# symbol: unclassified Symbols. +è¨˜å· +# +# symbol-misc: A general symbol not in one of the categories below. +# e.g. [â—‹â—Ž@$〒→+] +記å·-一般 +# +# symbol-comma: Commas +# e.g. [,ã€] +記å·-読点 +# +# symbol-period: Periods and full stops. +# e.g. [..。] +記å·-å¥ç‚¹ +# +# symbol-space: Full-width whitespace. +記å·-空白 +# +# symbol-open_bracket: +# e.g. [({‘“『ã€] +記å·-括弧開 +# +# symbol-close_bracket: +# e.g. [)}’â€ã€ã€ã€‘] +記å·-括弧閉 +# +# symbol-alphabetic: +#記å·-アルファベット +# +##### +# other: unclassified other +#ãã®ä»– +# +# other-interjection: Words that are hard to classify as noun-suffixes or +# sentence-final particles. +# e.g. (ã )ã‚¡ +ãã®ä»–-間投 +# +##### +# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. +# e.g. ã‚ã®, ã†ã‚“ã¨, ãˆã¨ +フィラー +# +##### +# non-verbal: non-verbal sound. +éžè¨€èªžéŸ³ +# +##### +# fragment: +#語断片 +# +##### +# unknown: unknown part of speech. +#未知語 +# +##### End of file diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ar.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ar.txt new file mode 100644 index 00000000000..046829db6a2 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ar.txt @@ -0,0 +1,125 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Cleaned on October 11, 2009 (not normalized, so use before normalization) +# This means that when modifying this list, you might need to add some +# redundant entries, for example containing forms with both Ø£ and ا +من +ومن +منها +منه +ÙÙŠ +ÙˆÙÙŠ +Ùيها +Ùيه +Ùˆ +Ù +ثم +او +أو +ب +بها +به +ا +Ø£ +اى +اي +أي +أى +لا +ولا +الا +ألا +إلا +لكن +ما +وما +كما +Ùما +عن +مع +اذا +إذا +ان +أن +إن +انها +أنها +إنها +انه +أنه +إنه +بان +بأن +Ùان +Ùأن +وان +وأن +وإن +التى +التي +الذى +الذي +الذين +الى +الي +إلى +إلي +على +عليها +عليه +اما +أما +إما +ايضا +أيضا +كل +وكل +لم +ولم +لن +ولن +هى +هي +هو +وهى +وهي +وهو +Ùهى +Ùهي +Ùهو +انت +أنت +لك +لها +له +هذه +هذا +تلك +ذلك +هناك +كانت +كان +يكون +تكون +وكانت +وكان +غير +بعض +قد +نحو +بين +بينما +منذ +ضمن +حيث +الان +الآن +خلال +بعد +قبل +حتى +عند +عندما +لدى +جميع diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_bg.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_bg.txt new file mode 100644 index 00000000000..1ae4ba2ae38 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_bg.txt @@ -0,0 +1,193 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +а +аз +ако +ала +бе +без +беше +би +бил +била +били +било +близо +бъдат +бъде +бÑха +в +Ð²Ð°Ñ +ваш +ваша +вероÑтно +вече +взема +ви +вие +винаги +вÑе +вÑеки +вÑички +вÑичко +вÑÑка +във +въпреки +върху +г +ги +главно +го +д +да +дали +до +докато +докога +дори +доÑега +доÑта +е +едва +един +ето +за +зад +заедно +заради +заÑега +затова +защо +защото +и +из +или +им +има +имат +иÑка +й +каза +как +каква +какво +както +какъв +като +кога +когато +което +които +кой +който +колко +коÑто +къде +където +към +ли +м +ме +между +мен +ми +мнозина +мога +могат +може +Ð¼Ð¾Ð»Ñ +момента +му +н +на +над +назад +най +направи +напред +например +Ð½Ð°Ñ +не +него +Ð½ÐµÑ +ни +ние +никой +нито +но +нÑкои +нÑкой +нÑма +обаче +около +оÑвен +оÑобено +от +отгоре +отново +още +пак +по +повече +повечето +под +поне +поради +поÑле +почти +прави +пред +преди +през +при +пък +първо +Ñ +Ñа +Ñамо +Ñе +Ñега +Ñи +Ñкоро +Ñлед +Ñме +Ñпоред +Ñред +Ñрещу +Ñте +Ñъм +ÑÑŠÑ +Ñъщо +Ñ‚ +тази +така +такива +такъв +там +твой +те +тези +ти +тн +то +това +тогава +този +той +толкова +точно +Ñ‚Ñ€Ñбва +тук +тъй +Ñ‚Ñ +Ñ‚ÑÑ… +у +хареÑва +ч +че +чеÑто +чрез +ще +щом +Ñ diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ca.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ca.txt new file mode 100644 index 00000000000..3da65deafe1 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ca.txt @@ -0,0 +1,220 @@ +# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) +a +abans +ací +ah +així +això +al +als +aleshores +algun +alguna +algunes +alguns +alhora +allà +allí +allò +altra +altre +altres +amb +ambdós +ambdues +apa +aquell +aquella +aquelles +aquells +aquest +aquesta +aquestes +aquests +aquí +baix +cada +cadascú +cadascuna +cadascunes +cadascuns +com +contra +d'un +d'una +d'unes +d'uns +dalt +de +del +dels +des +després +dins +dintre +donat +doncs +durant +e +eh +el +els +em +en +encara +ens +entre +érem +eren +éreu +es +és +esta +està +estàvem +estaven +estàveu +esteu +et +etc +ets +fins +fora +gairebé +ha +han +has +havia +he +hem +heu +hi +ho +i +igual +iguals +ja +l'hi +la +les +li +li'n +llavors +m'he +ma +mal +malgrat +mateix +mateixa +mateixes +mateixos +me +mentre +més +meu +meus +meva +meves +molt +molta +moltes +molts +mon +mons +n'he +n'hi +ne +ni +no +nogensmenys +només +nosaltres +nostra +nostre +nostres +o +oh +oi +on +pas +pel +pels +per +però +perquè +poc +poca +pocs +poques +potser +propi +qual +quals +quan +quant +que +què +quelcom +qui +quin +quina +quines +quins +s'ha +s'han +sa +semblant +semblants +ses +seu +seus +seva +seva +seves +si +sobre +sobretot +sóc +solament +sols +son +són +sons +sota +sou +t'ha +t'han +t'he +ta +tal +també +tampoc +tan +tant +tanta +tantes +teu +teus +teva +teves +ton +tons +tot +tota +totes +tots +un +una +unes +uns +us +va +vaig +vam +van +vas +veu +vosaltres +vostra +vostre +vostres diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_cz.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_cz.txt new file mode 100644 index 00000000000..53c6097dac7 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_cz.txt @@ -0,0 +1,172 @@ +a +s +k +o +i +u +v +z +dnes +cz +tímto +budeÅ¡ +budem +byli +jseÅ¡ +můj +svým +ta +tomto +tohle +tuto +tyto +jej +zda +proÄ +máte +tato +kam +tohoto +kdo +kteří +mi +nám +tom +tomuto +mít +nic +proto +kterou +byla +toho +protože +asi +ho +naÅ¡i +napiÅ¡te +re +což +tím +takže +svých +její +svými +jste +aj +tu +tedy +teto +bylo +kde +ke +pravé +ji +nad +nejsou +Äi +pod +téma +mezi +pÅ™es +ty +pak +vám +ani +když +vÅ¡ak +neg +jsem +tento +Älánku +Älánky +aby +jsme +pÅ™ed +pta +jejich +byl +jeÅ¡tÄ› +až +bez +také +pouze +první +vaÅ¡e +která +nás +nový +tipy +pokud +může +strana +jeho +své +jiné +zprávy +nové +není +vás +jen +podle +zde +už +být +více +bude +již +než +který +by +které +co +nebo +ten +tak +má +pÅ™i +od +po +jsou +jak +další +ale +si +se +ve +to +jako +za +zpÄ›t +ze +do +pro +je +na +atd +atp +jakmile +pÅ™iÄemž +já +on +ona +ono +oni +ony +my +vy +jí +ji +mÄ› +mne +jemu +tomu +tÄ›m +tÄ›mu +nÄ›mu +nÄ›muž +jehož +jíž +jelikož +jež +jakož +naÄež diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_da.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_da.txt new file mode 100644 index 00000000000..a3ff5fe122c --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_da.txt @@ -0,0 +1,108 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Danish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + +og | and +i | in +jeg | I +det | that (dem. pronoun)/it (pers. pronoun) +at | that (in front of a sentence)/to (with infinitive) +en | a/an +den | it (pers. pronoun)/that (dem. pronoun) +til | to/at/for/until/against/by/of/into, more +er | present tense of "to be" +som | who, as +pÃ¥ | on/upon/in/on/at/to/after/of/with/for, on +de | they +med | with/by/in, along +han | he +af | of/by/from/off/for/in/with/on, off +for | at/for/to/from/by/of/ago, in front/before, because +ikke | not +der | who/which, there/those +var | past tense of "to be" +mig | me/myself +sig | oneself/himself/herself/itself/themselves +men | but +et | a/an/one, one (number), someone/somebody/one +har | present tense of "to have" +om | round/about/for/in/a, about/around/down, if +vi | we +min | my +havde | past tense of "to have" +ham | him +hun | she +nu | now +over | over/above/across/by/beyond/past/on/about, over/past +da | then, when/as/since +fra | from/off/since, off, since +du | you +ud | out +sin | his/her/its/one's +dem | them +os | us/ourselves +op | up +man | you/one +hans | his +hvor | where +eller | or +hvad | what +skal | must/shall etc. +selv | myself/youself/herself/ourselves etc., even +her | here +alle | all/everyone/everybody etc. +vil | will (verb) +blev | past tense of "to stay/to remain/to get/to become" +kunne | could +ind | in +nÃ¥r | when +være | present tense of "to be" +dog | however/yet/after all +noget | something +ville | would +jo | you know/you see (adv), yes +deres | their/theirs +efter | after/behind/according to/for/by/from, later/afterwards +ned | down +skulle | should +denne | this +end | than +dette | this +mit | my/mine +ogsÃ¥ | also +under | under/beneath/below/during, below/underneath +have | have +dig | you +anden | other +hende | her +mine | my +alt | everything +meget | much/very, plenty of +sit | his, her, its, one's +sine | his, her, its, one's +vor | our +mod | against +disse | these +hvis | if +din | your/yours +nogle | some +hos | by/at +blive | be/become +mange | many +ad | by/through +bliver | present tense of "to be/to become" +hendes | her/hers +været | be +thi | for (conj) +jer | you +sÃ¥dan | such, like this/like that diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_de.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_de.txt new file mode 100644 index 00000000000..f7703841887 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_de.txt @@ -0,0 +1,292 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A German stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | The number of forms in this list is reduced significantly by passing it + | through the German stemmer. + + +aber | but + +alle | all +allem +allen +aller +alles + +als | than, as +also | so +am | an + dem +an | at + +ander | other +andere +anderem +anderen +anderer +anderes +anderm +andern +anderr +anders + +auch | also +auf | on +aus | out of +bei | by +bin | am +bis | until +bist | art +da | there +damit | with it +dann | then + +der | the +den +des +dem +die +das + +daß | that + +derselbe | the same +derselben +denselben +desselben +demselben +dieselbe +dieselben +dasselbe + +dazu | to that + +dein | thy +deine +deinem +deinen +deiner +deines + +denn | because + +derer | of those +dessen | of him + +dich | thee +dir | to thee +du | thou + +dies | this +diese +diesem +diesen +dieser +dieses + + +doch | (several meanings) +dort | (over) there + + +durch | through + +ein | a +eine +einem +einen +einer +eines + +einig | some +einige +einigem +einigen +einiger +einiges + +einmal | once + +er | he +ihn | him +ihm | to him + +es | it +etwas | something + +euer | your +eure +eurem +euren +eurer +eures + +für | for +gegen | towards +gewesen | p.p. of sein +hab | have +habe | have +haben | have +hat | has +hatte | had +hatten | had +hier | here +hin | there +hinter | behind + +ich | I +mich | me +mir | to me + + +ihr | you, to her +ihre +ihrem +ihren +ihrer +ihres +euch | to you + +im | in + dem +in | in +indem | while +ins | in + das +ist | is + +jede | each, every +jedem +jeden +jeder +jedes + +jene | that +jenem +jenen +jener +jenes + +jetzt | now +kann | can + +kein | no +keine +keinem +keinen +keiner +keines + +können | can +könnte | could +machen | do +man | one + +manche | some, many a +manchem +manchen +mancher +manches + +mein | my +meine +meinem +meinen +meiner +meines + +mit | with +muss | must +musste | had to +nach | to(wards) +nicht | not +nichts | nothing +noch | still, yet +nun | now +nur | only +ob | whether +oder | or +ohne | without +sehr | very + +sein | his +seine +seinem +seinen +seiner +seines + +selbst | self +sich | herself + +sie | they, she +ihnen | to them + +sind | are +so | so + +solche | such +solchem +solchen +solcher +solches + +soll | shall +sollte | should +sondern | but +sonst | else +über | over +um | about, around +und | and + +uns | us +unse +unsem +unsen +unser +unses + +unter | under +viel | much +vom | von + dem +von | from +vor | before +während | while +war | was +waren | were +warst | wast +was | what +weg | away, off +weil | because +weiter | further + +welche | which +welchem +welchen +welcher +welches + +wenn | when +werde | will +werden | will +wie | how +wieder | again +will | want +wir | we +wird | will +wirst | willst +wo | where +wollen | want +wollte | wanted +würde | would +würden | would +zu | to +zum | zu + dem +zur | zu + der +zwar | indeed +zwischen | between + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_el.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_el.txt new file mode 100644 index 00000000000..232681f5bd6 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_el.txt @@ -0,0 +1,78 @@ +# Lucene Greek Stopwords list +# Note: by default this file is used after GreekLowerCaseFilter, +# so when modifying this file use 'σ' instead of 'Ï‚' +ο +η +το +οι +τα +του +τησ +των +τον +την +και +κι +κ +ειμαι +εισαι +ειναι +ειμαστε +ειστε +στο +στον +στη +στην +μα +αλλα +απο +για +Ï€Ïοσ +με +σε +ωσ +παÏα +αντι +κατα +μετα +θα +να +δε +δεν +μη +μην +επι +ενω +εαν +αν +τοτε +που +πωσ +ποιοσ +ποια +ποιο +ποιοι +ποιεσ +ποιων +ποιουσ +αυτοσ +αυτη +αυτο +αυτοι +αυτων +αυτουσ +αυτεσ +αυτα +εκεινοσ +εκεινη +εκεινο +εκεινοι +εκεινεσ +εκεινα +εκεινων +εκεινουσ +οπωσ +ομωσ +ισωσ +οσο +οτι diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_en.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_en.txt new file mode 100644 index 00000000000..2c164c0b2a1 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_en.txt @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +# Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +such +that +the +their +then +there +these +they +this +to +was +will +with diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_es.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_es.txt new file mode 100644 index 00000000000..2db14760075 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_es.txt @@ -0,0 +1,354 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Spanish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | from, of +la | the, her +que | who, that +el | the +en | in +y | and +a | to +los | the, them +del | de + el +se | himself, from him etc +las | the, them +por | for, by, etc +un | a +para | for +con | with +no | no +una | a +su | his, her +al | a + el + | es from SER +lo | him +como | how +más | more +pero | pero +sus | su plural +le | to him, her +ya | already +o | or + | fue from SER +este | this + | ha from HABER +sí | himself etc +porque | because +esta | this + | son from SER +entre | between + | está from ESTAR +cuando | when +muy | very +sin | without +sobre | on + | ser from SER + | tiene from TENER +también | also +me | me +hasta | until +hay | there is/are +donde | where + | han from HABER +quien | whom, that + | están from ESTAR + | estado from ESTAR +desde | from +todo | all +nos | us +durante | during + | estados from ESTAR +todos | all +uno | a +les | to them +ni | nor +contra | against +otros | other + | fueron from SER +ese | that +eso | that + | había from HABER +ante | before +ellos | they +e | and (variant of y) +esto | this +mí | me +antes | before +algunos | some +qué | what? +unos | a +yo | I +otro | other +otras | other +otra | other +él | he +tanto | so much, many +esa | that +estos | these +mucho | much, many +quienes | who +nada | nothing +muchos | many +cual | who + | sea from SER +poco | few +ella | she +estar | to be + | haber from HABER +estas | these + | estaba from ESTAR + | estamos from ESTAR +algunas | some +algo | something +nosotros | we + + | other forms + +mi | me +mis | mi plural +tú | thou +te | thee +ti | thee +tu | thy +tus | tu plural +ellas | they +nosotras | we +vosotros | you +vosotras | you +os | you +mío | mine +mía | +míos | +mías | +tuyo | thine +tuya | +tuyos | +tuyas | +suyo | his, hers, theirs +suya | +suyos | +suyas | +nuestro | ours +nuestra | +nuestros | +nuestras | +vuestro | yours +vuestra | +vuestros | +vuestras | +esos | those +esas | those + + | forms of estar, to be (not including the infinitive): +estoy +estás +está +estamos +estáis +están +esté +estés +estemos +estéis +estén +estaré +estarás +estará +estaremos +estaréis +estarán +estaría +estarías +estaríamos +estaríais +estarían +estaba +estabas +estábamos +estabais +estaban +estuve +estuviste +estuvo +estuvimos +estuvisteis +estuvieron +estuviera +estuvieras +estuviéramos +estuvierais +estuvieran +estuviese +estuvieses +estuviésemos +estuvieseis +estuviesen +estando +estado +estada +estados +estadas +estad + + | forms of haber, to have (not including the infinitive): +he +has +ha +hemos +habéis +han +haya +hayas +hayamos +hayáis +hayan +habré +habrás +habrá +habremos +habréis +habrán +habría +habrías +habríamos +habríais +habrían +había +habías +habíamos +habíais +habían +hube +hubiste +hubo +hubimos +hubisteis +hubieron +hubiera +hubieras +hubiéramos +hubierais +hubieran +hubiese +hubieses +hubiésemos +hubieseis +hubiesen +habiendo +habido +habida +habidos +habidas + + | forms of ser, to be (not including the infinitive): +soy +eres +es +somos +sois +son +sea +seas +seamos +seáis +sean +seré +serás +será +seremos +seréis +serán +sería +serías +seríamos +seríais +serían +era +eras +éramos +erais +eran +fui +fuiste +fue +fuimos +fuisteis +fueron +fuera +fueras +fuéramos +fuerais +fueran +fuese +fueses +fuésemos +fueseis +fuesen +siendo +sido + | sed also means 'thirst' + + | forms of tener, to have (not including the infinitive): +tengo +tienes +tiene +tenemos +tenéis +tienen +tenga +tengas +tengamos +tengáis +tengan +tendré +tendrás +tendrá +tendremos +tendréis +tendrán +tendría +tendrías +tendríamos +tendríais +tendrían +tenía +tenías +teníamos +teníais +tenían +tuve +tuviste +tuvo +tuvimos +tuvisteis +tuvieron +tuviera +tuvieras +tuviéramos +tuvierais +tuvieran +tuviese +tuvieses +tuviésemos +tuvieseis +tuviesen +teniendo +tenido +tenida +tenidos +tenidas +tened + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_eu.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_eu.txt new file mode 100644 index 00000000000..25f1db93460 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_eu.txt @@ -0,0 +1,99 @@ +# example set of basque stopwords +al +anitz +arabera +asko +baina +bat +batean +batek +bati +batzuei +batzuek +batzuetan +batzuk +bera +beraiek +berau +berauek +bere +berori +beroriek +beste +bezala +da +dago +dira +ditu +du +dute +edo +egin +ere +eta +eurak +ez +gainera +gu +gutxi +guzti +haiei +haiek +haietan +hainbeste +hala +han +handik +hango +hara +hari +hark +hartan +hau +hauei +hauek +hauetan +hemen +hemendik +hemengo +hi +hona +honek +honela +honetan +honi +hor +hori +horiei +horiek +horietan +horko +horra +horrek +horrela +horretan +horri +hortik +hura +izan +ni +noiz +nola +non +nondik +nongo +nor +nora +ze +zein +zen +zenbait +zenbat +zer +zergatik +ziren +zituen +zu +zuek +zuen +zuten diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_fa.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_fa.txt new file mode 100644 index 00000000000..723641c6da7 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_fa.txt @@ -0,0 +1,313 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Note: by default this file is used after normalization, so when adding entries +# to this file, use the arabic 'ÙŠ' instead of 'ÛŒ' +انان +نداشته +سراسر +خياه +ايشان +وي +تاكنون +بيشتري +دوم +پس +ناشي +ÙˆÚ¯Ùˆ +يا +داشتند +سپس +هنگام +هرگز +پنج +نشان +امسال +ديگر +گروهي +شدند +چطور +ده +Ùˆ +دو +نخستين +ولي +چرا +Ú†Ù‡ +وسط +Ù‡ +كدام +قابل +يك +رÙت +Ù‡Ùت +همچنين +در +هزار +بله +بلي +شايد +اما +شناسي +گرÙته +دهد +داشته +دانست +داشتن +خواهيم +ميليارد +وقتيكه +امد +خواهد +جز +اورده +شده +بلكه +خدمات +شدن +برخي +نبود +بسياري +جلوگيري +حق +كردند +نوعي +بعري +نكرده +نظير +نبايد +بوده +بودن +داد +اورد +هست +جايي +شود +دنبال +داده +بايد +سابق +هيچ +همان +انجا +كمتر +كجاست +گردد +كسي +تر +مردم +تان +دادن +بودند +سري +جدا +ندارند +مگر +يكديگر +دارد +دهند +بنابراين +هنگامي +سمت +جا +انچه +خود +دادند +زياد +دارند +اثر +بدون +بهترين +بيشتر +البته +به +براساس +بيرون +كرد +بعضي +گرÙت +توي +اي +ميليون +او +جريان +تول +بر +مانند +برابر +باشيم +مدتي +گويند +اكنون +تا +تنها +جديد +چند +بي +نشده +كردن +كردم +گويد +كرده +كنيم +نمي +نزد +روي +قصد +Ùقط +بالاي +ديگران +اين +ديروز +توسط +سوم +ايم +دانند +سوي +استÙاده +شما +كنار +داريم +ساخته +طور +امده +رÙته +نخست +بيست +نزديك +طي +كنيد +از +انها +تمامي +داشت +يكي +طريق +اش +چيست +روب +نمايد +Ú¯Ùت +چندين +چيزي +تواند +ام +ايا +با +ان +ايد +ترين +اينكه +ديگري +راه +هايي +بروز +همچنان +پاعين +كس +حدود +مختل٠+مقابل +چيز +گيرد +ندارد +ضد +همچون +سازي +شان +مورد +باره +مرسي +خويش +برخوردار +چون +خارج +شش +هنوز +تحت +ضمن +هستيم +Ú¯Ùته +Ùكر +بسيار +پيش +براي +روزهاي +انكه +نخواهد +بالا +كل +وقتي +كي +چنين +كه +گيري +نيست +است +كجا +كند +نيز +يابد +بندي +حتي +توانند +عقب +خواست +كنند +بين +تمام +همه +ما +باشند +مثل +شد +اري +باشد +اره +طبق +بعد +اگر +صورت +غير +جاي +بيش +ريزي +اند +زيرا +چگونه +بار +لطÙا +مي +درباره +من +ديده +همين +گذاري +برداري +علت +گذاشته +هم +Ùوق +نه +ها +شوند +اباد +همواره +هر +اول +خواهند +چهار +نام +امروز +مان +هاي +قبل +كنم +سعي +تازه +را +هستند +زير +جلوي +عنوان +بود diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_fi.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_fi.txt new file mode 100644 index 00000000000..addad798c4b --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_fi.txt @@ -0,0 +1,95 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| forms of BE + +olla +olen +olet +on +olemme +olette +ovat +ole | negative form + +oli +olisi +olisit +olisin +olisimme +olisitte +olisivat +olit +olin +olimme +olitte +olivat +ollut +olleet + +en | negation +et +ei +emme +ette +eivät + +|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans +minä minun minut minua minussa minusta minuun minulla minulta minulle | I +sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you +hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she +me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we +te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you +he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they + +tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this +tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that +se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it +nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these +nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those +ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they + +kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who +ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) +mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what +mitkä | (pl) + +joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which +jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) + +| conjunctions + +että | that +ja | and +jos | if +koska | because +kuin | than +mutta | but +niin | so +sekä | and +sillä | for +tai | or +vaan | but +vai | or +vaikka | although + + +| prepositions + +kanssa | with +mukaan | according to +noin | about +poikki | across +yli | over, across + +| other + +kun | when +niin | so +nyt | now +itse | self + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_fr.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_fr.txt new file mode 100644 index 00000000000..c00837ea939 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_fr.txt @@ -0,0 +1,183 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A French stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +au | a + le +aux | a + les +avec | with +ce | this +ces | these +dans | with +de | of +des | de + les +du | de + le +elle | she +en | `of them' etc +et | and +eux | them +il | he +je | I +la | the +le | the +leur | their +lui | him +ma | my (fem) +mais | but +me | me +même | same; as in moi-même (myself) etc +mes | me (pl) +moi | me +mon | my (masc) +ne | not +nos | our (pl) +notre | our +nous | we +on | one +ou | where +par | by +pas | not +pour | for +qu | que before vowel +que | that +qui | who +sa | his, her (fem) +se | oneself +ses | his (pl) +son | his, her (masc) +sur | on +ta | thy (fem) +te | thee +tes | thy (pl) +toi | thee +ton | thy (masc) +tu | thou +un | a +une | a +vos | your (pl) +votre | your +vous | you + + | single letter forms + +c | c' +d | d' +j | j' +l | l' +à | to, at +m | m' +n | n' +s | s' +t | t' +y | there + + | forms of être (not including the infinitive): +été +étée +étées +étés +étant +suis +es +est +sommes +êtes +sont +serai +seras +sera +serons +serez +seront +serais +serait +serions +seriez +seraient +étais +était +étions +étiez +étaient +fus +fut +fûmes +fûtes +furent +sois +soit +soyons +soyez +soient +fusse +fusses +fût +fussions +fussiez +fussent + + | forms of avoir (not including the infinitive): +ayant +eu +eue +eues +eus +ai +as +avons +avez +ont +aurai +auras +aura +aurons +aurez +auront +aurais +aurait +aurions +auriez +auraient +avais +avait +avions +aviez +avaient +eut +eûmes +eûtes +eurent +aie +aies +ait +ayons +ayez +aient +eusse +eusses +eût +eussions +eussiez +eussent + + | Later additions (from Jean-Christophe Deschamps) +ceci | this +celà  | that +cet | this +cette | this +ici | here +ils | they +les | the (pl) +leurs | their (pl) +quel | which +quels | which +quelle | which +quelles | which +sans | without +soi | oneself + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ga.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ga.txt new file mode 100644 index 00000000000..9ff88d747e5 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ga.txt @@ -0,0 +1,110 @@ + +a +ach +ag +agus +an +aon +ar +arna +as +b' +ba +beirt +bhúr +caoga +ceathair +ceathrar +chomh +chtó +chuig +chun +cois +céad +cúig +cúigear +d' +daichead +dar +de +deich +deichniúr +den +dhá +do +don +dtí +dá +dár +dó +faoi +faoin +faoina +faoinár +fara +fiche +gach +gan +go +gur +haon +hocht +i +iad +idir +in +ina +ins +inár +is +le +leis +lena +lenár +m' +mar +mo +mé +na +nach +naoi +naonúr +ná +ní +níor +nó +nócha +ocht +ochtar +os +roimh +sa +seacht +seachtar +seachtó +seasca +seisear +siad +sibh +sinn +sna +sé +sí +tar +thar +thú +triúr +trí +trína +trínár +tríocha +tú +um +ár +é +éis +í +ó +ón +óna +ónár diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_gl.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_gl.txt new file mode 100644 index 00000000000..d8760b12c14 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_gl.txt @@ -0,0 +1,161 @@ +# galican stopwords +a +aínda +alí +aquel +aquela +aquelas +aqueles +aquilo +aquí +ao +aos +as +así +á +ben +cando +che +co +coa +comigo +con +connosco +contigo +convosco +coas +cos +cun +cuns +cunha +cunhas +da +dalgunha +dalgunhas +dalgún +dalgúns +das +de +del +dela +delas +deles +desde +deste +do +dos +dun +duns +dunha +dunhas +e +el +ela +elas +eles +en +era +eran +esa +esas +ese +eses +esta +estar +estaba +está +están +este +estes +estiven +estou +eu +é +facer +foi +foron +fun +había +hai +iso +isto +la +las +lle +lles +lo +los +mais +me +meu +meus +min +miña +miñas +moi +na +nas +neste +nin +no +non +nos +nosa +nosas +noso +nosos +nós +nun +nunha +nuns +nunhas +o +os +ou +ó +ós +para +pero +pode +pois +pola +polas +polo +polos +por +que +se +senón +ser +seu +seus +sexa +sido +sobre +súa +súas +tamén +tan +te +ten +teñen +teño +ter +teu +teus +ti +tido +tiña +tiven +túa +túas +un +unha +unhas +uns +vos +vosa +vosas +voso +vosos +vós diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_hi.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_hi.txt new file mode 100644 index 00000000000..86286bb083b --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_hi.txt @@ -0,0 +1,235 @@ +# Also see http://www.opensource.org/licenses/bsd-license.html +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# This file was created by Jacques Savoy and is distributed under the BSD license. +# Note: by default this file also contains forms normalized by HindiNormalizer +# for spelling variation (see section below), such that it can be used whether or +# not you enable that feature. When adding additional entries to this list, +# please add the normalized form as well. +अंदर +अत +अपना +अपनी +अपने +अभी +आदि +आप +इतà¥à¤¯à¤¾à¤¦à¤¿ +इन +इनका +इनà¥à¤¹à¥€à¤‚ +इनà¥à¤¹à¥‡à¤‚ +इनà¥à¤¹à¥‹à¤‚ +इस +इसका +इसकी +इसके +इसमें +इसी +इसे +उन +उनका +उनकी +उनके +उनको +उनà¥à¤¹à¥€à¤‚ +उनà¥à¤¹à¥‡à¤‚ +उनà¥à¤¹à¥‹à¤‚ +उस +उसके +उसी +उसे +à¤à¤• +à¤à¤µà¤‚ +à¤à¤¸ +à¤à¤¸à¥‡ +और +कई +कर +करता +करते +करना +करने +करें +कहते +कहा +का +काफ़ी +कि +कितना +किनà¥à¤¹à¥‡à¤‚ +किनà¥à¤¹à¥‹à¤‚ +किया +किर +किस +किसी +किसे +की +कà¥à¤› +कà¥à¤² +के +को +कोई +कौन +कौनसा +गया +घर +जब +जहाठ+जा +जितना +जिन +जिनà¥à¤¹à¥‡à¤‚ +जिनà¥à¤¹à¥‹à¤‚ +जिस +जिसे +जीधर +जैसा +जैसे +जो +तक +तब +तरह +तिन +तिनà¥à¤¹à¥‡à¤‚ +तिनà¥à¤¹à¥‹à¤‚ +तिस +तिसे +तो +था +थी +थे +दबारा +दिया +दà¥à¤¸à¤°à¤¾ +दूसरे +दो +दà¥à¤µà¤¾à¤°à¤¾ +न +नहीं +ना +निहायत +नीचे +ने +पर +पर +पहले +पूरा +पे +फिर +बनी +बही +बहà¥à¤¤ +बाद +बाला +बिलकà¥à¤² +भी +भीतर +मगर +मानो +मे +में +यदि +यह +यहाठ+यही +या +यिह +ये +रखें +रहा +रहे +ऱà¥à¤µà¤¾à¤¸à¤¾ +लिठ+लिये +लेकिन +व +वरà¥à¤— +वह +वह +वहाठ+वहीं +वाले +वà¥à¤¹ +वे +वग़ैरह +संग +सकता +सकते +सबसे +सभी +साथ +साबà¥à¤¤ +साभ +सारा +से +सो +ही +हà¥à¤† +हà¥à¤ˆ +हà¥à¤ +है +हैं +हो +होता +होती +होते +होना +होने +# additional normalized forms of the above +अपनि +जेसे +होति +सभि +तिंहों +इंहों +दवारा +इसि +किंहें +थि +उंहों +ओर +जिंहें +वहिं +अभि +बनि +हि +उंहिं +उंहें +हें +वगेरह +à¤à¤¸à¥‡ +रवासा +कोन +निचे +काफि +उसि +पà¥à¤°à¤¾ +भितर +हे +बहि +वहां +कोइ +यहां +जिंहों +तिंहें +किसि +कइ +यहि +इंहिं +जिधर +इंहें +अदि +इतयादि +हà¥à¤‡ +कोनसा +इसकि +दà¥à¤¸à¤°à¥‡ +जहां +अप +किंहों +उनकि +भि +वरग +हà¥à¤… +जेसा +नहिं diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_hu.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_hu.txt new file mode 100644 index 00000000000..1a96f1db6f2 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_hu.txt @@ -0,0 +1,209 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| Hungarian stop word list +| prepared by Anna Tordai + +a +ahogy +ahol +aki +akik +akkor +alatt +által +általában +amely +amelyek +amelyekben +amelyeket +amelyet +amelynek +ami +amit +amolyan +amíg +amikor +át +abban +ahhoz +annak +arra +arról +az +azok +azon +azt +azzal +azért +aztán +azután +azonban +bár +be +belül +benne +cikk +cikkek +cikkeket +csak +de +e +eddig +egész +egy +egyes +egyetlen +egyéb +egyik +egyre +ekkor +el +elég +ellen +elÅ‘ +elÅ‘ször +elÅ‘tt +elsÅ‘ +én +éppen +ebben +ehhez +emilyen +ennek +erre +ez +ezt +ezek +ezen +ezzel +ezért +és +fel +felé +hanem +hiszen +hogy +hogyan +igen +így +illetve +ill. +ill +ilyen +ilyenkor +ison +ismét +itt +jó +jól +jobban +kell +kellett +keresztül +keressünk +ki +kívül +között +közül +legalább +lehet +lehetett +legyen +lenne +lenni +lesz +lett +maga +magát +majd +majd +már +más +másik +meg +még +mellett +mert +mely +melyek +mi +mit +míg +miért +milyen +mikor +minden +mindent +mindenki +mindig +mint +mintha +mivel +most +nagy +nagyobb +nagyon +ne +néha +nekem +neki +nem +néhány +nélkül +nincs +olyan +ott +össze +Å‘ +Å‘k +Å‘ket +pedig +persze +rá +s +saját +sem +semmi +sok +sokat +sokkal +számára +szemben +szerint +szinte +talán +tehát +teljes +tovább +továbbá +több +úgy +ugyanis +új +újabb +újra +után +utána +utolsó +vagy +vagyis +valaki +valami +valamint +való +vagyok +van +vannak +volt +voltam +voltak +voltunk +vissza +vele +viszont +volna diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_hy.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_hy.txt new file mode 100644 index 00000000000..60c1c50fbc8 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_hy.txt @@ -0,0 +1,46 @@ +# example set of Armenian stopwords. +Õ¡ÕµÕ¤ +Õ¡ÕµÕ¬ +Õ¡ÕµÕ¶ +Õ¡ÕµÕ½ +Õ¤Õ¸Ö‚ +Õ¤Õ¸Ö‚Ö„ +Õ¥Õ´ +Õ¥Õ¶ +Õ¥Õ¶Ö„ +Õ¥Õ½ +Õ¥Ö„ +Õ§ +Õ§Õ« +Õ§Õ«Õ¶ +Õ§Õ«Õ¶Ö„ +Õ§Õ«Ö€ +Õ§Õ«Ö„ +Õ§Ö€ +Õ¨Õ½Õ¿ +Õ© +Õ« +Õ«Õ¶ +Õ«Õ½Õ¯ +Õ«Ö€ +Õ¯Õ¡Õ´ +Õ°Õ¡Õ´Õ¡Ö€ +Õ°Õ¥Õ¿ +Õ°Õ¥Õ¿Õ¸ +Õ´Õ¥Õ¶Ö„ +Õ´Õ¥Õ» +Õ´Õ« +Õ¶ +Õ¶Õ¡ +Õ¶Õ¡Ö‡ +Õ¶Ö€Õ¡ +Õ¶Ö€Õ¡Õ¶Ö„ +Õ¸Ö€ +Õ¸Ö€Õ¨ +Õ¸Ö€Õ¸Õ¶Ö„ +Õ¸Ö€ÕºÕ¥Õ½ +Õ¸Ö‚ +Õ¸Ö‚Õ´ +ÕºÕ«Õ¿Õ« +Õ¾Ö€Õ¡ +Ö‡ diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_id.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_id.txt new file mode 100644 index 00000000000..4617f83a5c5 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_id.txt @@ -0,0 +1,359 @@ +# from appendix D of: A Study of Stemming Effects on Information +# Retrieval in Bahasa Indonesia +ada +adanya +adalah +adapun +agak +agaknya +agar +akan +akankah +akhirnya +aku +akulah +amat +amatlah +anda +andalah +antar +diantaranya +antara +antaranya +diantara +apa +apaan +mengapa +apabila +apakah +apalagi +apatah +atau +ataukah +ataupun +bagai +bagaikan +sebagai +sebagainya +bagaimana +bagaimanapun +sebagaimana +bagaimanakah +bagi +bahkan +bahwa +bahwasanya +sebaliknya +banyak +sebanyak +beberapa +seberapa +begini +beginian +beginikah +beginilah +sebegini +begitu +begitukah +begitulah +begitupun +sebegitu +belum +belumlah +sebelum +sebelumnya +sebenarnya +berapa +berapakah +berapalah +berapapun +betulkah +sebetulnya +biasa +biasanya +bila +bilakah +bisa +bisakah +sebisanya +boleh +bolehkah +bolehlah +buat +bukan +bukankah +bukanlah +bukannya +cuma +percuma +dahulu +dalam +dan +dapat +dari +daripada +dekat +demi +demikian +demikianlah +sedemikian +dengan +depan +di +dia +dialah +dini +diri +dirinya +terdiri +dong +dulu +enggak +enggaknya +entah +entahlah +terhadap +terhadapnya +hal +hampir +hanya +hanyalah +harus +haruslah +harusnya +seharusnya +hendak +hendaklah +hendaknya +hingga +sehingga +ia +ialah +ibarat +ingin +inginkah +inginkan +ini +inikah +inilah +itu +itukah +itulah +jangan +jangankan +janganlah +jika +jikalau +juga +justru +kala +kalau +kalaulah +kalaupun +kalian +kami +kamilah +kamu +kamulah +kan +kapan +kapankah +kapanpun +dikarenakan +karena +karenanya +ke +kecil +kemudian +kenapa +kepada +kepadanya +ketika +seketika +khususnya +kini +kinilah +kiranya +sekiranya +kita +kitalah +kok +lagi +lagian +selagi +lah +lain +lainnya +melainkan +selaku +lalu +melalui +terlalu +lama +lamanya +selama +selama +selamanya +lebih +terlebih +bermacam +macam +semacam +maka +makanya +makin +malah +malahan +mampu +mampukah +mana +manakala +manalagi +masih +masihkah +semasih +masing +mau +maupun +semaunya +memang +mereka +merekalah +meski +meskipun +semula +mungkin +mungkinkah +nah +namun +nanti +nantinya +nyaris +oleh +olehnya +seorang +seseorang +pada +padanya +padahal +paling +sepanjang +pantas +sepantasnya +sepantasnyalah +para +pasti +pastilah +per +pernah +pula +pun +merupakan +rupanya +serupa +saat +saatnya +sesaat +saja +sajalah +saling +bersama +sama +sesama +sambil +sampai +sana +sangat +sangatlah +saya +sayalah +se +sebab +sebabnya +sebuah +tersebut +tersebutlah +sedang +sedangkan +sedikit +sedikitnya +segala +segalanya +segera +sesegera +sejak +sejenak +sekali +sekalian +sekalipun +sesekali +sekaligus +sekarang +sekarang +sekitar +sekitarnya +sela +selain +selalu +seluruh +seluruhnya +semakin +sementara +sempat +semua +semuanya +sendiri +sendirinya +seolah +seperti +sepertinya +sering +seringnya +serta +siapa +siapakah +siapapun +disini +disinilah +sini +sinilah +sesuatu +sesuatunya +suatu +sesudah +sesudahnya +sudah +sudahkah +sudahlah +supaya +tadi +tadinya +tak +tanpa +setelah +telah +tentang +tentu +tentulah +tentunya +tertentu +seterusnya +tapi +tetapi +setiap +tiap +setidaknya +tidak +tidakkah +tidaklah +toh +waduh +wah +wahai +sewaktu +walau +walaupun +wong +yaitu +yakni +yang diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_it.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_it.txt new file mode 100644 index 00000000000..4cb5b0891b1 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_it.txt @@ -0,0 +1,301 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | An Italian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +ad | a (to) before vowel +al | a + il +allo | a + lo +ai | a + i +agli | a + gli +all | a + l' +agl | a + gl' +alla | a + la +alle | a + le +con | with +col | con + il +coi | con + i (forms collo, cogli etc are now very rare) +da | from +dal | da + il +dallo | da + lo +dai | da + i +dagli | da + gli +dall | da + l' +dagl | da + gll' +dalla | da + la +dalle | da + le +di | of +del | di + il +dello | di + lo +dei | di + i +degli | di + gli +dell | di + l' +degl | di + gl' +della | di + la +delle | di + le +in | in +nel | in + el +nello | in + lo +nei | in + i +negli | in + gli +nell | in + l' +negl | in + gl' +nella | in + la +nelle | in + le +su | on +sul | su + il +sullo | su + lo +sui | su + i +sugli | su + gli +sull | su + l' +sugl | su + gl' +sulla | su + la +sulle | su + le +per | through, by +tra | among +contro | against +io | I +tu | thou +lui | he +lei | she +noi | we +voi | you +loro | they +mio | my +mia | +miei | +mie | +tuo | +tua | +tuoi | thy +tue | +suo | +sua | +suoi | his, her +sue | +nostro | our +nostra | +nostri | +nostre | +vostro | your +vostra | +vostri | +vostre | +mi | me +ti | thee +ci | us, there +vi | you, there +lo | him, the +la | her, the +li | them +le | them, the +gli | to him, the +ne | from there etc +il | the +un | a +uno | a +una | a +ma | but +ed | and +se | if +perché | why, because +anche | also +come | how +dov | where (as dov') +dove | where +che | who, that +chi | who +cui | whom +non | not +più | more +quale | who, that +quanto | how much +quanti | +quanta | +quante | +quello | that +quelli | +quella | +quelle | +questo | this +questi | +questa | +queste | +si | yes +tutto | all +tutti | all + + | single letter forms: + +a | at +c | as c' for ce or ci +e | and +i | the +l | as l' +o | or + + | forms of avere, to have (not including the infinitive): + +ho +hai +ha +abbiamo +avete +hanno +abbia +abbiate +abbiano +avrò +avrai +avrà +avremo +avrete +avranno +avrei +avresti +avrebbe +avremmo +avreste +avrebbero +avevo +avevi +aveva +avevamo +avevate +avevano +ebbi +avesti +ebbe +avemmo +aveste +ebbero +avessi +avesse +avessimo +avessero +avendo +avuto +avuta +avuti +avute + + | forms of essere, to be (not including the infinitive): +sono +sei +è +siamo +siete +sia +siate +siano +sarò +sarai +sarà +saremo +sarete +saranno +sarei +saresti +sarebbe +saremmo +sareste +sarebbero +ero +eri +era +eravamo +eravate +erano +fui +fosti +fu +fummo +foste +furono +fossi +fosse +fossimo +fossero +essendo + + | forms of fare, to do (not including the infinitive, fa, fat-): +faccio +fai +facciamo +fanno +faccia +facciate +facciano +farò +farai +farà +faremo +farete +faranno +farei +faresti +farebbe +faremmo +fareste +farebbero +facevo +facevi +faceva +facevamo +facevate +facevano +feci +facesti +fece +facemmo +faceste +fecero +facessi +facesse +facessimo +facessero +facendo + + | forms of stare, to be (not including the infinitive): +sto +stai +sta +stiamo +stanno +stia +stiate +stiano +starò +starai +starà +staremo +starete +staranno +starei +staresti +starebbe +staremmo +stareste +starebbero +stavo +stavi +stava +stavamo +stavate +stavano +stetti +stesti +stette +stemmo +steste +stettero +stessi +stesse +stessimo +stessero +stando diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ja.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ja.txt new file mode 100644 index 00000000000..d4321be6b16 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ja.txt @@ -0,0 +1,127 @@ +# +# This file defines a stopword set for Japanese. +# +# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. +# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 +# for frequency lists, etc. that can be useful for making your own set (if desired) +# +# Note that there is an overlap between these stopwords and the terms stopped when used +# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note +# that comments are not allowed on the same line as stopwords. +# +# Also note that stopping is done in a case-insensitive manner. Change your StopFilter +# configuration if you need case-sensitive stopping. Lastly, note that stopping is done +# using the same character width as the entries in this file. Since this StopFilter is +# normally done after a CJKWidthFilter in your chain, you would usually want your romaji +# entries to be in half-width and your kana entries to be in full-width. +# +ã® +ã« +㯠+ã‚’ +㟠+㌠+㧠+㦠+㨠+ã— +ã‚Œ +ã• +ã‚ã‚‹ +ã„ã‚‹ +ã‚‚ +ã™ã‚‹ +ã‹ã‚‰ +㪠+ã“㨠+ã¨ã—㦠+ã„ +ã‚„ +れる +ãªã© +ãªã£ +ãªã„ +ã“ã® +ãŸã‚ +ãã® +ã‚㣠+よㆠ+ã¾ãŸ +ã‚‚ã® +ã¨ã„ㆠ+ã‚ã‚Š +ã¾ã§ +られ +ãªã‚‹ +㸠+ã‹ +ã  +ã“ã‚Œ +ã«ã‚ˆã£ã¦ +ã«ã‚ˆã‚Š +ãŠã‚Š +より +ã«ã‚ˆã‚‹ +ãš +ãªã‚Š +られる +ã«ãŠã„㦠+ã° +ãªã‹ã£ +ãªã +ã—ã‹ã— +ã«ã¤ã„㦠+ã› +ã ã£ +ãã®å¾Œ +ã§ãã‚‹ +ãã‚Œ +ㆠ+ã®ã§ +ãªãŠ +ã®ã¿ +ã§ã +ã +㤠+ã«ãŠã‘ã‚‹ +ãŠã‚ˆã³ +ã„ㆠ+ã•ã‚‰ã« +ã§ã‚‚ +ら +ãŸã‚Š +ãã®ä»– +ã«é–¢ã™ã‚‹ +ãŸã¡ +ã¾ã™ +ã‚“ +ãªã‚‰ +ã«å¯¾ã—㦠+特㫠+ã›ã‚‹ +åŠã³ +ã“れら +ã¨ã +ã§ã¯ +ã«ã¦ +ã»ã‹ +ãªãŒã‚‰ +ã†ã¡ +ãã—㦠+ã¨ã¨ã‚‚ã« +ãŸã ã— +ã‹ã¤ã¦ +ãã‚Œãžã‚Œ +ã¾ãŸã¯ +㊠+ã»ã© +ã‚‚ã®ã® +ã«å¯¾ã™ã‚‹ +ã»ã¨ã‚“ã© +ã¨å…±ã« +ã¨ã„ã£ãŸ +ã§ã™ +ã¨ã‚‚ +ã¨ã“ã‚ +ã“ã“ +##### End of file diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_lv.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_lv.txt new file mode 100644 index 00000000000..e21a23c06c3 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_lv.txt @@ -0,0 +1,172 @@ +# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins +# the original list of over 800 forms was refined: +# pronouns, adverbs, interjections were removed +# +# prepositions +aiz +ap +ar +apakÅ¡ +Ärpus +augÅ¡pus +bez +caur +dēļ +gar +iekÅ¡ +iz +kopÅ¡ +labad +lejpus +lÄ«dz +no +otrpus +pa +par +pÄr +pÄ“c +pie +pirms +pret +priekÅ¡ +starp +Å¡aipus +uz +viņpus +virs +virspus +zem +apakÅ¡pus +# Conjunctions +un +bet +jo +ja +ka +lai +tomÄ“r +tikko +turpretÄ« +arÄ« +kaut +gan +tÄdēļ +tÄ +ne +tikvien +vien +kÄ +ir +te +vai +kamÄ“r +# Particles +ar +diezin +droÅ¡i +diemžēl +nebÅ«t +ik +it +taÄu +nu +pat +tiklab +iekÅ¡pus +nedz +tik +nevis +turpretim +jeb +iekam +iekÄm +iekÄms +kolÄ«dz +lÄ«dzko +tiklÄ«dz +jebÅ¡u +tÄlab +tÄpÄ“c +nekÄ +itin +jÄ +jau +jel +nÄ“ +nezin +tad +tikai +vis +tak +iekams +vien +# modal verbs +bÅ«t +biju +biji +bija +bijÄm +bijÄt +esmu +esi +esam +esat +bÅ«Å¡u +bÅ«si +bÅ«s +bÅ«sim +bÅ«siet +tikt +tiku +tiki +tika +tikÄm +tikÄt +tieku +tiec +tiek +tiekam +tiekat +tikÅ¡u +tiks +tiksim +tiksiet +tapt +tapi +tapÄt +topat +tapÅ¡u +tapsi +taps +tapsim +tapsiet +kļūt +kļuvu +kļuvi +kļuva +kļuvÄm +kļuvÄt +kļūstu +kļūsti +kļūst +kļūstam +kļūstat +kļūšu +kļūsi +kļūs +kļūsim +kļūsiet +# verbs +varÄ“t +varÄ“ju +varÄ“jÄm +varÄ“Å¡u +varÄ“sim +var +varÄ“ji +varÄ“jÄt +varÄ“si +varÄ“siet +varat +varÄ“ja +varÄ“s diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_nl.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_nl.txt new file mode 100644 index 00000000000..f4d61f5092c --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_nl.txt @@ -0,0 +1,117 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Dutch stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large sample of Dutch text. + + | Dutch stop words frequently exhibit homonym clashes. These are indicated + | clearly below. + +de | the +en | and +van | of, from +ik | I, the ego +te | (1) chez, at etc, (2) to, (3) too +dat | that, which +die | that, those, who, which +in | in, inside +een | a, an, one +hij | he +het | the, it +niet | not, nothing, naught +zijn | (1) to be, being, (2) his, one's, its +is | is +was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river +op | on, upon, at, in, up, used up +aan | on, upon, to (as dative) +met | with, by +als | like, such as, when +voor | (1) before, in front of, (2) furrow +had | had, past tense all persons sing. of 'hebben' (have) +er | there +maar | but, only +om | round, about, for etc +hem | him +dan | then +zou | should/would, past tense all persons sing. of 'zullen' +of | or, whether, if +wat | what, something, anything +mijn | possessive and noun 'mine' +men | people, 'one' +dit | this +zo | so, thus, in this way +door | through by +over | over, across +ze | she, her, they, them +zich | oneself +bij | (1) a bee, (2) by, near, at +ook | also, too +tot | till, until +je | you +mij | me +uit | out of, from +der | Old Dutch form of 'van der' still found in surnames +daar | (1) there, (2) because +haar | (1) her, their, them, (2) hair +naar | (1) unpleasant, unwell etc, (2) towards, (3) as +heb | present first person sing. of 'to have' +hoe | how, why +heeft | present third person sing. of 'to have' +hebben | 'to have' and various parts thereof +deze | this +u | you +want | (1) for, (2) mitten, (3) rigging +nog | yet, still +zal | 'shall', first and third person sing. of verb 'zullen' (will) +me | me +zij | she, they +nu | now +ge | 'thou', still used in Belgium and south Netherlands +geen | none +omdat | because +iets | something, somewhat +worden | to become, grow, get +toch | yet, still +al | all, every, each +waren | (1) 'were' (2) to wander, (3) wares, (3) +veel | much, many +meer | (1) more, (2) lake +doen | to do, to make +toen | then, when +moet | noun 'spot/mote' and present form of 'to must' +ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' +zonder | without +kan | noun 'can' and present form of 'to be able' +hun | their, them +dus | so, consequently +alles | all, everything, anything +onder | under, beneath +ja | yes, of course +eens | once, one day +hier | here +wie | who +werd | imperfect third person sing. of 'become' +altijd | always +doch | yet, but etc +wordt | present third person sing. of 'become' +wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans +kunnen | to be able +ons | us/our +zelf | self +tegen | against, towards, at +na | after, near +reeds | already +wil | (1) present tense of 'want', (2) 'will', noun, (3) fender +kon | could; past tense of 'to be able' +niets | nothing +uw | your +iemand | somebody +geweest | been; past participle of 'be' +andere | other diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_no.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_no.txt new file mode 100644 index 00000000000..e76f36e69ed --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_no.txt @@ -0,0 +1,192 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Norwegian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This stop word list is for the dominant bokmÃ¥l dialect. Words unique + | to nynorsk are marked *. + + | Revised by Jan Bruusgaard , Jan 2005 + +og | and +i | in +jeg | I +det | it/this/that +at | to (w. inf.) +en | a/an +et | a/an +den | it/this/that +til | to +er | is/am/are +som | who/that +pÃ¥ | on +de | they / you(formal) +med | with +han | he +av | of +ikke | not +ikkje | not * +der | there +sÃ¥ | so +var | was/were +meg | me +seg | you +men | but +ett | one +har | have +om | about +vi | we +min | my +mitt | my +ha | have +hadde | had +hun | she +nÃ¥ | now +over | over +da | when/as +ved | by/know +fra | from +du | you +ut | out +sin | your +dem | them +oss | us +opp | up +man | you/one +kan | can +hans | his +hvor | where +eller | or +hva | what +skal | shall/must +selv | self (reflective) +sjøl | self (reflective) +her | here +alle | all +vil | will +bli | become +ble | became +blei | became * +blitt | have become +kunne | could +inn | in +nÃ¥r | when +være | be +kom | come +noen | some +noe | some +ville | would +dere | you +som | who/which/that +deres | their/theirs +kun | only/just +ja | yes +etter | after +ned | down +skulle | should +denne | this +for | for/because +deg | you +si | hers/his +sine | hers/his +sitt | hers/his +mot | against +Ã¥ | to +meget | much +hvorfor | why +dette | this +disse | these/those +uten | without +hvordan | how +ingen | none +din | your +ditt | your +blir | become +samme | same +hvilken | which +hvilke | which (plural) +sÃ¥nn | such a +inni | inside/within +mellom | between +vÃ¥r | our +hver | each +hvem | who +vors | us/ours +hvis | whose +bÃ¥de | both +bare | only/just +enn | than +fordi | as/because +før | before +mange | many +ogsÃ¥ | also +slik | just +vært | been +være | to be +bÃ¥e | both * +begge | both +siden | since +dykk | your * +dykkar | yours * +dei | they * +deira | them * +deires | theirs * +deim | them * +di | your (fem.) * +dÃ¥ | as/when * +eg | I * +ein | a/an * +eit | a/an * +eitt | a/an * +elles | or * +honom | he * +hjÃ¥ | at * +ho | she * +hoe | she * +henne | her +hennar | her/hers +hennes | hers +hoss | how * +hossen | how * +ikkje | not * +ingi | noone * +inkje | noone * +korleis | how * +korso | how * +kva | what/which * +kvar | where * +kvarhelst | where * +kven | who/whom * +kvi | why * +kvifor | why * +me | we * +medan | while * +mi | my * +mine | my * +mykje | much * +no | now * +nokon | some (masc./neut.) * +noka | some (fem.) * +nokor | some * +noko | some * +nokre | some * +si | his/hers * +sia | since * +sidan | since * +so | so * +somt | some * +somme | some * +um | about* +upp | up * +vere | be * +vore | was * +verte | become * +vort | become * +varte | became * +vart | became * + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_pt.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_pt.txt new file mode 100644 index 00000000000..276c1b446f2 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_pt.txt @@ -0,0 +1,251 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Portuguese stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | of, from +a | the; to, at; her +o | the; him +que | who, that +e | and +do | de + o +da | de + a +em | in +um | a +para | for + | é from SER +com | with +não | not, no +uma | a +os | the; them +no | em + o +se | himself etc +na | em + a +por | for +mais | more +as | the; them +dos | de + os +como | as, like +mas | but + | foi from SER +ao | a + o +ele | he +das | de + as + | tem from TER +à | a + a +seu | his +sua | her +ou | or + | ser from SER +quando | when +muito | much + | há from HAV +nos | em + os; us +já | already, now + | está from EST +eu | I +também | also +só | only, just +pelo | per + o +pela | per + a +até | up to +isso | that +ela | he +entre | between + | era from SER +depois | after +sem | without +mesmo | same +aos | a + os + | ter from TER +seus | his +quem | whom +nas | em + as +me | me +esse | that +eles | they + | estão from EST +você | you + | tinha from TER + | foram from SER +essa | that +num | em + um +nem | nor +suas | her +meu | my +às | a + as +minha | my + | têm from TER +numa | em + uma +pelos | per + os +elas | they + | havia from HAV + | seja from SER +qual | which + | será from SER +nós | we + | tenho from TER +lhe | to him, her +deles | of them +essas | those +esses | those +pelas | per + as +este | this + | fosse from SER +dele | of him + + | other words. There are many contractions such as naquele = em+aquele, + | mo = me+o, but they are rare. + | Indefinite article plural forms are also rare. + +tu | thou +te | thee +vocês | you (plural) +vos | you +lhes | to them +meus | my +minhas +teu | thy +tua +teus +tuas +nosso | our +nossa +nossos +nossas + +dela | of her +delas | of them + +esta | this +estes | these +estas | these +aquele | that +aquela | that +aqueles | those +aquelas | those +isto | this +aquilo | that + + | forms of estar, to be (not including the infinitive): +estou +está +estamos +estão +estive +esteve +estivemos +estiveram +estava +estávamos +estavam +estivera +estivéramos +esteja +estejamos +estejam +estivesse +estivéssemos +estivessem +estiver +estivermos +estiverem + + | forms of haver, to have (not including the infinitive): +hei +há +havemos +hão +houve +houvemos +houveram +houvera +houvéramos +haja +hajamos +hajam +houvesse +houvéssemos +houvessem +houver +houvermos +houverem +houverei +houverá +houveremos +houverão +houveria +houveríamos +houveriam + + | forms of ser, to be (not including the infinitive): +sou +somos +são +era +éramos +eram +fui +foi +fomos +foram +fora +fôramos +seja +sejamos +sejam +fosse +fôssemos +fossem +for +formos +forem +serei +será +seremos +serão +seria +seríamos +seriam + + | forms of ter, to have (not including the infinitive): +tenho +tem +temos +tém +tinha +tínhamos +tinham +tive +teve +tivemos +tiveram +tivera +tivéramos +tenha +tenhamos +tenham +tivesse +tivéssemos +tivessem +tiver +tivermos +tiverem +terei +terá +teremos +terão +teria +teríamos +teriam diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ro.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ro.txt new file mode 100644 index 00000000000..4fdee90a5ba --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ro.txt @@ -0,0 +1,233 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +acea +aceasta +această +aceea +acei +aceia +acel +acela +acele +acelea +acest +acesta +aceste +acestea +aceÅŸti +aceÅŸtia +acolo +acum +ai +aia +aibă +aici +al +ăla +ale +alea +ălea +altceva +altcineva +am +ar +are +aÅŸ +aÅŸadar +asemenea +asta +ăsta +astăzi +astea +ăstea +ăştia +asupra +aÅ£i +au +avea +avem +aveÅ£i +azi +bine +bucur +bună +ca +că +căci +când +care +cărei +căror +cărui +cât +câte +câţi +către +câtva +ce +cel +ceva +chiar +cînd +cine +cineva +cît +cîte +cîţi +cîtva +contra +cu +cum +cumva +curând +curînd +da +dă +dacă +dar +datorită +de +deci +deja +deoarece +departe +deÅŸi +din +dinaintea +dintr +dintre +drept +după +ea +ei +el +ele +eram +este +eÅŸti +eu +face +fără +fi +fie +fiecare +fii +fim +fiÅ£i +iar +ieri +îi +îl +îmi +împotriva +în +înainte +înaintea +încât +încît +încotro +între +întrucât +întrucît +îţi +la +lângă +le +li +lîngă +lor +lui +mă +mâine +mea +mei +mele +mereu +meu +mi +mine +mult +multă +mulÅ£i +ne +nicăieri +nici +nimeni +niÅŸte +noastră +noastre +noi +noÅŸtri +nostru +nu +ori +oricând +oricare +oricât +orice +oricînd +oricine +oricît +oricum +oriunde +până +pe +pentru +peste +pînă +poate +pot +prea +prima +primul +prin +printr +sa +să +săi +sale +sau +său +se +ÅŸi +sînt +sîntem +sînteÅ£i +spre +sub +sunt +suntem +sunteÅ£i +ta +tăi +tale +tău +te +Å£i +Å£ie +tine +toată +toate +tot +toÅ£i +totuÅŸi +tu +un +una +unde +undeva +unei +unele +uneori +unor +vă +vi +voastră +voastre +voi +voÅŸtri +vostru +vouă +vreo +vreun diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ru.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ru.txt new file mode 100644 index 00000000000..64307693457 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_ru.txt @@ -0,0 +1,241 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | a russian stop word list. comments begin with vertical bar. each stop + | word is at the start of a line. + + | this is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | letter `Ñ‘' is translated to `е'. + +и | and +в | in/into +во | alternative form +не | not +что | what/that +он | he +на | on/onto +Ñ | i +Ñ | from +Ñо | alternative form +как | how +а | milder form of `no' (but) +то | conjunction and form of `that' +вÑе | all +она | she +так | so, thus +его | him +но | but +да | yes/and +Ñ‚Ñ‹ | thou +к | towards, by +у | around, chez +же | intensifier particle +вы | you +за | beyond, behind +бы | conditional/subj. particle +по | up to, along +только | only +ее | her +мне | to me +было | it was +вот | here is/are, particle +от | away from +Ð¼ÐµÐ½Ñ | me +еще | still, yet, more +нет | no, there isnt/arent +о | about +из | out of +ему | to him +теперь | now +когда | when +даже | even +ну | so, well +вдруг | suddenly +ли | interrogative particle +еÑли | if +уже | already, but homonym of `narrower' +или | or +ни | neither +быть | to be +был | he was +него | prepositional form of его +до | up to +Ð²Ð°Ñ | you accusative +нибудь | indef. suffix preceded by hyphen +опÑÑ‚ÑŒ | again +уж | already, but homonym of `adder' +вам | to you +Ñказал | he said +ведь | particle `after all' +там | there +потом | then +ÑÐµÐ±Ñ | oneself +ничего | nothing +ей | to her +может | usually with `быть' as `maybe' +они | they +тут | here +где | where +еÑÑ‚ÑŒ | there is/are +надо | got to, must +ней | prepositional form of ей +Ð´Ð»Ñ | for +мы | we +Ñ‚ÐµÐ±Ñ | thee +их | them, their +чем | than +была | she was +Ñам | self +чтоб | in order to +без | without +будто | as if +человек | man, person, one +чего | genitive form of `what' +раз | once +тоже | also +Ñебе | to oneself +под | beneath +жизнь | life +будет | will be +ж | short form of intensifer particle `же' +тогда | then +кто | who +Ñтот | this +говорил | was saying +того | genitive form of `that' +потому | for that reason +Ñтого | genitive form of `this' +какой | which +ÑовÑем | altogether +ним | prepositional form of `его', `они' +здеÑÑŒ | here +Ñтом | prepositional form of `Ñтот' +один | one +почти | almost +мой | my +тем | instrumental/dative plural of `тот', `то' +чтобы | full form of `in order that' +нее | her (acc.) +кажетÑÑ | it seems +ÑÐµÐ¹Ñ‡Ð°Ñ | now +были | they were +куда | where to +зачем | why +Ñказать | to say +вÑех | all (acc., gen. preposn. plural) +никогда | never +ÑÐµÐ³Ð¾Ð´Ð½Ñ | today +можно | possible, one can +при | by +наконец | finally +два | two +об | alternative form of `о', about +другой | another +хоть | even +поÑле | after +над | above +больше | more +тот | that one (masc.) +через | across, in +Ñти | these +Ð½Ð°Ñ | us +про | about +вÑего | in all, only, of all +них | prepositional form of `они' (they) +ÐºÐ°ÐºÐ°Ñ | which, feminine +много | lots +разве | interrogative particle +Ñказала | she said +три | three +Ñту | this, acc. fem. sing. +Ð¼Ð¾Ñ | my, feminine +впрочем | moreover, besides +хорошо | good +Ñвою | ones own, acc. fem. sing. +Ñтой | oblique form of `Ñта', fem. `this' +перед | in front of +иногда | sometimes +лучше | better +чуть | a little +том | preposn. form of `that one' +Ð½ÐµÐ»ÑŒÐ·Ñ | one must not +такой | such a one +им | to them +более | more +вÑегда | always +конечно | of course +вÑÑŽ | acc. fem. sing of `all' +между | between + + + | b: some paradigms + | + | personal pronouns + | + | Ñ Ð¼ÐµÐ½Ñ Ð¼Ð½Ðµ мной [мною] + | Ñ‚Ñ‹ Ñ‚ÐµÐ±Ñ Ñ‚ÐµÐ±Ðµ тобой [тобою] + | он его ему им [него, нему, ним] + | она ее Ñи ею [нее, нÑи, нею] + | оно его ему им [него, нему, ним] + | + | мы Ð½Ð°Ñ Ð½Ð°Ð¼ нами + | вы Ð²Ð°Ñ Ð²Ð°Ð¼ вами + | они их им ими [них, ним, ними] + | + | ÑÐµÐ±Ñ Ñебе Ñобой [Ñобою] + | + | demonstrative pronouns: Ñтот (this), тот (that) + | + | Ñтот Ñта Ñто Ñти + | Ñтого ÑÑ‚Ñ‹ Ñто Ñти + | Ñтого Ñтой Ñтого Ñтих + | Ñтому Ñтой Ñтому Ñтим + | Ñтим Ñтой Ñтим [Ñтою] Ñтими + | Ñтом Ñтой Ñтом Ñтих + | + | тот та то те + | того ту то те + | того той того тех + | тому той тому тем + | тем той тем [тою] теми + | том той том тех + | + | determinative pronouns + | + | (a) веÑÑŒ (all) + | + | веÑÑŒ вÑÑ Ð²Ñе вÑе + | вÑего вÑÑŽ вÑе вÑе + | вÑего вÑей вÑего вÑех + | вÑему вÑей вÑему вÑем + | вÑем вÑей вÑем [вÑею] вÑеми + | вÑем вÑей вÑем вÑех + | + | (b) Ñам (himself etc) + | + | Ñам Ñама Ñамо Ñами + | Ñамого Ñаму Ñамо Ñамих + | Ñамого Ñамой Ñамого Ñамих + | Ñамому Ñамой Ñамому Ñамим + | Ñамим Ñамой Ñамим [Ñамою] Ñамими + | Ñамом Ñамой Ñамом Ñамих + | + | stems of verbs `to be', `to have', `to do' and modal + | + | быть бы буд быв еÑÑ‚ÑŒ Ñуть + | име + | дел + | мог мож мочь + | уме + | хоч хот + | долж + | можн + | нужн + | Ð½ÐµÐ»ÑŒÐ·Ñ + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_sv.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_sv.txt new file mode 100644 index 00000000000..22bddfd8cb3 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_sv.txt @@ -0,0 +1,131 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Swedish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | Swedish stop words occasionally exhibit homonym clashes. For example + | sÃ¥ = so, but also seed. These are indicated clearly below. + +och | and +det | it, this/that +att | to (with infinitive) +i | in, at +en | a +jag | I +hon | she +som | who, that +han | he +pÃ¥ | on +den | it, this/that +med | with +var | where, each +sig | him(self) etc +för | for +sÃ¥ | so (also: seed) +till | to +är | is +men | but +ett | a +om | if; around, about +hade | had +de | they, these/those +av | of +icke | not, no +mig | me +du | you +henne | her +dÃ¥ | then, when +sin | his +nu | now +har | have +inte | inte nÃ¥gon = no one +hans | his +honom | him +skulle | 'sake' +hennes | her +där | there +min | my +man | one (pronoun) +ej | nor +vid | at, by, on (also: vast) +kunde | could +nÃ¥got | some etc +frÃ¥n | from, off +ut | out +när | when +efter | after, behind +upp | up +vi | we +dem | them +vara | be +vad | what +över | over +än | than +dig | you +kan | can +sina | his +här | here +ha | have +mot | towards +alla | all +under | under (also: wonder) +nÃ¥gon | some etc +eller | or (else) +allt | all +mycket | much +sedan | since +ju | why +denna | this/that +själv | myself, yourself etc +detta | this/that +Ã¥t | to +utan | without +varit | was +hur | how +ingen | no +mitt | my +ni | you +bli | to be, become +blev | from bli +oss | us +din | thy +dessa | these/those +nÃ¥gra | some etc +deras | their +blir | from bli +mina | my +samma | (the) same +vilken | who, that +er | you, your +sÃ¥dan | such a +vÃ¥r | our +blivit | from bli +dess | its +inom | within +mellan | between +sÃ¥dant | such a +varför | why +varje | each +vilka | who, that +ditt | thy +vem | who +vilket | who, that +sitta | his +sÃ¥dana | such a +vart | each +dina | thy +vars | whose +vÃ¥rt | our +vÃ¥ra | our +ert | your +era | your +vilkas | whose + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_th.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_th.txt new file mode 100644 index 00000000000..07f0fabe692 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_th.txt @@ -0,0 +1,119 @@ +# Thai stopwords from: +# "Opinion Detection in Thai Political News Columns +# Based on Subjectivity Analysis" +# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak +ไว้ +ไม่ +ไป +ได้ +ให้ +ใน +โดย +à¹à¸«à¹ˆà¸‡ +à¹à¸¥à¹‰à¸§ +à¹à¸¥à¸° +à¹à¸£à¸ +à¹à¸šà¸š +à¹à¸•à¹ˆ +เอง +เห็น +เลย +เริ่ม +เรา +เมื่อ +เพื่อ +เพราะ +เป็นà¸à¸²à¸£ +เป็น +เปิดเผย +เปิด +เนื่องจาภ+เดียวà¸à¸±à¸™ +เดียว +เช่น +เฉพาะ +เคย +เข้า +เขา +อีภ+อาจ +อะไร +ออภ+อย่าง +อยู่ +อยาภ+หาภ+หลาย +หลังจาภ+หลัง +หรือ +หนึ่ง +ส่วน +ส่ง +สุด +สà¹à¸²à¸«à¸£à¸±à¸š +ว่า +วัน +ลง +ร่วม +ราย +รับ +ระหว่าง +รวม +ยัง +มี +มาภ+มา +พร้อม +พบ +ผ่าน +ผล +บาง +น่า +นี้ +นà¹à¸² +นั้น +นัภ+นอà¸à¸ˆà¸²à¸ +ทุภ+ที่สุด +ที่ +ทà¹à¸²à¹ƒà¸«à¹‰ +ทà¹à¸² +ทาง +ทั้งนี้ +ทั้ง +ถ้า +ถูภ+ถึง +ต้อง +ต่างๆ +ต่าง +ต่อ +ตาม +ตั้งà¹à¸•à¹ˆ +ตั้ง +ด้าน +ด้วย +ดัง +ซึ่ง +ช่วง +จึง +จาภ+จัด +จะ +คือ +ความ +ครั้ง +คง +ขึ้น +ของ +ขอ +ขณะ +à¸à¹ˆà¸­à¸™ +à¸à¹‡ +à¸à¸²à¸£ +à¸à¸±à¸š +à¸à¸±à¸™ +à¸à¸§à¹ˆà¸² +à¸à¸¥à¹ˆà¸²à¸§ diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_tr.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_tr.txt new file mode 100644 index 00000000000..84d9408d4ea --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/stopwords_tr.txt @@ -0,0 +1,212 @@ +# Turkish stopwords from LUCENE-559 +# merged with the list from "Information Retrieval on Turkish Texts" +# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) +acaba +altmış +altı +ama +ancak +arada +aslında +ayrıca +bana +bazı +belki +ben +benden +beni +benim +beri +beÅŸ +bile +bin +bir +birçok +biri +birkaç +birkez +birÅŸey +birÅŸeyi +biz +bize +bizden +bizi +bizim +böyle +böylece +bu +buna +bunda +bundan +bunlar +bunları +bunların +bunu +bunun +burada +çok +çünkü +da +daha +dahi +de +defa +deÄŸil +diÄŸer +diye +doksan +dokuz +dolayı +dolayısıyla +dört +edecek +eden +ederek +edilecek +ediliyor +edilmesi +ediyor +eÄŸer +elli +en +etmesi +etti +ettiÄŸi +ettiÄŸini +gibi +göre +halen +hangi +hatta +hem +henüz +hep +hepsi +her +herhangi +herkesin +hiç +hiçbir +için +iki +ile +ilgili +ise +iÅŸte +itibaren +itibariyle +kadar +karşın +katrilyon +kendi +kendilerine +kendini +kendisi +kendisine +kendisini +kez +ki +kim +kimden +kime +kimi +kimse +kırk +milyar +milyon +mu +mü +mı +nasıl +ne +neden +nedenle +nerde +nerede +nereye +niye +niçin +o +olan +olarak +oldu +olduÄŸu +olduÄŸunu +olduklarını +olmadı +olmadığı +olmak +olması +olmayan +olmaz +olsa +olsun +olup +olur +olursa +oluyor +on +ona +ondan +onlar +onlardan +onları +onların +onu +onun +otuz +oysa +öyle +pek +raÄŸmen +sadece +sanki +sekiz +seksen +sen +senden +seni +senin +siz +sizden +sizi +sizin +ÅŸey +ÅŸeyden +ÅŸeyi +ÅŸeyler +şöyle +ÅŸu +ÅŸuna +ÅŸunda +ÅŸundan +ÅŸunları +ÅŸunu +tarafından +trilyon +tüm +üç +üzere +var +vardı +ve +veya +ya +yani +yapacak +yapılan +yapılması +yapıyor +yapmak +yaptı +yaptığı +yaptığını +yaptıkları +yedi +yerine +yetmiÅŸ +yine +yirmi +yoksa +yüz +zaten diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/userdict_ja.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/userdict_ja.txt new file mode 100644 index 00000000000..6f0368e4d81 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/lang/userdict_ja.txt @@ -0,0 +1,29 @@ +# +# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) +# +# Add entries to this file in order to override the statistical model in terms +# of segmentation, readings and part-of-speech tags. Notice that entries do +# not have weights since they are always used when found. This is by-design +# in order to maximize ease-of-use. +# +# Entries are defined using the following CSV format: +# , ... , ... , +# +# Notice that a single half-width space separates tokens and readings, and +# that the number tokens and readings must match exactly. +# +# Also notice that multiple entries with the same is undefined. +# +# Whitespace only lines are ignored. Comments are not allowed on entry lines. +# + +# Custom segmentation for kanji compounds +日本経済新èž,日本 経済 æ–°èž,ニホン ケイザイ シンブン,カスタムåè©ž +関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタムåè©ž + +# Custom segmentation for compound katakana +トートãƒãƒƒã‚°,トート ãƒãƒƒã‚°,トート ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž +ショルダーãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž + +# Custom reading for former sumo wrestler +æœé’é¾,æœé’é¾,アサショウリュウ,カスタム人å diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/protwords.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/protwords.txt new file mode 100644 index 00000000000..1dfc0abecbf --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/schema.xml new file mode 100644 index 00000000000..ae2c56d18ae --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/schema.xml @@ -0,0 +1,947 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + iddiff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/solrconfig.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/solrconfig.xml new file mode 100644 index 00000000000..9d9178746cf --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/solrconfig.xml @@ -0,0 +1,1764 @@ + + + + + + + + + LUCENE_43 + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.data.dir:} + + + + + + + + + + + + + + + + ${solr.maxIndexingThreads:8} + + + + + + 128 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.ulog.dir:} + + + + + ${solr.autoCommit.maxTime:60000} + false + + + + + + ${solr.autoSoftCommit.maxTime:1000} + + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + static firstSearcher warming in solrconfig.xml + + + + + + false + + + 4 + + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + text + + + + + + + + + + + + + + explicit + json + true + text + + + + + + + + true + json + true + + + + + + + + explicit + + + velocity + browse + layout + Solritas + + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text + 100% + *:* + 10 + *,score + + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename + 3 + + + on + cat + manu_exact + content_type + author_s + ipod + GB + 1 + cat,inStock + after + price + 0 + 600 + 50 + popularity + 0 + 10 + 3 + manufacturedate_dt + NOW/YEAR-10YEARS + NOW + +1YEAR + before + after + + + on + content features title name + html + <b> + </b> + 0 + title + 0 + name + 3 + 200 + content + 750 + + + on + false + 5 + 2 + 5 + true + true + 5 + 3 + + + + + spellcheck + + + + + + + + + + + + + + application/json + + + + + application/csv + + + + + + + + + + + + + + + + + + + + + solrpingquery + + + all + + + + + + + + + explicit + true + + + + + + + + + + + + + + + + textSpell + + + + + + default + name + solr.DirectSolrSpellChecker + + internal + + 0.5 + + 2 + + 1 + + 5 + + 4 + + 0.01 + + + + + + wordbreak + solr.WordBreakSolrSpellChecker + name + true + true + 10 + + + + + + + + + + + + + + + + text + + default + wordbreak + on + true + 10 + 5 + 5 + true + true + 10 + 5 + + + spellcheck + + + + + + + + + + text + true + + + tvComponent + + + + + + + + + default + + + org.carrot2.clustering.lingo.LingoClusteringAlgorithm + + + 20 + + + clustering/carrot2 + + + ENGLISH + + + stc + org.carrot2.clustering.stc.STCClusteringAlgorithm + + + + + + + true + default + true + + name + id + + features + + true + + + + false + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + *:* + 10 + *,score + + + clustering + + + + + + + + + + true + + + terms + + + + + + + + string + elevate.xml + + + + + + explicit + text + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + + + + + + + + + + + text/plain; charset=UTF-8 + + + + + + + + + 5 + + + + + + + + + + + + + + + + + + *:* + + + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/stopwords.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/stopwords.txt new file mode 100644 index 00000000000..ae1e83eeb3d --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/stopwords.txt @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/synonyms.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/synonyms.txt new file mode 100644 index 00000000000..7f72128303b --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/collection1/conf/synonyms.txt @@ -0,0 +1,29 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaafoo => aaabar +bbbfoo => bbbfoo bbbbar +cccfoo => cccbar cccbaz +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/currency.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/currency.xml new file mode 100644 index 00000000000..3a9c58afee8 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/currency.xml @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/elevate.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/elevate.xml new file mode 100644 index 00000000000..25d5cebe4fb --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/elevate.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_ca.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_ca.txt new file mode 100644 index 00000000000..307a85f913d --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_ca.txt @@ -0,0 +1,8 @@ +# Set of Catalan contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +l +m +n +s +t diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_fr.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_fr.txt new file mode 100644 index 00000000000..722db588333 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_fr.txt @@ -0,0 +1,9 @@ +# Set of French contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +l +m +t +qu +n +s +j diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_ga.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_ga.txt new file mode 100644 index 00000000000..9ebe7fa349a --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +m +b diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_it.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_it.txt new file mode 100644 index 00000000000..cac04095372 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/contractions_it.txt @@ -0,0 +1,23 @@ +# Set of Italian contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +c +l +all +dall +dell +nell +sull +coll +pell +gl +agl +dagl +degl +negl +sugl +un +m +t +s +v +d diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/hyphenations_ga.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/hyphenations_ga.txt new file mode 100644 index 00000000000..4d2642cc5a3 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/hyphenations_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish hyphenations for StopFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +h +n +t diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stemdict_nl.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stemdict_nl.txt new file mode 100644 index 00000000000..441072971d3 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stemdict_nl.txt @@ -0,0 +1,6 @@ +# Set of overrides for the dutch stemmer +# TODO: load this as a resource from the analyzer and sync it in build.xml +fiets fiets +bromfiets bromfiets +ei eier +kind kinder diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stoptags_ja.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stoptags_ja.txt new file mode 100644 index 00000000000..71b750845e3 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stoptags_ja.txt @@ -0,0 +1,420 @@ +# +# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. +# +# Any token with a part-of-speech tag that exactly matches those defined in this +# file are removed from the token stream. +# +# Set your own stoptags by uncommenting the lines below. Note that comments are +# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, +# etc. that can be useful for building you own stoptag set. +# +# The entire possible tagset is provided below for convenience. +# +##### +# noun: unclassified nouns +#åè©ž +# +# noun-common: Common nouns or nouns where the sub-classification is undefined +#åè©ž-一般 +# +# noun-proper: Proper nouns where the sub-classification is undefined +#åè©ž-固有åè©ž +# +# noun-proper-misc: miscellaneous proper nouns +#åè©ž-固有åè©ž-一般 +# +# noun-proper-person: Personal names where the sub-classification is undefined +#åè©ž-固有åè©ž-人å +# +# noun-proper-person-misc: names that cannot be divided into surname and +# given name; foreign names; names where the surname or given name is unknown. +# e.g. ãŠå¸‚ã®æ–¹ +#åè©ž-固有åè©ž-人å-一般 +# +# noun-proper-person-surname: Mainly Japanese surnames. +# e.g. 山田 +#åè©ž-固有åè©ž-人å-姓 +# +# noun-proper-person-given_name: Mainly Japanese given names. +# e.g. 太郎 +#åè©ž-固有åè©ž-人å-å +# +# noun-proper-organization: Names representing organizations. +# e.g. 通産çœ, NHK +#åè©ž-固有åè©ž-組織 +# +# noun-proper-place: Place names where the sub-classification is undefined +#åè©ž-固有åè©ž-地域 +# +# noun-proper-place-misc: Place names excluding countries. +# e.g. アジア, ãƒãƒ«ã‚»ãƒ­ãƒŠ, 京都 +#åè©ž-固有åè©ž-地域-一般 +# +# noun-proper-place-country: Country names. +# e.g. 日本, オーストラリア +#åè©ž-固有åè©ž-地域-国 +# +# noun-pronoun: Pronouns where the sub-classification is undefined +#åè©ž-代åè©ž +# +# noun-pronoun-misc: miscellaneous pronouns: +# e.g. ãã‚Œ, ã“ã“, ã‚ã„ã¤, ã‚ãªãŸ, ã‚ã¡ã“ã¡, ã„ãã¤, ã©ã“ã‹, ãªã«, ã¿ãªã•ã‚“, ã¿ã‚“ãª, ã‚ãŸãã—, ã‚ã‚Œã‚ã‚Œ +#åè©ž-代åè©ž-一般 +# +# noun-pronoun-contraction: Spoken language contraction made by combining a +# pronoun and the particle 'wa'. +# e.g. ã‚りゃ, ã“りゃ, ã“りゃã‚, ãりゃ, ãりゃ゠+#åè©ž-代åè©ž-縮約 +# +# noun-adverbial: Temporal nouns such as names of days or months that behave +# like adverbs. Nouns that represent amount or ratios and can be used adverbially, +# e.g. 金曜, 一月, åˆå¾Œ, å°‘é‡ +#åè©ž-副詞å¯èƒ½ +# +# noun-verbal: Nouns that take arguments with case and can appear followed by +# 'suru' and related verbs (ã™ã‚‹, ã§ãã‚‹, ãªã•ã‚‹, ãã ã•ã‚‹) +# e.g. インプット, æ„›ç€, 悪化, 悪戦苦闘, 一安心, 下å–ã‚Š +#åè©ž-サ変接続 +# +# noun-adjective-base: The base form of adjectives, words that appear before 㪠("na") +# e.g. å¥åº·, 安易, 駄目, ã ã‚ +#åè©ž-形容動詞語幹 +# +# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), æ•°. +# e.g. 0, 1, 2, 何, æ•°, å¹¾ +#åè©ž-æ•° +# +# noun-affix: noun affixes where the sub-classification is undefined +#åè©ž-éžè‡ªç«‹ +# +# noun-affix-misc: Of adnominalizers, the case-marker ã® ("no"), and words that +# attach to the base form of inflectional words, words that cannot be classified +# into any of the other categories below. This category includes indefinite nouns. +# e.g. ã‚ã‹ã¤ã, æš, ã‹ã„, 甲æ–, æ°—, ãらã„, å«Œã„, ãã›, ç™–, ã“ã¨, 事, ã”ã¨, 毎, ã—ã ã„, 次第, +# é †, ã›ã„, 所為, ã¤ã„ã§, åºã§, ã¤ã‚‚ã‚Š, ç©ã‚‚ã‚Š, 点, ã©ã“ã‚, ã®, ã¯ãš, ç­ˆ, ã¯ãšã¿, å¼¾ã¿, +# æ‹å­, ãµã†, ãµã‚Š, 振り, ã»ã†, æ–¹, æ—¨, ã‚‚ã®, 物, 者, ゆãˆ, æ•…, ゆãˆã‚“, 所以, ã‚ã‘, 訳, +# ã‚ã‚Š, 割り, 割, ã‚“-å£èªž/, ã‚‚ã‚“-å£èªž/ +#åè©ž-éžè‡ªç«‹-一般 +# +# noun-affix-adverbial: noun affixes that that can behave as adverbs. +# e.g. ã‚ã„ã , é–“, ã‚ã’ã, 挙ã’å¥, ã‚ã¨, 後, 余り, 以外, 以é™, 以後, 以上, 以å‰, 一方, ã†ãˆ, +# 上, ã†ã¡, 内, ãŠã‚Š, 折り, ã‹ãŽã‚Š, é™ã‚Š, ãã‚Š, ã£ãã‚Š, çµæžœ, ã“ã‚, é ƒ, ã•ã„, éš›, 最中, ã•ãªã‹, +# 最中, ã˜ãŸã„, 自体, ãŸã³, 度, ãŸã‚, 為, ã¤ã©, 都度, ã¨ãŠã‚Š, 通り, ã¨ã, 時, ã¨ã“ã‚, 所, +# ã¨ãŸã‚“, 途端, ãªã‹, 中, ã®ã¡, 後, ã°ã‚ã„, å ´åˆ, æ—¥, ã¶ã‚“, 分, ã»ã‹, ä»–, ã¾ãˆ, å‰, ã¾ã¾, +# 儘, ä¾­, ã¿ãŽã‚Š, 矢先 +#åè©ž-éžè‡ªç«‹-副詞å¯èƒ½ +# +# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars +# with the stem よã†(ã ) ("you(da)"). +# e.g. よã†, ã‚„ã†, 様 (よã†) +#åè©ž-éžè‡ªç«‹-助動詞語幹 +# +# noun-affix-adjective-base: noun affixes that can connect to the indeclinable +# connection form 㪠(aux "da"). +# e.g. ã¿ãŸã„, ãµã† +#åè©ž-éžè‡ªç«‹-形容動詞語幹 +# +# noun-special: special nouns where the sub-classification is undefined. +#åè©ž-特殊 +# +# noun-special-aux: The ãã†ã  ("souda") stem form that is used for reporting news, is +# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base +# form of inflectional words. +# e.g. ãㆠ+#åè©ž-特殊-助動詞語幹 +# +# noun-suffix: noun suffixes where the sub-classification is undefined. +#åè©ž-接尾 +# +# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect +# to ガル or タイ and can combine into compound nouns, words that cannot be classified into +# any of the other categories below. In general, this category is more inclusive than +# 接尾語 ("suffix") and is usually the last element in a compound noun. +# e.g. ãŠã, ã‹ãŸ, æ–¹, ç”²æ– (ãŒã„), ãŒã‹ã‚Š, ãŽã¿, 気味, ãã‚‹ã¿, (~ã—ãŸ) ã•, 次第, 済 (ãš) ã¿, +# よã†, (ã§ã)ã£ã“, æ„Ÿ, 観, 性, å­¦, é¡ž, é¢, 用 +#åè©ž-接尾-一般 +# +# noun-suffix-person: Suffixes that form nouns and attach to person names more often +# than other nouns. +# e.g. å›, 様, è‘— +#åè©ž-接尾-人å +# +# noun-suffix-place: Suffixes that form nouns and attach to place names more often +# than other nouns. +# e.g. 町, 市, 県 +#åè©ž-接尾-地域 +# +# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that +# can appear before スル ("suru"). +# e.g. 化, 視, 分ã‘, 入り, è½ã¡, è²·ã„ +#åè©ž-接尾-サ変接続 +# +# noun-suffix-aux: The stem form of ãã†ã  (様態) that is used to indicate conditions, +# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the +# conjunctive form of inflectional words. +# e.g. ãㆠ+#åè©ž-接尾-助動詞語幹 +# +# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive +# form of inflectional words and appear before the copula ã  ("da"). +# e.g. çš„, ã’, ãŒã¡ +#åè©ž-接尾-形容動詞語幹 +# +# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. +# e.g. 後 (ã”), 以後, 以é™, 以å‰, å‰å¾Œ, 中, 末, 上, 時 (ã˜) +#åè©ž-接尾-副詞å¯èƒ½ +# +# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category +# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach +# to numbers. +# e.g. 個, ã¤, 本, 冊, パーセント, cm, kg, カ月, ã‹å›½, 区画, 時間, æ™‚åŠ +#åè©ž-接尾-助数詞 +# +# noun-suffix-special: Special suffixes that mainly attach to inflecting words. +# e.g. (楽ã—) ã•, (考ãˆ) æ–¹ +#åè©ž-接尾-特殊 +# +# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words +# together. +# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) å…¼ (主婦) +#åè©ž-接続詞的 +# +# noun-verbal_aux: Nouns that attach to the conjunctive particle 㦠("te") and are +# semantically verb-like. +# e.g. ã”らん, ã”覧, 御覧, 頂戴 +#åè©ž-å‹•è©žéžè‡ªç«‹çš„ +# +# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, +# dialects, English, etc. Currently, the only entry for åè©ž 引用文字列 ("noun quotation") +# is ã„ã‚ã ("iwaku"). +#åè©ž-引用文字列 +# +# noun-nai_adjective: Words that appear before the auxiliary verb ãªã„ ("nai") and +# behave like an adjective. +# e.g. 申ã—訳, 仕方, ã¨ã‚“ã§ã‚‚, é•ã„ +#åè©ž-ナイ形容詞語幹 +# +##### +# prefix: unclassified prefixes +#接頭詞 +# +# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) +# excluding numerical expressions. +# e.g. ㊠(æ°´), æŸ (æ°), åŒ (社), æ•… (~æ°), 高 (å“質), ㊠(見事), ã” (ç«‹æ´¾) +#接頭詞-å詞接続 +# +# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb +# in conjunctive form followed by ãªã‚‹/ãªã•ã‚‹/ãã ã•ã‚‹. +# e.g. ㊠(読ã¿ãªã•ã„), ㊠(座り) +#接頭詞-動詞接続 +# +# prefix-adjectival: Prefixes that attach to adjectives. +# e.g. ㊠(寒ã„ã§ã™ã­ãˆ), ãƒã‚« (ã§ã‹ã„) +#接頭詞-形容詞接続 +# +# prefix-numerical: Prefixes that attach to numerical expressions. +# e.g. ç´„, ãŠã‚ˆã, 毎時 +#接頭詞-数接続 +# +##### +# verb: unclassified verbs +#å‹•è©ž +# +# verb-main: +#å‹•è©ž-自立 +# +# verb-auxiliary: +#å‹•è©ž-éžè‡ªç«‹ +# +# verb-suffix: +#å‹•è©ž-接尾 +# +##### +# adjective: unclassified adjectives +#形容詞 +# +# adjective-main: +#形容詞-自立 +# +# adjective-auxiliary: +#形容詞-éžè‡ªç«‹ +# +# adjective-suffix: +#形容詞-接尾 +# +##### +# adverb: unclassified adverbs +#副詞 +# +# adverb-misc: Words that can be segmented into one unit and where adnominal +# modification is not possible. +# e.g. ã‚ã„ã‹ã‚らãš, 多分 +#副詞-一般 +# +# adverb-particle_conjunction: Adverbs that can be followed by ã®, ã¯, ã«, +# ãª, ã™ã‚‹, ã , etc. +# e.g. ã“ã‚“ãªã«, ãã‚“ãªã«, ã‚ã‚“ãªã«, ãªã«ã‹, ãªã‚“ã§ã‚‚ +#副詞-助詞類接続 +# +##### +# adnominal: Words that only have noun-modifying forms. +# e.g. ã“ã®, ãã®, ã‚ã®, ã©ã®, ã„ã‚ゆる, ãªã‚“らã‹ã®, 何らã‹ã®, ã„ã‚ã‚“ãª, ã“ã†ã„ã†, ãã†ã„ã†, ã‚ã‚ã„ã†, +# ã©ã†ã„ã†, ã“ã‚“ãª, ãã‚“ãª, ã‚ã‚“ãª, ã©ã‚“ãª, 大ããª, å°ã•ãª, ãŠã‹ã—ãª, ã»ã‚“ã®, ãŸã„ã—ãŸ, +# 「(, ã‚‚) ã•ã‚‹ (ã“ã¨ãªãŒã‚‰)ã€, 微々ãŸã‚‹, 堂々ãŸã‚‹, å˜ãªã‚‹, ã„ã‹ãªã‚‹, 我ãŒã€ã€ŒåŒã˜, 亡ã +#連体詞 +# +##### +# conjunction: Conjunctions that can occur independently. +# e.g. ãŒ, ã‘ã‚Œã©ã‚‚, ãã—ã¦, ã˜ã‚ƒã‚, ãã‚Œã©ã“ã‚ã‹ +接続詞 +# +##### +# particle: unclassified particles. +助詞 +# +# particle-case: case particles where the subclassification is undefined. +助詞-格助詞 +# +# particle-case-misc: Case particles. +# e.g. ã‹ã‚‰, ãŒ, ã§, ã¨, ã«, ã¸, より, ã‚’, ã®, ã«ã¦ +助詞-格助詞-一般 +# +# particle-case-quote: the "to" that appears after nouns, a person’s speech, +# quotation marks, expressions of decisions from a meeting, reasons, judgements, +# conjectures, etc. +# e.g. ( ã ) 㨠(è¿°ã¹ãŸ.), ( ã§ã‚ã‚‹) 㨠(ã—ã¦åŸ·è¡ŒçŒ¶äºˆ...) +助詞-格助詞-引用 +# +# particle-case-compound: Compounds of particles and verbs that mainly behave +# like case particles. +# e.g. ã¨ã„ã†, ã¨ã„ã£ãŸ, ã¨ã‹ã„ã†, ã¨ã—ã¦, ã¨ã¨ã‚‚ã«, ã¨å…±ã«, ã§ã‚‚ã£ã¦, ã«ã‚ãŸã£ã¦, ã«å½“ãŸã£ã¦, ã«å½“ã£ã¦, +# ã«ã‚ãŸã‚Š, ã«å½“ãŸã‚Š, ã«å½“ã‚Š, ã«å½“ãŸã‚‹, ã«ã‚ãŸã‚‹, ã«ãŠã„ã¦, ã«æ–¼ã„ã¦,ã«æ–¼ã¦, ã«ãŠã‘ã‚‹, ã«æ–¼ã‘ã‚‹, +# ã«ã‹ã‘, ã«ã‹ã‘ã¦, ã«ã‹ã‚“ã—, ã«é–¢ã—, ã«ã‹ã‚“ã—ã¦, ã«é–¢ã—ã¦, ã«ã‹ã‚“ã™ã‚‹, ã«é–¢ã™ã‚‹, ã«éš›ã—, +# ã«éš›ã—ã¦, ã«ã—ãŸãŒã„, ã«å¾“ã„, ã«å¾“ã†, ã«ã—ãŸãŒã£ã¦, ã«å¾“ã£ã¦, ã«ãŸã„ã—, ã«å¯¾ã—, ã«ãŸã„ã—ã¦, +# ã«å¯¾ã—ã¦, ã«ãŸã„ã™ã‚‹, ã«å¯¾ã™ã‚‹, ã«ã¤ã„ã¦, ã«ã¤ã, ã«ã¤ã‘, ã«ã¤ã‘ã¦, ã«ã¤ã‚Œ, ã«ã¤ã‚Œã¦, ã«ã¨ã£ã¦, +# ã«ã¨ã‚Š, ã«ã¾ã¤ã‚ã‚‹, ã«ã‚ˆã£ã¦, ã«ä¾ã£ã¦, ã«å› ã£ã¦, ã«ã‚ˆã‚Š, ã«ä¾ã‚Š, ã«å› ã‚Š, ã«ã‚ˆã‚‹, ã«ä¾ã‚‹, ã«å› ã‚‹, +# ã«ã‚ãŸã£ã¦, ã«ã‚ãŸã‚‹, ã‚’ã‚‚ã£ã¦, を以ã£ã¦, を通ã˜, を通ã˜ã¦, を通ã—ã¦, ã‚’ã‚ãã£ã¦, ã‚’ã‚ãã‚Š, ã‚’ã‚ãã‚‹, +# ã£ã¦-å£èªž/, ã¡ã‚…ã†-関西å¼ã€Œã¨ã„ã†ã€/, (何) ã¦ã„ㆠ(人)-å£èªž/, ã£ã¦ã„ã†-å£èªž/, ã¨ã„ãµ, ã¨ã‹ã„ãµ +助詞-格助詞-連語 +# +# particle-conjunctive: +# e.g. ã‹ã‚‰, ã‹ã‚‰ã«ã¯, ãŒ, ã‘ã‚Œã©, ã‘ã‚Œã©ã‚‚, ã‘ã©, ã—, ã¤ã¤, ã¦, ã§, ã¨, ã¨ã“ã‚ãŒ, ã©ã“ã‚ã‹, ã¨ã‚‚, ã©ã‚‚, +# ãªãŒã‚‰, ãªã‚Š, ã®ã§, ã®ã«, ã°, ã‚‚ã®ã®, ã‚„ ( ã—ãŸ), ã‚„ã„ãªã‚„, (ã“ã‚ã‚“) ã˜ã‚ƒ(ã„ã‘ãªã„)-å£èªž/, +# (è¡Œã£) ã¡ã‚ƒ(ã„ã‘ãªã„)-å£èªž/, (言ã£) ãŸã£ã¦ (ã—ã‹ãŸãŒãªã„)-å£èªž/, (ãã‚ŒãŒãªã)ã£ãŸã£ã¦ (平気)-å£èªž/ +助詞-接続助詞 +# +# particle-dependency: +# e.g. ã“ã, ã•ãˆ, ã—ã‹, ã™ã‚‰, ã¯, ã‚‚, ãž +助詞-係助詞 +# +# particle-adverbial: +# e.g. ãŒã¦ã‚‰, ã‹ã‚‚, ãらã„, ä½, ãらã„, ã—ã‚‚, (学校) ã˜ã‚ƒ(ã“ã‚ŒãŒæµè¡Œã£ã¦ã„ã‚‹)-å£èªž/, +# (ãã‚Œ)ã˜ã‚ƒã‚ (よããªã„)-å£èªž/, ãšã¤, (ç§) ãªãž, ãªã©, (ç§) ãªã‚Š (ã«), (先生) ãªã‚“ã‹ (大嫌ã„)-å£èªž/, +# (ç§) ãªã‚“ãž, (先生) ãªã‚“㦠(大嫌ã„)-å£èªž/, ã®ã¿, ã ã‘, (ç§) ã ã£ã¦-å£èªž/, ã ã«, +# (å½¼)ã£ãŸã‚‰-å£èªž/, (ãŠèŒ¶) ã§ã‚‚ (ã„ã‹ãŒ), ç­‰ (ã¨ã†), (今後) ã¨ã‚‚, ã°ã‹ã‚Š, ã°ã£ã‹-å£èªž/, ã°ã£ã‹ã‚Š-å£èªž/, +# ã»ã©, 程, ã¾ã§, è¿„, (誰) ã‚‚ (ãŒ)([助詞-格助詞] ãŠã‚ˆã³ [助詞-係助詞] ã®å‰ã«ä½ç½®ã™ã‚‹ã€Œã‚‚ã€) +助詞-副助詞 +# +# particle-interjective: particles with interjective grammatical roles. +# e.g. (æ¾å³¶) ã‚„ +助詞-間投助詞 +# +# particle-coordinate: +# e.g. ã¨, ãŸã‚Š, ã ã®, ã ã‚Š, ã¨ã‹, ãªã‚Š, ã‚„, やら +助詞-並立助詞 +# +# particle-final: +# e.g. ã‹ã„, ã‹ã—ら, ã•, ãœ, (ã )ã£ã‘-å£èªž/, (ã¨ã¾ã£ã¦ã‚‹) ã§-方言/, ãª, ナ, ãªã‚-å£èªž/, ãž, ã­, ãƒ, +# ã­ã‡-å£èªž/, ã­ãˆ-å£èªž/, ã­ã‚“-方言/, ã®, ã®ã†-å£èªž/, ã‚„, よ, ヨ, よã‰-å£èªž/, ã‚, ã‚ã„-å£èªž/ +助詞-終助詞 +# +# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is +# adverbial, conjunctive, or sentence final. For example: +# (a) 「A ã‹ B ã‹ã€. Ex:「(国内ã§é‹ç”¨ã™ã‚‹) ã‹,(海外ã§é‹ç”¨ã™ã‚‹) ã‹ (.)〠+# (b) Inside an adverb phrase. Ex:「(幸ã„ã¨ã„ã†) ã‹ (, 死者ã¯ã„ãªã‹ã£ãŸ.)〠+# 「(祈りãŒå±Šã„ãŸã›ã„) ã‹ (, 試験ã«åˆæ ¼ã—ãŸ.)〠+# (c) 「ã‹ã®ã‚ˆã†ã«ã€. Ex:「(何もãªã‹ã£ãŸ) ã‹ (ã®ã‚ˆã†ã«æŒ¯ã‚‹èˆžã£ãŸ.)〠+# e.g. ã‹ +助詞-副助詞ï¼ä¸¦ç«‹åŠ©è©žï¼çµ‚助詞 +# +# particle-adnominalizer: The "no" that attaches to nouns and modifies +# non-inflectional words. +助詞-連体化 +# +# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs +# that are giongo, giseigo, or gitaigo. +# e.g. ã«, 㨠+助詞-副詞化 +# +# particle-special: A particle that does not fit into one of the above classifications. +# This includes particles that are used in Tanka, Haiku, and other poetry. +# e.g. ã‹ãª, ã‘ã‚€, ( ã—ãŸã ã‚ã†) ã«, (ã‚ã‚“ãŸ) ã«ã‚ƒ(ã‚ã‹ã‚‰ã‚“), (俺) ã‚“ (家) +助詞-特殊 +# +##### +# auxiliary-verb: +助動詞 +# +##### +# interjection: Greetings and other exclamations. +# e.g. ãŠã¯ã‚ˆã†, ãŠã¯ã‚ˆã†ã”ã–ã„ã¾ã™, ã“ã‚“ã«ã¡ã¯, ã“ã‚“ã°ã‚“ã¯, ã‚ã‚ŠãŒã¨ã†, ã©ã†ã‚‚ã‚ã‚ŠãŒã¨ã†, ã‚ã‚ŠãŒã¨ã†ã”ã–ã„ã¾ã™, +# ã„ãŸã ãã¾ã™, ã”ã¡ãã†ã•ã¾, ã•ã‚ˆãªã‚‰, ã•ã‚ˆã†ãªã‚‰, ã¯ã„, ã„ã„ãˆ, ã”ã‚ã‚“, ã”ã‚ã‚“ãªã•ã„ +#æ„Ÿå‹•è©ž +# +##### +# symbol: unclassified Symbols. +è¨˜å· +# +# symbol-misc: A general symbol not in one of the categories below. +# e.g. [â—‹â—Ž@$〒→+] +記å·-一般 +# +# symbol-comma: Commas +# e.g. [,ã€] +記å·-読点 +# +# symbol-period: Periods and full stops. +# e.g. [..。] +記å·-å¥ç‚¹ +# +# symbol-space: Full-width whitespace. +記å·-空白 +# +# symbol-open_bracket: +# e.g. [({‘“『ã€] +記å·-括弧開 +# +# symbol-close_bracket: +# e.g. [)}’â€ã€ã€ã€‘] +記å·-括弧閉 +# +# symbol-alphabetic: +#記å·-アルファベット +# +##### +# other: unclassified other +#ãã®ä»– +# +# other-interjection: Words that are hard to classify as noun-suffixes or +# sentence-final particles. +# e.g. (ã )ã‚¡ +ãã®ä»–-間投 +# +##### +# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. +# e.g. ã‚ã®, ã†ã‚“ã¨, ãˆã¨ +フィラー +# +##### +# non-verbal: non-verbal sound. +éžè¨€èªžéŸ³ +# +##### +# fragment: +#語断片 +# +##### +# unknown: unknown part of speech. +#未知語 +# +##### End of file diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ar.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ar.txt new file mode 100644 index 00000000000..046829db6a2 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ar.txt @@ -0,0 +1,125 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Cleaned on October 11, 2009 (not normalized, so use before normalization) +# This means that when modifying this list, you might need to add some +# redundant entries, for example containing forms with both Ø£ and ا +من +ومن +منها +منه +ÙÙŠ +ÙˆÙÙŠ +Ùيها +Ùيه +Ùˆ +Ù +ثم +او +أو +ب +بها +به +ا +Ø£ +اى +اي +أي +أى +لا +ولا +الا +ألا +إلا +لكن +ما +وما +كما +Ùما +عن +مع +اذا +إذا +ان +أن +إن +انها +أنها +إنها +انه +أنه +إنه +بان +بأن +Ùان +Ùأن +وان +وأن +وإن +التى +التي +الذى +الذي +الذين +الى +الي +إلى +إلي +على +عليها +عليه +اما +أما +إما +ايضا +أيضا +كل +وكل +لم +ولم +لن +ولن +هى +هي +هو +وهى +وهي +وهو +Ùهى +Ùهي +Ùهو +انت +أنت +لك +لها +له +هذه +هذا +تلك +ذلك +هناك +كانت +كان +يكون +تكون +وكانت +وكان +غير +بعض +قد +نحو +بين +بينما +منذ +ضمن +حيث +الان +الآن +خلال +بعد +قبل +حتى +عند +عندما +لدى +جميع diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_bg.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_bg.txt new file mode 100644 index 00000000000..1ae4ba2ae38 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_bg.txt @@ -0,0 +1,193 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +а +аз +ако +ала +бе +без +беше +би +бил +била +били +било +близо +бъдат +бъде +бÑха +в +Ð²Ð°Ñ +ваш +ваша +вероÑтно +вече +взема +ви +вие +винаги +вÑе +вÑеки +вÑички +вÑичко +вÑÑка +във +въпреки +върху +г +ги +главно +го +д +да +дали +до +докато +докога +дори +доÑега +доÑта +е +едва +един +ето +за +зад +заедно +заради +заÑега +затова +защо +защото +и +из +или +им +има +имат +иÑка +й +каза +как +каква +какво +както +какъв +като +кога +когато +което +които +кой +който +колко +коÑто +къде +където +към +ли +м +ме +между +мен +ми +мнозина +мога +могат +може +Ð¼Ð¾Ð»Ñ +момента +му +н +на +над +назад +най +направи +напред +например +Ð½Ð°Ñ +не +него +Ð½ÐµÑ +ни +ние +никой +нито +но +нÑкои +нÑкой +нÑма +обаче +около +оÑвен +оÑобено +от +отгоре +отново +още +пак +по +повече +повечето +под +поне +поради +поÑле +почти +прави +пред +преди +през +при +пък +първо +Ñ +Ñа +Ñамо +Ñе +Ñега +Ñи +Ñкоро +Ñлед +Ñме +Ñпоред +Ñред +Ñрещу +Ñте +Ñъм +ÑÑŠÑ +Ñъщо +Ñ‚ +тази +така +такива +такъв +там +твой +те +тези +ти +тн +то +това +тогава +този +той +толкова +точно +Ñ‚Ñ€Ñбва +тук +тъй +Ñ‚Ñ +Ñ‚ÑÑ… +у +хареÑва +ч +че +чеÑто +чрез +ще +щом +Ñ diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ca.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ca.txt new file mode 100644 index 00000000000..3da65deafe1 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ca.txt @@ -0,0 +1,220 @@ +# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) +a +abans +ací +ah +així +això +al +als +aleshores +algun +alguna +algunes +alguns +alhora +allà +allí +allò +altra +altre +altres +amb +ambdós +ambdues +apa +aquell +aquella +aquelles +aquells +aquest +aquesta +aquestes +aquests +aquí +baix +cada +cadascú +cadascuna +cadascunes +cadascuns +com +contra +d'un +d'una +d'unes +d'uns +dalt +de +del +dels +des +després +dins +dintre +donat +doncs +durant +e +eh +el +els +em +en +encara +ens +entre +érem +eren +éreu +es +és +esta +està +estàvem +estaven +estàveu +esteu +et +etc +ets +fins +fora +gairebé +ha +han +has +havia +he +hem +heu +hi +ho +i +igual +iguals +ja +l'hi +la +les +li +li'n +llavors +m'he +ma +mal +malgrat +mateix +mateixa +mateixes +mateixos +me +mentre +més +meu +meus +meva +meves +molt +molta +moltes +molts +mon +mons +n'he +n'hi +ne +ni +no +nogensmenys +només +nosaltres +nostra +nostre +nostres +o +oh +oi +on +pas +pel +pels +per +però +perquè +poc +poca +pocs +poques +potser +propi +qual +quals +quan +quant +que +què +quelcom +qui +quin +quina +quines +quins +s'ha +s'han +sa +semblant +semblants +ses +seu +seus +seva +seva +seves +si +sobre +sobretot +sóc +solament +sols +son +són +sons +sota +sou +t'ha +t'han +t'he +ta +tal +també +tampoc +tan +tant +tanta +tantes +teu +teus +teva +teves +ton +tons +tot +tota +totes +tots +un +una +unes +uns +us +va +vaig +vam +van +vas +veu +vosaltres +vostra +vostre +vostres diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_cz.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_cz.txt new file mode 100644 index 00000000000..53c6097dac7 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_cz.txt @@ -0,0 +1,172 @@ +a +s +k +o +i +u +v +z +dnes +cz +tímto +budeÅ¡ +budem +byli +jseÅ¡ +můj +svým +ta +tomto +tohle +tuto +tyto +jej +zda +proÄ +máte +tato +kam +tohoto +kdo +kteří +mi +nám +tom +tomuto +mít +nic +proto +kterou +byla +toho +protože +asi +ho +naÅ¡i +napiÅ¡te +re +což +tím +takže +svých +její +svými +jste +aj +tu +tedy +teto +bylo +kde +ke +pravé +ji +nad +nejsou +Äi +pod +téma +mezi +pÅ™es +ty +pak +vám +ani +když +vÅ¡ak +neg +jsem +tento +Älánku +Älánky +aby +jsme +pÅ™ed +pta +jejich +byl +jeÅ¡tÄ› +až +bez +také +pouze +první +vaÅ¡e +která +nás +nový +tipy +pokud +může +strana +jeho +své +jiné +zprávy +nové +není +vás +jen +podle +zde +už +být +více +bude +již +než +který +by +které +co +nebo +ten +tak +má +pÅ™i +od +po +jsou +jak +další +ale +si +se +ve +to +jako +za +zpÄ›t +ze +do +pro +je +na +atd +atp +jakmile +pÅ™iÄemž +já +on +ona +ono +oni +ony +my +vy +jí +ji +mÄ› +mne +jemu +tomu +tÄ›m +tÄ›mu +nÄ›mu +nÄ›muž +jehož +jíž +jelikož +jež +jakož +naÄež diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_da.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_da.txt new file mode 100644 index 00000000000..a3ff5fe122c --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_da.txt @@ -0,0 +1,108 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Danish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + +og | and +i | in +jeg | I +det | that (dem. pronoun)/it (pers. pronoun) +at | that (in front of a sentence)/to (with infinitive) +en | a/an +den | it (pers. pronoun)/that (dem. pronoun) +til | to/at/for/until/against/by/of/into, more +er | present tense of "to be" +som | who, as +pÃ¥ | on/upon/in/on/at/to/after/of/with/for, on +de | they +med | with/by/in, along +han | he +af | of/by/from/off/for/in/with/on, off +for | at/for/to/from/by/of/ago, in front/before, because +ikke | not +der | who/which, there/those +var | past tense of "to be" +mig | me/myself +sig | oneself/himself/herself/itself/themselves +men | but +et | a/an/one, one (number), someone/somebody/one +har | present tense of "to have" +om | round/about/for/in/a, about/around/down, if +vi | we +min | my +havde | past tense of "to have" +ham | him +hun | she +nu | now +over | over/above/across/by/beyond/past/on/about, over/past +da | then, when/as/since +fra | from/off/since, off, since +du | you +ud | out +sin | his/her/its/one's +dem | them +os | us/ourselves +op | up +man | you/one +hans | his +hvor | where +eller | or +hvad | what +skal | must/shall etc. +selv | myself/youself/herself/ourselves etc., even +her | here +alle | all/everyone/everybody etc. +vil | will (verb) +blev | past tense of "to stay/to remain/to get/to become" +kunne | could +ind | in +nÃ¥r | when +være | present tense of "to be" +dog | however/yet/after all +noget | something +ville | would +jo | you know/you see (adv), yes +deres | their/theirs +efter | after/behind/according to/for/by/from, later/afterwards +ned | down +skulle | should +denne | this +end | than +dette | this +mit | my/mine +ogsÃ¥ | also +under | under/beneath/below/during, below/underneath +have | have +dig | you +anden | other +hende | her +mine | my +alt | everything +meget | much/very, plenty of +sit | his, her, its, one's +sine | his, her, its, one's +vor | our +mod | against +disse | these +hvis | if +din | your/yours +nogle | some +hos | by/at +blive | be/become +mange | many +ad | by/through +bliver | present tense of "to be/to become" +hendes | her/hers +været | be +thi | for (conj) +jer | you +sÃ¥dan | such, like this/like that diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_de.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_de.txt new file mode 100644 index 00000000000..f7703841887 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_de.txt @@ -0,0 +1,292 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A German stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | The number of forms in this list is reduced significantly by passing it + | through the German stemmer. + + +aber | but + +alle | all +allem +allen +aller +alles + +als | than, as +also | so +am | an + dem +an | at + +ander | other +andere +anderem +anderen +anderer +anderes +anderm +andern +anderr +anders + +auch | also +auf | on +aus | out of +bei | by +bin | am +bis | until +bist | art +da | there +damit | with it +dann | then + +der | the +den +des +dem +die +das + +daß | that + +derselbe | the same +derselben +denselben +desselben +demselben +dieselbe +dieselben +dasselbe + +dazu | to that + +dein | thy +deine +deinem +deinen +deiner +deines + +denn | because + +derer | of those +dessen | of him + +dich | thee +dir | to thee +du | thou + +dies | this +diese +diesem +diesen +dieser +dieses + + +doch | (several meanings) +dort | (over) there + + +durch | through + +ein | a +eine +einem +einen +einer +eines + +einig | some +einige +einigem +einigen +einiger +einiges + +einmal | once + +er | he +ihn | him +ihm | to him + +es | it +etwas | something + +euer | your +eure +eurem +euren +eurer +eures + +für | for +gegen | towards +gewesen | p.p. of sein +hab | have +habe | have +haben | have +hat | has +hatte | had +hatten | had +hier | here +hin | there +hinter | behind + +ich | I +mich | me +mir | to me + + +ihr | you, to her +ihre +ihrem +ihren +ihrer +ihres +euch | to you + +im | in + dem +in | in +indem | while +ins | in + das +ist | is + +jede | each, every +jedem +jeden +jeder +jedes + +jene | that +jenem +jenen +jener +jenes + +jetzt | now +kann | can + +kein | no +keine +keinem +keinen +keiner +keines + +können | can +könnte | could +machen | do +man | one + +manche | some, many a +manchem +manchen +mancher +manches + +mein | my +meine +meinem +meinen +meiner +meines + +mit | with +muss | must +musste | had to +nach | to(wards) +nicht | not +nichts | nothing +noch | still, yet +nun | now +nur | only +ob | whether +oder | or +ohne | without +sehr | very + +sein | his +seine +seinem +seinen +seiner +seines + +selbst | self +sich | herself + +sie | they, she +ihnen | to them + +sind | are +so | so + +solche | such +solchem +solchen +solcher +solches + +soll | shall +sollte | should +sondern | but +sonst | else +über | over +um | about, around +und | and + +uns | us +unse +unsem +unsen +unser +unses + +unter | under +viel | much +vom | von + dem +von | from +vor | before +während | while +war | was +waren | were +warst | wast +was | what +weg | away, off +weil | because +weiter | further + +welche | which +welchem +welchen +welcher +welches + +wenn | when +werde | will +werden | will +wie | how +wieder | again +will | want +wir | we +wird | will +wirst | willst +wo | where +wollen | want +wollte | wanted +würde | would +würden | would +zu | to +zum | zu + dem +zur | zu + der +zwar | indeed +zwischen | between + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_el.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_el.txt new file mode 100644 index 00000000000..232681f5bd6 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_el.txt @@ -0,0 +1,78 @@ +# Lucene Greek Stopwords list +# Note: by default this file is used after GreekLowerCaseFilter, +# so when modifying this file use 'σ' instead of 'Ï‚' +ο +η +το +οι +τα +του +τησ +των +τον +την +και +κι +κ +ειμαι +εισαι +ειναι +ειμαστε +ειστε +στο +στον +στη +στην +μα +αλλα +απο +για +Ï€Ïοσ +με +σε +ωσ +παÏα +αντι +κατα +μετα +θα +να +δε +δεν +μη +μην +επι +ενω +εαν +αν +τοτε +που +πωσ +ποιοσ +ποια +ποιο +ποιοι +ποιεσ +ποιων +ποιουσ +αυτοσ +αυτη +αυτο +αυτοι +αυτων +αυτουσ +αυτεσ +αυτα +εκεινοσ +εκεινη +εκεινο +εκεινοι +εκεινεσ +εκεινα +εκεινων +εκεινουσ +οπωσ +ομωσ +ισωσ +οσο +οτι diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_en.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_en.txt new file mode 100644 index 00000000000..2c164c0b2a1 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_en.txt @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +# Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +such +that +the +their +then +there +these +they +this +to +was +will +with diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_es.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_es.txt new file mode 100644 index 00000000000..2db14760075 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_es.txt @@ -0,0 +1,354 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Spanish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | from, of +la | the, her +que | who, that +el | the +en | in +y | and +a | to +los | the, them +del | de + el +se | himself, from him etc +las | the, them +por | for, by, etc +un | a +para | for +con | with +no | no +una | a +su | his, her +al | a + el + | es from SER +lo | him +como | how +más | more +pero | pero +sus | su plural +le | to him, her +ya | already +o | or + | fue from SER +este | this + | ha from HABER +sí | himself etc +porque | because +esta | this + | son from SER +entre | between + | está from ESTAR +cuando | when +muy | very +sin | without +sobre | on + | ser from SER + | tiene from TENER +también | also +me | me +hasta | until +hay | there is/are +donde | where + | han from HABER +quien | whom, that + | están from ESTAR + | estado from ESTAR +desde | from +todo | all +nos | us +durante | during + | estados from ESTAR +todos | all +uno | a +les | to them +ni | nor +contra | against +otros | other + | fueron from SER +ese | that +eso | that + | había from HABER +ante | before +ellos | they +e | and (variant of y) +esto | this +mí | me +antes | before +algunos | some +qué | what? +unos | a +yo | I +otro | other +otras | other +otra | other +él | he +tanto | so much, many +esa | that +estos | these +mucho | much, many +quienes | who +nada | nothing +muchos | many +cual | who + | sea from SER +poco | few +ella | she +estar | to be + | haber from HABER +estas | these + | estaba from ESTAR + | estamos from ESTAR +algunas | some +algo | something +nosotros | we + + | other forms + +mi | me +mis | mi plural +tú | thou +te | thee +ti | thee +tu | thy +tus | tu plural +ellas | they +nosotras | we +vosotros | you +vosotras | you +os | you +mío | mine +mía | +míos | +mías | +tuyo | thine +tuya | +tuyos | +tuyas | +suyo | his, hers, theirs +suya | +suyos | +suyas | +nuestro | ours +nuestra | +nuestros | +nuestras | +vuestro | yours +vuestra | +vuestros | +vuestras | +esos | those +esas | those + + | forms of estar, to be (not including the infinitive): +estoy +estás +está +estamos +estáis +están +esté +estés +estemos +estéis +estén +estaré +estarás +estará +estaremos +estaréis +estarán +estaría +estarías +estaríamos +estaríais +estarían +estaba +estabas +estábamos +estabais +estaban +estuve +estuviste +estuvo +estuvimos +estuvisteis +estuvieron +estuviera +estuvieras +estuviéramos +estuvierais +estuvieran +estuviese +estuvieses +estuviésemos +estuvieseis +estuviesen +estando +estado +estada +estados +estadas +estad + + | forms of haber, to have (not including the infinitive): +he +has +ha +hemos +habéis +han +haya +hayas +hayamos +hayáis +hayan +habré +habrás +habrá +habremos +habréis +habrán +habría +habrías +habríamos +habríais +habrían +había +habías +habíamos +habíais +habían +hube +hubiste +hubo +hubimos +hubisteis +hubieron +hubiera +hubieras +hubiéramos +hubierais +hubieran +hubiese +hubieses +hubiésemos +hubieseis +hubiesen +habiendo +habido +habida +habidos +habidas + + | forms of ser, to be (not including the infinitive): +soy +eres +es +somos +sois +son +sea +seas +seamos +seáis +sean +seré +serás +será +seremos +seréis +serán +sería +serías +seríamos +seríais +serían +era +eras +éramos +erais +eran +fui +fuiste +fue +fuimos +fuisteis +fueron +fuera +fueras +fuéramos +fuerais +fueran +fuese +fueses +fuésemos +fueseis +fuesen +siendo +sido + | sed also means 'thirst' + + | forms of tener, to have (not including the infinitive): +tengo +tienes +tiene +tenemos +tenéis +tienen +tenga +tengas +tengamos +tengáis +tengan +tendré +tendrás +tendrá +tendremos +tendréis +tendrán +tendría +tendrías +tendríamos +tendríais +tendrían +tenía +tenías +teníamos +teníais +tenían +tuve +tuviste +tuvo +tuvimos +tuvisteis +tuvieron +tuviera +tuvieras +tuviéramos +tuvierais +tuvieran +tuviese +tuvieses +tuviésemos +tuvieseis +tuviesen +teniendo +tenido +tenida +tenidos +tenidas +tened + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_eu.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_eu.txt new file mode 100644 index 00000000000..25f1db93460 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_eu.txt @@ -0,0 +1,99 @@ +# example set of basque stopwords +al +anitz +arabera +asko +baina +bat +batean +batek +bati +batzuei +batzuek +batzuetan +batzuk +bera +beraiek +berau +berauek +bere +berori +beroriek +beste +bezala +da +dago +dira +ditu +du +dute +edo +egin +ere +eta +eurak +ez +gainera +gu +gutxi +guzti +haiei +haiek +haietan +hainbeste +hala +han +handik +hango +hara +hari +hark +hartan +hau +hauei +hauek +hauetan +hemen +hemendik +hemengo +hi +hona +honek +honela +honetan +honi +hor +hori +horiei +horiek +horietan +horko +horra +horrek +horrela +horretan +horri +hortik +hura +izan +ni +noiz +nola +non +nondik +nongo +nor +nora +ze +zein +zen +zenbait +zenbat +zer +zergatik +ziren +zituen +zu +zuek +zuen +zuten diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_fa.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_fa.txt new file mode 100644 index 00000000000..723641c6da7 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_fa.txt @@ -0,0 +1,313 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Note: by default this file is used after normalization, so when adding entries +# to this file, use the arabic 'ÙŠ' instead of 'ÛŒ' +انان +نداشته +سراسر +خياه +ايشان +وي +تاكنون +بيشتري +دوم +پس +ناشي +ÙˆÚ¯Ùˆ +يا +داشتند +سپس +هنگام +هرگز +پنج +نشان +امسال +ديگر +گروهي +شدند +چطور +ده +Ùˆ +دو +نخستين +ولي +چرا +Ú†Ù‡ +وسط +Ù‡ +كدام +قابل +يك +رÙت +Ù‡Ùت +همچنين +در +هزار +بله +بلي +شايد +اما +شناسي +گرÙته +دهد +داشته +دانست +داشتن +خواهيم +ميليارد +وقتيكه +امد +خواهد +جز +اورده +شده +بلكه +خدمات +شدن +برخي +نبود +بسياري +جلوگيري +حق +كردند +نوعي +بعري +نكرده +نظير +نبايد +بوده +بودن +داد +اورد +هست +جايي +شود +دنبال +داده +بايد +سابق +هيچ +همان +انجا +كمتر +كجاست +گردد +كسي +تر +مردم +تان +دادن +بودند +سري +جدا +ندارند +مگر +يكديگر +دارد +دهند +بنابراين +هنگامي +سمت +جا +انچه +خود +دادند +زياد +دارند +اثر +بدون +بهترين +بيشتر +البته +به +براساس +بيرون +كرد +بعضي +گرÙت +توي +اي +ميليون +او +جريان +تول +بر +مانند +برابر +باشيم +مدتي +گويند +اكنون +تا +تنها +جديد +چند +بي +نشده +كردن +كردم +گويد +كرده +كنيم +نمي +نزد +روي +قصد +Ùقط +بالاي +ديگران +اين +ديروز +توسط +سوم +ايم +دانند +سوي +استÙاده +شما +كنار +داريم +ساخته +طور +امده +رÙته +نخست +بيست +نزديك +طي +كنيد +از +انها +تمامي +داشت +يكي +طريق +اش +چيست +روب +نمايد +Ú¯Ùت +چندين +چيزي +تواند +ام +ايا +با +ان +ايد +ترين +اينكه +ديگري +راه +هايي +بروز +همچنان +پاعين +كس +حدود +مختل٠+مقابل +چيز +گيرد +ندارد +ضد +همچون +سازي +شان +مورد +باره +مرسي +خويش +برخوردار +چون +خارج +شش +هنوز +تحت +ضمن +هستيم +Ú¯Ùته +Ùكر +بسيار +پيش +براي +روزهاي +انكه +نخواهد +بالا +كل +وقتي +كي +چنين +كه +گيري +نيست +است +كجا +كند +نيز +يابد +بندي +حتي +توانند +عقب +خواست +كنند +بين +تمام +همه +ما +باشند +مثل +شد +اري +باشد +اره +طبق +بعد +اگر +صورت +غير +جاي +بيش +ريزي +اند +زيرا +چگونه +بار +لطÙا +مي +درباره +من +ديده +همين +گذاري +برداري +علت +گذاشته +هم +Ùوق +نه +ها +شوند +اباد +همواره +هر +اول +خواهند +چهار +نام +امروز +مان +هاي +قبل +كنم +سعي +تازه +را +هستند +زير +جلوي +عنوان +بود diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_fi.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_fi.txt new file mode 100644 index 00000000000..addad798c4b --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_fi.txt @@ -0,0 +1,95 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| forms of BE + +olla +olen +olet +on +olemme +olette +ovat +ole | negative form + +oli +olisi +olisit +olisin +olisimme +olisitte +olisivat +olit +olin +olimme +olitte +olivat +ollut +olleet + +en | negation +et +ei +emme +ette +eivät + +|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans +minä minun minut minua minussa minusta minuun minulla minulta minulle | I +sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you +hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she +me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we +te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you +he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they + +tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this +tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that +se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it +nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these +nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those +ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they + +kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who +ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) +mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what +mitkä | (pl) + +joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which +jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) + +| conjunctions + +että | that +ja | and +jos | if +koska | because +kuin | than +mutta | but +niin | so +sekä | and +sillä | for +tai | or +vaan | but +vai | or +vaikka | although + + +| prepositions + +kanssa | with +mukaan | according to +noin | about +poikki | across +yli | over, across + +| other + +kun | when +niin | so +nyt | now +itse | self + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_fr.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_fr.txt new file mode 100644 index 00000000000..c00837ea939 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_fr.txt @@ -0,0 +1,183 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A French stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +au | a + le +aux | a + les +avec | with +ce | this +ces | these +dans | with +de | of +des | de + les +du | de + le +elle | she +en | `of them' etc +et | and +eux | them +il | he +je | I +la | the +le | the +leur | their +lui | him +ma | my (fem) +mais | but +me | me +même | same; as in moi-même (myself) etc +mes | me (pl) +moi | me +mon | my (masc) +ne | not +nos | our (pl) +notre | our +nous | we +on | one +ou | where +par | by +pas | not +pour | for +qu | que before vowel +que | that +qui | who +sa | his, her (fem) +se | oneself +ses | his (pl) +son | his, her (masc) +sur | on +ta | thy (fem) +te | thee +tes | thy (pl) +toi | thee +ton | thy (masc) +tu | thou +un | a +une | a +vos | your (pl) +votre | your +vous | you + + | single letter forms + +c | c' +d | d' +j | j' +l | l' +à | to, at +m | m' +n | n' +s | s' +t | t' +y | there + + | forms of être (not including the infinitive): +été +étée +étées +étés +étant +suis +es +est +sommes +êtes +sont +serai +seras +sera +serons +serez +seront +serais +serait +serions +seriez +seraient +étais +était +étions +étiez +étaient +fus +fut +fûmes +fûtes +furent +sois +soit +soyons +soyez +soient +fusse +fusses +fût +fussions +fussiez +fussent + + | forms of avoir (not including the infinitive): +ayant +eu +eue +eues +eus +ai +as +avons +avez +ont +aurai +auras +aura +aurons +aurez +auront +aurais +aurait +aurions +auriez +auraient +avais +avait +avions +aviez +avaient +eut +eûmes +eûtes +eurent +aie +aies +ait +ayons +ayez +aient +eusse +eusses +eût +eussions +eussiez +eussent + + | Later additions (from Jean-Christophe Deschamps) +ceci | this +celà  | that +cet | this +cette | this +ici | here +ils | they +les | the (pl) +leurs | their (pl) +quel | which +quels | which +quelle | which +quelles | which +sans | without +soi | oneself + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ga.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ga.txt new file mode 100644 index 00000000000..9ff88d747e5 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ga.txt @@ -0,0 +1,110 @@ + +a +ach +ag +agus +an +aon +ar +arna +as +b' +ba +beirt +bhúr +caoga +ceathair +ceathrar +chomh +chtó +chuig +chun +cois +céad +cúig +cúigear +d' +daichead +dar +de +deich +deichniúr +den +dhá +do +don +dtí +dá +dár +dó +faoi +faoin +faoina +faoinár +fara +fiche +gach +gan +go +gur +haon +hocht +i +iad +idir +in +ina +ins +inár +is +le +leis +lena +lenár +m' +mar +mo +mé +na +nach +naoi +naonúr +ná +ní +níor +nó +nócha +ocht +ochtar +os +roimh +sa +seacht +seachtar +seachtó +seasca +seisear +siad +sibh +sinn +sna +sé +sí +tar +thar +thú +triúr +trí +trína +trínár +tríocha +tú +um +ár +é +éis +í +ó +ón +óna +ónár diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_gl.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_gl.txt new file mode 100644 index 00000000000..d8760b12c14 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_gl.txt @@ -0,0 +1,161 @@ +# galican stopwords +a +aínda +alí +aquel +aquela +aquelas +aqueles +aquilo +aquí +ao +aos +as +así +á +ben +cando +che +co +coa +comigo +con +connosco +contigo +convosco +coas +cos +cun +cuns +cunha +cunhas +da +dalgunha +dalgunhas +dalgún +dalgúns +das +de +del +dela +delas +deles +desde +deste +do +dos +dun +duns +dunha +dunhas +e +el +ela +elas +eles +en +era +eran +esa +esas +ese +eses +esta +estar +estaba +está +están +este +estes +estiven +estou +eu +é +facer +foi +foron +fun +había +hai +iso +isto +la +las +lle +lles +lo +los +mais +me +meu +meus +min +miña +miñas +moi +na +nas +neste +nin +no +non +nos +nosa +nosas +noso +nosos +nós +nun +nunha +nuns +nunhas +o +os +ou +ó +ós +para +pero +pode +pois +pola +polas +polo +polos +por +que +se +senón +ser +seu +seus +sexa +sido +sobre +súa +súas +tamén +tan +te +ten +teñen +teño +ter +teu +teus +ti +tido +tiña +tiven +túa +túas +un +unha +unhas +uns +vos +vosa +vosas +voso +vosos +vós diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_hi.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_hi.txt new file mode 100644 index 00000000000..86286bb083b --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_hi.txt @@ -0,0 +1,235 @@ +# Also see http://www.opensource.org/licenses/bsd-license.html +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# This file was created by Jacques Savoy and is distributed under the BSD license. +# Note: by default this file also contains forms normalized by HindiNormalizer +# for spelling variation (see section below), such that it can be used whether or +# not you enable that feature. When adding additional entries to this list, +# please add the normalized form as well. +अंदर +अत +अपना +अपनी +अपने +अभी +आदि +आप +इतà¥à¤¯à¤¾à¤¦à¤¿ +इन +इनका +इनà¥à¤¹à¥€à¤‚ +इनà¥à¤¹à¥‡à¤‚ +इनà¥à¤¹à¥‹à¤‚ +इस +इसका +इसकी +इसके +इसमें +इसी +इसे +उन +उनका +उनकी +उनके +उनको +उनà¥à¤¹à¥€à¤‚ +उनà¥à¤¹à¥‡à¤‚ +उनà¥à¤¹à¥‹à¤‚ +उस +उसके +उसी +उसे +à¤à¤• +à¤à¤µà¤‚ +à¤à¤¸ +à¤à¤¸à¥‡ +और +कई +कर +करता +करते +करना +करने +करें +कहते +कहा +का +काफ़ी +कि +कितना +किनà¥à¤¹à¥‡à¤‚ +किनà¥à¤¹à¥‹à¤‚ +किया +किर +किस +किसी +किसे +की +कà¥à¤› +कà¥à¤² +के +को +कोई +कौन +कौनसा +गया +घर +जब +जहाठ+जा +जितना +जिन +जिनà¥à¤¹à¥‡à¤‚ +जिनà¥à¤¹à¥‹à¤‚ +जिस +जिसे +जीधर +जैसा +जैसे +जो +तक +तब +तरह +तिन +तिनà¥à¤¹à¥‡à¤‚ +तिनà¥à¤¹à¥‹à¤‚ +तिस +तिसे +तो +था +थी +थे +दबारा +दिया +दà¥à¤¸à¤°à¤¾ +दूसरे +दो +दà¥à¤µà¤¾à¤°à¤¾ +न +नहीं +ना +निहायत +नीचे +ने +पर +पर +पहले +पूरा +पे +फिर +बनी +बही +बहà¥à¤¤ +बाद +बाला +बिलकà¥à¤² +भी +भीतर +मगर +मानो +मे +में +यदि +यह +यहाठ+यही +या +यिह +ये +रखें +रहा +रहे +ऱà¥à¤µà¤¾à¤¸à¤¾ +लिठ+लिये +लेकिन +व +वरà¥à¤— +वह +वह +वहाठ+वहीं +वाले +वà¥à¤¹ +वे +वग़ैरह +संग +सकता +सकते +सबसे +सभी +साथ +साबà¥à¤¤ +साभ +सारा +से +सो +ही +हà¥à¤† +हà¥à¤ˆ +हà¥à¤ +है +हैं +हो +होता +होती +होते +होना +होने +# additional normalized forms of the above +अपनि +जेसे +होति +सभि +तिंहों +इंहों +दवारा +इसि +किंहें +थि +उंहों +ओर +जिंहें +वहिं +अभि +बनि +हि +उंहिं +उंहें +हें +वगेरह +à¤à¤¸à¥‡ +रवासा +कोन +निचे +काफि +उसि +पà¥à¤°à¤¾ +भितर +हे +बहि +वहां +कोइ +यहां +जिंहों +तिंहें +किसि +कइ +यहि +इंहिं +जिधर +इंहें +अदि +इतयादि +हà¥à¤‡ +कोनसा +इसकि +दà¥à¤¸à¤°à¥‡ +जहां +अप +किंहों +उनकि +भि +वरग +हà¥à¤… +जेसा +नहिं diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_hu.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_hu.txt new file mode 100644 index 00000000000..1a96f1db6f2 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_hu.txt @@ -0,0 +1,209 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| Hungarian stop word list +| prepared by Anna Tordai + +a +ahogy +ahol +aki +akik +akkor +alatt +által +általában +amely +amelyek +amelyekben +amelyeket +amelyet +amelynek +ami +amit +amolyan +amíg +amikor +át +abban +ahhoz +annak +arra +arról +az +azok +azon +azt +azzal +azért +aztán +azután +azonban +bár +be +belül +benne +cikk +cikkek +cikkeket +csak +de +e +eddig +egész +egy +egyes +egyetlen +egyéb +egyik +egyre +ekkor +el +elég +ellen +elÅ‘ +elÅ‘ször +elÅ‘tt +elsÅ‘ +én +éppen +ebben +ehhez +emilyen +ennek +erre +ez +ezt +ezek +ezen +ezzel +ezért +és +fel +felé +hanem +hiszen +hogy +hogyan +igen +így +illetve +ill. +ill +ilyen +ilyenkor +ison +ismét +itt +jó +jól +jobban +kell +kellett +keresztül +keressünk +ki +kívül +között +közül +legalább +lehet +lehetett +legyen +lenne +lenni +lesz +lett +maga +magát +majd +majd +már +más +másik +meg +még +mellett +mert +mely +melyek +mi +mit +míg +miért +milyen +mikor +minden +mindent +mindenki +mindig +mint +mintha +mivel +most +nagy +nagyobb +nagyon +ne +néha +nekem +neki +nem +néhány +nélkül +nincs +olyan +ott +össze +Å‘ +Å‘k +Å‘ket +pedig +persze +rá +s +saját +sem +semmi +sok +sokat +sokkal +számára +szemben +szerint +szinte +talán +tehát +teljes +tovább +továbbá +több +úgy +ugyanis +új +újabb +újra +után +utána +utolsó +vagy +vagyis +valaki +valami +valamint +való +vagyok +van +vannak +volt +voltam +voltak +voltunk +vissza +vele +viszont +volna diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_hy.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_hy.txt new file mode 100644 index 00000000000..60c1c50fbc8 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_hy.txt @@ -0,0 +1,46 @@ +# example set of Armenian stopwords. +Õ¡ÕµÕ¤ +Õ¡ÕµÕ¬ +Õ¡ÕµÕ¶ +Õ¡ÕµÕ½ +Õ¤Õ¸Ö‚ +Õ¤Õ¸Ö‚Ö„ +Õ¥Õ´ +Õ¥Õ¶ +Õ¥Õ¶Ö„ +Õ¥Õ½ +Õ¥Ö„ +Õ§ +Õ§Õ« +Õ§Õ«Õ¶ +Õ§Õ«Õ¶Ö„ +Õ§Õ«Ö€ +Õ§Õ«Ö„ +Õ§Ö€ +Õ¨Õ½Õ¿ +Õ© +Õ« +Õ«Õ¶ +Õ«Õ½Õ¯ +Õ«Ö€ +Õ¯Õ¡Õ´ +Õ°Õ¡Õ´Õ¡Ö€ +Õ°Õ¥Õ¿ +Õ°Õ¥Õ¿Õ¸ +Õ´Õ¥Õ¶Ö„ +Õ´Õ¥Õ» +Õ´Õ« +Õ¶ +Õ¶Õ¡ +Õ¶Õ¡Ö‡ +Õ¶Ö€Õ¡ +Õ¶Ö€Õ¡Õ¶Ö„ +Õ¸Ö€ +Õ¸Ö€Õ¨ +Õ¸Ö€Õ¸Õ¶Ö„ +Õ¸Ö€ÕºÕ¥Õ½ +Õ¸Ö‚ +Õ¸Ö‚Õ´ +ÕºÕ«Õ¿Õ« +Õ¾Ö€Õ¡ +Ö‡ diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_id.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_id.txt new file mode 100644 index 00000000000..4617f83a5c5 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_id.txt @@ -0,0 +1,359 @@ +# from appendix D of: A Study of Stemming Effects on Information +# Retrieval in Bahasa Indonesia +ada +adanya +adalah +adapun +agak +agaknya +agar +akan +akankah +akhirnya +aku +akulah +amat +amatlah +anda +andalah +antar +diantaranya +antara +antaranya +diantara +apa +apaan +mengapa +apabila +apakah +apalagi +apatah +atau +ataukah +ataupun +bagai +bagaikan +sebagai +sebagainya +bagaimana +bagaimanapun +sebagaimana +bagaimanakah +bagi +bahkan +bahwa +bahwasanya +sebaliknya +banyak +sebanyak +beberapa +seberapa +begini +beginian +beginikah +beginilah +sebegini +begitu +begitukah +begitulah +begitupun +sebegitu +belum +belumlah +sebelum +sebelumnya +sebenarnya +berapa +berapakah +berapalah +berapapun +betulkah +sebetulnya +biasa +biasanya +bila +bilakah +bisa +bisakah +sebisanya +boleh +bolehkah +bolehlah +buat +bukan +bukankah +bukanlah +bukannya +cuma +percuma +dahulu +dalam +dan +dapat +dari +daripada +dekat +demi +demikian +demikianlah +sedemikian +dengan +depan +di +dia +dialah +dini +diri +dirinya +terdiri +dong +dulu +enggak +enggaknya +entah +entahlah +terhadap +terhadapnya +hal +hampir +hanya +hanyalah +harus +haruslah +harusnya +seharusnya +hendak +hendaklah +hendaknya +hingga +sehingga +ia +ialah +ibarat +ingin +inginkah +inginkan +ini +inikah +inilah +itu +itukah +itulah +jangan +jangankan +janganlah +jika +jikalau +juga +justru +kala +kalau +kalaulah +kalaupun +kalian +kami +kamilah +kamu +kamulah +kan +kapan +kapankah +kapanpun +dikarenakan +karena +karenanya +ke +kecil +kemudian +kenapa +kepada +kepadanya +ketika +seketika +khususnya +kini +kinilah +kiranya +sekiranya +kita +kitalah +kok +lagi +lagian +selagi +lah +lain +lainnya +melainkan +selaku +lalu +melalui +terlalu +lama +lamanya +selama +selama +selamanya +lebih +terlebih +bermacam +macam +semacam +maka +makanya +makin +malah +malahan +mampu +mampukah +mana +manakala +manalagi +masih +masihkah +semasih +masing +mau +maupun +semaunya +memang +mereka +merekalah +meski +meskipun +semula +mungkin +mungkinkah +nah +namun +nanti +nantinya +nyaris +oleh +olehnya +seorang +seseorang +pada +padanya +padahal +paling +sepanjang +pantas +sepantasnya +sepantasnyalah +para +pasti +pastilah +per +pernah +pula +pun +merupakan +rupanya +serupa +saat +saatnya +sesaat +saja +sajalah +saling +bersama +sama +sesama +sambil +sampai +sana +sangat +sangatlah +saya +sayalah +se +sebab +sebabnya +sebuah +tersebut +tersebutlah +sedang +sedangkan +sedikit +sedikitnya +segala +segalanya +segera +sesegera +sejak +sejenak +sekali +sekalian +sekalipun +sesekali +sekaligus +sekarang +sekarang +sekitar +sekitarnya +sela +selain +selalu +seluruh +seluruhnya +semakin +sementara +sempat +semua +semuanya +sendiri +sendirinya +seolah +seperti +sepertinya +sering +seringnya +serta +siapa +siapakah +siapapun +disini +disinilah +sini +sinilah +sesuatu +sesuatunya +suatu +sesudah +sesudahnya +sudah +sudahkah +sudahlah +supaya +tadi +tadinya +tak +tanpa +setelah +telah +tentang +tentu +tentulah +tentunya +tertentu +seterusnya +tapi +tetapi +setiap +tiap +setidaknya +tidak +tidakkah +tidaklah +toh +waduh +wah +wahai +sewaktu +walau +walaupun +wong +yaitu +yakni +yang diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_it.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_it.txt new file mode 100644 index 00000000000..4cb5b0891b1 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_it.txt @@ -0,0 +1,301 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | An Italian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +ad | a (to) before vowel +al | a + il +allo | a + lo +ai | a + i +agli | a + gli +all | a + l' +agl | a + gl' +alla | a + la +alle | a + le +con | with +col | con + il +coi | con + i (forms collo, cogli etc are now very rare) +da | from +dal | da + il +dallo | da + lo +dai | da + i +dagli | da + gli +dall | da + l' +dagl | da + gll' +dalla | da + la +dalle | da + le +di | of +del | di + il +dello | di + lo +dei | di + i +degli | di + gli +dell | di + l' +degl | di + gl' +della | di + la +delle | di + le +in | in +nel | in + el +nello | in + lo +nei | in + i +negli | in + gli +nell | in + l' +negl | in + gl' +nella | in + la +nelle | in + le +su | on +sul | su + il +sullo | su + lo +sui | su + i +sugli | su + gli +sull | su + l' +sugl | su + gl' +sulla | su + la +sulle | su + le +per | through, by +tra | among +contro | against +io | I +tu | thou +lui | he +lei | she +noi | we +voi | you +loro | they +mio | my +mia | +miei | +mie | +tuo | +tua | +tuoi | thy +tue | +suo | +sua | +suoi | his, her +sue | +nostro | our +nostra | +nostri | +nostre | +vostro | your +vostra | +vostri | +vostre | +mi | me +ti | thee +ci | us, there +vi | you, there +lo | him, the +la | her, the +li | them +le | them, the +gli | to him, the +ne | from there etc +il | the +un | a +uno | a +una | a +ma | but +ed | and +se | if +perché | why, because +anche | also +come | how +dov | where (as dov') +dove | where +che | who, that +chi | who +cui | whom +non | not +più | more +quale | who, that +quanto | how much +quanti | +quanta | +quante | +quello | that +quelli | +quella | +quelle | +questo | this +questi | +questa | +queste | +si | yes +tutto | all +tutti | all + + | single letter forms: + +a | at +c | as c' for ce or ci +e | and +i | the +l | as l' +o | or + + | forms of avere, to have (not including the infinitive): + +ho +hai +ha +abbiamo +avete +hanno +abbia +abbiate +abbiano +avrò +avrai +avrà +avremo +avrete +avranno +avrei +avresti +avrebbe +avremmo +avreste +avrebbero +avevo +avevi +aveva +avevamo +avevate +avevano +ebbi +avesti +ebbe +avemmo +aveste +ebbero +avessi +avesse +avessimo +avessero +avendo +avuto +avuta +avuti +avute + + | forms of essere, to be (not including the infinitive): +sono +sei +è +siamo +siete +sia +siate +siano +sarò +sarai +sarà +saremo +sarete +saranno +sarei +saresti +sarebbe +saremmo +sareste +sarebbero +ero +eri +era +eravamo +eravate +erano +fui +fosti +fu +fummo +foste +furono +fossi +fosse +fossimo +fossero +essendo + + | forms of fare, to do (not including the infinitive, fa, fat-): +faccio +fai +facciamo +fanno +faccia +facciate +facciano +farò +farai +farà +faremo +farete +faranno +farei +faresti +farebbe +faremmo +fareste +farebbero +facevo +facevi +faceva +facevamo +facevate +facevano +feci +facesti +fece +facemmo +faceste +fecero +facessi +facesse +facessimo +facessero +facendo + + | forms of stare, to be (not including the infinitive): +sto +stai +sta +stiamo +stanno +stia +stiate +stiano +starò +starai +starà +staremo +starete +staranno +starei +staresti +starebbe +staremmo +stareste +starebbero +stavo +stavi +stava +stavamo +stavate +stavano +stetti +stesti +stette +stemmo +steste +stettero +stessi +stesse +stessimo +stessero +stando diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ja.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ja.txt new file mode 100644 index 00000000000..d4321be6b16 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ja.txt @@ -0,0 +1,127 @@ +# +# This file defines a stopword set for Japanese. +# +# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. +# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 +# for frequency lists, etc. that can be useful for making your own set (if desired) +# +# Note that there is an overlap between these stopwords and the terms stopped when used +# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note +# that comments are not allowed on the same line as stopwords. +# +# Also note that stopping is done in a case-insensitive manner. Change your StopFilter +# configuration if you need case-sensitive stopping. Lastly, note that stopping is done +# using the same character width as the entries in this file. Since this StopFilter is +# normally done after a CJKWidthFilter in your chain, you would usually want your romaji +# entries to be in half-width and your kana entries to be in full-width. +# +ã® +ã« +㯠+ã‚’ +㟠+㌠+㧠+㦠+㨠+ã— +ã‚Œ +ã• +ã‚ã‚‹ +ã„ã‚‹ +ã‚‚ +ã™ã‚‹ +ã‹ã‚‰ +㪠+ã“㨠+ã¨ã—㦠+ã„ +ã‚„ +れる +ãªã© +ãªã£ +ãªã„ +ã“ã® +ãŸã‚ +ãã® +ã‚㣠+よㆠ+ã¾ãŸ +ã‚‚ã® +ã¨ã„ㆠ+ã‚ã‚Š +ã¾ã§ +られ +ãªã‚‹ +㸠+ã‹ +ã  +ã“ã‚Œ +ã«ã‚ˆã£ã¦ +ã«ã‚ˆã‚Š +ãŠã‚Š +より +ã«ã‚ˆã‚‹ +ãš +ãªã‚Š +られる +ã«ãŠã„㦠+ã° +ãªã‹ã£ +ãªã +ã—ã‹ã— +ã«ã¤ã„㦠+ã› +ã ã£ +ãã®å¾Œ +ã§ãã‚‹ +ãã‚Œ +ㆠ+ã®ã§ +ãªãŠ +ã®ã¿ +ã§ã +ã +㤠+ã«ãŠã‘ã‚‹ +ãŠã‚ˆã³ +ã„ㆠ+ã•ã‚‰ã« +ã§ã‚‚ +ら +ãŸã‚Š +ãã®ä»– +ã«é–¢ã™ã‚‹ +ãŸã¡ +ã¾ã™ +ã‚“ +ãªã‚‰ +ã«å¯¾ã—㦠+特㫠+ã›ã‚‹ +åŠã³ +ã“れら +ã¨ã +ã§ã¯ +ã«ã¦ +ã»ã‹ +ãªãŒã‚‰ +ã†ã¡ +ãã—㦠+ã¨ã¨ã‚‚ã« +ãŸã ã— +ã‹ã¤ã¦ +ãã‚Œãžã‚Œ +ã¾ãŸã¯ +㊠+ã»ã© +ã‚‚ã®ã® +ã«å¯¾ã™ã‚‹ +ã»ã¨ã‚“ã© +ã¨å…±ã« +ã¨ã„ã£ãŸ +ã§ã™ +ã¨ã‚‚ +ã¨ã“ã‚ +ã“ã“ +##### End of file diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_lv.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_lv.txt new file mode 100644 index 00000000000..e21a23c06c3 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_lv.txt @@ -0,0 +1,172 @@ +# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins +# the original list of over 800 forms was refined: +# pronouns, adverbs, interjections were removed +# +# prepositions +aiz +ap +ar +apakÅ¡ +Ärpus +augÅ¡pus +bez +caur +dēļ +gar +iekÅ¡ +iz +kopÅ¡ +labad +lejpus +lÄ«dz +no +otrpus +pa +par +pÄr +pÄ“c +pie +pirms +pret +priekÅ¡ +starp +Å¡aipus +uz +viņpus +virs +virspus +zem +apakÅ¡pus +# Conjunctions +un +bet +jo +ja +ka +lai +tomÄ“r +tikko +turpretÄ« +arÄ« +kaut +gan +tÄdēļ +tÄ +ne +tikvien +vien +kÄ +ir +te +vai +kamÄ“r +# Particles +ar +diezin +droÅ¡i +diemžēl +nebÅ«t +ik +it +taÄu +nu +pat +tiklab +iekÅ¡pus +nedz +tik +nevis +turpretim +jeb +iekam +iekÄm +iekÄms +kolÄ«dz +lÄ«dzko +tiklÄ«dz +jebÅ¡u +tÄlab +tÄpÄ“c +nekÄ +itin +jÄ +jau +jel +nÄ“ +nezin +tad +tikai +vis +tak +iekams +vien +# modal verbs +bÅ«t +biju +biji +bija +bijÄm +bijÄt +esmu +esi +esam +esat +bÅ«Å¡u +bÅ«si +bÅ«s +bÅ«sim +bÅ«siet +tikt +tiku +tiki +tika +tikÄm +tikÄt +tieku +tiec +tiek +tiekam +tiekat +tikÅ¡u +tiks +tiksim +tiksiet +tapt +tapi +tapÄt +topat +tapÅ¡u +tapsi +taps +tapsim +tapsiet +kļūt +kļuvu +kļuvi +kļuva +kļuvÄm +kļuvÄt +kļūstu +kļūsti +kļūst +kļūstam +kļūstat +kļūšu +kļūsi +kļūs +kļūsim +kļūsiet +# verbs +varÄ“t +varÄ“ju +varÄ“jÄm +varÄ“Å¡u +varÄ“sim +var +varÄ“ji +varÄ“jÄt +varÄ“si +varÄ“siet +varat +varÄ“ja +varÄ“s diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_nl.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_nl.txt new file mode 100644 index 00000000000..f4d61f5092c --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_nl.txt @@ -0,0 +1,117 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Dutch stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large sample of Dutch text. + + | Dutch stop words frequently exhibit homonym clashes. These are indicated + | clearly below. + +de | the +en | and +van | of, from +ik | I, the ego +te | (1) chez, at etc, (2) to, (3) too +dat | that, which +die | that, those, who, which +in | in, inside +een | a, an, one +hij | he +het | the, it +niet | not, nothing, naught +zijn | (1) to be, being, (2) his, one's, its +is | is +was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river +op | on, upon, at, in, up, used up +aan | on, upon, to (as dative) +met | with, by +als | like, such as, when +voor | (1) before, in front of, (2) furrow +had | had, past tense all persons sing. of 'hebben' (have) +er | there +maar | but, only +om | round, about, for etc +hem | him +dan | then +zou | should/would, past tense all persons sing. of 'zullen' +of | or, whether, if +wat | what, something, anything +mijn | possessive and noun 'mine' +men | people, 'one' +dit | this +zo | so, thus, in this way +door | through by +over | over, across +ze | she, her, they, them +zich | oneself +bij | (1) a bee, (2) by, near, at +ook | also, too +tot | till, until +je | you +mij | me +uit | out of, from +der | Old Dutch form of 'van der' still found in surnames +daar | (1) there, (2) because +haar | (1) her, their, them, (2) hair +naar | (1) unpleasant, unwell etc, (2) towards, (3) as +heb | present first person sing. of 'to have' +hoe | how, why +heeft | present third person sing. of 'to have' +hebben | 'to have' and various parts thereof +deze | this +u | you +want | (1) for, (2) mitten, (3) rigging +nog | yet, still +zal | 'shall', first and third person sing. of verb 'zullen' (will) +me | me +zij | she, they +nu | now +ge | 'thou', still used in Belgium and south Netherlands +geen | none +omdat | because +iets | something, somewhat +worden | to become, grow, get +toch | yet, still +al | all, every, each +waren | (1) 'were' (2) to wander, (3) wares, (3) +veel | much, many +meer | (1) more, (2) lake +doen | to do, to make +toen | then, when +moet | noun 'spot/mote' and present form of 'to must' +ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' +zonder | without +kan | noun 'can' and present form of 'to be able' +hun | their, them +dus | so, consequently +alles | all, everything, anything +onder | under, beneath +ja | yes, of course +eens | once, one day +hier | here +wie | who +werd | imperfect third person sing. of 'become' +altijd | always +doch | yet, but etc +wordt | present third person sing. of 'become' +wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans +kunnen | to be able +ons | us/our +zelf | self +tegen | against, towards, at +na | after, near +reeds | already +wil | (1) present tense of 'want', (2) 'will', noun, (3) fender +kon | could; past tense of 'to be able' +niets | nothing +uw | your +iemand | somebody +geweest | been; past participle of 'be' +andere | other diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_no.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_no.txt new file mode 100644 index 00000000000..e76f36e69ed --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_no.txt @@ -0,0 +1,192 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Norwegian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This stop word list is for the dominant bokmÃ¥l dialect. Words unique + | to nynorsk are marked *. + + | Revised by Jan Bruusgaard , Jan 2005 + +og | and +i | in +jeg | I +det | it/this/that +at | to (w. inf.) +en | a/an +et | a/an +den | it/this/that +til | to +er | is/am/are +som | who/that +pÃ¥ | on +de | they / you(formal) +med | with +han | he +av | of +ikke | not +ikkje | not * +der | there +sÃ¥ | so +var | was/were +meg | me +seg | you +men | but +ett | one +har | have +om | about +vi | we +min | my +mitt | my +ha | have +hadde | had +hun | she +nÃ¥ | now +over | over +da | when/as +ved | by/know +fra | from +du | you +ut | out +sin | your +dem | them +oss | us +opp | up +man | you/one +kan | can +hans | his +hvor | where +eller | or +hva | what +skal | shall/must +selv | self (reflective) +sjøl | self (reflective) +her | here +alle | all +vil | will +bli | become +ble | became +blei | became * +blitt | have become +kunne | could +inn | in +nÃ¥r | when +være | be +kom | come +noen | some +noe | some +ville | would +dere | you +som | who/which/that +deres | their/theirs +kun | only/just +ja | yes +etter | after +ned | down +skulle | should +denne | this +for | for/because +deg | you +si | hers/his +sine | hers/his +sitt | hers/his +mot | against +Ã¥ | to +meget | much +hvorfor | why +dette | this +disse | these/those +uten | without +hvordan | how +ingen | none +din | your +ditt | your +blir | become +samme | same +hvilken | which +hvilke | which (plural) +sÃ¥nn | such a +inni | inside/within +mellom | between +vÃ¥r | our +hver | each +hvem | who +vors | us/ours +hvis | whose +bÃ¥de | both +bare | only/just +enn | than +fordi | as/because +før | before +mange | many +ogsÃ¥ | also +slik | just +vært | been +være | to be +bÃ¥e | both * +begge | both +siden | since +dykk | your * +dykkar | yours * +dei | they * +deira | them * +deires | theirs * +deim | them * +di | your (fem.) * +dÃ¥ | as/when * +eg | I * +ein | a/an * +eit | a/an * +eitt | a/an * +elles | or * +honom | he * +hjÃ¥ | at * +ho | she * +hoe | she * +henne | her +hennar | her/hers +hennes | hers +hoss | how * +hossen | how * +ikkje | not * +ingi | noone * +inkje | noone * +korleis | how * +korso | how * +kva | what/which * +kvar | where * +kvarhelst | where * +kven | who/whom * +kvi | why * +kvifor | why * +me | we * +medan | while * +mi | my * +mine | my * +mykje | much * +no | now * +nokon | some (masc./neut.) * +noka | some (fem.) * +nokor | some * +noko | some * +nokre | some * +si | his/hers * +sia | since * +sidan | since * +so | so * +somt | some * +somme | some * +um | about* +upp | up * +vere | be * +vore | was * +verte | become * +vort | become * +varte | became * +vart | became * + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_pt.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_pt.txt new file mode 100644 index 00000000000..276c1b446f2 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_pt.txt @@ -0,0 +1,251 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Portuguese stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | of, from +a | the; to, at; her +o | the; him +que | who, that +e | and +do | de + o +da | de + a +em | in +um | a +para | for + | é from SER +com | with +não | not, no +uma | a +os | the; them +no | em + o +se | himself etc +na | em + a +por | for +mais | more +as | the; them +dos | de + os +como | as, like +mas | but + | foi from SER +ao | a + o +ele | he +das | de + as + | tem from TER +à | a + a +seu | his +sua | her +ou | or + | ser from SER +quando | when +muito | much + | há from HAV +nos | em + os; us +já | already, now + | está from EST +eu | I +também | also +só | only, just +pelo | per + o +pela | per + a +até | up to +isso | that +ela | he +entre | between + | era from SER +depois | after +sem | without +mesmo | same +aos | a + os + | ter from TER +seus | his +quem | whom +nas | em + as +me | me +esse | that +eles | they + | estão from EST +você | you + | tinha from TER + | foram from SER +essa | that +num | em + um +nem | nor +suas | her +meu | my +às | a + as +minha | my + | têm from TER +numa | em + uma +pelos | per + os +elas | they + | havia from HAV + | seja from SER +qual | which + | será from SER +nós | we + | tenho from TER +lhe | to him, her +deles | of them +essas | those +esses | those +pelas | per + as +este | this + | fosse from SER +dele | of him + + | other words. There are many contractions such as naquele = em+aquele, + | mo = me+o, but they are rare. + | Indefinite article plural forms are also rare. + +tu | thou +te | thee +vocês | you (plural) +vos | you +lhes | to them +meus | my +minhas +teu | thy +tua +teus +tuas +nosso | our +nossa +nossos +nossas + +dela | of her +delas | of them + +esta | this +estes | these +estas | these +aquele | that +aquela | that +aqueles | those +aquelas | those +isto | this +aquilo | that + + | forms of estar, to be (not including the infinitive): +estou +está +estamos +estão +estive +esteve +estivemos +estiveram +estava +estávamos +estavam +estivera +estivéramos +esteja +estejamos +estejam +estivesse +estivéssemos +estivessem +estiver +estivermos +estiverem + + | forms of haver, to have (not including the infinitive): +hei +há +havemos +hão +houve +houvemos +houveram +houvera +houvéramos +haja +hajamos +hajam +houvesse +houvéssemos +houvessem +houver +houvermos +houverem +houverei +houverá +houveremos +houverão +houveria +houveríamos +houveriam + + | forms of ser, to be (not including the infinitive): +sou +somos +são +era +éramos +eram +fui +foi +fomos +foram +fora +fôramos +seja +sejamos +sejam +fosse +fôssemos +fossem +for +formos +forem +serei +será +seremos +serão +seria +seríamos +seriam + + | forms of ter, to have (not including the infinitive): +tenho +tem +temos +tém +tinha +tínhamos +tinham +tive +teve +tivemos +tiveram +tivera +tivéramos +tenha +tenhamos +tenham +tivesse +tivéssemos +tivessem +tiver +tivermos +tiverem +terei +terá +teremos +terão +teria +teríamos +teriam diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ro.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ro.txt new file mode 100644 index 00000000000..4fdee90a5ba --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ro.txt @@ -0,0 +1,233 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +acea +aceasta +această +aceea +acei +aceia +acel +acela +acele +acelea +acest +acesta +aceste +acestea +aceÅŸti +aceÅŸtia +acolo +acum +ai +aia +aibă +aici +al +ăla +ale +alea +ălea +altceva +altcineva +am +ar +are +aÅŸ +aÅŸadar +asemenea +asta +ăsta +astăzi +astea +ăstea +ăştia +asupra +aÅ£i +au +avea +avem +aveÅ£i +azi +bine +bucur +bună +ca +că +căci +când +care +cărei +căror +cărui +cât +câte +câţi +către +câtva +ce +cel +ceva +chiar +cînd +cine +cineva +cît +cîte +cîţi +cîtva +contra +cu +cum +cumva +curând +curînd +da +dă +dacă +dar +datorită +de +deci +deja +deoarece +departe +deÅŸi +din +dinaintea +dintr +dintre +drept +după +ea +ei +el +ele +eram +este +eÅŸti +eu +face +fără +fi +fie +fiecare +fii +fim +fiÅ£i +iar +ieri +îi +îl +îmi +împotriva +în +înainte +înaintea +încât +încît +încotro +între +întrucât +întrucît +îţi +la +lângă +le +li +lîngă +lor +lui +mă +mâine +mea +mei +mele +mereu +meu +mi +mine +mult +multă +mulÅ£i +ne +nicăieri +nici +nimeni +niÅŸte +noastră +noastre +noi +noÅŸtri +nostru +nu +ori +oricând +oricare +oricât +orice +oricînd +oricine +oricît +oricum +oriunde +până +pe +pentru +peste +pînă +poate +pot +prea +prima +primul +prin +printr +sa +să +săi +sale +sau +său +se +ÅŸi +sînt +sîntem +sînteÅ£i +spre +sub +sunt +suntem +sunteÅ£i +ta +tăi +tale +tău +te +Å£i +Å£ie +tine +toată +toate +tot +toÅ£i +totuÅŸi +tu +un +una +unde +undeva +unei +unele +uneori +unor +vă +vi +voastră +voastre +voi +voÅŸtri +vostru +vouă +vreo +vreun diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ru.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ru.txt new file mode 100644 index 00000000000..64307693457 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_ru.txt @@ -0,0 +1,241 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | a russian stop word list. comments begin with vertical bar. each stop + | word is at the start of a line. + + | this is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | letter `Ñ‘' is translated to `е'. + +и | and +в | in/into +во | alternative form +не | not +что | what/that +он | he +на | on/onto +Ñ | i +Ñ | from +Ñо | alternative form +как | how +а | milder form of `no' (but) +то | conjunction and form of `that' +вÑе | all +она | she +так | so, thus +его | him +но | but +да | yes/and +Ñ‚Ñ‹ | thou +к | towards, by +у | around, chez +же | intensifier particle +вы | you +за | beyond, behind +бы | conditional/subj. particle +по | up to, along +только | only +ее | her +мне | to me +было | it was +вот | here is/are, particle +от | away from +Ð¼ÐµÐ½Ñ | me +еще | still, yet, more +нет | no, there isnt/arent +о | about +из | out of +ему | to him +теперь | now +когда | when +даже | even +ну | so, well +вдруг | suddenly +ли | interrogative particle +еÑли | if +уже | already, but homonym of `narrower' +или | or +ни | neither +быть | to be +был | he was +него | prepositional form of его +до | up to +Ð²Ð°Ñ | you accusative +нибудь | indef. suffix preceded by hyphen +опÑÑ‚ÑŒ | again +уж | already, but homonym of `adder' +вам | to you +Ñказал | he said +ведь | particle `after all' +там | there +потом | then +ÑÐµÐ±Ñ | oneself +ничего | nothing +ей | to her +может | usually with `быть' as `maybe' +они | they +тут | here +где | where +еÑÑ‚ÑŒ | there is/are +надо | got to, must +ней | prepositional form of ей +Ð´Ð»Ñ | for +мы | we +Ñ‚ÐµÐ±Ñ | thee +их | them, their +чем | than +была | she was +Ñам | self +чтоб | in order to +без | without +будто | as if +человек | man, person, one +чего | genitive form of `what' +раз | once +тоже | also +Ñебе | to oneself +под | beneath +жизнь | life +будет | will be +ж | short form of intensifer particle `же' +тогда | then +кто | who +Ñтот | this +говорил | was saying +того | genitive form of `that' +потому | for that reason +Ñтого | genitive form of `this' +какой | which +ÑовÑем | altogether +ним | prepositional form of `его', `они' +здеÑÑŒ | here +Ñтом | prepositional form of `Ñтот' +один | one +почти | almost +мой | my +тем | instrumental/dative plural of `тот', `то' +чтобы | full form of `in order that' +нее | her (acc.) +кажетÑÑ | it seems +ÑÐµÐ¹Ñ‡Ð°Ñ | now +были | they were +куда | where to +зачем | why +Ñказать | to say +вÑех | all (acc., gen. preposn. plural) +никогда | never +ÑÐµÐ³Ð¾Ð´Ð½Ñ | today +можно | possible, one can +при | by +наконец | finally +два | two +об | alternative form of `о', about +другой | another +хоть | even +поÑле | after +над | above +больше | more +тот | that one (masc.) +через | across, in +Ñти | these +Ð½Ð°Ñ | us +про | about +вÑего | in all, only, of all +них | prepositional form of `они' (they) +ÐºÐ°ÐºÐ°Ñ | which, feminine +много | lots +разве | interrogative particle +Ñказала | she said +три | three +Ñту | this, acc. fem. sing. +Ð¼Ð¾Ñ | my, feminine +впрочем | moreover, besides +хорошо | good +Ñвою | ones own, acc. fem. sing. +Ñтой | oblique form of `Ñта', fem. `this' +перед | in front of +иногда | sometimes +лучше | better +чуть | a little +том | preposn. form of `that one' +Ð½ÐµÐ»ÑŒÐ·Ñ | one must not +такой | such a one +им | to them +более | more +вÑегда | always +конечно | of course +вÑÑŽ | acc. fem. sing of `all' +между | between + + + | b: some paradigms + | + | personal pronouns + | + | Ñ Ð¼ÐµÐ½Ñ Ð¼Ð½Ðµ мной [мною] + | Ñ‚Ñ‹ Ñ‚ÐµÐ±Ñ Ñ‚ÐµÐ±Ðµ тобой [тобою] + | он его ему им [него, нему, ним] + | она ее Ñи ею [нее, нÑи, нею] + | оно его ему им [него, нему, ним] + | + | мы Ð½Ð°Ñ Ð½Ð°Ð¼ нами + | вы Ð²Ð°Ñ Ð²Ð°Ð¼ вами + | они их им ими [них, ним, ними] + | + | ÑÐµÐ±Ñ Ñебе Ñобой [Ñобою] + | + | demonstrative pronouns: Ñтот (this), тот (that) + | + | Ñтот Ñта Ñто Ñти + | Ñтого ÑÑ‚Ñ‹ Ñто Ñти + | Ñтого Ñтой Ñтого Ñтих + | Ñтому Ñтой Ñтому Ñтим + | Ñтим Ñтой Ñтим [Ñтою] Ñтими + | Ñтом Ñтой Ñтом Ñтих + | + | тот та то те + | того ту то те + | того той того тех + | тому той тому тем + | тем той тем [тою] теми + | том той том тех + | + | determinative pronouns + | + | (a) веÑÑŒ (all) + | + | веÑÑŒ вÑÑ Ð²Ñе вÑе + | вÑего вÑÑŽ вÑе вÑе + | вÑего вÑей вÑего вÑех + | вÑему вÑей вÑему вÑем + | вÑем вÑей вÑем [вÑею] вÑеми + | вÑем вÑей вÑем вÑех + | + | (b) Ñам (himself etc) + | + | Ñам Ñама Ñамо Ñами + | Ñамого Ñаму Ñамо Ñамих + | Ñамого Ñамой Ñамого Ñамих + | Ñамому Ñамой Ñамому Ñамим + | Ñамим Ñамой Ñамим [Ñамою] Ñамими + | Ñамом Ñамой Ñамом Ñамих + | + | stems of verbs `to be', `to have', `to do' and modal + | + | быть бы буд быв еÑÑ‚ÑŒ Ñуть + | име + | дел + | мог мож мочь + | уме + | хоч хот + | долж + | можн + | нужн + | Ð½ÐµÐ»ÑŒÐ·Ñ + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_sv.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_sv.txt new file mode 100644 index 00000000000..22bddfd8cb3 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_sv.txt @@ -0,0 +1,131 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Swedish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | Swedish stop words occasionally exhibit homonym clashes. For example + | sÃ¥ = so, but also seed. These are indicated clearly below. + +och | and +det | it, this/that +att | to (with infinitive) +i | in, at +en | a +jag | I +hon | she +som | who, that +han | he +pÃ¥ | on +den | it, this/that +med | with +var | where, each +sig | him(self) etc +för | for +sÃ¥ | so (also: seed) +till | to +är | is +men | but +ett | a +om | if; around, about +hade | had +de | they, these/those +av | of +icke | not, no +mig | me +du | you +henne | her +dÃ¥ | then, when +sin | his +nu | now +har | have +inte | inte nÃ¥gon = no one +hans | his +honom | him +skulle | 'sake' +hennes | her +där | there +min | my +man | one (pronoun) +ej | nor +vid | at, by, on (also: vast) +kunde | could +nÃ¥got | some etc +frÃ¥n | from, off +ut | out +när | when +efter | after, behind +upp | up +vi | we +dem | them +vara | be +vad | what +över | over +än | than +dig | you +kan | can +sina | his +här | here +ha | have +mot | towards +alla | all +under | under (also: wonder) +nÃ¥gon | some etc +eller | or (else) +allt | all +mycket | much +sedan | since +ju | why +denna | this/that +själv | myself, yourself etc +detta | this/that +Ã¥t | to +utan | without +varit | was +hur | how +ingen | no +mitt | my +ni | you +bli | to be, become +blev | from bli +oss | us +din | thy +dessa | these/those +nÃ¥gra | some etc +deras | their +blir | from bli +mina | my +samma | (the) same +vilken | who, that +er | you, your +sÃ¥dan | such a +vÃ¥r | our +blivit | from bli +dess | its +inom | within +mellan | between +sÃ¥dant | such a +varför | why +varje | each +vilka | who, that +ditt | thy +vem | who +vilket | who, that +sitta | his +sÃ¥dana | such a +vart | each +dina | thy +vars | whose +vÃ¥rt | our +vÃ¥ra | our +ert | your +era | your +vilkas | whose + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_th.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_th.txt new file mode 100644 index 00000000000..07f0fabe692 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_th.txt @@ -0,0 +1,119 @@ +# Thai stopwords from: +# "Opinion Detection in Thai Political News Columns +# Based on Subjectivity Analysis" +# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak +ไว้ +ไม่ +ไป +ได้ +ให้ +ใน +โดย +à¹à¸«à¹ˆà¸‡ +à¹à¸¥à¹‰à¸§ +à¹à¸¥à¸° +à¹à¸£à¸ +à¹à¸šà¸š +à¹à¸•à¹ˆ +เอง +เห็น +เลย +เริ่ม +เรา +เมื่อ +เพื่อ +เพราะ +เป็นà¸à¸²à¸£ +เป็น +เปิดเผย +เปิด +เนื่องจาภ+เดียวà¸à¸±à¸™ +เดียว +เช่น +เฉพาะ +เคย +เข้า +เขา +อีภ+อาจ +อะไร +ออภ+อย่าง +อยู่ +อยาภ+หาภ+หลาย +หลังจาภ+หลัง +หรือ +หนึ่ง +ส่วน +ส่ง +สุด +สà¹à¸²à¸«à¸£à¸±à¸š +ว่า +วัน +ลง +ร่วม +ราย +รับ +ระหว่าง +รวม +ยัง +มี +มาภ+มา +พร้อม +พบ +ผ่าน +ผล +บาง +น่า +นี้ +นà¹à¸² +นั้น +นัภ+นอà¸à¸ˆà¸²à¸ +ทุภ+ที่สุด +ที่ +ทà¹à¸²à¹ƒà¸«à¹‰ +ทà¹à¸² +ทาง +ทั้งนี้ +ทั้ง +ถ้า +ถูภ+ถึง +ต้อง +ต่างๆ +ต่าง +ต่อ +ตาม +ตั้งà¹à¸•à¹ˆ +ตั้ง +ด้าน +ด้วย +ดัง +ซึ่ง +ช่วง +จึง +จาภ+จัด +จะ +คือ +ความ +ครั้ง +คง +ขึ้น +ของ +ขอ +ขณะ +à¸à¹ˆà¸­à¸™ +à¸à¹‡ +à¸à¸²à¸£ +à¸à¸±à¸š +à¸à¸±à¸™ +à¸à¸§à¹ˆà¸² +à¸à¸¥à¹ˆà¸²à¸§ diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_tr.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_tr.txt new file mode 100644 index 00000000000..84d9408d4ea --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/stopwords_tr.txt @@ -0,0 +1,212 @@ +# Turkish stopwords from LUCENE-559 +# merged with the list from "Information Retrieval on Turkish Texts" +# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) +acaba +altmış +altı +ama +ancak +arada +aslında +ayrıca +bana +bazı +belki +ben +benden +beni +benim +beri +beÅŸ +bile +bin +bir +birçok +biri +birkaç +birkez +birÅŸey +birÅŸeyi +biz +bize +bizden +bizi +bizim +böyle +böylece +bu +buna +bunda +bundan +bunlar +bunları +bunların +bunu +bunun +burada +çok +çünkü +da +daha +dahi +de +defa +deÄŸil +diÄŸer +diye +doksan +dokuz +dolayı +dolayısıyla +dört +edecek +eden +ederek +edilecek +ediliyor +edilmesi +ediyor +eÄŸer +elli +en +etmesi +etti +ettiÄŸi +ettiÄŸini +gibi +göre +halen +hangi +hatta +hem +henüz +hep +hepsi +her +herhangi +herkesin +hiç +hiçbir +için +iki +ile +ilgili +ise +iÅŸte +itibaren +itibariyle +kadar +karşın +katrilyon +kendi +kendilerine +kendini +kendisi +kendisine +kendisini +kez +ki +kim +kimden +kime +kimi +kimse +kırk +milyar +milyon +mu +mü +mı +nasıl +ne +neden +nedenle +nerde +nerede +nereye +niye +niçin +o +olan +olarak +oldu +olduÄŸu +olduÄŸunu +olduklarını +olmadı +olmadığı +olmak +olması +olmayan +olmaz +olsa +olsun +olup +olur +olursa +oluyor +on +ona +ondan +onlar +onlardan +onları +onların +onu +onun +otuz +oysa +öyle +pek +raÄŸmen +sadece +sanki +sekiz +seksen +sen +senden +seni +senin +siz +sizden +sizi +sizin +ÅŸey +ÅŸeyden +ÅŸeyi +ÅŸeyler +şöyle +ÅŸu +ÅŸuna +ÅŸunda +ÅŸundan +ÅŸunları +ÅŸunu +tarafından +trilyon +tüm +üç +üzere +var +vardı +ve +veya +ya +yani +yapacak +yapılan +yapılması +yapıyor +yapmak +yaptı +yaptığı +yaptığını +yaptıkları +yedi +yerine +yetmiÅŸ +yine +yirmi +yoksa +yüz +zaten diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/userdict_ja.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/userdict_ja.txt new file mode 100644 index 00000000000..6f0368e4d81 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/lang/userdict_ja.txt @@ -0,0 +1,29 @@ +# +# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) +# +# Add entries to this file in order to override the statistical model in terms +# of segmentation, readings and part-of-speech tags. Notice that entries do +# not have weights since they are always used when found. This is by-design +# in order to maximize ease-of-use. +# +# Entries are defined using the following CSV format: +# , ... , ... , +# +# Notice that a single half-width space separates tokens and readings, and +# that the number tokens and readings must match exactly. +# +# Also notice that multiple entries with the same is undefined. +# +# Whitespace only lines are ignored. Comments are not allowed on entry lines. +# + +# Custom segmentation for kanji compounds +日本経済新èž,日本 経済 æ–°èž,ニホン ケイザイ シンブン,カスタムåè©ž +関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタムåè©ž + +# Custom segmentation for compound katakana +トートãƒãƒƒã‚°,トート ãƒãƒƒã‚°,トート ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž +ショルダーãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž + +# Custom reading for former sumo wrestler +æœé’é¾,æœé’é¾,アサショウリュウ,カスタム人å diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/protwords.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/protwords.txt new file mode 100644 index 00000000000..1dfc0abecbf --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/schema.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/schema.xml new file mode 100644 index 00000000000..65192efe442 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/schema.xml @@ -0,0 +1,961 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + iddiff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/solrconfig.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/solrconfig.xml new file mode 100644 index 00000000000..beff1b2af0a --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/solrconfig.xml @@ -0,0 +1,1784 @@ + + + + + + + + + LUCENE_43 + + + + + + + + + + + + + + + + + + + + + + + + ${solr.data.dir:} + + + + + ${solr.hdfs.home:} + ${solr.hdfs.confdir:} + ${solr.hdfs.blockcache.enabled:true} + ${solr.hdfs.blockcache.slab.count:1} + ${solr.hdfs.blockcache.direct.memory.allocation:true} + ${solr.hdfs.blockcache.blocksperbank:16384} + ${solr.hdfs.blockcache.read.enabled:true} + ${solr.hdfs.blockcache.write.enabled:true} + ${solr.hdfs.nrtcachingdirectory.enable:true} + ${solr.hdfs.nrtcachingdirectory.maxmergesizemb:16} + ${solr.hdfs.nrtcachingdirectory.maxcachedmb:192} + + + + + + + + + + + + + ${solr.maxIndexingThreads:8} + + + + + + 128 + + + + + + + + + + + + + ${solr.lock.type:hdfs} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.ulog.dir:} + + + + + ${solr.autoCommit.maxTime:60000} + false + + + + + ${solr.autoSoftCommit.maxTime:1000} + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + static firstSearcher warming in solrconfig.xml + + + + + + false + + + 4 + + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + text + + + + + + + + + + + + + + explicit + json + true + text + + + + + + + + true + json + true + + + + + + + + explicit + + + velocity + browse + layout + Solritas + + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text + 100% + *:* + 10 + *,score + + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename + 3 + + + on + cat + manu_exact + content_type + author_s + ipod + GB + 1 + cat,inStock + after + price + 0 + 600 + 50 + popularity + 0 + 10 + 3 + manufacturedate_dt + NOW/YEAR-10YEARS + NOW + +1YEAR + before + after + + + on + content features title name + html + <b> + </b> + 0 + title + 0 + name + 3 + 200 + content + 750 + + + on + false + 5 + 2 + 5 + true + true + 5 + 3 + + + + + spellcheck + + + + + + + + + + + + + + application/json + + + + + application/csv + + + + + + + + + + + + + + + + + + + + + solrpingquery + + + all + + + + + + + + + explicit + true + + + + + + + + + + + + + + + + text_general + + + + + + default + text + solr.DirectSolrSpellChecker + + internal + + 0.5 + + 2 + + 1 + + 5 + + 4 + + 0.01 + + + + + + wordbreak + solr.WordBreakSolrSpellChecker + name + true + true + 10 + + + + + + + + + + + + + + + + text + + default + wordbreak + on + true + 10 + 5 + 5 + true + true + 10 + 5 + + + spellcheck + + + + + + + + + + text + true + + + tvComponent + + + + + + + + + default + + + org.carrot2.clustering.lingo.LingoClusteringAlgorithm + + + 20 + + + clustering/carrot2 + + + ENGLISH + + + stc + org.carrot2.clustering.stc.STCClusteringAlgorithm + + + + + + + true + default + true + + name + id + + features + + true + + + + false + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + *:* + 10 + *,score + + + clustering + + + + + + + + + + true + false + + + terms + + + + + + + + string + elevate.xml + + + + + + explicit + text + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + + + + + + + + + + + text/plain; charset=UTF-8 + + + + + + + + + 5 + + + + + + + + + + + + + + + + + + *:* + + + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/stopwords.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/stopwords.txt new file mode 100644 index 00000000000..ae1e83eeb3d --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/stopwords.txt @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/synonyms.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/synonyms.txt new file mode 100644 index 00000000000..7f72128303b --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/conf/synonyms.txt @@ -0,0 +1,29 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaafoo => aaabar +bbbfoo => bbbfoo bbbbar +cccfoo => cccbar cccbaz +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/solr.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/solr.xml new file mode 100644 index 00000000000..6c8b43f75ed --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/minimr/solr.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + ${socketTimeout:120000} + ${connTimeout:15000} + + + + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/currency.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/currency.xml new file mode 100644 index 00000000000..3a9c58afee8 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/currency.xml @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/elevate.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/elevate.xml new file mode 100644 index 00000000000..25d5cebe4fb --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/elevate.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_ca.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_ca.txt new file mode 100644 index 00000000000..307a85f913d --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_ca.txt @@ -0,0 +1,8 @@ +# Set of Catalan contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +l +m +n +s +t diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_fr.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_fr.txt new file mode 100644 index 00000000000..722db588333 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_fr.txt @@ -0,0 +1,9 @@ +# Set of French contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +l +m +t +qu +n +s +j diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_ga.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_ga.txt new file mode 100644 index 00000000000..9ebe7fa349a --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +m +b diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_it.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_it.txt new file mode 100644 index 00000000000..cac04095372 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/contractions_it.txt @@ -0,0 +1,23 @@ +# Set of Italian contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +c +l +all +dall +dell +nell +sull +coll +pell +gl +agl +dagl +degl +negl +sugl +un +m +t +s +v +d diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/hyphenations_ga.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/hyphenations_ga.txt new file mode 100644 index 00000000000..4d2642cc5a3 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/hyphenations_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish hyphenations for StopFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +h +n +t diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stemdict_nl.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stemdict_nl.txt new file mode 100644 index 00000000000..441072971d3 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stemdict_nl.txt @@ -0,0 +1,6 @@ +# Set of overrides for the dutch stemmer +# TODO: load this as a resource from the analyzer and sync it in build.xml +fiets fiets +bromfiets bromfiets +ei eier +kind kinder diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stoptags_ja.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stoptags_ja.txt new file mode 100644 index 00000000000..71b750845e3 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stoptags_ja.txt @@ -0,0 +1,420 @@ +# +# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. +# +# Any token with a part-of-speech tag that exactly matches those defined in this +# file are removed from the token stream. +# +# Set your own stoptags by uncommenting the lines below. Note that comments are +# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, +# etc. that can be useful for building you own stoptag set. +# +# The entire possible tagset is provided below for convenience. +# +##### +# noun: unclassified nouns +#åè©ž +# +# noun-common: Common nouns or nouns where the sub-classification is undefined +#åè©ž-一般 +# +# noun-proper: Proper nouns where the sub-classification is undefined +#åè©ž-固有åè©ž +# +# noun-proper-misc: miscellaneous proper nouns +#åè©ž-固有åè©ž-一般 +# +# noun-proper-person: Personal names where the sub-classification is undefined +#åè©ž-固有åè©ž-人å +# +# noun-proper-person-misc: names that cannot be divided into surname and +# given name; foreign names; names where the surname or given name is unknown. +# e.g. ãŠå¸‚ã®æ–¹ +#åè©ž-固有åè©ž-人å-一般 +# +# noun-proper-person-surname: Mainly Japanese surnames. +# e.g. 山田 +#åè©ž-固有åè©ž-人å-姓 +# +# noun-proper-person-given_name: Mainly Japanese given names. +# e.g. 太郎 +#åè©ž-固有åè©ž-人å-å +# +# noun-proper-organization: Names representing organizations. +# e.g. 通産çœ, NHK +#åè©ž-固有åè©ž-組織 +# +# noun-proper-place: Place names where the sub-classification is undefined +#åè©ž-固有åè©ž-地域 +# +# noun-proper-place-misc: Place names excluding countries. +# e.g. アジア, ãƒãƒ«ã‚»ãƒ­ãƒŠ, 京都 +#åè©ž-固有åè©ž-地域-一般 +# +# noun-proper-place-country: Country names. +# e.g. 日本, オーストラリア +#åè©ž-固有åè©ž-地域-国 +# +# noun-pronoun: Pronouns where the sub-classification is undefined +#åè©ž-代åè©ž +# +# noun-pronoun-misc: miscellaneous pronouns: +# e.g. ãã‚Œ, ã“ã“, ã‚ã„ã¤, ã‚ãªãŸ, ã‚ã¡ã“ã¡, ã„ãã¤, ã©ã“ã‹, ãªã«, ã¿ãªã•ã‚“, ã¿ã‚“ãª, ã‚ãŸãã—, ã‚ã‚Œã‚ã‚Œ +#åè©ž-代åè©ž-一般 +# +# noun-pronoun-contraction: Spoken language contraction made by combining a +# pronoun and the particle 'wa'. +# e.g. ã‚りゃ, ã“りゃ, ã“りゃã‚, ãりゃ, ãりゃ゠+#åè©ž-代åè©ž-縮約 +# +# noun-adverbial: Temporal nouns such as names of days or months that behave +# like adverbs. Nouns that represent amount or ratios and can be used adverbially, +# e.g. 金曜, 一月, åˆå¾Œ, å°‘é‡ +#åè©ž-副詞å¯èƒ½ +# +# noun-verbal: Nouns that take arguments with case and can appear followed by +# 'suru' and related verbs (ã™ã‚‹, ã§ãã‚‹, ãªã•ã‚‹, ãã ã•ã‚‹) +# e.g. インプット, æ„›ç€, 悪化, 悪戦苦闘, 一安心, 下å–ã‚Š +#åè©ž-サ変接続 +# +# noun-adjective-base: The base form of adjectives, words that appear before 㪠("na") +# e.g. å¥åº·, 安易, 駄目, ã ã‚ +#åè©ž-形容動詞語幹 +# +# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), æ•°. +# e.g. 0, 1, 2, 何, æ•°, å¹¾ +#åè©ž-æ•° +# +# noun-affix: noun affixes where the sub-classification is undefined +#åè©ž-éžè‡ªç«‹ +# +# noun-affix-misc: Of adnominalizers, the case-marker ã® ("no"), and words that +# attach to the base form of inflectional words, words that cannot be classified +# into any of the other categories below. This category includes indefinite nouns. +# e.g. ã‚ã‹ã¤ã, æš, ã‹ã„, 甲æ–, æ°—, ãらã„, å«Œã„, ãã›, ç™–, ã“ã¨, 事, ã”ã¨, 毎, ã—ã ã„, 次第, +# é †, ã›ã„, 所為, ã¤ã„ã§, åºã§, ã¤ã‚‚ã‚Š, ç©ã‚‚ã‚Š, 点, ã©ã“ã‚, ã®, ã¯ãš, ç­ˆ, ã¯ãšã¿, å¼¾ã¿, +# æ‹å­, ãµã†, ãµã‚Š, 振り, ã»ã†, æ–¹, æ—¨, ã‚‚ã®, 物, 者, ゆãˆ, æ•…, ゆãˆã‚“, 所以, ã‚ã‘, 訳, +# ã‚ã‚Š, 割り, 割, ã‚“-å£èªž/, ã‚‚ã‚“-å£èªž/ +#åè©ž-éžè‡ªç«‹-一般 +# +# noun-affix-adverbial: noun affixes that that can behave as adverbs. +# e.g. ã‚ã„ã , é–“, ã‚ã’ã, 挙ã’å¥, ã‚ã¨, 後, 余り, 以外, 以é™, 以後, 以上, 以å‰, 一方, ã†ãˆ, +# 上, ã†ã¡, 内, ãŠã‚Š, 折り, ã‹ãŽã‚Š, é™ã‚Š, ãã‚Š, ã£ãã‚Š, çµæžœ, ã“ã‚, é ƒ, ã•ã„, éš›, 最中, ã•ãªã‹, +# 最中, ã˜ãŸã„, 自体, ãŸã³, 度, ãŸã‚, 為, ã¤ã©, 都度, ã¨ãŠã‚Š, 通り, ã¨ã, 時, ã¨ã“ã‚, 所, +# ã¨ãŸã‚“, 途端, ãªã‹, 中, ã®ã¡, 後, ã°ã‚ã„, å ´åˆ, æ—¥, ã¶ã‚“, 分, ã»ã‹, ä»–, ã¾ãˆ, å‰, ã¾ã¾, +# 儘, ä¾­, ã¿ãŽã‚Š, 矢先 +#åè©ž-éžè‡ªç«‹-副詞å¯èƒ½ +# +# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars +# with the stem よã†(ã ) ("you(da)"). +# e.g. よã†, ã‚„ã†, 様 (よã†) +#åè©ž-éžè‡ªç«‹-助動詞語幹 +# +# noun-affix-adjective-base: noun affixes that can connect to the indeclinable +# connection form 㪠(aux "da"). +# e.g. ã¿ãŸã„, ãµã† +#åè©ž-éžè‡ªç«‹-形容動詞語幹 +# +# noun-special: special nouns where the sub-classification is undefined. +#åè©ž-特殊 +# +# noun-special-aux: The ãã†ã  ("souda") stem form that is used for reporting news, is +# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base +# form of inflectional words. +# e.g. ãㆠ+#åè©ž-特殊-助動詞語幹 +# +# noun-suffix: noun suffixes where the sub-classification is undefined. +#åè©ž-接尾 +# +# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect +# to ガル or タイ and can combine into compound nouns, words that cannot be classified into +# any of the other categories below. In general, this category is more inclusive than +# 接尾語 ("suffix") and is usually the last element in a compound noun. +# e.g. ãŠã, ã‹ãŸ, æ–¹, ç”²æ– (ãŒã„), ãŒã‹ã‚Š, ãŽã¿, 気味, ãã‚‹ã¿, (~ã—ãŸ) ã•, 次第, 済 (ãš) ã¿, +# よã†, (ã§ã)ã£ã“, æ„Ÿ, 観, 性, å­¦, é¡ž, é¢, 用 +#åè©ž-接尾-一般 +# +# noun-suffix-person: Suffixes that form nouns and attach to person names more often +# than other nouns. +# e.g. å›, 様, è‘— +#åè©ž-接尾-人å +# +# noun-suffix-place: Suffixes that form nouns and attach to place names more often +# than other nouns. +# e.g. 町, 市, 県 +#åè©ž-接尾-地域 +# +# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that +# can appear before スル ("suru"). +# e.g. 化, 視, 分ã‘, 入り, è½ã¡, è²·ã„ +#åè©ž-接尾-サ変接続 +# +# noun-suffix-aux: The stem form of ãã†ã  (様態) that is used to indicate conditions, +# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the +# conjunctive form of inflectional words. +# e.g. ãㆠ+#åè©ž-接尾-助動詞語幹 +# +# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive +# form of inflectional words and appear before the copula ã  ("da"). +# e.g. çš„, ã’, ãŒã¡ +#åè©ž-接尾-形容動詞語幹 +# +# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. +# e.g. 後 (ã”), 以後, 以é™, 以å‰, å‰å¾Œ, 中, 末, 上, 時 (ã˜) +#åè©ž-接尾-副詞å¯èƒ½ +# +# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category +# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach +# to numbers. +# e.g. 個, ã¤, 本, 冊, パーセント, cm, kg, カ月, ã‹å›½, 区画, 時間, æ™‚åŠ +#åè©ž-接尾-助数詞 +# +# noun-suffix-special: Special suffixes that mainly attach to inflecting words. +# e.g. (楽ã—) ã•, (考ãˆ) æ–¹ +#åè©ž-接尾-特殊 +# +# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words +# together. +# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) å…¼ (主婦) +#åè©ž-接続詞的 +# +# noun-verbal_aux: Nouns that attach to the conjunctive particle 㦠("te") and are +# semantically verb-like. +# e.g. ã”らん, ã”覧, 御覧, 頂戴 +#åè©ž-å‹•è©žéžè‡ªç«‹çš„ +# +# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, +# dialects, English, etc. Currently, the only entry for åè©ž 引用文字列 ("noun quotation") +# is ã„ã‚ã ("iwaku"). +#åè©ž-引用文字列 +# +# noun-nai_adjective: Words that appear before the auxiliary verb ãªã„ ("nai") and +# behave like an adjective. +# e.g. 申ã—訳, 仕方, ã¨ã‚“ã§ã‚‚, é•ã„ +#åè©ž-ナイ形容詞語幹 +# +##### +# prefix: unclassified prefixes +#接頭詞 +# +# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) +# excluding numerical expressions. +# e.g. ㊠(æ°´), æŸ (æ°), åŒ (社), æ•… (~æ°), 高 (å“質), ㊠(見事), ã” (ç«‹æ´¾) +#接頭詞-å詞接続 +# +# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb +# in conjunctive form followed by ãªã‚‹/ãªã•ã‚‹/ãã ã•ã‚‹. +# e.g. ㊠(読ã¿ãªã•ã„), ㊠(座り) +#接頭詞-動詞接続 +# +# prefix-adjectival: Prefixes that attach to adjectives. +# e.g. ㊠(寒ã„ã§ã™ã­ãˆ), ãƒã‚« (ã§ã‹ã„) +#接頭詞-形容詞接続 +# +# prefix-numerical: Prefixes that attach to numerical expressions. +# e.g. ç´„, ãŠã‚ˆã, 毎時 +#接頭詞-数接続 +# +##### +# verb: unclassified verbs +#å‹•è©ž +# +# verb-main: +#å‹•è©ž-自立 +# +# verb-auxiliary: +#å‹•è©ž-éžè‡ªç«‹ +# +# verb-suffix: +#å‹•è©ž-接尾 +# +##### +# adjective: unclassified adjectives +#形容詞 +# +# adjective-main: +#形容詞-自立 +# +# adjective-auxiliary: +#形容詞-éžè‡ªç«‹ +# +# adjective-suffix: +#形容詞-接尾 +# +##### +# adverb: unclassified adverbs +#副詞 +# +# adverb-misc: Words that can be segmented into one unit and where adnominal +# modification is not possible. +# e.g. ã‚ã„ã‹ã‚らãš, 多分 +#副詞-一般 +# +# adverb-particle_conjunction: Adverbs that can be followed by ã®, ã¯, ã«, +# ãª, ã™ã‚‹, ã , etc. +# e.g. ã“ã‚“ãªã«, ãã‚“ãªã«, ã‚ã‚“ãªã«, ãªã«ã‹, ãªã‚“ã§ã‚‚ +#副詞-助詞類接続 +# +##### +# adnominal: Words that only have noun-modifying forms. +# e.g. ã“ã®, ãã®, ã‚ã®, ã©ã®, ã„ã‚ゆる, ãªã‚“らã‹ã®, 何らã‹ã®, ã„ã‚ã‚“ãª, ã“ã†ã„ã†, ãã†ã„ã†, ã‚ã‚ã„ã†, +# ã©ã†ã„ã†, ã“ã‚“ãª, ãã‚“ãª, ã‚ã‚“ãª, ã©ã‚“ãª, 大ããª, å°ã•ãª, ãŠã‹ã—ãª, ã»ã‚“ã®, ãŸã„ã—ãŸ, +# 「(, ã‚‚) ã•ã‚‹ (ã“ã¨ãªãŒã‚‰)ã€, 微々ãŸã‚‹, 堂々ãŸã‚‹, å˜ãªã‚‹, ã„ã‹ãªã‚‹, 我ãŒã€ã€ŒåŒã˜, 亡ã +#連体詞 +# +##### +# conjunction: Conjunctions that can occur independently. +# e.g. ãŒ, ã‘ã‚Œã©ã‚‚, ãã—ã¦, ã˜ã‚ƒã‚, ãã‚Œã©ã“ã‚ã‹ +接続詞 +# +##### +# particle: unclassified particles. +助詞 +# +# particle-case: case particles where the subclassification is undefined. +助詞-格助詞 +# +# particle-case-misc: Case particles. +# e.g. ã‹ã‚‰, ãŒ, ã§, ã¨, ã«, ã¸, より, ã‚’, ã®, ã«ã¦ +助詞-格助詞-一般 +# +# particle-case-quote: the "to" that appears after nouns, a person’s speech, +# quotation marks, expressions of decisions from a meeting, reasons, judgements, +# conjectures, etc. +# e.g. ( ã ) 㨠(è¿°ã¹ãŸ.), ( ã§ã‚ã‚‹) 㨠(ã—ã¦åŸ·è¡ŒçŒ¶äºˆ...) +助詞-格助詞-引用 +# +# particle-case-compound: Compounds of particles and verbs that mainly behave +# like case particles. +# e.g. ã¨ã„ã†, ã¨ã„ã£ãŸ, ã¨ã‹ã„ã†, ã¨ã—ã¦, ã¨ã¨ã‚‚ã«, ã¨å…±ã«, ã§ã‚‚ã£ã¦, ã«ã‚ãŸã£ã¦, ã«å½“ãŸã£ã¦, ã«å½“ã£ã¦, +# ã«ã‚ãŸã‚Š, ã«å½“ãŸã‚Š, ã«å½“ã‚Š, ã«å½“ãŸã‚‹, ã«ã‚ãŸã‚‹, ã«ãŠã„ã¦, ã«æ–¼ã„ã¦,ã«æ–¼ã¦, ã«ãŠã‘ã‚‹, ã«æ–¼ã‘ã‚‹, +# ã«ã‹ã‘, ã«ã‹ã‘ã¦, ã«ã‹ã‚“ã—, ã«é–¢ã—, ã«ã‹ã‚“ã—ã¦, ã«é–¢ã—ã¦, ã«ã‹ã‚“ã™ã‚‹, ã«é–¢ã™ã‚‹, ã«éš›ã—, +# ã«éš›ã—ã¦, ã«ã—ãŸãŒã„, ã«å¾“ã„, ã«å¾“ã†, ã«ã—ãŸãŒã£ã¦, ã«å¾“ã£ã¦, ã«ãŸã„ã—, ã«å¯¾ã—, ã«ãŸã„ã—ã¦, +# ã«å¯¾ã—ã¦, ã«ãŸã„ã™ã‚‹, ã«å¯¾ã™ã‚‹, ã«ã¤ã„ã¦, ã«ã¤ã, ã«ã¤ã‘, ã«ã¤ã‘ã¦, ã«ã¤ã‚Œ, ã«ã¤ã‚Œã¦, ã«ã¨ã£ã¦, +# ã«ã¨ã‚Š, ã«ã¾ã¤ã‚ã‚‹, ã«ã‚ˆã£ã¦, ã«ä¾ã£ã¦, ã«å› ã£ã¦, ã«ã‚ˆã‚Š, ã«ä¾ã‚Š, ã«å› ã‚Š, ã«ã‚ˆã‚‹, ã«ä¾ã‚‹, ã«å› ã‚‹, +# ã«ã‚ãŸã£ã¦, ã«ã‚ãŸã‚‹, ã‚’ã‚‚ã£ã¦, を以ã£ã¦, を通ã˜, を通ã˜ã¦, を通ã—ã¦, ã‚’ã‚ãã£ã¦, ã‚’ã‚ãã‚Š, ã‚’ã‚ãã‚‹, +# ã£ã¦-å£èªž/, ã¡ã‚…ã†-関西å¼ã€Œã¨ã„ã†ã€/, (何) ã¦ã„ㆠ(人)-å£èªž/, ã£ã¦ã„ã†-å£èªž/, ã¨ã„ãµ, ã¨ã‹ã„ãµ +助詞-格助詞-連語 +# +# particle-conjunctive: +# e.g. ã‹ã‚‰, ã‹ã‚‰ã«ã¯, ãŒ, ã‘ã‚Œã©, ã‘ã‚Œã©ã‚‚, ã‘ã©, ã—, ã¤ã¤, ã¦, ã§, ã¨, ã¨ã“ã‚ãŒ, ã©ã“ã‚ã‹, ã¨ã‚‚, ã©ã‚‚, +# ãªãŒã‚‰, ãªã‚Š, ã®ã§, ã®ã«, ã°, ã‚‚ã®ã®, ã‚„ ( ã—ãŸ), ã‚„ã„ãªã‚„, (ã“ã‚ã‚“) ã˜ã‚ƒ(ã„ã‘ãªã„)-å£èªž/, +# (è¡Œã£) ã¡ã‚ƒ(ã„ã‘ãªã„)-å£èªž/, (言ã£) ãŸã£ã¦ (ã—ã‹ãŸãŒãªã„)-å£èªž/, (ãã‚ŒãŒãªã)ã£ãŸã£ã¦ (平気)-å£èªž/ +助詞-接続助詞 +# +# particle-dependency: +# e.g. ã“ã, ã•ãˆ, ã—ã‹, ã™ã‚‰, ã¯, ã‚‚, ãž +助詞-係助詞 +# +# particle-adverbial: +# e.g. ãŒã¦ã‚‰, ã‹ã‚‚, ãらã„, ä½, ãらã„, ã—ã‚‚, (学校) ã˜ã‚ƒ(ã“ã‚ŒãŒæµè¡Œã£ã¦ã„ã‚‹)-å£èªž/, +# (ãã‚Œ)ã˜ã‚ƒã‚ (よããªã„)-å£èªž/, ãšã¤, (ç§) ãªãž, ãªã©, (ç§) ãªã‚Š (ã«), (先生) ãªã‚“ã‹ (大嫌ã„)-å£èªž/, +# (ç§) ãªã‚“ãž, (先生) ãªã‚“㦠(大嫌ã„)-å£èªž/, ã®ã¿, ã ã‘, (ç§) ã ã£ã¦-å£èªž/, ã ã«, +# (å½¼)ã£ãŸã‚‰-å£èªž/, (ãŠèŒ¶) ã§ã‚‚ (ã„ã‹ãŒ), ç­‰ (ã¨ã†), (今後) ã¨ã‚‚, ã°ã‹ã‚Š, ã°ã£ã‹-å£èªž/, ã°ã£ã‹ã‚Š-å£èªž/, +# ã»ã©, 程, ã¾ã§, è¿„, (誰) ã‚‚ (ãŒ)([助詞-格助詞] ãŠã‚ˆã³ [助詞-係助詞] ã®å‰ã«ä½ç½®ã™ã‚‹ã€Œã‚‚ã€) +助詞-副助詞 +# +# particle-interjective: particles with interjective grammatical roles. +# e.g. (æ¾å³¶) ã‚„ +助詞-間投助詞 +# +# particle-coordinate: +# e.g. ã¨, ãŸã‚Š, ã ã®, ã ã‚Š, ã¨ã‹, ãªã‚Š, ã‚„, やら +助詞-並立助詞 +# +# particle-final: +# e.g. ã‹ã„, ã‹ã—ら, ã•, ãœ, (ã )ã£ã‘-å£èªž/, (ã¨ã¾ã£ã¦ã‚‹) ã§-方言/, ãª, ナ, ãªã‚-å£èªž/, ãž, ã­, ãƒ, +# ã­ã‡-å£èªž/, ã­ãˆ-å£èªž/, ã­ã‚“-方言/, ã®, ã®ã†-å£èªž/, ã‚„, よ, ヨ, よã‰-å£èªž/, ã‚, ã‚ã„-å£èªž/ +助詞-終助詞 +# +# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is +# adverbial, conjunctive, or sentence final. For example: +# (a) 「A ã‹ B ã‹ã€. Ex:「(国内ã§é‹ç”¨ã™ã‚‹) ã‹,(海外ã§é‹ç”¨ã™ã‚‹) ã‹ (.)〠+# (b) Inside an adverb phrase. Ex:「(幸ã„ã¨ã„ã†) ã‹ (, 死者ã¯ã„ãªã‹ã£ãŸ.)〠+# 「(祈りãŒå±Šã„ãŸã›ã„) ã‹ (, 試験ã«åˆæ ¼ã—ãŸ.)〠+# (c) 「ã‹ã®ã‚ˆã†ã«ã€. Ex:「(何もãªã‹ã£ãŸ) ã‹ (ã®ã‚ˆã†ã«æŒ¯ã‚‹èˆžã£ãŸ.)〠+# e.g. ã‹ +助詞-副助詞ï¼ä¸¦ç«‹åŠ©è©žï¼çµ‚助詞 +# +# particle-adnominalizer: The "no" that attaches to nouns and modifies +# non-inflectional words. +助詞-連体化 +# +# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs +# that are giongo, giseigo, or gitaigo. +# e.g. ã«, 㨠+助詞-副詞化 +# +# particle-special: A particle that does not fit into one of the above classifications. +# This includes particles that are used in Tanka, Haiku, and other poetry. +# e.g. ã‹ãª, ã‘ã‚€, ( ã—ãŸã ã‚ã†) ã«, (ã‚ã‚“ãŸ) ã«ã‚ƒ(ã‚ã‹ã‚‰ã‚“), (俺) ã‚“ (家) +助詞-特殊 +# +##### +# auxiliary-verb: +助動詞 +# +##### +# interjection: Greetings and other exclamations. +# e.g. ãŠã¯ã‚ˆã†, ãŠã¯ã‚ˆã†ã”ã–ã„ã¾ã™, ã“ã‚“ã«ã¡ã¯, ã“ã‚“ã°ã‚“ã¯, ã‚ã‚ŠãŒã¨ã†, ã©ã†ã‚‚ã‚ã‚ŠãŒã¨ã†, ã‚ã‚ŠãŒã¨ã†ã”ã–ã„ã¾ã™, +# ã„ãŸã ãã¾ã™, ã”ã¡ãã†ã•ã¾, ã•ã‚ˆãªã‚‰, ã•ã‚ˆã†ãªã‚‰, ã¯ã„, ã„ã„ãˆ, ã”ã‚ã‚“, ã”ã‚ã‚“ãªã•ã„ +#æ„Ÿå‹•è©ž +# +##### +# symbol: unclassified Symbols. +è¨˜å· +# +# symbol-misc: A general symbol not in one of the categories below. +# e.g. [â—‹â—Ž@$〒→+] +記å·-一般 +# +# symbol-comma: Commas +# e.g. [,ã€] +記å·-読点 +# +# symbol-period: Periods and full stops. +# e.g. [..。] +記å·-å¥ç‚¹ +# +# symbol-space: Full-width whitespace. +記å·-空白 +# +# symbol-open_bracket: +# e.g. [({‘“『ã€] +記å·-括弧開 +# +# symbol-close_bracket: +# e.g. [)}’â€ã€ã€ã€‘] +記å·-括弧閉 +# +# symbol-alphabetic: +#記å·-アルファベット +# +##### +# other: unclassified other +#ãã®ä»– +# +# other-interjection: Words that are hard to classify as noun-suffixes or +# sentence-final particles. +# e.g. (ã )ã‚¡ +ãã®ä»–-間投 +# +##### +# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. +# e.g. ã‚ã®, ã†ã‚“ã¨, ãˆã¨ +フィラー +# +##### +# non-verbal: non-verbal sound. +éžè¨€èªžéŸ³ +# +##### +# fragment: +#語断片 +# +##### +# unknown: unknown part of speech. +#未知語 +# +##### End of file diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ar.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ar.txt new file mode 100644 index 00000000000..046829db6a2 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ar.txt @@ -0,0 +1,125 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Cleaned on October 11, 2009 (not normalized, so use before normalization) +# This means that when modifying this list, you might need to add some +# redundant entries, for example containing forms with both Ø£ and ا +من +ومن +منها +منه +ÙÙŠ +ÙˆÙÙŠ +Ùيها +Ùيه +Ùˆ +Ù +ثم +او +أو +ب +بها +به +ا +Ø£ +اى +اي +أي +أى +لا +ولا +الا +ألا +إلا +لكن +ما +وما +كما +Ùما +عن +مع +اذا +إذا +ان +أن +إن +انها +أنها +إنها +انه +أنه +إنه +بان +بأن +Ùان +Ùأن +وان +وأن +وإن +التى +التي +الذى +الذي +الذين +الى +الي +إلى +إلي +على +عليها +عليه +اما +أما +إما +ايضا +أيضا +كل +وكل +لم +ولم +لن +ولن +هى +هي +هو +وهى +وهي +وهو +Ùهى +Ùهي +Ùهو +انت +أنت +لك +لها +له +هذه +هذا +تلك +ذلك +هناك +كانت +كان +يكون +تكون +وكانت +وكان +غير +بعض +قد +نحو +بين +بينما +منذ +ضمن +حيث +الان +الآن +خلال +بعد +قبل +حتى +عند +عندما +لدى +جميع diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_bg.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_bg.txt new file mode 100644 index 00000000000..1ae4ba2ae38 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_bg.txt @@ -0,0 +1,193 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +а +аз +ако +ала +бе +без +беше +би +бил +била +били +било +близо +бъдат +бъде +бÑха +в +Ð²Ð°Ñ +ваш +ваша +вероÑтно +вече +взема +ви +вие +винаги +вÑе +вÑеки +вÑички +вÑичко +вÑÑка +във +въпреки +върху +г +ги +главно +го +д +да +дали +до +докато +докога +дори +доÑега +доÑта +е +едва +един +ето +за +зад +заедно +заради +заÑега +затова +защо +защото +и +из +или +им +има +имат +иÑка +й +каза +как +каква +какво +както +какъв +като +кога +когато +което +които +кой +който +колко +коÑто +къде +където +към +ли +м +ме +между +мен +ми +мнозина +мога +могат +може +Ð¼Ð¾Ð»Ñ +момента +му +н +на +над +назад +най +направи +напред +например +Ð½Ð°Ñ +не +него +Ð½ÐµÑ +ни +ние +никой +нито +но +нÑкои +нÑкой +нÑма +обаче +около +оÑвен +оÑобено +от +отгоре +отново +още +пак +по +повече +повечето +под +поне +поради +поÑле +почти +прави +пред +преди +през +при +пък +първо +Ñ +Ñа +Ñамо +Ñе +Ñега +Ñи +Ñкоро +Ñлед +Ñме +Ñпоред +Ñред +Ñрещу +Ñте +Ñъм +ÑÑŠÑ +Ñъщо +Ñ‚ +тази +така +такива +такъв +там +твой +те +тези +ти +тн +то +това +тогава +този +той +толкова +точно +Ñ‚Ñ€Ñбва +тук +тъй +Ñ‚Ñ +Ñ‚ÑÑ… +у +хареÑва +ч +че +чеÑто +чрез +ще +щом +Ñ diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ca.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ca.txt new file mode 100644 index 00000000000..3da65deafe1 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ca.txt @@ -0,0 +1,220 @@ +# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) +a +abans +ací +ah +així +això +al +als +aleshores +algun +alguna +algunes +alguns +alhora +allà +allí +allò +altra +altre +altres +amb +ambdós +ambdues +apa +aquell +aquella +aquelles +aquells +aquest +aquesta +aquestes +aquests +aquí +baix +cada +cadascú +cadascuna +cadascunes +cadascuns +com +contra +d'un +d'una +d'unes +d'uns +dalt +de +del +dels +des +després +dins +dintre +donat +doncs +durant +e +eh +el +els +em +en +encara +ens +entre +érem +eren +éreu +es +és +esta +està +estàvem +estaven +estàveu +esteu +et +etc +ets +fins +fora +gairebé +ha +han +has +havia +he +hem +heu +hi +ho +i +igual +iguals +ja +l'hi +la +les +li +li'n +llavors +m'he +ma +mal +malgrat +mateix +mateixa +mateixes +mateixos +me +mentre +més +meu +meus +meva +meves +molt +molta +moltes +molts +mon +mons +n'he +n'hi +ne +ni +no +nogensmenys +només +nosaltres +nostra +nostre +nostres +o +oh +oi +on +pas +pel +pels +per +però +perquè +poc +poca +pocs +poques +potser +propi +qual +quals +quan +quant +que +què +quelcom +qui +quin +quina +quines +quins +s'ha +s'han +sa +semblant +semblants +ses +seu +seus +seva +seva +seves +si +sobre +sobretot +sóc +solament +sols +son +són +sons +sota +sou +t'ha +t'han +t'he +ta +tal +també +tampoc +tan +tant +tanta +tantes +teu +teus +teva +teves +ton +tons +tot +tota +totes +tots +un +una +unes +uns +us +va +vaig +vam +van +vas +veu +vosaltres +vostra +vostre +vostres diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_cz.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_cz.txt new file mode 100644 index 00000000000..53c6097dac7 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_cz.txt @@ -0,0 +1,172 @@ +a +s +k +o +i +u +v +z +dnes +cz +tímto +budeÅ¡ +budem +byli +jseÅ¡ +můj +svým +ta +tomto +tohle +tuto +tyto +jej +zda +proÄ +máte +tato +kam +tohoto +kdo +kteří +mi +nám +tom +tomuto +mít +nic +proto +kterou +byla +toho +protože +asi +ho +naÅ¡i +napiÅ¡te +re +což +tím +takže +svých +její +svými +jste +aj +tu +tedy +teto +bylo +kde +ke +pravé +ji +nad +nejsou +Äi +pod +téma +mezi +pÅ™es +ty +pak +vám +ani +když +vÅ¡ak +neg +jsem +tento +Älánku +Älánky +aby +jsme +pÅ™ed +pta +jejich +byl +jeÅ¡tÄ› +až +bez +také +pouze +první +vaÅ¡e +která +nás +nový +tipy +pokud +může +strana +jeho +své +jiné +zprávy +nové +není +vás +jen +podle +zde +už +být +více +bude +již +než +který +by +které +co +nebo +ten +tak +má +pÅ™i +od +po +jsou +jak +další +ale +si +se +ve +to +jako +za +zpÄ›t +ze +do +pro +je +na +atd +atp +jakmile +pÅ™iÄemž +já +on +ona +ono +oni +ony +my +vy +jí +ji +mÄ› +mne +jemu +tomu +tÄ›m +tÄ›mu +nÄ›mu +nÄ›muž +jehož +jíž +jelikož +jež +jakož +naÄež diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_da.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_da.txt new file mode 100644 index 00000000000..a3ff5fe122c --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_da.txt @@ -0,0 +1,108 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Danish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + +og | and +i | in +jeg | I +det | that (dem. pronoun)/it (pers. pronoun) +at | that (in front of a sentence)/to (with infinitive) +en | a/an +den | it (pers. pronoun)/that (dem. pronoun) +til | to/at/for/until/against/by/of/into, more +er | present tense of "to be" +som | who, as +pÃ¥ | on/upon/in/on/at/to/after/of/with/for, on +de | they +med | with/by/in, along +han | he +af | of/by/from/off/for/in/with/on, off +for | at/for/to/from/by/of/ago, in front/before, because +ikke | not +der | who/which, there/those +var | past tense of "to be" +mig | me/myself +sig | oneself/himself/herself/itself/themselves +men | but +et | a/an/one, one (number), someone/somebody/one +har | present tense of "to have" +om | round/about/for/in/a, about/around/down, if +vi | we +min | my +havde | past tense of "to have" +ham | him +hun | she +nu | now +over | over/above/across/by/beyond/past/on/about, over/past +da | then, when/as/since +fra | from/off/since, off, since +du | you +ud | out +sin | his/her/its/one's +dem | them +os | us/ourselves +op | up +man | you/one +hans | his +hvor | where +eller | or +hvad | what +skal | must/shall etc. +selv | myself/youself/herself/ourselves etc., even +her | here +alle | all/everyone/everybody etc. +vil | will (verb) +blev | past tense of "to stay/to remain/to get/to become" +kunne | could +ind | in +nÃ¥r | when +være | present tense of "to be" +dog | however/yet/after all +noget | something +ville | would +jo | you know/you see (adv), yes +deres | their/theirs +efter | after/behind/according to/for/by/from, later/afterwards +ned | down +skulle | should +denne | this +end | than +dette | this +mit | my/mine +ogsÃ¥ | also +under | under/beneath/below/during, below/underneath +have | have +dig | you +anden | other +hende | her +mine | my +alt | everything +meget | much/very, plenty of +sit | his, her, its, one's +sine | his, her, its, one's +vor | our +mod | against +disse | these +hvis | if +din | your/yours +nogle | some +hos | by/at +blive | be/become +mange | many +ad | by/through +bliver | present tense of "to be/to become" +hendes | her/hers +været | be +thi | for (conj) +jer | you +sÃ¥dan | such, like this/like that diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_de.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_de.txt new file mode 100644 index 00000000000..f7703841887 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_de.txt @@ -0,0 +1,292 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A German stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | The number of forms in this list is reduced significantly by passing it + | through the German stemmer. + + +aber | but + +alle | all +allem +allen +aller +alles + +als | than, as +also | so +am | an + dem +an | at + +ander | other +andere +anderem +anderen +anderer +anderes +anderm +andern +anderr +anders + +auch | also +auf | on +aus | out of +bei | by +bin | am +bis | until +bist | art +da | there +damit | with it +dann | then + +der | the +den +des +dem +die +das + +daß | that + +derselbe | the same +derselben +denselben +desselben +demselben +dieselbe +dieselben +dasselbe + +dazu | to that + +dein | thy +deine +deinem +deinen +deiner +deines + +denn | because + +derer | of those +dessen | of him + +dich | thee +dir | to thee +du | thou + +dies | this +diese +diesem +diesen +dieser +dieses + + +doch | (several meanings) +dort | (over) there + + +durch | through + +ein | a +eine +einem +einen +einer +eines + +einig | some +einige +einigem +einigen +einiger +einiges + +einmal | once + +er | he +ihn | him +ihm | to him + +es | it +etwas | something + +euer | your +eure +eurem +euren +eurer +eures + +für | for +gegen | towards +gewesen | p.p. of sein +hab | have +habe | have +haben | have +hat | has +hatte | had +hatten | had +hier | here +hin | there +hinter | behind + +ich | I +mich | me +mir | to me + + +ihr | you, to her +ihre +ihrem +ihren +ihrer +ihres +euch | to you + +im | in + dem +in | in +indem | while +ins | in + das +ist | is + +jede | each, every +jedem +jeden +jeder +jedes + +jene | that +jenem +jenen +jener +jenes + +jetzt | now +kann | can + +kein | no +keine +keinem +keinen +keiner +keines + +können | can +könnte | could +machen | do +man | one + +manche | some, many a +manchem +manchen +mancher +manches + +mein | my +meine +meinem +meinen +meiner +meines + +mit | with +muss | must +musste | had to +nach | to(wards) +nicht | not +nichts | nothing +noch | still, yet +nun | now +nur | only +ob | whether +oder | or +ohne | without +sehr | very + +sein | his +seine +seinem +seinen +seiner +seines + +selbst | self +sich | herself + +sie | they, she +ihnen | to them + +sind | are +so | so + +solche | such +solchem +solchen +solcher +solches + +soll | shall +sollte | should +sondern | but +sonst | else +über | over +um | about, around +und | and + +uns | us +unse +unsem +unsen +unser +unses + +unter | under +viel | much +vom | von + dem +von | from +vor | before +während | while +war | was +waren | were +warst | wast +was | what +weg | away, off +weil | because +weiter | further + +welche | which +welchem +welchen +welcher +welches + +wenn | when +werde | will +werden | will +wie | how +wieder | again +will | want +wir | we +wird | will +wirst | willst +wo | where +wollen | want +wollte | wanted +würde | would +würden | would +zu | to +zum | zu + dem +zur | zu + der +zwar | indeed +zwischen | between + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_el.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_el.txt new file mode 100644 index 00000000000..232681f5bd6 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_el.txt @@ -0,0 +1,78 @@ +# Lucene Greek Stopwords list +# Note: by default this file is used after GreekLowerCaseFilter, +# so when modifying this file use 'σ' instead of 'Ï‚' +ο +η +το +οι +τα +του +τησ +των +τον +την +και +κι +κ +ειμαι +εισαι +ειναι +ειμαστε +ειστε +στο +στον +στη +στην +μα +αλλα +απο +για +Ï€Ïοσ +με +σε +ωσ +παÏα +αντι +κατα +μετα +θα +να +δε +δεν +μη +μην +επι +ενω +εαν +αν +τοτε +που +πωσ +ποιοσ +ποια +ποιο +ποιοι +ποιεσ +ποιων +ποιουσ +αυτοσ +αυτη +αυτο +αυτοι +αυτων +αυτουσ +αυτεσ +αυτα +εκεινοσ +εκεινη +εκεινο +εκεινοι +εκεινεσ +εκεινα +εκεινων +εκεινουσ +οπωσ +ομωσ +ισωσ +οσο +οτι diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_en.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_en.txt new file mode 100644 index 00000000000..2c164c0b2a1 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_en.txt @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +# Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +such +that +the +their +then +there +these +they +this +to +was +will +with diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_es.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_es.txt new file mode 100644 index 00000000000..2db14760075 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_es.txt @@ -0,0 +1,354 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Spanish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | from, of +la | the, her +que | who, that +el | the +en | in +y | and +a | to +los | the, them +del | de + el +se | himself, from him etc +las | the, them +por | for, by, etc +un | a +para | for +con | with +no | no +una | a +su | his, her +al | a + el + | es from SER +lo | him +como | how +más | more +pero | pero +sus | su plural +le | to him, her +ya | already +o | or + | fue from SER +este | this + | ha from HABER +sí | himself etc +porque | because +esta | this + | son from SER +entre | between + | está from ESTAR +cuando | when +muy | very +sin | without +sobre | on + | ser from SER + | tiene from TENER +también | also +me | me +hasta | until +hay | there is/are +donde | where + | han from HABER +quien | whom, that + | están from ESTAR + | estado from ESTAR +desde | from +todo | all +nos | us +durante | during + | estados from ESTAR +todos | all +uno | a +les | to them +ni | nor +contra | against +otros | other + | fueron from SER +ese | that +eso | that + | había from HABER +ante | before +ellos | they +e | and (variant of y) +esto | this +mí | me +antes | before +algunos | some +qué | what? +unos | a +yo | I +otro | other +otras | other +otra | other +él | he +tanto | so much, many +esa | that +estos | these +mucho | much, many +quienes | who +nada | nothing +muchos | many +cual | who + | sea from SER +poco | few +ella | she +estar | to be + | haber from HABER +estas | these + | estaba from ESTAR + | estamos from ESTAR +algunas | some +algo | something +nosotros | we + + | other forms + +mi | me +mis | mi plural +tú | thou +te | thee +ti | thee +tu | thy +tus | tu plural +ellas | they +nosotras | we +vosotros | you +vosotras | you +os | you +mío | mine +mía | +míos | +mías | +tuyo | thine +tuya | +tuyos | +tuyas | +suyo | his, hers, theirs +suya | +suyos | +suyas | +nuestro | ours +nuestra | +nuestros | +nuestras | +vuestro | yours +vuestra | +vuestros | +vuestras | +esos | those +esas | those + + | forms of estar, to be (not including the infinitive): +estoy +estás +está +estamos +estáis +están +esté +estés +estemos +estéis +estén +estaré +estarás +estará +estaremos +estaréis +estarán +estaría +estarías +estaríamos +estaríais +estarían +estaba +estabas +estábamos +estabais +estaban +estuve +estuviste +estuvo +estuvimos +estuvisteis +estuvieron +estuviera +estuvieras +estuviéramos +estuvierais +estuvieran +estuviese +estuvieses +estuviésemos +estuvieseis +estuviesen +estando +estado +estada +estados +estadas +estad + + | forms of haber, to have (not including the infinitive): +he +has +ha +hemos +habéis +han +haya +hayas +hayamos +hayáis +hayan +habré +habrás +habrá +habremos +habréis +habrán +habría +habrías +habríamos +habríais +habrían +había +habías +habíamos +habíais +habían +hube +hubiste +hubo +hubimos +hubisteis +hubieron +hubiera +hubieras +hubiéramos +hubierais +hubieran +hubiese +hubieses +hubiésemos +hubieseis +hubiesen +habiendo +habido +habida +habidos +habidas + + | forms of ser, to be (not including the infinitive): +soy +eres +es +somos +sois +son +sea +seas +seamos +seáis +sean +seré +serás +será +seremos +seréis +serán +sería +serías +seríamos +seríais +serían +era +eras +éramos +erais +eran +fui +fuiste +fue +fuimos +fuisteis +fueron +fuera +fueras +fuéramos +fuerais +fueran +fuese +fueses +fuésemos +fueseis +fuesen +siendo +sido + | sed also means 'thirst' + + | forms of tener, to have (not including the infinitive): +tengo +tienes +tiene +tenemos +tenéis +tienen +tenga +tengas +tengamos +tengáis +tengan +tendré +tendrás +tendrá +tendremos +tendréis +tendrán +tendría +tendrías +tendríamos +tendríais +tendrían +tenía +tenías +teníamos +teníais +tenían +tuve +tuviste +tuvo +tuvimos +tuvisteis +tuvieron +tuviera +tuvieras +tuviéramos +tuvierais +tuvieran +tuviese +tuvieses +tuviésemos +tuvieseis +tuviesen +teniendo +tenido +tenida +tenidos +tenidas +tened + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_eu.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_eu.txt new file mode 100644 index 00000000000..25f1db93460 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_eu.txt @@ -0,0 +1,99 @@ +# example set of basque stopwords +al +anitz +arabera +asko +baina +bat +batean +batek +bati +batzuei +batzuek +batzuetan +batzuk +bera +beraiek +berau +berauek +bere +berori +beroriek +beste +bezala +da +dago +dira +ditu +du +dute +edo +egin +ere +eta +eurak +ez +gainera +gu +gutxi +guzti +haiei +haiek +haietan +hainbeste +hala +han +handik +hango +hara +hari +hark +hartan +hau +hauei +hauek +hauetan +hemen +hemendik +hemengo +hi +hona +honek +honela +honetan +honi +hor +hori +horiei +horiek +horietan +horko +horra +horrek +horrela +horretan +horri +hortik +hura +izan +ni +noiz +nola +non +nondik +nongo +nor +nora +ze +zein +zen +zenbait +zenbat +zer +zergatik +ziren +zituen +zu +zuek +zuen +zuten diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_fa.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_fa.txt new file mode 100644 index 00000000000..723641c6da7 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_fa.txt @@ -0,0 +1,313 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Note: by default this file is used after normalization, so when adding entries +# to this file, use the arabic 'ÙŠ' instead of 'ÛŒ' +انان +نداشته +سراسر +خياه +ايشان +وي +تاكنون +بيشتري +دوم +پس +ناشي +ÙˆÚ¯Ùˆ +يا +داشتند +سپس +هنگام +هرگز +پنج +نشان +امسال +ديگر +گروهي +شدند +چطور +ده +Ùˆ +دو +نخستين +ولي +چرا +Ú†Ù‡ +وسط +Ù‡ +كدام +قابل +يك +رÙت +Ù‡Ùت +همچنين +در +هزار +بله +بلي +شايد +اما +شناسي +گرÙته +دهد +داشته +دانست +داشتن +خواهيم +ميليارد +وقتيكه +امد +خواهد +جز +اورده +شده +بلكه +خدمات +شدن +برخي +نبود +بسياري +جلوگيري +حق +كردند +نوعي +بعري +نكرده +نظير +نبايد +بوده +بودن +داد +اورد +هست +جايي +شود +دنبال +داده +بايد +سابق +هيچ +همان +انجا +كمتر +كجاست +گردد +كسي +تر +مردم +تان +دادن +بودند +سري +جدا +ندارند +مگر +يكديگر +دارد +دهند +بنابراين +هنگامي +سمت +جا +انچه +خود +دادند +زياد +دارند +اثر +بدون +بهترين +بيشتر +البته +به +براساس +بيرون +كرد +بعضي +گرÙت +توي +اي +ميليون +او +جريان +تول +بر +مانند +برابر +باشيم +مدتي +گويند +اكنون +تا +تنها +جديد +چند +بي +نشده +كردن +كردم +گويد +كرده +كنيم +نمي +نزد +روي +قصد +Ùقط +بالاي +ديگران +اين +ديروز +توسط +سوم +ايم +دانند +سوي +استÙاده +شما +كنار +داريم +ساخته +طور +امده +رÙته +نخست +بيست +نزديك +طي +كنيد +از +انها +تمامي +داشت +يكي +طريق +اش +چيست +روب +نمايد +Ú¯Ùت +چندين +چيزي +تواند +ام +ايا +با +ان +ايد +ترين +اينكه +ديگري +راه +هايي +بروز +همچنان +پاعين +كس +حدود +مختل٠+مقابل +چيز +گيرد +ندارد +ضد +همچون +سازي +شان +مورد +باره +مرسي +خويش +برخوردار +چون +خارج +شش +هنوز +تحت +ضمن +هستيم +Ú¯Ùته +Ùكر +بسيار +پيش +براي +روزهاي +انكه +نخواهد +بالا +كل +وقتي +كي +چنين +كه +گيري +نيست +است +كجا +كند +نيز +يابد +بندي +حتي +توانند +عقب +خواست +كنند +بين +تمام +همه +ما +باشند +مثل +شد +اري +باشد +اره +طبق +بعد +اگر +صورت +غير +جاي +بيش +ريزي +اند +زيرا +چگونه +بار +لطÙا +مي +درباره +من +ديده +همين +گذاري +برداري +علت +گذاشته +هم +Ùوق +نه +ها +شوند +اباد +همواره +هر +اول +خواهند +چهار +نام +امروز +مان +هاي +قبل +كنم +سعي +تازه +را +هستند +زير +جلوي +عنوان +بود diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_fi.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_fi.txt new file mode 100644 index 00000000000..addad798c4b --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_fi.txt @@ -0,0 +1,95 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| forms of BE + +olla +olen +olet +on +olemme +olette +ovat +ole | negative form + +oli +olisi +olisit +olisin +olisimme +olisitte +olisivat +olit +olin +olimme +olitte +olivat +ollut +olleet + +en | negation +et +ei +emme +ette +eivät + +|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans +minä minun minut minua minussa minusta minuun minulla minulta minulle | I +sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you +hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she +me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we +te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you +he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they + +tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this +tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that +se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it +nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these +nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those +ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they + +kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who +ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) +mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what +mitkä | (pl) + +joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which +jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) + +| conjunctions + +että | that +ja | and +jos | if +koska | because +kuin | than +mutta | but +niin | so +sekä | and +sillä | for +tai | or +vaan | but +vai | or +vaikka | although + + +| prepositions + +kanssa | with +mukaan | according to +noin | about +poikki | across +yli | over, across + +| other + +kun | when +niin | so +nyt | now +itse | self + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_fr.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_fr.txt new file mode 100644 index 00000000000..c00837ea939 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_fr.txt @@ -0,0 +1,183 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A French stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +au | a + le +aux | a + les +avec | with +ce | this +ces | these +dans | with +de | of +des | de + les +du | de + le +elle | she +en | `of them' etc +et | and +eux | them +il | he +je | I +la | the +le | the +leur | their +lui | him +ma | my (fem) +mais | but +me | me +même | same; as in moi-même (myself) etc +mes | me (pl) +moi | me +mon | my (masc) +ne | not +nos | our (pl) +notre | our +nous | we +on | one +ou | where +par | by +pas | not +pour | for +qu | que before vowel +que | that +qui | who +sa | his, her (fem) +se | oneself +ses | his (pl) +son | his, her (masc) +sur | on +ta | thy (fem) +te | thee +tes | thy (pl) +toi | thee +ton | thy (masc) +tu | thou +un | a +une | a +vos | your (pl) +votre | your +vous | you + + | single letter forms + +c | c' +d | d' +j | j' +l | l' +à | to, at +m | m' +n | n' +s | s' +t | t' +y | there + + | forms of être (not including the infinitive): +été +étée +étées +étés +étant +suis +es +est +sommes +êtes +sont +serai +seras +sera +serons +serez +seront +serais +serait +serions +seriez +seraient +étais +était +étions +étiez +étaient +fus +fut +fûmes +fûtes +furent +sois +soit +soyons +soyez +soient +fusse +fusses +fût +fussions +fussiez +fussent + + | forms of avoir (not including the infinitive): +ayant +eu +eue +eues +eus +ai +as +avons +avez +ont +aurai +auras +aura +aurons +aurez +auront +aurais +aurait +aurions +auriez +auraient +avais +avait +avions +aviez +avaient +eut +eûmes +eûtes +eurent +aie +aies +ait +ayons +ayez +aient +eusse +eusses +eût +eussions +eussiez +eussent + + | Later additions (from Jean-Christophe Deschamps) +ceci | this +celà  | that +cet | this +cette | this +ici | here +ils | they +les | the (pl) +leurs | their (pl) +quel | which +quels | which +quelle | which +quelles | which +sans | without +soi | oneself + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ga.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ga.txt new file mode 100644 index 00000000000..9ff88d747e5 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ga.txt @@ -0,0 +1,110 @@ + +a +ach +ag +agus +an +aon +ar +arna +as +b' +ba +beirt +bhúr +caoga +ceathair +ceathrar +chomh +chtó +chuig +chun +cois +céad +cúig +cúigear +d' +daichead +dar +de +deich +deichniúr +den +dhá +do +don +dtí +dá +dár +dó +faoi +faoin +faoina +faoinár +fara +fiche +gach +gan +go +gur +haon +hocht +i +iad +idir +in +ina +ins +inár +is +le +leis +lena +lenár +m' +mar +mo +mé +na +nach +naoi +naonúr +ná +ní +níor +nó +nócha +ocht +ochtar +os +roimh +sa +seacht +seachtar +seachtó +seasca +seisear +siad +sibh +sinn +sna +sé +sí +tar +thar +thú +triúr +trí +trína +trínár +tríocha +tú +um +ár +é +éis +í +ó +ón +óna +ónár diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_gl.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_gl.txt new file mode 100644 index 00000000000..d8760b12c14 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_gl.txt @@ -0,0 +1,161 @@ +# galican stopwords +a +aínda +alí +aquel +aquela +aquelas +aqueles +aquilo +aquí +ao +aos +as +así +á +ben +cando +che +co +coa +comigo +con +connosco +contigo +convosco +coas +cos +cun +cuns +cunha +cunhas +da +dalgunha +dalgunhas +dalgún +dalgúns +das +de +del +dela +delas +deles +desde +deste +do +dos +dun +duns +dunha +dunhas +e +el +ela +elas +eles +en +era +eran +esa +esas +ese +eses +esta +estar +estaba +está +están +este +estes +estiven +estou +eu +é +facer +foi +foron +fun +había +hai +iso +isto +la +las +lle +lles +lo +los +mais +me +meu +meus +min +miña +miñas +moi +na +nas +neste +nin +no +non +nos +nosa +nosas +noso +nosos +nós +nun +nunha +nuns +nunhas +o +os +ou +ó +ós +para +pero +pode +pois +pola +polas +polo +polos +por +que +se +senón +ser +seu +seus +sexa +sido +sobre +súa +súas +tamén +tan +te +ten +teñen +teño +ter +teu +teus +ti +tido +tiña +tiven +túa +túas +un +unha +unhas +uns +vos +vosa +vosas +voso +vosos +vós diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_hi.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_hi.txt new file mode 100644 index 00000000000..86286bb083b --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_hi.txt @@ -0,0 +1,235 @@ +# Also see http://www.opensource.org/licenses/bsd-license.html +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# This file was created by Jacques Savoy and is distributed under the BSD license. +# Note: by default this file also contains forms normalized by HindiNormalizer +# for spelling variation (see section below), such that it can be used whether or +# not you enable that feature. When adding additional entries to this list, +# please add the normalized form as well. +अंदर +अत +अपना +अपनी +अपने +अभी +आदि +आप +इतà¥à¤¯à¤¾à¤¦à¤¿ +इन +इनका +इनà¥à¤¹à¥€à¤‚ +इनà¥à¤¹à¥‡à¤‚ +इनà¥à¤¹à¥‹à¤‚ +इस +इसका +इसकी +इसके +इसमें +इसी +इसे +उन +उनका +उनकी +उनके +उनको +उनà¥à¤¹à¥€à¤‚ +उनà¥à¤¹à¥‡à¤‚ +उनà¥à¤¹à¥‹à¤‚ +उस +उसके +उसी +उसे +à¤à¤• +à¤à¤µà¤‚ +à¤à¤¸ +à¤à¤¸à¥‡ +और +कई +कर +करता +करते +करना +करने +करें +कहते +कहा +का +काफ़ी +कि +कितना +किनà¥à¤¹à¥‡à¤‚ +किनà¥à¤¹à¥‹à¤‚ +किया +किर +किस +किसी +किसे +की +कà¥à¤› +कà¥à¤² +के +को +कोई +कौन +कौनसा +गया +घर +जब +जहाठ+जा +जितना +जिन +जिनà¥à¤¹à¥‡à¤‚ +जिनà¥à¤¹à¥‹à¤‚ +जिस +जिसे +जीधर +जैसा +जैसे +जो +तक +तब +तरह +तिन +तिनà¥à¤¹à¥‡à¤‚ +तिनà¥à¤¹à¥‹à¤‚ +तिस +तिसे +तो +था +थी +थे +दबारा +दिया +दà¥à¤¸à¤°à¤¾ +दूसरे +दो +दà¥à¤µà¤¾à¤°à¤¾ +न +नहीं +ना +निहायत +नीचे +ने +पर +पर +पहले +पूरा +पे +फिर +बनी +बही +बहà¥à¤¤ +बाद +बाला +बिलकà¥à¤² +भी +भीतर +मगर +मानो +मे +में +यदि +यह +यहाठ+यही +या +यिह +ये +रखें +रहा +रहे +ऱà¥à¤µà¤¾à¤¸à¤¾ +लिठ+लिये +लेकिन +व +वरà¥à¤— +वह +वह +वहाठ+वहीं +वाले +वà¥à¤¹ +वे +वग़ैरह +संग +सकता +सकते +सबसे +सभी +साथ +साबà¥à¤¤ +साभ +सारा +से +सो +ही +हà¥à¤† +हà¥à¤ˆ +हà¥à¤ +है +हैं +हो +होता +होती +होते +होना +होने +# additional normalized forms of the above +अपनि +जेसे +होति +सभि +तिंहों +इंहों +दवारा +इसि +किंहें +थि +उंहों +ओर +जिंहें +वहिं +अभि +बनि +हि +उंहिं +उंहें +हें +वगेरह +à¤à¤¸à¥‡ +रवासा +कोन +निचे +काफि +उसि +पà¥à¤°à¤¾ +भितर +हे +बहि +वहां +कोइ +यहां +जिंहों +तिंहें +किसि +कइ +यहि +इंहिं +जिधर +इंहें +अदि +इतयादि +हà¥à¤‡ +कोनसा +इसकि +दà¥à¤¸à¤°à¥‡ +जहां +अप +किंहों +उनकि +भि +वरग +हà¥à¤… +जेसा +नहिं diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_hu.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_hu.txt new file mode 100644 index 00000000000..1a96f1db6f2 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_hu.txt @@ -0,0 +1,209 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| Hungarian stop word list +| prepared by Anna Tordai + +a +ahogy +ahol +aki +akik +akkor +alatt +által +általában +amely +amelyek +amelyekben +amelyeket +amelyet +amelynek +ami +amit +amolyan +amíg +amikor +át +abban +ahhoz +annak +arra +arról +az +azok +azon +azt +azzal +azért +aztán +azután +azonban +bár +be +belül +benne +cikk +cikkek +cikkeket +csak +de +e +eddig +egész +egy +egyes +egyetlen +egyéb +egyik +egyre +ekkor +el +elég +ellen +elÅ‘ +elÅ‘ször +elÅ‘tt +elsÅ‘ +én +éppen +ebben +ehhez +emilyen +ennek +erre +ez +ezt +ezek +ezen +ezzel +ezért +és +fel +felé +hanem +hiszen +hogy +hogyan +igen +így +illetve +ill. +ill +ilyen +ilyenkor +ison +ismét +itt +jó +jól +jobban +kell +kellett +keresztül +keressünk +ki +kívül +között +közül +legalább +lehet +lehetett +legyen +lenne +lenni +lesz +lett +maga +magát +majd +majd +már +más +másik +meg +még +mellett +mert +mely +melyek +mi +mit +míg +miért +milyen +mikor +minden +mindent +mindenki +mindig +mint +mintha +mivel +most +nagy +nagyobb +nagyon +ne +néha +nekem +neki +nem +néhány +nélkül +nincs +olyan +ott +össze +Å‘ +Å‘k +Å‘ket +pedig +persze +rá +s +saját +sem +semmi +sok +sokat +sokkal +számára +szemben +szerint +szinte +talán +tehát +teljes +tovább +továbbá +több +úgy +ugyanis +új +újabb +újra +után +utána +utolsó +vagy +vagyis +valaki +valami +valamint +való +vagyok +van +vannak +volt +voltam +voltak +voltunk +vissza +vele +viszont +volna diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_hy.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_hy.txt new file mode 100644 index 00000000000..60c1c50fbc8 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_hy.txt @@ -0,0 +1,46 @@ +# example set of Armenian stopwords. +Õ¡ÕµÕ¤ +Õ¡ÕµÕ¬ +Õ¡ÕµÕ¶ +Õ¡ÕµÕ½ +Õ¤Õ¸Ö‚ +Õ¤Õ¸Ö‚Ö„ +Õ¥Õ´ +Õ¥Õ¶ +Õ¥Õ¶Ö„ +Õ¥Õ½ +Õ¥Ö„ +Õ§ +Õ§Õ« +Õ§Õ«Õ¶ +Õ§Õ«Õ¶Ö„ +Õ§Õ«Ö€ +Õ§Õ«Ö„ +Õ§Ö€ +Õ¨Õ½Õ¿ +Õ© +Õ« +Õ«Õ¶ +Õ«Õ½Õ¯ +Õ«Ö€ +Õ¯Õ¡Õ´ +Õ°Õ¡Õ´Õ¡Ö€ +Õ°Õ¥Õ¿ +Õ°Õ¥Õ¿Õ¸ +Õ´Õ¥Õ¶Ö„ +Õ´Õ¥Õ» +Õ´Õ« +Õ¶ +Õ¶Õ¡ +Õ¶Õ¡Ö‡ +Õ¶Ö€Õ¡ +Õ¶Ö€Õ¡Õ¶Ö„ +Õ¸Ö€ +Õ¸Ö€Õ¨ +Õ¸Ö€Õ¸Õ¶Ö„ +Õ¸Ö€ÕºÕ¥Õ½ +Õ¸Ö‚ +Õ¸Ö‚Õ´ +ÕºÕ«Õ¿Õ« +Õ¾Ö€Õ¡ +Ö‡ diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_id.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_id.txt new file mode 100644 index 00000000000..4617f83a5c5 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_id.txt @@ -0,0 +1,359 @@ +# from appendix D of: A Study of Stemming Effects on Information +# Retrieval in Bahasa Indonesia +ada +adanya +adalah +adapun +agak +agaknya +agar +akan +akankah +akhirnya +aku +akulah +amat +amatlah +anda +andalah +antar +diantaranya +antara +antaranya +diantara +apa +apaan +mengapa +apabila +apakah +apalagi +apatah +atau +ataukah +ataupun +bagai +bagaikan +sebagai +sebagainya +bagaimana +bagaimanapun +sebagaimana +bagaimanakah +bagi +bahkan +bahwa +bahwasanya +sebaliknya +banyak +sebanyak +beberapa +seberapa +begini +beginian +beginikah +beginilah +sebegini +begitu +begitukah +begitulah +begitupun +sebegitu +belum +belumlah +sebelum +sebelumnya +sebenarnya +berapa +berapakah +berapalah +berapapun +betulkah +sebetulnya +biasa +biasanya +bila +bilakah +bisa +bisakah +sebisanya +boleh +bolehkah +bolehlah +buat +bukan +bukankah +bukanlah +bukannya +cuma +percuma +dahulu +dalam +dan +dapat +dari +daripada +dekat +demi +demikian +demikianlah +sedemikian +dengan +depan +di +dia +dialah +dini +diri +dirinya +terdiri +dong +dulu +enggak +enggaknya +entah +entahlah +terhadap +terhadapnya +hal +hampir +hanya +hanyalah +harus +haruslah +harusnya +seharusnya +hendak +hendaklah +hendaknya +hingga +sehingga +ia +ialah +ibarat +ingin +inginkah +inginkan +ini +inikah +inilah +itu +itukah +itulah +jangan +jangankan +janganlah +jika +jikalau +juga +justru +kala +kalau +kalaulah +kalaupun +kalian +kami +kamilah +kamu +kamulah +kan +kapan +kapankah +kapanpun +dikarenakan +karena +karenanya +ke +kecil +kemudian +kenapa +kepada +kepadanya +ketika +seketika +khususnya +kini +kinilah +kiranya +sekiranya +kita +kitalah +kok +lagi +lagian +selagi +lah +lain +lainnya +melainkan +selaku +lalu +melalui +terlalu +lama +lamanya +selama +selama +selamanya +lebih +terlebih +bermacam +macam +semacam +maka +makanya +makin +malah +malahan +mampu +mampukah +mana +manakala +manalagi +masih +masihkah +semasih +masing +mau +maupun +semaunya +memang +mereka +merekalah +meski +meskipun +semula +mungkin +mungkinkah +nah +namun +nanti +nantinya +nyaris +oleh +olehnya +seorang +seseorang +pada +padanya +padahal +paling +sepanjang +pantas +sepantasnya +sepantasnyalah +para +pasti +pastilah +per +pernah +pula +pun +merupakan +rupanya +serupa +saat +saatnya +sesaat +saja +sajalah +saling +bersama +sama +sesama +sambil +sampai +sana +sangat +sangatlah +saya +sayalah +se +sebab +sebabnya +sebuah +tersebut +tersebutlah +sedang +sedangkan +sedikit +sedikitnya +segala +segalanya +segera +sesegera +sejak +sejenak +sekali +sekalian +sekalipun +sesekali +sekaligus +sekarang +sekarang +sekitar +sekitarnya +sela +selain +selalu +seluruh +seluruhnya +semakin +sementara +sempat +semua +semuanya +sendiri +sendirinya +seolah +seperti +sepertinya +sering +seringnya +serta +siapa +siapakah +siapapun +disini +disinilah +sini +sinilah +sesuatu +sesuatunya +suatu +sesudah +sesudahnya +sudah +sudahkah +sudahlah +supaya +tadi +tadinya +tak +tanpa +setelah +telah +tentang +tentu +tentulah +tentunya +tertentu +seterusnya +tapi +tetapi +setiap +tiap +setidaknya +tidak +tidakkah +tidaklah +toh +waduh +wah +wahai +sewaktu +walau +walaupun +wong +yaitu +yakni +yang diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_it.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_it.txt new file mode 100644 index 00000000000..4cb5b0891b1 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_it.txt @@ -0,0 +1,301 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | An Italian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +ad | a (to) before vowel +al | a + il +allo | a + lo +ai | a + i +agli | a + gli +all | a + l' +agl | a + gl' +alla | a + la +alle | a + le +con | with +col | con + il +coi | con + i (forms collo, cogli etc are now very rare) +da | from +dal | da + il +dallo | da + lo +dai | da + i +dagli | da + gli +dall | da + l' +dagl | da + gll' +dalla | da + la +dalle | da + le +di | of +del | di + il +dello | di + lo +dei | di + i +degli | di + gli +dell | di + l' +degl | di + gl' +della | di + la +delle | di + le +in | in +nel | in + el +nello | in + lo +nei | in + i +negli | in + gli +nell | in + l' +negl | in + gl' +nella | in + la +nelle | in + le +su | on +sul | su + il +sullo | su + lo +sui | su + i +sugli | su + gli +sull | su + l' +sugl | su + gl' +sulla | su + la +sulle | su + le +per | through, by +tra | among +contro | against +io | I +tu | thou +lui | he +lei | she +noi | we +voi | you +loro | they +mio | my +mia | +miei | +mie | +tuo | +tua | +tuoi | thy +tue | +suo | +sua | +suoi | his, her +sue | +nostro | our +nostra | +nostri | +nostre | +vostro | your +vostra | +vostri | +vostre | +mi | me +ti | thee +ci | us, there +vi | you, there +lo | him, the +la | her, the +li | them +le | them, the +gli | to him, the +ne | from there etc +il | the +un | a +uno | a +una | a +ma | but +ed | and +se | if +perché | why, because +anche | also +come | how +dov | where (as dov') +dove | where +che | who, that +chi | who +cui | whom +non | not +più | more +quale | who, that +quanto | how much +quanti | +quanta | +quante | +quello | that +quelli | +quella | +quelle | +questo | this +questi | +questa | +queste | +si | yes +tutto | all +tutti | all + + | single letter forms: + +a | at +c | as c' for ce or ci +e | and +i | the +l | as l' +o | or + + | forms of avere, to have (not including the infinitive): + +ho +hai +ha +abbiamo +avete +hanno +abbia +abbiate +abbiano +avrò +avrai +avrà +avremo +avrete +avranno +avrei +avresti +avrebbe +avremmo +avreste +avrebbero +avevo +avevi +aveva +avevamo +avevate +avevano +ebbi +avesti +ebbe +avemmo +aveste +ebbero +avessi +avesse +avessimo +avessero +avendo +avuto +avuta +avuti +avute + + | forms of essere, to be (not including the infinitive): +sono +sei +è +siamo +siete +sia +siate +siano +sarò +sarai +sarà +saremo +sarete +saranno +sarei +saresti +sarebbe +saremmo +sareste +sarebbero +ero +eri +era +eravamo +eravate +erano +fui +fosti +fu +fummo +foste +furono +fossi +fosse +fossimo +fossero +essendo + + | forms of fare, to do (not including the infinitive, fa, fat-): +faccio +fai +facciamo +fanno +faccia +facciate +facciano +farò +farai +farà +faremo +farete +faranno +farei +faresti +farebbe +faremmo +fareste +farebbero +facevo +facevi +faceva +facevamo +facevate +facevano +feci +facesti +fece +facemmo +faceste +fecero +facessi +facesse +facessimo +facessero +facendo + + | forms of stare, to be (not including the infinitive): +sto +stai +sta +stiamo +stanno +stia +stiate +stiano +starò +starai +starà +staremo +starete +staranno +starei +staresti +starebbe +staremmo +stareste +starebbero +stavo +stavi +stava +stavamo +stavate +stavano +stetti +stesti +stette +stemmo +steste +stettero +stessi +stesse +stessimo +stessero +stando diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ja.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ja.txt new file mode 100644 index 00000000000..d4321be6b16 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ja.txt @@ -0,0 +1,127 @@ +# +# This file defines a stopword set for Japanese. +# +# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. +# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 +# for frequency lists, etc. that can be useful for making your own set (if desired) +# +# Note that there is an overlap between these stopwords and the terms stopped when used +# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note +# that comments are not allowed on the same line as stopwords. +# +# Also note that stopping is done in a case-insensitive manner. Change your StopFilter +# configuration if you need case-sensitive stopping. Lastly, note that stopping is done +# using the same character width as the entries in this file. Since this StopFilter is +# normally done after a CJKWidthFilter in your chain, you would usually want your romaji +# entries to be in half-width and your kana entries to be in full-width. +# +ã® +ã« +㯠+ã‚’ +㟠+㌠+㧠+㦠+㨠+ã— +ã‚Œ +ã• +ã‚ã‚‹ +ã„ã‚‹ +ã‚‚ +ã™ã‚‹ +ã‹ã‚‰ +㪠+ã“㨠+ã¨ã—㦠+ã„ +ã‚„ +れる +ãªã© +ãªã£ +ãªã„ +ã“ã® +ãŸã‚ +ãã® +ã‚㣠+よㆠ+ã¾ãŸ +ã‚‚ã® +ã¨ã„ㆠ+ã‚ã‚Š +ã¾ã§ +られ +ãªã‚‹ +㸠+ã‹ +ã  +ã“ã‚Œ +ã«ã‚ˆã£ã¦ +ã«ã‚ˆã‚Š +ãŠã‚Š +より +ã«ã‚ˆã‚‹ +ãš +ãªã‚Š +られる +ã«ãŠã„㦠+ã° +ãªã‹ã£ +ãªã +ã—ã‹ã— +ã«ã¤ã„㦠+ã› +ã ã£ +ãã®å¾Œ +ã§ãã‚‹ +ãã‚Œ +ㆠ+ã®ã§ +ãªãŠ +ã®ã¿ +ã§ã +ã +㤠+ã«ãŠã‘ã‚‹ +ãŠã‚ˆã³ +ã„ㆠ+ã•ã‚‰ã« +ã§ã‚‚ +ら +ãŸã‚Š +ãã®ä»– +ã«é–¢ã™ã‚‹ +ãŸã¡ +ã¾ã™ +ã‚“ +ãªã‚‰ +ã«å¯¾ã—㦠+特㫠+ã›ã‚‹ +åŠã³ +ã“れら +ã¨ã +ã§ã¯ +ã«ã¦ +ã»ã‹ +ãªãŒã‚‰ +ã†ã¡ +ãã—㦠+ã¨ã¨ã‚‚ã« +ãŸã ã— +ã‹ã¤ã¦ +ãã‚Œãžã‚Œ +ã¾ãŸã¯ +㊠+ã»ã© +ã‚‚ã®ã® +ã«å¯¾ã™ã‚‹ +ã»ã¨ã‚“ã© +ã¨å…±ã« +ã¨ã„ã£ãŸ +ã§ã™ +ã¨ã‚‚ +ã¨ã“ã‚ +ã“ã“ +##### End of file diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_lv.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_lv.txt new file mode 100644 index 00000000000..e21a23c06c3 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_lv.txt @@ -0,0 +1,172 @@ +# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins +# the original list of over 800 forms was refined: +# pronouns, adverbs, interjections were removed +# +# prepositions +aiz +ap +ar +apakÅ¡ +Ärpus +augÅ¡pus +bez +caur +dēļ +gar +iekÅ¡ +iz +kopÅ¡ +labad +lejpus +lÄ«dz +no +otrpus +pa +par +pÄr +pÄ“c +pie +pirms +pret +priekÅ¡ +starp +Å¡aipus +uz +viņpus +virs +virspus +zem +apakÅ¡pus +# Conjunctions +un +bet +jo +ja +ka +lai +tomÄ“r +tikko +turpretÄ« +arÄ« +kaut +gan +tÄdēļ +tÄ +ne +tikvien +vien +kÄ +ir +te +vai +kamÄ“r +# Particles +ar +diezin +droÅ¡i +diemžēl +nebÅ«t +ik +it +taÄu +nu +pat +tiklab +iekÅ¡pus +nedz +tik +nevis +turpretim +jeb +iekam +iekÄm +iekÄms +kolÄ«dz +lÄ«dzko +tiklÄ«dz +jebÅ¡u +tÄlab +tÄpÄ“c +nekÄ +itin +jÄ +jau +jel +nÄ“ +nezin +tad +tikai +vis +tak +iekams +vien +# modal verbs +bÅ«t +biju +biji +bija +bijÄm +bijÄt +esmu +esi +esam +esat +bÅ«Å¡u +bÅ«si +bÅ«s +bÅ«sim +bÅ«siet +tikt +tiku +tiki +tika +tikÄm +tikÄt +tieku +tiec +tiek +tiekam +tiekat +tikÅ¡u +tiks +tiksim +tiksiet +tapt +tapi +tapÄt +topat +tapÅ¡u +tapsi +taps +tapsim +tapsiet +kļūt +kļuvu +kļuvi +kļuva +kļuvÄm +kļuvÄt +kļūstu +kļūsti +kļūst +kļūstam +kļūstat +kļūšu +kļūsi +kļūs +kļūsim +kļūsiet +# verbs +varÄ“t +varÄ“ju +varÄ“jÄm +varÄ“Å¡u +varÄ“sim +var +varÄ“ji +varÄ“jÄt +varÄ“si +varÄ“siet +varat +varÄ“ja +varÄ“s diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_nl.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_nl.txt new file mode 100644 index 00000000000..f4d61f5092c --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_nl.txt @@ -0,0 +1,117 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Dutch stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large sample of Dutch text. + + | Dutch stop words frequently exhibit homonym clashes. These are indicated + | clearly below. + +de | the +en | and +van | of, from +ik | I, the ego +te | (1) chez, at etc, (2) to, (3) too +dat | that, which +die | that, those, who, which +in | in, inside +een | a, an, one +hij | he +het | the, it +niet | not, nothing, naught +zijn | (1) to be, being, (2) his, one's, its +is | is +was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river +op | on, upon, at, in, up, used up +aan | on, upon, to (as dative) +met | with, by +als | like, such as, when +voor | (1) before, in front of, (2) furrow +had | had, past tense all persons sing. of 'hebben' (have) +er | there +maar | but, only +om | round, about, for etc +hem | him +dan | then +zou | should/would, past tense all persons sing. of 'zullen' +of | or, whether, if +wat | what, something, anything +mijn | possessive and noun 'mine' +men | people, 'one' +dit | this +zo | so, thus, in this way +door | through by +over | over, across +ze | she, her, they, them +zich | oneself +bij | (1) a bee, (2) by, near, at +ook | also, too +tot | till, until +je | you +mij | me +uit | out of, from +der | Old Dutch form of 'van der' still found in surnames +daar | (1) there, (2) because +haar | (1) her, their, them, (2) hair +naar | (1) unpleasant, unwell etc, (2) towards, (3) as +heb | present first person sing. of 'to have' +hoe | how, why +heeft | present third person sing. of 'to have' +hebben | 'to have' and various parts thereof +deze | this +u | you +want | (1) for, (2) mitten, (3) rigging +nog | yet, still +zal | 'shall', first and third person sing. of verb 'zullen' (will) +me | me +zij | she, they +nu | now +ge | 'thou', still used in Belgium and south Netherlands +geen | none +omdat | because +iets | something, somewhat +worden | to become, grow, get +toch | yet, still +al | all, every, each +waren | (1) 'were' (2) to wander, (3) wares, (3) +veel | much, many +meer | (1) more, (2) lake +doen | to do, to make +toen | then, when +moet | noun 'spot/mote' and present form of 'to must' +ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' +zonder | without +kan | noun 'can' and present form of 'to be able' +hun | their, them +dus | so, consequently +alles | all, everything, anything +onder | under, beneath +ja | yes, of course +eens | once, one day +hier | here +wie | who +werd | imperfect third person sing. of 'become' +altijd | always +doch | yet, but etc +wordt | present third person sing. of 'become' +wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans +kunnen | to be able +ons | us/our +zelf | self +tegen | against, towards, at +na | after, near +reeds | already +wil | (1) present tense of 'want', (2) 'will', noun, (3) fender +kon | could; past tense of 'to be able' +niets | nothing +uw | your +iemand | somebody +geweest | been; past participle of 'be' +andere | other diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_no.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_no.txt new file mode 100644 index 00000000000..e76f36e69ed --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_no.txt @@ -0,0 +1,192 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Norwegian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This stop word list is for the dominant bokmÃ¥l dialect. Words unique + | to nynorsk are marked *. + + | Revised by Jan Bruusgaard , Jan 2005 + +og | and +i | in +jeg | I +det | it/this/that +at | to (w. inf.) +en | a/an +et | a/an +den | it/this/that +til | to +er | is/am/are +som | who/that +pÃ¥ | on +de | they / you(formal) +med | with +han | he +av | of +ikke | not +ikkje | not * +der | there +sÃ¥ | so +var | was/were +meg | me +seg | you +men | but +ett | one +har | have +om | about +vi | we +min | my +mitt | my +ha | have +hadde | had +hun | she +nÃ¥ | now +over | over +da | when/as +ved | by/know +fra | from +du | you +ut | out +sin | your +dem | them +oss | us +opp | up +man | you/one +kan | can +hans | his +hvor | where +eller | or +hva | what +skal | shall/must +selv | self (reflective) +sjøl | self (reflective) +her | here +alle | all +vil | will +bli | become +ble | became +blei | became * +blitt | have become +kunne | could +inn | in +nÃ¥r | when +være | be +kom | come +noen | some +noe | some +ville | would +dere | you +som | who/which/that +deres | their/theirs +kun | only/just +ja | yes +etter | after +ned | down +skulle | should +denne | this +for | for/because +deg | you +si | hers/his +sine | hers/his +sitt | hers/his +mot | against +Ã¥ | to +meget | much +hvorfor | why +dette | this +disse | these/those +uten | without +hvordan | how +ingen | none +din | your +ditt | your +blir | become +samme | same +hvilken | which +hvilke | which (plural) +sÃ¥nn | such a +inni | inside/within +mellom | between +vÃ¥r | our +hver | each +hvem | who +vors | us/ours +hvis | whose +bÃ¥de | both +bare | only/just +enn | than +fordi | as/because +før | before +mange | many +ogsÃ¥ | also +slik | just +vært | been +være | to be +bÃ¥e | both * +begge | both +siden | since +dykk | your * +dykkar | yours * +dei | they * +deira | them * +deires | theirs * +deim | them * +di | your (fem.) * +dÃ¥ | as/when * +eg | I * +ein | a/an * +eit | a/an * +eitt | a/an * +elles | or * +honom | he * +hjÃ¥ | at * +ho | she * +hoe | she * +henne | her +hennar | her/hers +hennes | hers +hoss | how * +hossen | how * +ikkje | not * +ingi | noone * +inkje | noone * +korleis | how * +korso | how * +kva | what/which * +kvar | where * +kvarhelst | where * +kven | who/whom * +kvi | why * +kvifor | why * +me | we * +medan | while * +mi | my * +mine | my * +mykje | much * +no | now * +nokon | some (masc./neut.) * +noka | some (fem.) * +nokor | some * +noko | some * +nokre | some * +si | his/hers * +sia | since * +sidan | since * +so | so * +somt | some * +somme | some * +um | about* +upp | up * +vere | be * +vore | was * +verte | become * +vort | become * +varte | became * +vart | became * + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_pt.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_pt.txt new file mode 100644 index 00000000000..276c1b446f2 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_pt.txt @@ -0,0 +1,251 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Portuguese stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | of, from +a | the; to, at; her +o | the; him +que | who, that +e | and +do | de + o +da | de + a +em | in +um | a +para | for + | é from SER +com | with +não | not, no +uma | a +os | the; them +no | em + o +se | himself etc +na | em + a +por | for +mais | more +as | the; them +dos | de + os +como | as, like +mas | but + | foi from SER +ao | a + o +ele | he +das | de + as + | tem from TER +à | a + a +seu | his +sua | her +ou | or + | ser from SER +quando | when +muito | much + | há from HAV +nos | em + os; us +já | already, now + | está from EST +eu | I +também | also +só | only, just +pelo | per + o +pela | per + a +até | up to +isso | that +ela | he +entre | between + | era from SER +depois | after +sem | without +mesmo | same +aos | a + os + | ter from TER +seus | his +quem | whom +nas | em + as +me | me +esse | that +eles | they + | estão from EST +você | you + | tinha from TER + | foram from SER +essa | that +num | em + um +nem | nor +suas | her +meu | my +às | a + as +minha | my + | têm from TER +numa | em + uma +pelos | per + os +elas | they + | havia from HAV + | seja from SER +qual | which + | será from SER +nós | we + | tenho from TER +lhe | to him, her +deles | of them +essas | those +esses | those +pelas | per + as +este | this + | fosse from SER +dele | of him + + | other words. There are many contractions such as naquele = em+aquele, + | mo = me+o, but they are rare. + | Indefinite article plural forms are also rare. + +tu | thou +te | thee +vocês | you (plural) +vos | you +lhes | to them +meus | my +minhas +teu | thy +tua +teus +tuas +nosso | our +nossa +nossos +nossas + +dela | of her +delas | of them + +esta | this +estes | these +estas | these +aquele | that +aquela | that +aqueles | those +aquelas | those +isto | this +aquilo | that + + | forms of estar, to be (not including the infinitive): +estou +está +estamos +estão +estive +esteve +estivemos +estiveram +estava +estávamos +estavam +estivera +estivéramos +esteja +estejamos +estejam +estivesse +estivéssemos +estivessem +estiver +estivermos +estiverem + + | forms of haver, to have (not including the infinitive): +hei +há +havemos +hão +houve +houvemos +houveram +houvera +houvéramos +haja +hajamos +hajam +houvesse +houvéssemos +houvessem +houver +houvermos +houverem +houverei +houverá +houveremos +houverão +houveria +houveríamos +houveriam + + | forms of ser, to be (not including the infinitive): +sou +somos +são +era +éramos +eram +fui +foi +fomos +foram +fora +fôramos +seja +sejamos +sejam +fosse +fôssemos +fossem +for +formos +forem +serei +será +seremos +serão +seria +seríamos +seriam + + | forms of ter, to have (not including the infinitive): +tenho +tem +temos +tém +tinha +tínhamos +tinham +tive +teve +tivemos +tiveram +tivera +tivéramos +tenha +tenhamos +tenham +tivesse +tivéssemos +tivessem +tiver +tivermos +tiverem +terei +terá +teremos +terão +teria +teríamos +teriam diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ro.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ro.txt new file mode 100644 index 00000000000..4fdee90a5ba --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ro.txt @@ -0,0 +1,233 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +acea +aceasta +această +aceea +acei +aceia +acel +acela +acele +acelea +acest +acesta +aceste +acestea +aceÅŸti +aceÅŸtia +acolo +acum +ai +aia +aibă +aici +al +ăla +ale +alea +ălea +altceva +altcineva +am +ar +are +aÅŸ +aÅŸadar +asemenea +asta +ăsta +astăzi +astea +ăstea +ăştia +asupra +aÅ£i +au +avea +avem +aveÅ£i +azi +bine +bucur +bună +ca +că +căci +când +care +cărei +căror +cărui +cât +câte +câţi +către +câtva +ce +cel +ceva +chiar +cînd +cine +cineva +cît +cîte +cîţi +cîtva +contra +cu +cum +cumva +curând +curînd +da +dă +dacă +dar +datorită +de +deci +deja +deoarece +departe +deÅŸi +din +dinaintea +dintr +dintre +drept +după +ea +ei +el +ele +eram +este +eÅŸti +eu +face +fără +fi +fie +fiecare +fii +fim +fiÅ£i +iar +ieri +îi +îl +îmi +împotriva +în +înainte +înaintea +încât +încît +încotro +între +întrucât +întrucît +îţi +la +lângă +le +li +lîngă +lor +lui +mă +mâine +mea +mei +mele +mereu +meu +mi +mine +mult +multă +mulÅ£i +ne +nicăieri +nici +nimeni +niÅŸte +noastră +noastre +noi +noÅŸtri +nostru +nu +ori +oricând +oricare +oricât +orice +oricînd +oricine +oricît +oricum +oriunde +până +pe +pentru +peste +pînă +poate +pot +prea +prima +primul +prin +printr +sa +să +săi +sale +sau +său +se +ÅŸi +sînt +sîntem +sînteÅ£i +spre +sub +sunt +suntem +sunteÅ£i +ta +tăi +tale +tău +te +Å£i +Å£ie +tine +toată +toate +tot +toÅ£i +totuÅŸi +tu +un +una +unde +undeva +unei +unele +uneori +unor +vă +vi +voastră +voastre +voi +voÅŸtri +vostru +vouă +vreo +vreun diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ru.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ru.txt new file mode 100644 index 00000000000..64307693457 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_ru.txt @@ -0,0 +1,241 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | a russian stop word list. comments begin with vertical bar. each stop + | word is at the start of a line. + + | this is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | letter `Ñ‘' is translated to `е'. + +и | and +в | in/into +во | alternative form +не | not +что | what/that +он | he +на | on/onto +Ñ | i +Ñ | from +Ñо | alternative form +как | how +а | milder form of `no' (but) +то | conjunction and form of `that' +вÑе | all +она | she +так | so, thus +его | him +но | but +да | yes/and +Ñ‚Ñ‹ | thou +к | towards, by +у | around, chez +же | intensifier particle +вы | you +за | beyond, behind +бы | conditional/subj. particle +по | up to, along +только | only +ее | her +мне | to me +было | it was +вот | here is/are, particle +от | away from +Ð¼ÐµÐ½Ñ | me +еще | still, yet, more +нет | no, there isnt/arent +о | about +из | out of +ему | to him +теперь | now +когда | when +даже | even +ну | so, well +вдруг | suddenly +ли | interrogative particle +еÑли | if +уже | already, but homonym of `narrower' +или | or +ни | neither +быть | to be +был | he was +него | prepositional form of его +до | up to +Ð²Ð°Ñ | you accusative +нибудь | indef. suffix preceded by hyphen +опÑÑ‚ÑŒ | again +уж | already, but homonym of `adder' +вам | to you +Ñказал | he said +ведь | particle `after all' +там | there +потом | then +ÑÐµÐ±Ñ | oneself +ничего | nothing +ей | to her +может | usually with `быть' as `maybe' +они | they +тут | here +где | where +еÑÑ‚ÑŒ | there is/are +надо | got to, must +ней | prepositional form of ей +Ð´Ð»Ñ | for +мы | we +Ñ‚ÐµÐ±Ñ | thee +их | them, their +чем | than +была | she was +Ñам | self +чтоб | in order to +без | without +будто | as if +человек | man, person, one +чего | genitive form of `what' +раз | once +тоже | also +Ñебе | to oneself +под | beneath +жизнь | life +будет | will be +ж | short form of intensifer particle `же' +тогда | then +кто | who +Ñтот | this +говорил | was saying +того | genitive form of `that' +потому | for that reason +Ñтого | genitive form of `this' +какой | which +ÑовÑем | altogether +ним | prepositional form of `его', `они' +здеÑÑŒ | here +Ñтом | prepositional form of `Ñтот' +один | one +почти | almost +мой | my +тем | instrumental/dative plural of `тот', `то' +чтобы | full form of `in order that' +нее | her (acc.) +кажетÑÑ | it seems +ÑÐµÐ¹Ñ‡Ð°Ñ | now +были | they were +куда | where to +зачем | why +Ñказать | to say +вÑех | all (acc., gen. preposn. plural) +никогда | never +ÑÐµÐ³Ð¾Ð´Ð½Ñ | today +можно | possible, one can +при | by +наконец | finally +два | two +об | alternative form of `о', about +другой | another +хоть | even +поÑле | after +над | above +больше | more +тот | that one (masc.) +через | across, in +Ñти | these +Ð½Ð°Ñ | us +про | about +вÑего | in all, only, of all +них | prepositional form of `они' (they) +ÐºÐ°ÐºÐ°Ñ | which, feminine +много | lots +разве | interrogative particle +Ñказала | she said +три | three +Ñту | this, acc. fem. sing. +Ð¼Ð¾Ñ | my, feminine +впрочем | moreover, besides +хорошо | good +Ñвою | ones own, acc. fem. sing. +Ñтой | oblique form of `Ñта', fem. `this' +перед | in front of +иногда | sometimes +лучше | better +чуть | a little +том | preposn. form of `that one' +Ð½ÐµÐ»ÑŒÐ·Ñ | one must not +такой | such a one +им | to them +более | more +вÑегда | always +конечно | of course +вÑÑŽ | acc. fem. sing of `all' +между | between + + + | b: some paradigms + | + | personal pronouns + | + | Ñ Ð¼ÐµÐ½Ñ Ð¼Ð½Ðµ мной [мною] + | Ñ‚Ñ‹ Ñ‚ÐµÐ±Ñ Ñ‚ÐµÐ±Ðµ тобой [тобою] + | он его ему им [него, нему, ним] + | она ее Ñи ею [нее, нÑи, нею] + | оно его ему им [него, нему, ним] + | + | мы Ð½Ð°Ñ Ð½Ð°Ð¼ нами + | вы Ð²Ð°Ñ Ð²Ð°Ð¼ вами + | они их им ими [них, ним, ними] + | + | ÑÐµÐ±Ñ Ñебе Ñобой [Ñобою] + | + | demonstrative pronouns: Ñтот (this), тот (that) + | + | Ñтот Ñта Ñто Ñти + | Ñтого ÑÑ‚Ñ‹ Ñто Ñти + | Ñтого Ñтой Ñтого Ñтих + | Ñтому Ñтой Ñтому Ñтим + | Ñтим Ñтой Ñтим [Ñтою] Ñтими + | Ñтом Ñтой Ñтом Ñтих + | + | тот та то те + | того ту то те + | того той того тех + | тому той тому тем + | тем той тем [тою] теми + | том той том тех + | + | determinative pronouns + | + | (a) веÑÑŒ (all) + | + | веÑÑŒ вÑÑ Ð²Ñе вÑе + | вÑего вÑÑŽ вÑе вÑе + | вÑего вÑей вÑего вÑех + | вÑему вÑей вÑему вÑем + | вÑем вÑей вÑем [вÑею] вÑеми + | вÑем вÑей вÑем вÑех + | + | (b) Ñам (himself etc) + | + | Ñам Ñама Ñамо Ñами + | Ñамого Ñаму Ñамо Ñамих + | Ñамого Ñамой Ñамого Ñамих + | Ñамому Ñамой Ñамому Ñамим + | Ñамим Ñамой Ñамим [Ñамою] Ñамими + | Ñамом Ñамой Ñамом Ñамих + | + | stems of verbs `to be', `to have', `to do' and modal + | + | быть бы буд быв еÑÑ‚ÑŒ Ñуть + | име + | дел + | мог мож мочь + | уме + | хоч хот + | долж + | можн + | нужн + | Ð½ÐµÐ»ÑŒÐ·Ñ + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_sv.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_sv.txt new file mode 100644 index 00000000000..22bddfd8cb3 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_sv.txt @@ -0,0 +1,131 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Swedish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | Swedish stop words occasionally exhibit homonym clashes. For example + | sÃ¥ = so, but also seed. These are indicated clearly below. + +och | and +det | it, this/that +att | to (with infinitive) +i | in, at +en | a +jag | I +hon | she +som | who, that +han | he +pÃ¥ | on +den | it, this/that +med | with +var | where, each +sig | him(self) etc +för | for +sÃ¥ | so (also: seed) +till | to +är | is +men | but +ett | a +om | if; around, about +hade | had +de | they, these/those +av | of +icke | not, no +mig | me +du | you +henne | her +dÃ¥ | then, when +sin | his +nu | now +har | have +inte | inte nÃ¥gon = no one +hans | his +honom | him +skulle | 'sake' +hennes | her +där | there +min | my +man | one (pronoun) +ej | nor +vid | at, by, on (also: vast) +kunde | could +nÃ¥got | some etc +frÃ¥n | from, off +ut | out +när | when +efter | after, behind +upp | up +vi | we +dem | them +vara | be +vad | what +över | over +än | than +dig | you +kan | can +sina | his +här | here +ha | have +mot | towards +alla | all +under | under (also: wonder) +nÃ¥gon | some etc +eller | or (else) +allt | all +mycket | much +sedan | since +ju | why +denna | this/that +själv | myself, yourself etc +detta | this/that +Ã¥t | to +utan | without +varit | was +hur | how +ingen | no +mitt | my +ni | you +bli | to be, become +blev | from bli +oss | us +din | thy +dessa | these/those +nÃ¥gra | some etc +deras | their +blir | from bli +mina | my +samma | (the) same +vilken | who, that +er | you, your +sÃ¥dan | such a +vÃ¥r | our +blivit | from bli +dess | its +inom | within +mellan | between +sÃ¥dant | such a +varför | why +varje | each +vilka | who, that +ditt | thy +vem | who +vilket | who, that +sitta | his +sÃ¥dana | such a +vart | each +dina | thy +vars | whose +vÃ¥rt | our +vÃ¥ra | our +ert | your +era | your +vilkas | whose + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_th.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_th.txt new file mode 100644 index 00000000000..07f0fabe692 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_th.txt @@ -0,0 +1,119 @@ +# Thai stopwords from: +# "Opinion Detection in Thai Political News Columns +# Based on Subjectivity Analysis" +# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak +ไว้ +ไม่ +ไป +ได้ +ให้ +ใน +โดย +à¹à¸«à¹ˆà¸‡ +à¹à¸¥à¹‰à¸§ +à¹à¸¥à¸° +à¹à¸£à¸ +à¹à¸šà¸š +à¹à¸•à¹ˆ +เอง +เห็น +เลย +เริ่ม +เรา +เมื่อ +เพื่อ +เพราะ +เป็นà¸à¸²à¸£ +เป็น +เปิดเผย +เปิด +เนื่องจาภ+เดียวà¸à¸±à¸™ +เดียว +เช่น +เฉพาะ +เคย +เข้า +เขา +อีภ+อาจ +อะไร +ออภ+อย่าง +อยู่ +อยาภ+หาภ+หลาย +หลังจาภ+หลัง +หรือ +หนึ่ง +ส่วน +ส่ง +สุด +สà¹à¸²à¸«à¸£à¸±à¸š +ว่า +วัน +ลง +ร่วม +ราย +รับ +ระหว่าง +รวม +ยัง +มี +มาภ+มา +พร้อม +พบ +ผ่าน +ผล +บาง +น่า +นี้ +นà¹à¸² +นั้น +นัภ+นอà¸à¸ˆà¸²à¸ +ทุภ+ที่สุด +ที่ +ทà¹à¸²à¹ƒà¸«à¹‰ +ทà¹à¸² +ทาง +ทั้งนี้ +ทั้ง +ถ้า +ถูภ+ถึง +ต้อง +ต่างๆ +ต่าง +ต่อ +ตาม +ตั้งà¹à¸•à¹ˆ +ตั้ง +ด้าน +ด้วย +ดัง +ซึ่ง +ช่วง +จึง +จาภ+จัด +จะ +คือ +ความ +ครั้ง +คง +ขึ้น +ของ +ขอ +ขณะ +à¸à¹ˆà¸­à¸™ +à¸à¹‡ +à¸à¸²à¸£ +à¸à¸±à¸š +à¸à¸±à¸™ +à¸à¸§à¹ˆà¸² +à¸à¸¥à¹ˆà¸²à¸§ diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_tr.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_tr.txt new file mode 100644 index 00000000000..84d9408d4ea --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/stopwords_tr.txt @@ -0,0 +1,212 @@ +# Turkish stopwords from LUCENE-559 +# merged with the list from "Information Retrieval on Turkish Texts" +# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) +acaba +altmış +altı +ama +ancak +arada +aslında +ayrıca +bana +bazı +belki +ben +benden +beni +benim +beri +beÅŸ +bile +bin +bir +birçok +biri +birkaç +birkez +birÅŸey +birÅŸeyi +biz +bize +bizden +bizi +bizim +böyle +böylece +bu +buna +bunda +bundan +bunlar +bunları +bunların +bunu +bunun +burada +çok +çünkü +da +daha +dahi +de +defa +deÄŸil +diÄŸer +diye +doksan +dokuz +dolayı +dolayısıyla +dört +edecek +eden +ederek +edilecek +ediliyor +edilmesi +ediyor +eÄŸer +elli +en +etmesi +etti +ettiÄŸi +ettiÄŸini +gibi +göre +halen +hangi +hatta +hem +henüz +hep +hepsi +her +herhangi +herkesin +hiç +hiçbir +için +iki +ile +ilgili +ise +iÅŸte +itibaren +itibariyle +kadar +karşın +katrilyon +kendi +kendilerine +kendini +kendisi +kendisine +kendisini +kez +ki +kim +kimden +kime +kimi +kimse +kırk +milyar +milyon +mu +mü +mı +nasıl +ne +neden +nedenle +nerde +nerede +nereye +niye +niçin +o +olan +olarak +oldu +olduÄŸu +olduÄŸunu +olduklarını +olmadı +olmadığı +olmak +olması +olmayan +olmaz +olsa +olsun +olup +olur +olursa +oluyor +on +ona +ondan +onlar +onlardan +onları +onların +onu +onun +otuz +oysa +öyle +pek +raÄŸmen +sadece +sanki +sekiz +seksen +sen +senden +seni +senin +siz +sizden +sizi +sizin +ÅŸey +ÅŸeyden +ÅŸeyi +ÅŸeyler +şöyle +ÅŸu +ÅŸuna +ÅŸunda +ÅŸundan +ÅŸunları +ÅŸunu +tarafından +trilyon +tüm +üç +üzere +var +vardı +ve +veya +ya +yani +yapacak +yapılan +yapılması +yapıyor +yapmak +yaptı +yaptığı +yaptığını +yaptıkları +yedi +yerine +yetmiÅŸ +yine +yirmi +yoksa +yüz +zaten diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/userdict_ja.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/userdict_ja.txt new file mode 100644 index 00000000000..6f0368e4d81 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/lang/userdict_ja.txt @@ -0,0 +1,29 @@ +# +# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) +# +# Add entries to this file in order to override the statistical model in terms +# of segmentation, readings and part-of-speech tags. Notice that entries do +# not have weights since they are always used when found. This is by-design +# in order to maximize ease-of-use. +# +# Entries are defined using the following CSV format: +# , ... , ... , +# +# Notice that a single half-width space separates tokens and readings, and +# that the number tokens and readings must match exactly. +# +# Also notice that multiple entries with the same is undefined. +# +# Whitespace only lines are ignored. Comments are not allowed on entry lines. +# + +# Custom segmentation for kanji compounds +日本経済新èž,日本 経済 æ–°èž,ニホン ケイザイ シンブン,カスタムåè©ž +関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタムåè©ž + +# Custom segmentation for compound katakana +トートãƒãƒƒã‚°,トート ãƒãƒƒã‚°,トート ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž +ショルダーãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž + +# Custom reading for former sumo wrestler +æœé’é¾,æœé’é¾,アサショウリュウ,カスタム人å diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/protwords.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/protwords.txt new file mode 100644 index 00000000000..1dfc0abecbf --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/schema.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/schema.xml new file mode 100644 index 00000000000..b133c135f31 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/schema.xml @@ -0,0 +1,961 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + iddiff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/solrconfig.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/solrconfig.xml new file mode 100644 index 00000000000..f9683b27db7 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/solrconfig.xml @@ -0,0 +1,1789 @@ + + + + + + + + + LUCENE_43 + + + + + + + + + + + + + + + + + + + + + + + + ${solr.data.dir:} + + + + + ${solr.hdfs.home:} + ${solr.hdfs.confdir:} + ${solr.hdfs.security.kerberos.enabled:false} + ${solr.hdfs.security.kerberos.keytabfile:} + ${solr.hdfs.security.kerberos.principal:} + ${solr.hdfs.blockcache.enabled:true} + ${solr.hdfs.blockcache.slab.count:1} + ${solr.hdfs.blockcache.direct.memory.allocation:true} + ${solr.hdfs.blockcache.blocksperbank:16384} + ${solr.hdfs.blockcache.read.enabled:true} + ${solr.hdfs.blockcache.write.enabled:true} + ${solr.hdfs.nrtcachingdirectory.enable:true} + ${solr.hdfs.nrtcachingdirectory.maxmergesizemb:16} + ${solr.hdfs.nrtcachingdirectory.maxcachedmb:192} + + + + + + + + + + + + + ${solr.maxIndexingThreads:8} + + + + + + 128 + + + + + + + + + + + + + ${solr.lock.type:hdfs} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.autoCommit.maxTime:60000} + false + + + + + ${solr.autoSoftCommit.maxTime:1000} + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + static firstSearcher warming in solrconfig.xml + + + + + + false + + + 4 + + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + text + + + + + + + + + + + + + + explicit + json + true + text + + + + + + + + true + json + true + + + + + + + + explicit + + + velocity + browse + layout + Solritas + + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text + 100% + *:* + 10 + *,score + + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename + 3 + + + on + cat + manu_exact + content_type + author_s + ipod + GB + 1 + cat,inStock + after + price + 0 + 600 + 50 + popularity + 0 + 10 + 3 + manufacturedate_dt + NOW/YEAR-10YEARS + NOW + +1YEAR + before + after + + + on + content features title name + html + <b> + </b> + 0 + title + 0 + name + 3 + 200 + content + 750 + + + on + false + 5 + 2 + 5 + true + true + 5 + 3 + + + + + spellcheck + + + + + + + + + + + + + + application/json + + + + + application/csv + + + + + + + + + + + + + + + + + + + + + solrpingquery + + + all + + + + + + + + + explicit + true + + + + + + + + + + + + + + + + text_general + + + + + + default + text + solr.DirectSolrSpellChecker + + internal + + 0.5 + + 2 + + 1 + + 5 + + 4 + + 0.01 + + + + + + wordbreak + solr.WordBreakSolrSpellChecker + name + true + true + 10 + + + + + + + + + + + + + + + + text + + default + wordbreak + on + true + 10 + 5 + 5 + true + true + 10 + 5 + + + spellcheck + + + + + + + + + + text + true + + + tvComponent + + + + + + + + + default + + + org.carrot2.clustering.lingo.LingoClusteringAlgorithm + + + 20 + + + clustering/carrot2 + + + ENGLISH + + + stc + org.carrot2.clustering.stc.STCClusteringAlgorithm + + + + + + + true + default + true + + name + id + + features + + true + + + + false + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + *:* + 10 + *,score + + + clustering + + + + + + + + + + true + false + + + terms + + + + + + + + string + elevate.xml + + + + + + explicit + text + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + + + + + + + + + + + text/plain; charset=UTF-8 + + + + + + + + + 5 + + + + + + + + + + + + + + + + + + *:* + + + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/stopwords.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/stopwords.txt new file mode 100644 index 00000000000..ae1e83eeb3d --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/stopwords.txt @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/synonyms.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/synonyms.txt new file mode 100644 index 00000000000..7f72128303b --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/conf/synonyms.txt @@ -0,0 +1,29 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaafoo => aaabar +bbbfoo => bbbfoo bbbbar +cccfoo => cccbar cccbaz +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/solr.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/solr.xml new file mode 100644 index 00000000000..6c8b43f75ed --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/mrunit/solr.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + ${socketTimeout:120000} + ${connTimeout:15000} + + + + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solr.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solr.xml new file mode 100644 index 00000000000..4604f60476f --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solr.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + ${socketTimeout:120000} + ${connTimeout:15000} + + + + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/currency.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/currency.xml new file mode 100644 index 00000000000..3a9c58afee8 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/currency.xml @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/elevate.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/elevate.xml new file mode 100644 index 00000000000..25d5cebe4fb --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/elevate.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ca.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ca.txt new file mode 100644 index 00000000000..307a85f913d --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ca.txt @@ -0,0 +1,8 @@ +# Set of Catalan contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +l +m +n +s +t diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_fr.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_fr.txt new file mode 100644 index 00000000000..722db588333 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_fr.txt @@ -0,0 +1,9 @@ +# Set of French contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +l +m +t +qu +n +s +j diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ga.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ga.txt new file mode 100644 index 00000000000..9ebe7fa349a --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +m +b diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_it.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_it.txt new file mode 100644 index 00000000000..cac04095372 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_it.txt @@ -0,0 +1,23 @@ +# Set of Italian contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +c +l +all +dall +dell +nell +sull +coll +pell +gl +agl +dagl +degl +negl +sugl +un +m +t +s +v +d diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/hyphenations_ga.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/hyphenations_ga.txt new file mode 100644 index 00000000000..4d2642cc5a3 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/hyphenations_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish hyphenations for StopFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +h +n +t diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stemdict_nl.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stemdict_nl.txt new file mode 100644 index 00000000000..441072971d3 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stemdict_nl.txt @@ -0,0 +1,6 @@ +# Set of overrides for the dutch stemmer +# TODO: load this as a resource from the analyzer and sync it in build.xml +fiets fiets +bromfiets bromfiets +ei eier +kind kinder diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stoptags_ja.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stoptags_ja.txt new file mode 100644 index 00000000000..71b750845e3 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stoptags_ja.txt @@ -0,0 +1,420 @@ +# +# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. +# +# Any token with a part-of-speech tag that exactly matches those defined in this +# file are removed from the token stream. +# +# Set your own stoptags by uncommenting the lines below. Note that comments are +# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, +# etc. that can be useful for building you own stoptag set. +# +# The entire possible tagset is provided below for convenience. +# +##### +# noun: unclassified nouns +#åè©ž +# +# noun-common: Common nouns or nouns where the sub-classification is undefined +#åè©ž-一般 +# +# noun-proper: Proper nouns where the sub-classification is undefined +#åè©ž-固有åè©ž +# +# noun-proper-misc: miscellaneous proper nouns +#åè©ž-固有åè©ž-一般 +# +# noun-proper-person: Personal names where the sub-classification is undefined +#åè©ž-固有åè©ž-人å +# +# noun-proper-person-misc: names that cannot be divided into surname and +# given name; foreign names; names where the surname or given name is unknown. +# e.g. ãŠå¸‚ã®æ–¹ +#åè©ž-固有åè©ž-人å-一般 +# +# noun-proper-person-surname: Mainly Japanese surnames. +# e.g. 山田 +#åè©ž-固有åè©ž-人å-姓 +# +# noun-proper-person-given_name: Mainly Japanese given names. +# e.g. 太郎 +#åè©ž-固有åè©ž-人å-å +# +# noun-proper-organization: Names representing organizations. +# e.g. 通産çœ, NHK +#åè©ž-固有åè©ž-組織 +# +# noun-proper-place: Place names where the sub-classification is undefined +#åè©ž-固有åè©ž-地域 +# +# noun-proper-place-misc: Place names excluding countries. +# e.g. アジア, ãƒãƒ«ã‚»ãƒ­ãƒŠ, 京都 +#åè©ž-固有åè©ž-地域-一般 +# +# noun-proper-place-country: Country names. +# e.g. 日本, オーストラリア +#åè©ž-固有åè©ž-地域-国 +# +# noun-pronoun: Pronouns where the sub-classification is undefined +#åè©ž-代åè©ž +# +# noun-pronoun-misc: miscellaneous pronouns: +# e.g. ãã‚Œ, ã“ã“, ã‚ã„ã¤, ã‚ãªãŸ, ã‚ã¡ã“ã¡, ã„ãã¤, ã©ã“ã‹, ãªã«, ã¿ãªã•ã‚“, ã¿ã‚“ãª, ã‚ãŸãã—, ã‚ã‚Œã‚ã‚Œ +#åè©ž-代åè©ž-一般 +# +# noun-pronoun-contraction: Spoken language contraction made by combining a +# pronoun and the particle 'wa'. +# e.g. ã‚りゃ, ã“りゃ, ã“りゃã‚, ãりゃ, ãりゃ゠+#åè©ž-代åè©ž-縮約 +# +# noun-adverbial: Temporal nouns such as names of days or months that behave +# like adverbs. Nouns that represent amount or ratios and can be used adverbially, +# e.g. 金曜, 一月, åˆå¾Œ, å°‘é‡ +#åè©ž-副詞å¯èƒ½ +# +# noun-verbal: Nouns that take arguments with case and can appear followed by +# 'suru' and related verbs (ã™ã‚‹, ã§ãã‚‹, ãªã•ã‚‹, ãã ã•ã‚‹) +# e.g. インプット, æ„›ç€, 悪化, 悪戦苦闘, 一安心, 下å–ã‚Š +#åè©ž-サ変接続 +# +# noun-adjective-base: The base form of adjectives, words that appear before 㪠("na") +# e.g. å¥åº·, 安易, 駄目, ã ã‚ +#åè©ž-形容動詞語幹 +# +# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), æ•°. +# e.g. 0, 1, 2, 何, æ•°, å¹¾ +#åè©ž-æ•° +# +# noun-affix: noun affixes where the sub-classification is undefined +#åè©ž-éžè‡ªç«‹ +# +# noun-affix-misc: Of adnominalizers, the case-marker ã® ("no"), and words that +# attach to the base form of inflectional words, words that cannot be classified +# into any of the other categories below. This category includes indefinite nouns. +# e.g. ã‚ã‹ã¤ã, æš, ã‹ã„, 甲æ–, æ°—, ãらã„, å«Œã„, ãã›, ç™–, ã“ã¨, 事, ã”ã¨, 毎, ã—ã ã„, 次第, +# é †, ã›ã„, 所為, ã¤ã„ã§, åºã§, ã¤ã‚‚ã‚Š, ç©ã‚‚ã‚Š, 点, ã©ã“ã‚, ã®, ã¯ãš, ç­ˆ, ã¯ãšã¿, å¼¾ã¿, +# æ‹å­, ãµã†, ãµã‚Š, 振り, ã»ã†, æ–¹, æ—¨, ã‚‚ã®, 物, 者, ゆãˆ, æ•…, ゆãˆã‚“, 所以, ã‚ã‘, 訳, +# ã‚ã‚Š, 割り, 割, ã‚“-å£èªž/, ã‚‚ã‚“-å£èªž/ +#åè©ž-éžè‡ªç«‹-一般 +# +# noun-affix-adverbial: noun affixes that that can behave as adverbs. +# e.g. ã‚ã„ã , é–“, ã‚ã’ã, 挙ã’å¥, ã‚ã¨, 後, 余り, 以外, 以é™, 以後, 以上, 以å‰, 一方, ã†ãˆ, +# 上, ã†ã¡, 内, ãŠã‚Š, 折り, ã‹ãŽã‚Š, é™ã‚Š, ãã‚Š, ã£ãã‚Š, çµæžœ, ã“ã‚, é ƒ, ã•ã„, éš›, 最中, ã•ãªã‹, +# 最中, ã˜ãŸã„, 自体, ãŸã³, 度, ãŸã‚, 為, ã¤ã©, 都度, ã¨ãŠã‚Š, 通り, ã¨ã, 時, ã¨ã“ã‚, 所, +# ã¨ãŸã‚“, 途端, ãªã‹, 中, ã®ã¡, 後, ã°ã‚ã„, å ´åˆ, æ—¥, ã¶ã‚“, 分, ã»ã‹, ä»–, ã¾ãˆ, å‰, ã¾ã¾, +# 儘, ä¾­, ã¿ãŽã‚Š, 矢先 +#åè©ž-éžè‡ªç«‹-副詞å¯èƒ½ +# +# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars +# with the stem よã†(ã ) ("you(da)"). +# e.g. よã†, ã‚„ã†, 様 (よã†) +#åè©ž-éžè‡ªç«‹-助動詞語幹 +# +# noun-affix-adjective-base: noun affixes that can connect to the indeclinable +# connection form 㪠(aux "da"). +# e.g. ã¿ãŸã„, ãµã† +#åè©ž-éžè‡ªç«‹-形容動詞語幹 +# +# noun-special: special nouns where the sub-classification is undefined. +#åè©ž-特殊 +# +# noun-special-aux: The ãã†ã  ("souda") stem form that is used for reporting news, is +# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base +# form of inflectional words. +# e.g. ãㆠ+#åè©ž-特殊-助動詞語幹 +# +# noun-suffix: noun suffixes where the sub-classification is undefined. +#åè©ž-接尾 +# +# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect +# to ガル or タイ and can combine into compound nouns, words that cannot be classified into +# any of the other categories below. In general, this category is more inclusive than +# 接尾語 ("suffix") and is usually the last element in a compound noun. +# e.g. ãŠã, ã‹ãŸ, æ–¹, ç”²æ– (ãŒã„), ãŒã‹ã‚Š, ãŽã¿, 気味, ãã‚‹ã¿, (~ã—ãŸ) ã•, 次第, 済 (ãš) ã¿, +# よã†, (ã§ã)ã£ã“, æ„Ÿ, 観, 性, å­¦, é¡ž, é¢, 用 +#åè©ž-接尾-一般 +# +# noun-suffix-person: Suffixes that form nouns and attach to person names more often +# than other nouns. +# e.g. å›, 様, è‘— +#åè©ž-接尾-人å +# +# noun-suffix-place: Suffixes that form nouns and attach to place names more often +# than other nouns. +# e.g. 町, 市, 県 +#åè©ž-接尾-地域 +# +# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that +# can appear before スル ("suru"). +# e.g. 化, 視, 分ã‘, 入り, è½ã¡, è²·ã„ +#åè©ž-接尾-サ変接続 +# +# noun-suffix-aux: The stem form of ãã†ã  (様態) that is used to indicate conditions, +# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the +# conjunctive form of inflectional words. +# e.g. ãㆠ+#åè©ž-接尾-助動詞語幹 +# +# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive +# form of inflectional words and appear before the copula ã  ("da"). +# e.g. çš„, ã’, ãŒã¡ +#åè©ž-接尾-形容動詞語幹 +# +# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. +# e.g. 後 (ã”), 以後, 以é™, 以å‰, å‰å¾Œ, 中, 末, 上, 時 (ã˜) +#åè©ž-接尾-副詞å¯èƒ½ +# +# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category +# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach +# to numbers. +# e.g. 個, ã¤, 本, 冊, パーセント, cm, kg, カ月, ã‹å›½, 区画, 時間, æ™‚åŠ +#åè©ž-接尾-助数詞 +# +# noun-suffix-special: Special suffixes that mainly attach to inflecting words. +# e.g. (楽ã—) ã•, (考ãˆ) æ–¹ +#åè©ž-接尾-特殊 +# +# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words +# together. +# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) å…¼ (主婦) +#åè©ž-接続詞的 +# +# noun-verbal_aux: Nouns that attach to the conjunctive particle 㦠("te") and are +# semantically verb-like. +# e.g. ã”らん, ã”覧, 御覧, 頂戴 +#åè©ž-å‹•è©žéžè‡ªç«‹çš„ +# +# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, +# dialects, English, etc. Currently, the only entry for åè©ž 引用文字列 ("noun quotation") +# is ã„ã‚ã ("iwaku"). +#åè©ž-引用文字列 +# +# noun-nai_adjective: Words that appear before the auxiliary verb ãªã„ ("nai") and +# behave like an adjective. +# e.g. 申ã—訳, 仕方, ã¨ã‚“ã§ã‚‚, é•ã„ +#åè©ž-ナイ形容詞語幹 +# +##### +# prefix: unclassified prefixes +#接頭詞 +# +# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) +# excluding numerical expressions. +# e.g. ㊠(æ°´), æŸ (æ°), åŒ (社), æ•… (~æ°), 高 (å“質), ㊠(見事), ã” (ç«‹æ´¾) +#接頭詞-å詞接続 +# +# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb +# in conjunctive form followed by ãªã‚‹/ãªã•ã‚‹/ãã ã•ã‚‹. +# e.g. ㊠(読ã¿ãªã•ã„), ㊠(座り) +#接頭詞-動詞接続 +# +# prefix-adjectival: Prefixes that attach to adjectives. +# e.g. ㊠(寒ã„ã§ã™ã­ãˆ), ãƒã‚« (ã§ã‹ã„) +#接頭詞-形容詞接続 +# +# prefix-numerical: Prefixes that attach to numerical expressions. +# e.g. ç´„, ãŠã‚ˆã, 毎時 +#接頭詞-数接続 +# +##### +# verb: unclassified verbs +#å‹•è©ž +# +# verb-main: +#å‹•è©ž-自立 +# +# verb-auxiliary: +#å‹•è©ž-éžè‡ªç«‹ +# +# verb-suffix: +#å‹•è©ž-接尾 +# +##### +# adjective: unclassified adjectives +#形容詞 +# +# adjective-main: +#形容詞-自立 +# +# adjective-auxiliary: +#形容詞-éžè‡ªç«‹ +# +# adjective-suffix: +#形容詞-接尾 +# +##### +# adverb: unclassified adverbs +#副詞 +# +# adverb-misc: Words that can be segmented into one unit and where adnominal +# modification is not possible. +# e.g. ã‚ã„ã‹ã‚らãš, 多分 +#副詞-一般 +# +# adverb-particle_conjunction: Adverbs that can be followed by ã®, ã¯, ã«, +# ãª, ã™ã‚‹, ã , etc. +# e.g. ã“ã‚“ãªã«, ãã‚“ãªã«, ã‚ã‚“ãªã«, ãªã«ã‹, ãªã‚“ã§ã‚‚ +#副詞-助詞類接続 +# +##### +# adnominal: Words that only have noun-modifying forms. +# e.g. ã“ã®, ãã®, ã‚ã®, ã©ã®, ã„ã‚ゆる, ãªã‚“らã‹ã®, 何らã‹ã®, ã„ã‚ã‚“ãª, ã“ã†ã„ã†, ãã†ã„ã†, ã‚ã‚ã„ã†, +# ã©ã†ã„ã†, ã“ã‚“ãª, ãã‚“ãª, ã‚ã‚“ãª, ã©ã‚“ãª, 大ããª, å°ã•ãª, ãŠã‹ã—ãª, ã»ã‚“ã®, ãŸã„ã—ãŸ, +# 「(, ã‚‚) ã•ã‚‹ (ã“ã¨ãªãŒã‚‰)ã€, 微々ãŸã‚‹, 堂々ãŸã‚‹, å˜ãªã‚‹, ã„ã‹ãªã‚‹, 我ãŒã€ã€ŒåŒã˜, 亡ã +#連体詞 +# +##### +# conjunction: Conjunctions that can occur independently. +# e.g. ãŒ, ã‘ã‚Œã©ã‚‚, ãã—ã¦, ã˜ã‚ƒã‚, ãã‚Œã©ã“ã‚ã‹ +接続詞 +# +##### +# particle: unclassified particles. +助詞 +# +# particle-case: case particles where the subclassification is undefined. +助詞-格助詞 +# +# particle-case-misc: Case particles. +# e.g. ã‹ã‚‰, ãŒ, ã§, ã¨, ã«, ã¸, より, ã‚’, ã®, ã«ã¦ +助詞-格助詞-一般 +# +# particle-case-quote: the "to" that appears after nouns, a person’s speech, +# quotation marks, expressions of decisions from a meeting, reasons, judgements, +# conjectures, etc. +# e.g. ( ã ) 㨠(è¿°ã¹ãŸ.), ( ã§ã‚ã‚‹) 㨠(ã—ã¦åŸ·è¡ŒçŒ¶äºˆ...) +助詞-格助詞-引用 +# +# particle-case-compound: Compounds of particles and verbs that mainly behave +# like case particles. +# e.g. ã¨ã„ã†, ã¨ã„ã£ãŸ, ã¨ã‹ã„ã†, ã¨ã—ã¦, ã¨ã¨ã‚‚ã«, ã¨å…±ã«, ã§ã‚‚ã£ã¦, ã«ã‚ãŸã£ã¦, ã«å½“ãŸã£ã¦, ã«å½“ã£ã¦, +# ã«ã‚ãŸã‚Š, ã«å½“ãŸã‚Š, ã«å½“ã‚Š, ã«å½“ãŸã‚‹, ã«ã‚ãŸã‚‹, ã«ãŠã„ã¦, ã«æ–¼ã„ã¦,ã«æ–¼ã¦, ã«ãŠã‘ã‚‹, ã«æ–¼ã‘ã‚‹, +# ã«ã‹ã‘, ã«ã‹ã‘ã¦, ã«ã‹ã‚“ã—, ã«é–¢ã—, ã«ã‹ã‚“ã—ã¦, ã«é–¢ã—ã¦, ã«ã‹ã‚“ã™ã‚‹, ã«é–¢ã™ã‚‹, ã«éš›ã—, +# ã«éš›ã—ã¦, ã«ã—ãŸãŒã„, ã«å¾“ã„, ã«å¾“ã†, ã«ã—ãŸãŒã£ã¦, ã«å¾“ã£ã¦, ã«ãŸã„ã—, ã«å¯¾ã—, ã«ãŸã„ã—ã¦, +# ã«å¯¾ã—ã¦, ã«ãŸã„ã™ã‚‹, ã«å¯¾ã™ã‚‹, ã«ã¤ã„ã¦, ã«ã¤ã, ã«ã¤ã‘, ã«ã¤ã‘ã¦, ã«ã¤ã‚Œ, ã«ã¤ã‚Œã¦, ã«ã¨ã£ã¦, +# ã«ã¨ã‚Š, ã«ã¾ã¤ã‚ã‚‹, ã«ã‚ˆã£ã¦, ã«ä¾ã£ã¦, ã«å› ã£ã¦, ã«ã‚ˆã‚Š, ã«ä¾ã‚Š, ã«å› ã‚Š, ã«ã‚ˆã‚‹, ã«ä¾ã‚‹, ã«å› ã‚‹, +# ã«ã‚ãŸã£ã¦, ã«ã‚ãŸã‚‹, ã‚’ã‚‚ã£ã¦, を以ã£ã¦, を通ã˜, を通ã˜ã¦, を通ã—ã¦, ã‚’ã‚ãã£ã¦, ã‚’ã‚ãã‚Š, ã‚’ã‚ãã‚‹, +# ã£ã¦-å£èªž/, ã¡ã‚…ã†-関西å¼ã€Œã¨ã„ã†ã€/, (何) ã¦ã„ㆠ(人)-å£èªž/, ã£ã¦ã„ã†-å£èªž/, ã¨ã„ãµ, ã¨ã‹ã„ãµ +助詞-格助詞-連語 +# +# particle-conjunctive: +# e.g. ã‹ã‚‰, ã‹ã‚‰ã«ã¯, ãŒ, ã‘ã‚Œã©, ã‘ã‚Œã©ã‚‚, ã‘ã©, ã—, ã¤ã¤, ã¦, ã§, ã¨, ã¨ã“ã‚ãŒ, ã©ã“ã‚ã‹, ã¨ã‚‚, ã©ã‚‚, +# ãªãŒã‚‰, ãªã‚Š, ã®ã§, ã®ã«, ã°, ã‚‚ã®ã®, ã‚„ ( ã—ãŸ), ã‚„ã„ãªã‚„, (ã“ã‚ã‚“) ã˜ã‚ƒ(ã„ã‘ãªã„)-å£èªž/, +# (è¡Œã£) ã¡ã‚ƒ(ã„ã‘ãªã„)-å£èªž/, (言ã£) ãŸã£ã¦ (ã—ã‹ãŸãŒãªã„)-å£èªž/, (ãã‚ŒãŒãªã)ã£ãŸã£ã¦ (平気)-å£èªž/ +助詞-接続助詞 +# +# particle-dependency: +# e.g. ã“ã, ã•ãˆ, ã—ã‹, ã™ã‚‰, ã¯, ã‚‚, ãž +助詞-係助詞 +# +# particle-adverbial: +# e.g. ãŒã¦ã‚‰, ã‹ã‚‚, ãらã„, ä½, ãらã„, ã—ã‚‚, (学校) ã˜ã‚ƒ(ã“ã‚ŒãŒæµè¡Œã£ã¦ã„ã‚‹)-å£èªž/, +# (ãã‚Œ)ã˜ã‚ƒã‚ (よããªã„)-å£èªž/, ãšã¤, (ç§) ãªãž, ãªã©, (ç§) ãªã‚Š (ã«), (先生) ãªã‚“ã‹ (大嫌ã„)-å£èªž/, +# (ç§) ãªã‚“ãž, (先生) ãªã‚“㦠(大嫌ã„)-å£èªž/, ã®ã¿, ã ã‘, (ç§) ã ã£ã¦-å£èªž/, ã ã«, +# (å½¼)ã£ãŸã‚‰-å£èªž/, (ãŠèŒ¶) ã§ã‚‚ (ã„ã‹ãŒ), ç­‰ (ã¨ã†), (今後) ã¨ã‚‚, ã°ã‹ã‚Š, ã°ã£ã‹-å£èªž/, ã°ã£ã‹ã‚Š-å£èªž/, +# ã»ã©, 程, ã¾ã§, è¿„, (誰) ã‚‚ (ãŒ)([助詞-格助詞] ãŠã‚ˆã³ [助詞-係助詞] ã®å‰ã«ä½ç½®ã™ã‚‹ã€Œã‚‚ã€) +助詞-副助詞 +# +# particle-interjective: particles with interjective grammatical roles. +# e.g. (æ¾å³¶) ã‚„ +助詞-間投助詞 +# +# particle-coordinate: +# e.g. ã¨, ãŸã‚Š, ã ã®, ã ã‚Š, ã¨ã‹, ãªã‚Š, ã‚„, やら +助詞-並立助詞 +# +# particle-final: +# e.g. ã‹ã„, ã‹ã—ら, ã•, ãœ, (ã )ã£ã‘-å£èªž/, (ã¨ã¾ã£ã¦ã‚‹) ã§-方言/, ãª, ナ, ãªã‚-å£èªž/, ãž, ã­, ãƒ, +# ã­ã‡-å£èªž/, ã­ãˆ-å£èªž/, ã­ã‚“-方言/, ã®, ã®ã†-å£èªž/, ã‚„, よ, ヨ, よã‰-å£èªž/, ã‚, ã‚ã„-å£èªž/ +助詞-終助詞 +# +# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is +# adverbial, conjunctive, or sentence final. For example: +# (a) 「A ã‹ B ã‹ã€. Ex:「(国内ã§é‹ç”¨ã™ã‚‹) ã‹,(海外ã§é‹ç”¨ã™ã‚‹) ã‹ (.)〠+# (b) Inside an adverb phrase. Ex:「(幸ã„ã¨ã„ã†) ã‹ (, 死者ã¯ã„ãªã‹ã£ãŸ.)〠+# 「(祈りãŒå±Šã„ãŸã›ã„) ã‹ (, 試験ã«åˆæ ¼ã—ãŸ.)〠+# (c) 「ã‹ã®ã‚ˆã†ã«ã€. Ex:「(何もãªã‹ã£ãŸ) ã‹ (ã®ã‚ˆã†ã«æŒ¯ã‚‹èˆžã£ãŸ.)〠+# e.g. ã‹ +助詞-副助詞ï¼ä¸¦ç«‹åŠ©è©žï¼çµ‚助詞 +# +# particle-adnominalizer: The "no" that attaches to nouns and modifies +# non-inflectional words. +助詞-連体化 +# +# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs +# that are giongo, giseigo, or gitaigo. +# e.g. ã«, 㨠+助詞-副詞化 +# +# particle-special: A particle that does not fit into one of the above classifications. +# This includes particles that are used in Tanka, Haiku, and other poetry. +# e.g. ã‹ãª, ã‘ã‚€, ( ã—ãŸã ã‚ã†) ã«, (ã‚ã‚“ãŸ) ã«ã‚ƒ(ã‚ã‹ã‚‰ã‚“), (俺) ã‚“ (家) +助詞-特殊 +# +##### +# auxiliary-verb: +助動詞 +# +##### +# interjection: Greetings and other exclamations. +# e.g. ãŠã¯ã‚ˆã†, ãŠã¯ã‚ˆã†ã”ã–ã„ã¾ã™, ã“ã‚“ã«ã¡ã¯, ã“ã‚“ã°ã‚“ã¯, ã‚ã‚ŠãŒã¨ã†, ã©ã†ã‚‚ã‚ã‚ŠãŒã¨ã†, ã‚ã‚ŠãŒã¨ã†ã”ã–ã„ã¾ã™, +# ã„ãŸã ãã¾ã™, ã”ã¡ãã†ã•ã¾, ã•ã‚ˆãªã‚‰, ã•ã‚ˆã†ãªã‚‰, ã¯ã„, ã„ã„ãˆ, ã”ã‚ã‚“, ã”ã‚ã‚“ãªã•ã„ +#æ„Ÿå‹•è©ž +# +##### +# symbol: unclassified Symbols. +è¨˜å· +# +# symbol-misc: A general symbol not in one of the categories below. +# e.g. [â—‹â—Ž@$〒→+] +記å·-一般 +# +# symbol-comma: Commas +# e.g. [,ã€] +記å·-読点 +# +# symbol-period: Periods and full stops. +# e.g. [..。] +記å·-å¥ç‚¹ +# +# symbol-space: Full-width whitespace. +記å·-空白 +# +# symbol-open_bracket: +# e.g. [({‘“『ã€] +記å·-括弧開 +# +# symbol-close_bracket: +# e.g. [)}’â€ã€ã€ã€‘] +記å·-括弧閉 +# +# symbol-alphabetic: +#記å·-アルファベット +# +##### +# other: unclassified other +#ãã®ä»– +# +# other-interjection: Words that are hard to classify as noun-suffixes or +# sentence-final particles. +# e.g. (ã )ã‚¡ +ãã®ä»–-間投 +# +##### +# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. +# e.g. ã‚ã®, ã†ã‚“ã¨, ãˆã¨ +フィラー +# +##### +# non-verbal: non-verbal sound. +éžè¨€èªžéŸ³ +# +##### +# fragment: +#語断片 +# +##### +# unknown: unknown part of speech. +#未知語 +# +##### End of file diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ar.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ar.txt new file mode 100644 index 00000000000..046829db6a2 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ar.txt @@ -0,0 +1,125 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Cleaned on October 11, 2009 (not normalized, so use before normalization) +# This means that when modifying this list, you might need to add some +# redundant entries, for example containing forms with both Ø£ and ا +من +ومن +منها +منه +ÙÙŠ +ÙˆÙÙŠ +Ùيها +Ùيه +Ùˆ +Ù +ثم +او +أو +ب +بها +به +ا +Ø£ +اى +اي +أي +أى +لا +ولا +الا +ألا +إلا +لكن +ما +وما +كما +Ùما +عن +مع +اذا +إذا +ان +أن +إن +انها +أنها +إنها +انه +أنه +إنه +بان +بأن +Ùان +Ùأن +وان +وأن +وإن +التى +التي +الذى +الذي +الذين +الى +الي +إلى +إلي +على +عليها +عليه +اما +أما +إما +ايضا +أيضا +كل +وكل +لم +ولم +لن +ولن +هى +هي +هو +وهى +وهي +وهو +Ùهى +Ùهي +Ùهو +انت +أنت +لك +لها +له +هذه +هذا +تلك +ذلك +هناك +كانت +كان +يكون +تكون +وكانت +وكان +غير +بعض +قد +نحو +بين +بينما +منذ +ضمن +حيث +الان +الآن +خلال +بعد +قبل +حتى +عند +عندما +لدى +جميع diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_bg.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_bg.txt new file mode 100644 index 00000000000..1ae4ba2ae38 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_bg.txt @@ -0,0 +1,193 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +а +аз +ако +ала +бе +без +беше +би +бил +била +били +било +близо +бъдат +бъде +бÑха +в +Ð²Ð°Ñ +ваш +ваша +вероÑтно +вече +взема +ви +вие +винаги +вÑе +вÑеки +вÑички +вÑичко +вÑÑка +във +въпреки +върху +г +ги +главно +го +д +да +дали +до +докато +докога +дори +доÑега +доÑта +е +едва +един +ето +за +зад +заедно +заради +заÑега +затова +защо +защото +и +из +или +им +има +имат +иÑка +й +каза +как +каква +какво +както +какъв +като +кога +когато +което +които +кой +който +колко +коÑто +къде +където +към +ли +м +ме +между +мен +ми +мнозина +мога +могат +може +Ð¼Ð¾Ð»Ñ +момента +му +н +на +над +назад +най +направи +напред +например +Ð½Ð°Ñ +не +него +Ð½ÐµÑ +ни +ние +никой +нито +но +нÑкои +нÑкой +нÑма +обаче +около +оÑвен +оÑобено +от +отгоре +отново +още +пак +по +повече +повечето +под +поне +поради +поÑле +почти +прави +пред +преди +през +при +пък +първо +Ñ +Ñа +Ñамо +Ñе +Ñега +Ñи +Ñкоро +Ñлед +Ñме +Ñпоред +Ñред +Ñрещу +Ñте +Ñъм +ÑÑŠÑ +Ñъщо +Ñ‚ +тази +така +такива +такъв +там +твой +те +тези +ти +тн +то +това +тогава +този +той +толкова +точно +Ñ‚Ñ€Ñбва +тук +тъй +Ñ‚Ñ +Ñ‚ÑÑ… +у +хареÑва +ч +че +чеÑто +чрез +ще +щом +Ñ diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ca.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ca.txt new file mode 100644 index 00000000000..3da65deafe1 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ca.txt @@ -0,0 +1,220 @@ +# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) +a +abans +ací +ah +així +això +al +als +aleshores +algun +alguna +algunes +alguns +alhora +allà +allí +allò +altra +altre +altres +amb +ambdós +ambdues +apa +aquell +aquella +aquelles +aquells +aquest +aquesta +aquestes +aquests +aquí +baix +cada +cadascú +cadascuna +cadascunes +cadascuns +com +contra +d'un +d'una +d'unes +d'uns +dalt +de +del +dels +des +després +dins +dintre +donat +doncs +durant +e +eh +el +els +em +en +encara +ens +entre +érem +eren +éreu +es +és +esta +està +estàvem +estaven +estàveu +esteu +et +etc +ets +fins +fora +gairebé +ha +han +has +havia +he +hem +heu +hi +ho +i +igual +iguals +ja +l'hi +la +les +li +li'n +llavors +m'he +ma +mal +malgrat +mateix +mateixa +mateixes +mateixos +me +mentre +més +meu +meus +meva +meves +molt +molta +moltes +molts +mon +mons +n'he +n'hi +ne +ni +no +nogensmenys +només +nosaltres +nostra +nostre +nostres +o +oh +oi +on +pas +pel +pels +per +però +perquè +poc +poca +pocs +poques +potser +propi +qual +quals +quan +quant +que +què +quelcom +qui +quin +quina +quines +quins +s'ha +s'han +sa +semblant +semblants +ses +seu +seus +seva +seva +seves +si +sobre +sobretot +sóc +solament +sols +son +són +sons +sota +sou +t'ha +t'han +t'he +ta +tal +també +tampoc +tan +tant +tanta +tantes +teu +teus +teva +teves +ton +tons +tot +tota +totes +tots +un +una +unes +uns +us +va +vaig +vam +van +vas +veu +vosaltres +vostra +vostre +vostres diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_cz.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_cz.txt new file mode 100644 index 00000000000..53c6097dac7 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_cz.txt @@ -0,0 +1,172 @@ +a +s +k +o +i +u +v +z +dnes +cz +tímto +budeÅ¡ +budem +byli +jseÅ¡ +můj +svým +ta +tomto +tohle +tuto +tyto +jej +zda +proÄ +máte +tato +kam +tohoto +kdo +kteří +mi +nám +tom +tomuto +mít +nic +proto +kterou +byla +toho +protože +asi +ho +naÅ¡i +napiÅ¡te +re +což +tím +takže +svých +její +svými +jste +aj +tu +tedy +teto +bylo +kde +ke +pravé +ji +nad +nejsou +Äi +pod +téma +mezi +pÅ™es +ty +pak +vám +ani +když +vÅ¡ak +neg +jsem +tento +Älánku +Älánky +aby +jsme +pÅ™ed +pta +jejich +byl +jeÅ¡tÄ› +až +bez +také +pouze +první +vaÅ¡e +která +nás +nový +tipy +pokud +může +strana +jeho +své +jiné +zprávy +nové +není +vás +jen +podle +zde +už +být +více +bude +již +než +který +by +které +co +nebo +ten +tak +má +pÅ™i +od +po +jsou +jak +další +ale +si +se +ve +to +jako +za +zpÄ›t +ze +do +pro +je +na +atd +atp +jakmile +pÅ™iÄemž +já +on +ona +ono +oni +ony +my +vy +jí +ji +mÄ› +mne +jemu +tomu +tÄ›m +tÄ›mu +nÄ›mu +nÄ›muž +jehož +jíž +jelikož +jež +jakož +naÄež diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_da.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_da.txt new file mode 100644 index 00000000000..a3ff5fe122c --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_da.txt @@ -0,0 +1,108 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Danish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + +og | and +i | in +jeg | I +det | that (dem. pronoun)/it (pers. pronoun) +at | that (in front of a sentence)/to (with infinitive) +en | a/an +den | it (pers. pronoun)/that (dem. pronoun) +til | to/at/for/until/against/by/of/into, more +er | present tense of "to be" +som | who, as +pÃ¥ | on/upon/in/on/at/to/after/of/with/for, on +de | they +med | with/by/in, along +han | he +af | of/by/from/off/for/in/with/on, off +for | at/for/to/from/by/of/ago, in front/before, because +ikke | not +der | who/which, there/those +var | past tense of "to be" +mig | me/myself +sig | oneself/himself/herself/itself/themselves +men | but +et | a/an/one, one (number), someone/somebody/one +har | present tense of "to have" +om | round/about/for/in/a, about/around/down, if +vi | we +min | my +havde | past tense of "to have" +ham | him +hun | she +nu | now +over | over/above/across/by/beyond/past/on/about, over/past +da | then, when/as/since +fra | from/off/since, off, since +du | you +ud | out +sin | his/her/its/one's +dem | them +os | us/ourselves +op | up +man | you/one +hans | his +hvor | where +eller | or +hvad | what +skal | must/shall etc. +selv | myself/youself/herself/ourselves etc., even +her | here +alle | all/everyone/everybody etc. +vil | will (verb) +blev | past tense of "to stay/to remain/to get/to become" +kunne | could +ind | in +nÃ¥r | when +være | present tense of "to be" +dog | however/yet/after all +noget | something +ville | would +jo | you know/you see (adv), yes +deres | their/theirs +efter | after/behind/according to/for/by/from, later/afterwards +ned | down +skulle | should +denne | this +end | than +dette | this +mit | my/mine +ogsÃ¥ | also +under | under/beneath/below/during, below/underneath +have | have +dig | you +anden | other +hende | her +mine | my +alt | everything +meget | much/very, plenty of +sit | his, her, its, one's +sine | his, her, its, one's +vor | our +mod | against +disse | these +hvis | if +din | your/yours +nogle | some +hos | by/at +blive | be/become +mange | many +ad | by/through +bliver | present tense of "to be/to become" +hendes | her/hers +været | be +thi | for (conj) +jer | you +sÃ¥dan | such, like this/like that diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_de.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_de.txt new file mode 100644 index 00000000000..f7703841887 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_de.txt @@ -0,0 +1,292 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A German stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | The number of forms in this list is reduced significantly by passing it + | through the German stemmer. + + +aber | but + +alle | all +allem +allen +aller +alles + +als | than, as +also | so +am | an + dem +an | at + +ander | other +andere +anderem +anderen +anderer +anderes +anderm +andern +anderr +anders + +auch | also +auf | on +aus | out of +bei | by +bin | am +bis | until +bist | art +da | there +damit | with it +dann | then + +der | the +den +des +dem +die +das + +daß | that + +derselbe | the same +derselben +denselben +desselben +demselben +dieselbe +dieselben +dasselbe + +dazu | to that + +dein | thy +deine +deinem +deinen +deiner +deines + +denn | because + +derer | of those +dessen | of him + +dich | thee +dir | to thee +du | thou + +dies | this +diese +diesem +diesen +dieser +dieses + + +doch | (several meanings) +dort | (over) there + + +durch | through + +ein | a +eine +einem +einen +einer +eines + +einig | some +einige +einigem +einigen +einiger +einiges + +einmal | once + +er | he +ihn | him +ihm | to him + +es | it +etwas | something + +euer | your +eure +eurem +euren +eurer +eures + +für | for +gegen | towards +gewesen | p.p. of sein +hab | have +habe | have +haben | have +hat | has +hatte | had +hatten | had +hier | here +hin | there +hinter | behind + +ich | I +mich | me +mir | to me + + +ihr | you, to her +ihre +ihrem +ihren +ihrer +ihres +euch | to you + +im | in + dem +in | in +indem | while +ins | in + das +ist | is + +jede | each, every +jedem +jeden +jeder +jedes + +jene | that +jenem +jenen +jener +jenes + +jetzt | now +kann | can + +kein | no +keine +keinem +keinen +keiner +keines + +können | can +könnte | could +machen | do +man | one + +manche | some, many a +manchem +manchen +mancher +manches + +mein | my +meine +meinem +meinen +meiner +meines + +mit | with +muss | must +musste | had to +nach | to(wards) +nicht | not +nichts | nothing +noch | still, yet +nun | now +nur | only +ob | whether +oder | or +ohne | without +sehr | very + +sein | his +seine +seinem +seinen +seiner +seines + +selbst | self +sich | herself + +sie | they, she +ihnen | to them + +sind | are +so | so + +solche | such +solchem +solchen +solcher +solches + +soll | shall +sollte | should +sondern | but +sonst | else +über | over +um | about, around +und | and + +uns | us +unse +unsem +unsen +unser +unses + +unter | under +viel | much +vom | von + dem +von | from +vor | before +während | while +war | was +waren | were +warst | wast +was | what +weg | away, off +weil | because +weiter | further + +welche | which +welchem +welchen +welcher +welches + +wenn | when +werde | will +werden | will +wie | how +wieder | again +will | want +wir | we +wird | will +wirst | willst +wo | where +wollen | want +wollte | wanted +würde | would +würden | would +zu | to +zum | zu + dem +zur | zu + der +zwar | indeed +zwischen | between + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_el.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_el.txt new file mode 100644 index 00000000000..232681f5bd6 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_el.txt @@ -0,0 +1,78 @@ +# Lucene Greek Stopwords list +# Note: by default this file is used after GreekLowerCaseFilter, +# so when modifying this file use 'σ' instead of 'Ï‚' +ο +η +το +οι +τα +του +τησ +των +τον +την +και +κι +κ +ειμαι +εισαι +ειναι +ειμαστε +ειστε +στο +στον +στη +στην +μα +αλλα +απο +για +Ï€Ïοσ +με +σε +ωσ +παÏα +αντι +κατα +μετα +θα +να +δε +δεν +μη +μην +επι +ενω +εαν +αν +τοτε +που +πωσ +ποιοσ +ποια +ποιο +ποιοι +ποιεσ +ποιων +ποιουσ +αυτοσ +αυτη +αυτο +αυτοι +αυτων +αυτουσ +αυτεσ +αυτα +εκεινοσ +εκεινη +εκεινο +εκεινοι +εκεινεσ +εκεινα +εκεινων +εκεινουσ +οπωσ +ομωσ +ισωσ +οσο +οτι diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_en.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_en.txt new file mode 100644 index 00000000000..2c164c0b2a1 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_en.txt @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +# Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +such +that +the +their +then +there +these +they +this +to +was +will +with diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_es.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_es.txt new file mode 100644 index 00000000000..2db14760075 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_es.txt @@ -0,0 +1,354 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Spanish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | from, of +la | the, her +que | who, that +el | the +en | in +y | and +a | to +los | the, them +del | de + el +se | himself, from him etc +las | the, them +por | for, by, etc +un | a +para | for +con | with +no | no +una | a +su | his, her +al | a + el + | es from SER +lo | him +como | how +más | more +pero | pero +sus | su plural +le | to him, her +ya | already +o | or + | fue from SER +este | this + | ha from HABER +sí | himself etc +porque | because +esta | this + | son from SER +entre | between + | está from ESTAR +cuando | when +muy | very +sin | without +sobre | on + | ser from SER + | tiene from TENER +también | also +me | me +hasta | until +hay | there is/are +donde | where + | han from HABER +quien | whom, that + | están from ESTAR + | estado from ESTAR +desde | from +todo | all +nos | us +durante | during + | estados from ESTAR +todos | all +uno | a +les | to them +ni | nor +contra | against +otros | other + | fueron from SER +ese | that +eso | that + | había from HABER +ante | before +ellos | they +e | and (variant of y) +esto | this +mí | me +antes | before +algunos | some +qué | what? +unos | a +yo | I +otro | other +otras | other +otra | other +él | he +tanto | so much, many +esa | that +estos | these +mucho | much, many +quienes | who +nada | nothing +muchos | many +cual | who + | sea from SER +poco | few +ella | she +estar | to be + | haber from HABER +estas | these + | estaba from ESTAR + | estamos from ESTAR +algunas | some +algo | something +nosotros | we + + | other forms + +mi | me +mis | mi plural +tú | thou +te | thee +ti | thee +tu | thy +tus | tu plural +ellas | they +nosotras | we +vosotros | you +vosotras | you +os | you +mío | mine +mía | +míos | +mías | +tuyo | thine +tuya | +tuyos | +tuyas | +suyo | his, hers, theirs +suya | +suyos | +suyas | +nuestro | ours +nuestra | +nuestros | +nuestras | +vuestro | yours +vuestra | +vuestros | +vuestras | +esos | those +esas | those + + | forms of estar, to be (not including the infinitive): +estoy +estás +está +estamos +estáis +están +esté +estés +estemos +estéis +estén +estaré +estarás +estará +estaremos +estaréis +estarán +estaría +estarías +estaríamos +estaríais +estarían +estaba +estabas +estábamos +estabais +estaban +estuve +estuviste +estuvo +estuvimos +estuvisteis +estuvieron +estuviera +estuvieras +estuviéramos +estuvierais +estuvieran +estuviese +estuvieses +estuviésemos +estuvieseis +estuviesen +estando +estado +estada +estados +estadas +estad + + | forms of haber, to have (not including the infinitive): +he +has +ha +hemos +habéis +han +haya +hayas +hayamos +hayáis +hayan +habré +habrás +habrá +habremos +habréis +habrán +habría +habrías +habríamos +habríais +habrían +había +habías +habíamos +habíais +habían +hube +hubiste +hubo +hubimos +hubisteis +hubieron +hubiera +hubieras +hubiéramos +hubierais +hubieran +hubiese +hubieses +hubiésemos +hubieseis +hubiesen +habiendo +habido +habida +habidos +habidas + + | forms of ser, to be (not including the infinitive): +soy +eres +es +somos +sois +son +sea +seas +seamos +seáis +sean +seré +serás +será +seremos +seréis +serán +sería +serías +seríamos +seríais +serían +era +eras +éramos +erais +eran +fui +fuiste +fue +fuimos +fuisteis +fueron +fuera +fueras +fuéramos +fuerais +fueran +fuese +fueses +fuésemos +fueseis +fuesen +siendo +sido + | sed also means 'thirst' + + | forms of tener, to have (not including the infinitive): +tengo +tienes +tiene +tenemos +tenéis +tienen +tenga +tengas +tengamos +tengáis +tengan +tendré +tendrás +tendrá +tendremos +tendréis +tendrán +tendría +tendrías +tendríamos +tendríais +tendrían +tenía +tenías +teníamos +teníais +tenían +tuve +tuviste +tuvo +tuvimos +tuvisteis +tuvieron +tuviera +tuvieras +tuviéramos +tuvierais +tuvieran +tuviese +tuvieses +tuviésemos +tuvieseis +tuviesen +teniendo +tenido +tenida +tenidos +tenidas +tened + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_eu.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_eu.txt new file mode 100644 index 00000000000..25f1db93460 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_eu.txt @@ -0,0 +1,99 @@ +# example set of basque stopwords +al +anitz +arabera +asko +baina +bat +batean +batek +bati +batzuei +batzuek +batzuetan +batzuk +bera +beraiek +berau +berauek +bere +berori +beroriek +beste +bezala +da +dago +dira +ditu +du +dute +edo +egin +ere +eta +eurak +ez +gainera +gu +gutxi +guzti +haiei +haiek +haietan +hainbeste +hala +han +handik +hango +hara +hari +hark +hartan +hau +hauei +hauek +hauetan +hemen +hemendik +hemengo +hi +hona +honek +honela +honetan +honi +hor +hori +horiei +horiek +horietan +horko +horra +horrek +horrela +horretan +horri +hortik +hura +izan +ni +noiz +nola +non +nondik +nongo +nor +nora +ze +zein +zen +zenbait +zenbat +zer +zergatik +ziren +zituen +zu +zuek +zuen +zuten diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fa.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fa.txt new file mode 100644 index 00000000000..723641c6da7 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fa.txt @@ -0,0 +1,313 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Note: by default this file is used after normalization, so when adding entries +# to this file, use the arabic 'ÙŠ' instead of 'ÛŒ' +انان +نداشته +سراسر +خياه +ايشان +وي +تاكنون +بيشتري +دوم +پس +ناشي +ÙˆÚ¯Ùˆ +يا +داشتند +سپس +هنگام +هرگز +پنج +نشان +امسال +ديگر +گروهي +شدند +چطور +ده +Ùˆ +دو +نخستين +ولي +چرا +Ú†Ù‡ +وسط +Ù‡ +كدام +قابل +يك +رÙت +Ù‡Ùت +همچنين +در +هزار +بله +بلي +شايد +اما +شناسي +گرÙته +دهد +داشته +دانست +داشتن +خواهيم +ميليارد +وقتيكه +امد +خواهد +جز +اورده +شده +بلكه +خدمات +شدن +برخي +نبود +بسياري +جلوگيري +حق +كردند +نوعي +بعري +نكرده +نظير +نبايد +بوده +بودن +داد +اورد +هست +جايي +شود +دنبال +داده +بايد +سابق +هيچ +همان +انجا +كمتر +كجاست +گردد +كسي +تر +مردم +تان +دادن +بودند +سري +جدا +ندارند +مگر +يكديگر +دارد +دهند +بنابراين +هنگامي +سمت +جا +انچه +خود +دادند +زياد +دارند +اثر +بدون +بهترين +بيشتر +البته +به +براساس +بيرون +كرد +بعضي +گرÙت +توي +اي +ميليون +او +جريان +تول +بر +مانند +برابر +باشيم +مدتي +گويند +اكنون +تا +تنها +جديد +چند +بي +نشده +كردن +كردم +گويد +كرده +كنيم +نمي +نزد +روي +قصد +Ùقط +بالاي +ديگران +اين +ديروز +توسط +سوم +ايم +دانند +سوي +استÙاده +شما +كنار +داريم +ساخته +طور +امده +رÙته +نخست +بيست +نزديك +طي +كنيد +از +انها +تمامي +داشت +يكي +طريق +اش +چيست +روب +نمايد +Ú¯Ùت +چندين +چيزي +تواند +ام +ايا +با +ان +ايد +ترين +اينكه +ديگري +راه +هايي +بروز +همچنان +پاعين +كس +حدود +مختل٠+مقابل +چيز +گيرد +ندارد +ضد +همچون +سازي +شان +مورد +باره +مرسي +خويش +برخوردار +چون +خارج +شش +هنوز +تحت +ضمن +هستيم +Ú¯Ùته +Ùكر +بسيار +پيش +براي +روزهاي +انكه +نخواهد +بالا +كل +وقتي +كي +چنين +كه +گيري +نيست +است +كجا +كند +نيز +يابد +بندي +حتي +توانند +عقب +خواست +كنند +بين +تمام +همه +ما +باشند +مثل +شد +اري +باشد +اره +طبق +بعد +اگر +صورت +غير +جاي +بيش +ريزي +اند +زيرا +چگونه +بار +لطÙا +مي +درباره +من +ديده +همين +گذاري +برداري +علت +گذاشته +هم +Ùوق +نه +ها +شوند +اباد +همواره +هر +اول +خواهند +چهار +نام +امروز +مان +هاي +قبل +كنم +سعي +تازه +را +هستند +زير +جلوي +عنوان +بود diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fi.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fi.txt new file mode 100644 index 00000000000..addad798c4b --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fi.txt @@ -0,0 +1,95 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| forms of BE + +olla +olen +olet +on +olemme +olette +ovat +ole | negative form + +oli +olisi +olisit +olisin +olisimme +olisitte +olisivat +olit +olin +olimme +olitte +olivat +ollut +olleet + +en | negation +et +ei +emme +ette +eivät + +|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans +minä minun minut minua minussa minusta minuun minulla minulta minulle | I +sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you +hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she +me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we +te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you +he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they + +tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this +tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that +se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it +nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these +nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those +ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they + +kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who +ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) +mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what +mitkä | (pl) + +joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which +jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) + +| conjunctions + +että | that +ja | and +jos | if +koska | because +kuin | than +mutta | but +niin | so +sekä | and +sillä | for +tai | or +vaan | but +vai | or +vaikka | although + + +| prepositions + +kanssa | with +mukaan | according to +noin | about +poikki | across +yli | over, across + +| other + +kun | when +niin | so +nyt | now +itse | self + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fr.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fr.txt new file mode 100644 index 00000000000..c00837ea939 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fr.txt @@ -0,0 +1,183 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A French stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +au | a + le +aux | a + les +avec | with +ce | this +ces | these +dans | with +de | of +des | de + les +du | de + le +elle | she +en | `of them' etc +et | and +eux | them +il | he +je | I +la | the +le | the +leur | their +lui | him +ma | my (fem) +mais | but +me | me +même | same; as in moi-même (myself) etc +mes | me (pl) +moi | me +mon | my (masc) +ne | not +nos | our (pl) +notre | our +nous | we +on | one +ou | where +par | by +pas | not +pour | for +qu | que before vowel +que | that +qui | who +sa | his, her (fem) +se | oneself +ses | his (pl) +son | his, her (masc) +sur | on +ta | thy (fem) +te | thee +tes | thy (pl) +toi | thee +ton | thy (masc) +tu | thou +un | a +une | a +vos | your (pl) +votre | your +vous | you + + | single letter forms + +c | c' +d | d' +j | j' +l | l' +à | to, at +m | m' +n | n' +s | s' +t | t' +y | there + + | forms of être (not including the infinitive): +été +étée +étées +étés +étant +suis +es +est +sommes +êtes +sont +serai +seras +sera +serons +serez +seront +serais +serait +serions +seriez +seraient +étais +était +étions +étiez +étaient +fus +fut +fûmes +fûtes +furent +sois +soit +soyons +soyez +soient +fusse +fusses +fût +fussions +fussiez +fussent + + | forms of avoir (not including the infinitive): +ayant +eu +eue +eues +eus +ai +as +avons +avez +ont +aurai +auras +aura +aurons +aurez +auront +aurais +aurait +aurions +auriez +auraient +avais +avait +avions +aviez +avaient +eut +eûmes +eûtes +eurent +aie +aies +ait +ayons +ayez +aient +eusse +eusses +eût +eussions +eussiez +eussent + + | Later additions (from Jean-Christophe Deschamps) +ceci | this +celà  | that +cet | this +cette | this +ici | here +ils | they +les | the (pl) +leurs | their (pl) +quel | which +quels | which +quelle | which +quelles | which +sans | without +soi | oneself + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ga.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ga.txt new file mode 100644 index 00000000000..9ff88d747e5 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ga.txt @@ -0,0 +1,110 @@ + +a +ach +ag +agus +an +aon +ar +arna +as +b' +ba +beirt +bhúr +caoga +ceathair +ceathrar +chomh +chtó +chuig +chun +cois +céad +cúig +cúigear +d' +daichead +dar +de +deich +deichniúr +den +dhá +do +don +dtí +dá +dár +dó +faoi +faoin +faoina +faoinár +fara +fiche +gach +gan +go +gur +haon +hocht +i +iad +idir +in +ina +ins +inár +is +le +leis +lena +lenár +m' +mar +mo +mé +na +nach +naoi +naonúr +ná +ní +níor +nó +nócha +ocht +ochtar +os +roimh +sa +seacht +seachtar +seachtó +seasca +seisear +siad +sibh +sinn +sna +sé +sí +tar +thar +thú +triúr +trí +trína +trínár +tríocha +tú +um +ár +é +éis +í +ó +ón +óna +ónár diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_gl.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_gl.txt new file mode 100644 index 00000000000..d8760b12c14 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_gl.txt @@ -0,0 +1,161 @@ +# galican stopwords +a +aínda +alí +aquel +aquela +aquelas +aqueles +aquilo +aquí +ao +aos +as +así +á +ben +cando +che +co +coa +comigo +con +connosco +contigo +convosco +coas +cos +cun +cuns +cunha +cunhas +da +dalgunha +dalgunhas +dalgún +dalgúns +das +de +del +dela +delas +deles +desde +deste +do +dos +dun +duns +dunha +dunhas +e +el +ela +elas +eles +en +era +eran +esa +esas +ese +eses +esta +estar +estaba +está +están +este +estes +estiven +estou +eu +é +facer +foi +foron +fun +había +hai +iso +isto +la +las +lle +lles +lo +los +mais +me +meu +meus +min +miña +miñas +moi +na +nas +neste +nin +no +non +nos +nosa +nosas +noso +nosos +nós +nun +nunha +nuns +nunhas +o +os +ou +ó +ós +para +pero +pode +pois +pola +polas +polo +polos +por +que +se +senón +ser +seu +seus +sexa +sido +sobre +súa +súas +tamén +tan +te +ten +teñen +teño +ter +teu +teus +ti +tido +tiña +tiven +túa +túas +un +unha +unhas +uns +vos +vosa +vosas +voso +vosos +vós diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hi.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hi.txt new file mode 100644 index 00000000000..86286bb083b --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hi.txt @@ -0,0 +1,235 @@ +# Also see http://www.opensource.org/licenses/bsd-license.html +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# This file was created by Jacques Savoy and is distributed under the BSD license. +# Note: by default this file also contains forms normalized by HindiNormalizer +# for spelling variation (see section below), such that it can be used whether or +# not you enable that feature. When adding additional entries to this list, +# please add the normalized form as well. +अंदर +अत +अपना +अपनी +अपने +अभी +आदि +आप +इतà¥à¤¯à¤¾à¤¦à¤¿ +इन +इनका +इनà¥à¤¹à¥€à¤‚ +इनà¥à¤¹à¥‡à¤‚ +इनà¥à¤¹à¥‹à¤‚ +इस +इसका +इसकी +इसके +इसमें +इसी +इसे +उन +उनका +उनकी +उनके +उनको +उनà¥à¤¹à¥€à¤‚ +उनà¥à¤¹à¥‡à¤‚ +उनà¥à¤¹à¥‹à¤‚ +उस +उसके +उसी +उसे +à¤à¤• +à¤à¤µà¤‚ +à¤à¤¸ +à¤à¤¸à¥‡ +और +कई +कर +करता +करते +करना +करने +करें +कहते +कहा +का +काफ़ी +कि +कितना +किनà¥à¤¹à¥‡à¤‚ +किनà¥à¤¹à¥‹à¤‚ +किया +किर +किस +किसी +किसे +की +कà¥à¤› +कà¥à¤² +के +को +कोई +कौन +कौनसा +गया +घर +जब +जहाठ+जा +जितना +जिन +जिनà¥à¤¹à¥‡à¤‚ +जिनà¥à¤¹à¥‹à¤‚ +जिस +जिसे +जीधर +जैसा +जैसे +जो +तक +तब +तरह +तिन +तिनà¥à¤¹à¥‡à¤‚ +तिनà¥à¤¹à¥‹à¤‚ +तिस +तिसे +तो +था +थी +थे +दबारा +दिया +दà¥à¤¸à¤°à¤¾ +दूसरे +दो +दà¥à¤µà¤¾à¤°à¤¾ +न +नहीं +ना +निहायत +नीचे +ने +पर +पर +पहले +पूरा +पे +फिर +बनी +बही +बहà¥à¤¤ +बाद +बाला +बिलकà¥à¤² +भी +भीतर +मगर +मानो +मे +में +यदि +यह +यहाठ+यही +या +यिह +ये +रखें +रहा +रहे +ऱà¥à¤µà¤¾à¤¸à¤¾ +लिठ+लिये +लेकिन +व +वरà¥à¤— +वह +वह +वहाठ+वहीं +वाले +वà¥à¤¹ +वे +वग़ैरह +संग +सकता +सकते +सबसे +सभी +साथ +साबà¥à¤¤ +साभ +सारा +से +सो +ही +हà¥à¤† +हà¥à¤ˆ +हà¥à¤ +है +हैं +हो +होता +होती +होते +होना +होने +# additional normalized forms of the above +अपनि +जेसे +होति +सभि +तिंहों +इंहों +दवारा +इसि +किंहें +थि +उंहों +ओर +जिंहें +वहिं +अभि +बनि +हि +उंहिं +उंहें +हें +वगेरह +à¤à¤¸à¥‡ +रवासा +कोन +निचे +काफि +उसि +पà¥à¤°à¤¾ +भितर +हे +बहि +वहां +कोइ +यहां +जिंहों +तिंहें +किसि +कइ +यहि +इंहिं +जिधर +इंहें +अदि +इतयादि +हà¥à¤‡ +कोनसा +इसकि +दà¥à¤¸à¤°à¥‡ +जहां +अप +किंहों +उनकि +भि +वरग +हà¥à¤… +जेसा +नहिं diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hu.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hu.txt new file mode 100644 index 00000000000..1a96f1db6f2 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hu.txt @@ -0,0 +1,209 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| Hungarian stop word list +| prepared by Anna Tordai + +a +ahogy +ahol +aki +akik +akkor +alatt +által +általában +amely +amelyek +amelyekben +amelyeket +amelyet +amelynek +ami +amit +amolyan +amíg +amikor +át +abban +ahhoz +annak +arra +arról +az +azok +azon +azt +azzal +azért +aztán +azután +azonban +bár +be +belül +benne +cikk +cikkek +cikkeket +csak +de +e +eddig +egész +egy +egyes +egyetlen +egyéb +egyik +egyre +ekkor +el +elég +ellen +elÅ‘ +elÅ‘ször +elÅ‘tt +elsÅ‘ +én +éppen +ebben +ehhez +emilyen +ennek +erre +ez +ezt +ezek +ezen +ezzel +ezért +és +fel +felé +hanem +hiszen +hogy +hogyan +igen +így +illetve +ill. +ill +ilyen +ilyenkor +ison +ismét +itt +jó +jól +jobban +kell +kellett +keresztül +keressünk +ki +kívül +között +közül +legalább +lehet +lehetett +legyen +lenne +lenni +lesz +lett +maga +magát +majd +majd +már +más +másik +meg +még +mellett +mert +mely +melyek +mi +mit +míg +miért +milyen +mikor +minden +mindent +mindenki +mindig +mint +mintha +mivel +most +nagy +nagyobb +nagyon +ne +néha +nekem +neki +nem +néhány +nélkül +nincs +olyan +ott +össze +Å‘ +Å‘k +Å‘ket +pedig +persze +rá +s +saját +sem +semmi +sok +sokat +sokkal +számára +szemben +szerint +szinte +talán +tehát +teljes +tovább +továbbá +több +úgy +ugyanis +új +újabb +újra +után +utána +utolsó +vagy +vagyis +valaki +valami +valamint +való +vagyok +van +vannak +volt +voltam +voltak +voltunk +vissza +vele +viszont +volna diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hy.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hy.txt new file mode 100644 index 00000000000..60c1c50fbc8 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hy.txt @@ -0,0 +1,46 @@ +# example set of Armenian stopwords. +Õ¡ÕµÕ¤ +Õ¡ÕµÕ¬ +Õ¡ÕµÕ¶ +Õ¡ÕµÕ½ +Õ¤Õ¸Ö‚ +Õ¤Õ¸Ö‚Ö„ +Õ¥Õ´ +Õ¥Õ¶ +Õ¥Õ¶Ö„ +Õ¥Õ½ +Õ¥Ö„ +Õ§ +Õ§Õ« +Õ§Õ«Õ¶ +Õ§Õ«Õ¶Ö„ +Õ§Õ«Ö€ +Õ§Õ«Ö„ +Õ§Ö€ +Õ¨Õ½Õ¿ +Õ© +Õ« +Õ«Õ¶ +Õ«Õ½Õ¯ +Õ«Ö€ +Õ¯Õ¡Õ´ +Õ°Õ¡Õ´Õ¡Ö€ +Õ°Õ¥Õ¿ +Õ°Õ¥Õ¿Õ¸ +Õ´Õ¥Õ¶Ö„ +Õ´Õ¥Õ» +Õ´Õ« +Õ¶ +Õ¶Õ¡ +Õ¶Õ¡Ö‡ +Õ¶Ö€Õ¡ +Õ¶Ö€Õ¡Õ¶Ö„ +Õ¸Ö€ +Õ¸Ö€Õ¨ +Õ¸Ö€Õ¸Õ¶Ö„ +Õ¸Ö€ÕºÕ¥Õ½ +Õ¸Ö‚ +Õ¸Ö‚Õ´ +ÕºÕ«Õ¿Õ« +Õ¾Ö€Õ¡ +Ö‡ diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_id.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_id.txt new file mode 100644 index 00000000000..4617f83a5c5 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_id.txt @@ -0,0 +1,359 @@ +# from appendix D of: A Study of Stemming Effects on Information +# Retrieval in Bahasa Indonesia +ada +adanya +adalah +adapun +agak +agaknya +agar +akan +akankah +akhirnya +aku +akulah +amat +amatlah +anda +andalah +antar +diantaranya +antara +antaranya +diantara +apa +apaan +mengapa +apabila +apakah +apalagi +apatah +atau +ataukah +ataupun +bagai +bagaikan +sebagai +sebagainya +bagaimana +bagaimanapun +sebagaimana +bagaimanakah +bagi +bahkan +bahwa +bahwasanya +sebaliknya +banyak +sebanyak +beberapa +seberapa +begini +beginian +beginikah +beginilah +sebegini +begitu +begitukah +begitulah +begitupun +sebegitu +belum +belumlah +sebelum +sebelumnya +sebenarnya +berapa +berapakah +berapalah +berapapun +betulkah +sebetulnya +biasa +biasanya +bila +bilakah +bisa +bisakah +sebisanya +boleh +bolehkah +bolehlah +buat +bukan +bukankah +bukanlah +bukannya +cuma +percuma +dahulu +dalam +dan +dapat +dari +daripada +dekat +demi +demikian +demikianlah +sedemikian +dengan +depan +di +dia +dialah +dini +diri +dirinya +terdiri +dong +dulu +enggak +enggaknya +entah +entahlah +terhadap +terhadapnya +hal +hampir +hanya +hanyalah +harus +haruslah +harusnya +seharusnya +hendak +hendaklah +hendaknya +hingga +sehingga +ia +ialah +ibarat +ingin +inginkah +inginkan +ini +inikah +inilah +itu +itukah +itulah +jangan +jangankan +janganlah +jika +jikalau +juga +justru +kala +kalau +kalaulah +kalaupun +kalian +kami +kamilah +kamu +kamulah +kan +kapan +kapankah +kapanpun +dikarenakan +karena +karenanya +ke +kecil +kemudian +kenapa +kepada +kepadanya +ketika +seketika +khususnya +kini +kinilah +kiranya +sekiranya +kita +kitalah +kok +lagi +lagian +selagi +lah +lain +lainnya +melainkan +selaku +lalu +melalui +terlalu +lama +lamanya +selama +selama +selamanya +lebih +terlebih +bermacam +macam +semacam +maka +makanya +makin +malah +malahan +mampu +mampukah +mana +manakala +manalagi +masih +masihkah +semasih +masing +mau +maupun +semaunya +memang +mereka +merekalah +meski +meskipun +semula +mungkin +mungkinkah +nah +namun +nanti +nantinya +nyaris +oleh +olehnya +seorang +seseorang +pada +padanya +padahal +paling +sepanjang +pantas +sepantasnya +sepantasnyalah +para +pasti +pastilah +per +pernah +pula +pun +merupakan +rupanya +serupa +saat +saatnya +sesaat +saja +sajalah +saling +bersama +sama +sesama +sambil +sampai +sana +sangat +sangatlah +saya +sayalah +se +sebab +sebabnya +sebuah +tersebut +tersebutlah +sedang +sedangkan +sedikit +sedikitnya +segala +segalanya +segera +sesegera +sejak +sejenak +sekali +sekalian +sekalipun +sesekali +sekaligus +sekarang +sekarang +sekitar +sekitarnya +sela +selain +selalu +seluruh +seluruhnya +semakin +sementara +sempat +semua +semuanya +sendiri +sendirinya +seolah +seperti +sepertinya +sering +seringnya +serta +siapa +siapakah +siapapun +disini +disinilah +sini +sinilah +sesuatu +sesuatunya +suatu +sesudah +sesudahnya +sudah +sudahkah +sudahlah +supaya +tadi +tadinya +tak +tanpa +setelah +telah +tentang +tentu +tentulah +tentunya +tertentu +seterusnya +tapi +tetapi +setiap +tiap +setidaknya +tidak +tidakkah +tidaklah +toh +waduh +wah +wahai +sewaktu +walau +walaupun +wong +yaitu +yakni +yang diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_it.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_it.txt new file mode 100644 index 00000000000..4cb5b0891b1 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_it.txt @@ -0,0 +1,301 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | An Italian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +ad | a (to) before vowel +al | a + il +allo | a + lo +ai | a + i +agli | a + gli +all | a + l' +agl | a + gl' +alla | a + la +alle | a + le +con | with +col | con + il +coi | con + i (forms collo, cogli etc are now very rare) +da | from +dal | da + il +dallo | da + lo +dai | da + i +dagli | da + gli +dall | da + l' +dagl | da + gll' +dalla | da + la +dalle | da + le +di | of +del | di + il +dello | di + lo +dei | di + i +degli | di + gli +dell | di + l' +degl | di + gl' +della | di + la +delle | di + le +in | in +nel | in + el +nello | in + lo +nei | in + i +negli | in + gli +nell | in + l' +negl | in + gl' +nella | in + la +nelle | in + le +su | on +sul | su + il +sullo | su + lo +sui | su + i +sugli | su + gli +sull | su + l' +sugl | su + gl' +sulla | su + la +sulle | su + le +per | through, by +tra | among +contro | against +io | I +tu | thou +lui | he +lei | she +noi | we +voi | you +loro | they +mio | my +mia | +miei | +mie | +tuo | +tua | +tuoi | thy +tue | +suo | +sua | +suoi | his, her +sue | +nostro | our +nostra | +nostri | +nostre | +vostro | your +vostra | +vostri | +vostre | +mi | me +ti | thee +ci | us, there +vi | you, there +lo | him, the +la | her, the +li | them +le | them, the +gli | to him, the +ne | from there etc +il | the +un | a +uno | a +una | a +ma | but +ed | and +se | if +perché | why, because +anche | also +come | how +dov | where (as dov') +dove | where +che | who, that +chi | who +cui | whom +non | not +più | more +quale | who, that +quanto | how much +quanti | +quanta | +quante | +quello | that +quelli | +quella | +quelle | +questo | this +questi | +questa | +queste | +si | yes +tutto | all +tutti | all + + | single letter forms: + +a | at +c | as c' for ce or ci +e | and +i | the +l | as l' +o | or + + | forms of avere, to have (not including the infinitive): + +ho +hai +ha +abbiamo +avete +hanno +abbia +abbiate +abbiano +avrò +avrai +avrà +avremo +avrete +avranno +avrei +avresti +avrebbe +avremmo +avreste +avrebbero +avevo +avevi +aveva +avevamo +avevate +avevano +ebbi +avesti +ebbe +avemmo +aveste +ebbero +avessi +avesse +avessimo +avessero +avendo +avuto +avuta +avuti +avute + + | forms of essere, to be (not including the infinitive): +sono +sei +è +siamo +siete +sia +siate +siano +sarò +sarai +sarà +saremo +sarete +saranno +sarei +saresti +sarebbe +saremmo +sareste +sarebbero +ero +eri +era +eravamo +eravate +erano +fui +fosti +fu +fummo +foste +furono +fossi +fosse +fossimo +fossero +essendo + + | forms of fare, to do (not including the infinitive, fa, fat-): +faccio +fai +facciamo +fanno +faccia +facciate +facciano +farò +farai +farà +faremo +farete +faranno +farei +faresti +farebbe +faremmo +fareste +farebbero +facevo +facevi +faceva +facevamo +facevate +facevano +feci +facesti +fece +facemmo +faceste +fecero +facessi +facesse +facessimo +facessero +facendo + + | forms of stare, to be (not including the infinitive): +sto +stai +sta +stiamo +stanno +stia +stiate +stiano +starò +starai +starà +staremo +starete +staranno +starei +staresti +starebbe +staremmo +stareste +starebbero +stavo +stavi +stava +stavamo +stavate +stavano +stetti +stesti +stette +stemmo +steste +stettero +stessi +stesse +stessimo +stessero +stando diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ja.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ja.txt new file mode 100644 index 00000000000..d4321be6b16 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ja.txt @@ -0,0 +1,127 @@ +# +# This file defines a stopword set for Japanese. +# +# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. +# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 +# for frequency lists, etc. that can be useful for making your own set (if desired) +# +# Note that there is an overlap between these stopwords and the terms stopped when used +# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note +# that comments are not allowed on the same line as stopwords. +# +# Also note that stopping is done in a case-insensitive manner. Change your StopFilter +# configuration if you need case-sensitive stopping. Lastly, note that stopping is done +# using the same character width as the entries in this file. Since this StopFilter is +# normally done after a CJKWidthFilter in your chain, you would usually want your romaji +# entries to be in half-width and your kana entries to be in full-width. +# +ã® +ã« +㯠+ã‚’ +㟠+㌠+㧠+㦠+㨠+ã— +ã‚Œ +ã• +ã‚ã‚‹ +ã„ã‚‹ +ã‚‚ +ã™ã‚‹ +ã‹ã‚‰ +㪠+ã“㨠+ã¨ã—㦠+ã„ +ã‚„ +れる +ãªã© +ãªã£ +ãªã„ +ã“ã® +ãŸã‚ +ãã® +ã‚㣠+よㆠ+ã¾ãŸ +ã‚‚ã® +ã¨ã„ㆠ+ã‚ã‚Š +ã¾ã§ +られ +ãªã‚‹ +㸠+ã‹ +ã  +ã“ã‚Œ +ã«ã‚ˆã£ã¦ +ã«ã‚ˆã‚Š +ãŠã‚Š +より +ã«ã‚ˆã‚‹ +ãš +ãªã‚Š +られる +ã«ãŠã„㦠+ã° +ãªã‹ã£ +ãªã +ã—ã‹ã— +ã«ã¤ã„㦠+ã› +ã ã£ +ãã®å¾Œ +ã§ãã‚‹ +ãã‚Œ +ㆠ+ã®ã§ +ãªãŠ +ã®ã¿ +ã§ã +ã +㤠+ã«ãŠã‘ã‚‹ +ãŠã‚ˆã³ +ã„ㆠ+ã•ã‚‰ã« +ã§ã‚‚ +ら +ãŸã‚Š +ãã®ä»– +ã«é–¢ã™ã‚‹ +ãŸã¡ +ã¾ã™ +ã‚“ +ãªã‚‰ +ã«å¯¾ã—㦠+特㫠+ã›ã‚‹ +åŠã³ +ã“れら +ã¨ã +ã§ã¯ +ã«ã¦ +ã»ã‹ +ãªãŒã‚‰ +ã†ã¡ +ãã—㦠+ã¨ã¨ã‚‚ã« +ãŸã ã— +ã‹ã¤ã¦ +ãã‚Œãžã‚Œ +ã¾ãŸã¯ +㊠+ã»ã© +ã‚‚ã®ã® +ã«å¯¾ã™ã‚‹ +ã»ã¨ã‚“ã© +ã¨å…±ã« +ã¨ã„ã£ãŸ +ã§ã™ +ã¨ã‚‚ +ã¨ã“ã‚ +ã“ã“ +##### End of file diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_lv.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_lv.txt new file mode 100644 index 00000000000..e21a23c06c3 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_lv.txt @@ -0,0 +1,172 @@ +# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins +# the original list of over 800 forms was refined: +# pronouns, adverbs, interjections were removed +# +# prepositions +aiz +ap +ar +apakÅ¡ +Ärpus +augÅ¡pus +bez +caur +dēļ +gar +iekÅ¡ +iz +kopÅ¡ +labad +lejpus +lÄ«dz +no +otrpus +pa +par +pÄr +pÄ“c +pie +pirms +pret +priekÅ¡ +starp +Å¡aipus +uz +viņpus +virs +virspus +zem +apakÅ¡pus +# Conjunctions +un +bet +jo +ja +ka +lai +tomÄ“r +tikko +turpretÄ« +arÄ« +kaut +gan +tÄdēļ +tÄ +ne +tikvien +vien +kÄ +ir +te +vai +kamÄ“r +# Particles +ar +diezin +droÅ¡i +diemžēl +nebÅ«t +ik +it +taÄu +nu +pat +tiklab +iekÅ¡pus +nedz +tik +nevis +turpretim +jeb +iekam +iekÄm +iekÄms +kolÄ«dz +lÄ«dzko +tiklÄ«dz +jebÅ¡u +tÄlab +tÄpÄ“c +nekÄ +itin +jÄ +jau +jel +nÄ“ +nezin +tad +tikai +vis +tak +iekams +vien +# modal verbs +bÅ«t +biju +biji +bija +bijÄm +bijÄt +esmu +esi +esam +esat +bÅ«Å¡u +bÅ«si +bÅ«s +bÅ«sim +bÅ«siet +tikt +tiku +tiki +tika +tikÄm +tikÄt +tieku +tiec +tiek +tiekam +tiekat +tikÅ¡u +tiks +tiksim +tiksiet +tapt +tapi +tapÄt +topat +tapÅ¡u +tapsi +taps +tapsim +tapsiet +kļūt +kļuvu +kļuvi +kļuva +kļuvÄm +kļuvÄt +kļūstu +kļūsti +kļūst +kļūstam +kļūstat +kļūšu +kļūsi +kļūs +kļūsim +kļūsiet +# verbs +varÄ“t +varÄ“ju +varÄ“jÄm +varÄ“Å¡u +varÄ“sim +var +varÄ“ji +varÄ“jÄt +varÄ“si +varÄ“siet +varat +varÄ“ja +varÄ“s diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_nl.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_nl.txt new file mode 100644 index 00000000000..f4d61f5092c --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_nl.txt @@ -0,0 +1,117 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Dutch stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large sample of Dutch text. + + | Dutch stop words frequently exhibit homonym clashes. These are indicated + | clearly below. + +de | the +en | and +van | of, from +ik | I, the ego +te | (1) chez, at etc, (2) to, (3) too +dat | that, which +die | that, those, who, which +in | in, inside +een | a, an, one +hij | he +het | the, it +niet | not, nothing, naught +zijn | (1) to be, being, (2) his, one's, its +is | is +was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river +op | on, upon, at, in, up, used up +aan | on, upon, to (as dative) +met | with, by +als | like, such as, when +voor | (1) before, in front of, (2) furrow +had | had, past tense all persons sing. of 'hebben' (have) +er | there +maar | but, only +om | round, about, for etc +hem | him +dan | then +zou | should/would, past tense all persons sing. of 'zullen' +of | or, whether, if +wat | what, something, anything +mijn | possessive and noun 'mine' +men | people, 'one' +dit | this +zo | so, thus, in this way +door | through by +over | over, across +ze | she, her, they, them +zich | oneself +bij | (1) a bee, (2) by, near, at +ook | also, too +tot | till, until +je | you +mij | me +uit | out of, from +der | Old Dutch form of 'van der' still found in surnames +daar | (1) there, (2) because +haar | (1) her, their, them, (2) hair +naar | (1) unpleasant, unwell etc, (2) towards, (3) as +heb | present first person sing. of 'to have' +hoe | how, why +heeft | present third person sing. of 'to have' +hebben | 'to have' and various parts thereof +deze | this +u | you +want | (1) for, (2) mitten, (3) rigging +nog | yet, still +zal | 'shall', first and third person sing. of verb 'zullen' (will) +me | me +zij | she, they +nu | now +ge | 'thou', still used in Belgium and south Netherlands +geen | none +omdat | because +iets | something, somewhat +worden | to become, grow, get +toch | yet, still +al | all, every, each +waren | (1) 'were' (2) to wander, (3) wares, (3) +veel | much, many +meer | (1) more, (2) lake +doen | to do, to make +toen | then, when +moet | noun 'spot/mote' and present form of 'to must' +ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' +zonder | without +kan | noun 'can' and present form of 'to be able' +hun | their, them +dus | so, consequently +alles | all, everything, anything +onder | under, beneath +ja | yes, of course +eens | once, one day +hier | here +wie | who +werd | imperfect third person sing. of 'become' +altijd | always +doch | yet, but etc +wordt | present third person sing. of 'become' +wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans +kunnen | to be able +ons | us/our +zelf | self +tegen | against, towards, at +na | after, near +reeds | already +wil | (1) present tense of 'want', (2) 'will', noun, (3) fender +kon | could; past tense of 'to be able' +niets | nothing +uw | your +iemand | somebody +geweest | been; past participle of 'be' +andere | other diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_no.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_no.txt new file mode 100644 index 00000000000..e76f36e69ed --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_no.txt @@ -0,0 +1,192 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Norwegian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This stop word list is for the dominant bokmÃ¥l dialect. Words unique + | to nynorsk are marked *. + + | Revised by Jan Bruusgaard , Jan 2005 + +og | and +i | in +jeg | I +det | it/this/that +at | to (w. inf.) +en | a/an +et | a/an +den | it/this/that +til | to +er | is/am/are +som | who/that +pÃ¥ | on +de | they / you(formal) +med | with +han | he +av | of +ikke | not +ikkje | not * +der | there +sÃ¥ | so +var | was/were +meg | me +seg | you +men | but +ett | one +har | have +om | about +vi | we +min | my +mitt | my +ha | have +hadde | had +hun | she +nÃ¥ | now +over | over +da | when/as +ved | by/know +fra | from +du | you +ut | out +sin | your +dem | them +oss | us +opp | up +man | you/one +kan | can +hans | his +hvor | where +eller | or +hva | what +skal | shall/must +selv | self (reflective) +sjøl | self (reflective) +her | here +alle | all +vil | will +bli | become +ble | became +blei | became * +blitt | have become +kunne | could +inn | in +nÃ¥r | when +være | be +kom | come +noen | some +noe | some +ville | would +dere | you +som | who/which/that +deres | their/theirs +kun | only/just +ja | yes +etter | after +ned | down +skulle | should +denne | this +for | for/because +deg | you +si | hers/his +sine | hers/his +sitt | hers/his +mot | against +Ã¥ | to +meget | much +hvorfor | why +dette | this +disse | these/those +uten | without +hvordan | how +ingen | none +din | your +ditt | your +blir | become +samme | same +hvilken | which +hvilke | which (plural) +sÃ¥nn | such a +inni | inside/within +mellom | between +vÃ¥r | our +hver | each +hvem | who +vors | us/ours +hvis | whose +bÃ¥de | both +bare | only/just +enn | than +fordi | as/because +før | before +mange | many +ogsÃ¥ | also +slik | just +vært | been +være | to be +bÃ¥e | both * +begge | both +siden | since +dykk | your * +dykkar | yours * +dei | they * +deira | them * +deires | theirs * +deim | them * +di | your (fem.) * +dÃ¥ | as/when * +eg | I * +ein | a/an * +eit | a/an * +eitt | a/an * +elles | or * +honom | he * +hjÃ¥ | at * +ho | she * +hoe | she * +henne | her +hennar | her/hers +hennes | hers +hoss | how * +hossen | how * +ikkje | not * +ingi | noone * +inkje | noone * +korleis | how * +korso | how * +kva | what/which * +kvar | where * +kvarhelst | where * +kven | who/whom * +kvi | why * +kvifor | why * +me | we * +medan | while * +mi | my * +mine | my * +mykje | much * +no | now * +nokon | some (masc./neut.) * +noka | some (fem.) * +nokor | some * +noko | some * +nokre | some * +si | his/hers * +sia | since * +sidan | since * +so | so * +somt | some * +somme | some * +um | about* +upp | up * +vere | be * +vore | was * +verte | become * +vort | become * +varte | became * +vart | became * + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_pt.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_pt.txt new file mode 100644 index 00000000000..276c1b446f2 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_pt.txt @@ -0,0 +1,251 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Portuguese stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | of, from +a | the; to, at; her +o | the; him +que | who, that +e | and +do | de + o +da | de + a +em | in +um | a +para | for + | é from SER +com | with +não | not, no +uma | a +os | the; them +no | em + o +se | himself etc +na | em + a +por | for +mais | more +as | the; them +dos | de + os +como | as, like +mas | but + | foi from SER +ao | a + o +ele | he +das | de + as + | tem from TER +à | a + a +seu | his +sua | her +ou | or + | ser from SER +quando | when +muito | much + | há from HAV +nos | em + os; us +já | already, now + | está from EST +eu | I +também | also +só | only, just +pelo | per + o +pela | per + a +até | up to +isso | that +ela | he +entre | between + | era from SER +depois | after +sem | without +mesmo | same +aos | a + os + | ter from TER +seus | his +quem | whom +nas | em + as +me | me +esse | that +eles | they + | estão from EST +você | you + | tinha from TER + | foram from SER +essa | that +num | em + um +nem | nor +suas | her +meu | my +às | a + as +minha | my + | têm from TER +numa | em + uma +pelos | per + os +elas | they + | havia from HAV + | seja from SER +qual | which + | será from SER +nós | we + | tenho from TER +lhe | to him, her +deles | of them +essas | those +esses | those +pelas | per + as +este | this + | fosse from SER +dele | of him + + | other words. There are many contractions such as naquele = em+aquele, + | mo = me+o, but they are rare. + | Indefinite article plural forms are also rare. + +tu | thou +te | thee +vocês | you (plural) +vos | you +lhes | to them +meus | my +minhas +teu | thy +tua +teus +tuas +nosso | our +nossa +nossos +nossas + +dela | of her +delas | of them + +esta | this +estes | these +estas | these +aquele | that +aquela | that +aqueles | those +aquelas | those +isto | this +aquilo | that + + | forms of estar, to be (not including the infinitive): +estou +está +estamos +estão +estive +esteve +estivemos +estiveram +estava +estávamos +estavam +estivera +estivéramos +esteja +estejamos +estejam +estivesse +estivéssemos +estivessem +estiver +estivermos +estiverem + + | forms of haver, to have (not including the infinitive): +hei +há +havemos +hão +houve +houvemos +houveram +houvera +houvéramos +haja +hajamos +hajam +houvesse +houvéssemos +houvessem +houver +houvermos +houverem +houverei +houverá +houveremos +houverão +houveria +houveríamos +houveriam + + | forms of ser, to be (not including the infinitive): +sou +somos +são +era +éramos +eram +fui +foi +fomos +foram +fora +fôramos +seja +sejamos +sejam +fosse +fôssemos +fossem +for +formos +forem +serei +será +seremos +serão +seria +seríamos +seriam + + | forms of ter, to have (not including the infinitive): +tenho +tem +temos +tém +tinha +tínhamos +tinham +tive +teve +tivemos +tiveram +tivera +tivéramos +tenha +tenhamos +tenham +tivesse +tivéssemos +tivessem +tiver +tivermos +tiverem +terei +terá +teremos +terão +teria +teríamos +teriam diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ro.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ro.txt new file mode 100644 index 00000000000..4fdee90a5ba --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ro.txt @@ -0,0 +1,233 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +acea +aceasta +această +aceea +acei +aceia +acel +acela +acele +acelea +acest +acesta +aceste +acestea +aceÅŸti +aceÅŸtia +acolo +acum +ai +aia +aibă +aici +al +ăla +ale +alea +ălea +altceva +altcineva +am +ar +are +aÅŸ +aÅŸadar +asemenea +asta +ăsta +astăzi +astea +ăstea +ăştia +asupra +aÅ£i +au +avea +avem +aveÅ£i +azi +bine +bucur +bună +ca +că +căci +când +care +cărei +căror +cărui +cât +câte +câţi +către +câtva +ce +cel +ceva +chiar +cînd +cine +cineva +cît +cîte +cîţi +cîtva +contra +cu +cum +cumva +curând +curînd +da +dă +dacă +dar +datorită +de +deci +deja +deoarece +departe +deÅŸi +din +dinaintea +dintr +dintre +drept +după +ea +ei +el +ele +eram +este +eÅŸti +eu +face +fără +fi +fie +fiecare +fii +fim +fiÅ£i +iar +ieri +îi +îl +îmi +împotriva +în +înainte +înaintea +încât +încît +încotro +între +întrucât +întrucît +îţi +la +lângă +le +li +lîngă +lor +lui +mă +mâine +mea +mei +mele +mereu +meu +mi +mine +mult +multă +mulÅ£i +ne +nicăieri +nici +nimeni +niÅŸte +noastră +noastre +noi +noÅŸtri +nostru +nu +ori +oricând +oricare +oricât +orice +oricînd +oricine +oricît +oricum +oriunde +până +pe +pentru +peste +pînă +poate +pot +prea +prima +primul +prin +printr +sa +să +săi +sale +sau +său +se +ÅŸi +sînt +sîntem +sînteÅ£i +spre +sub +sunt +suntem +sunteÅ£i +ta +tăi +tale +tău +te +Å£i +Å£ie +tine +toată +toate +tot +toÅ£i +totuÅŸi +tu +un +una +unde +undeva +unei +unele +uneori +unor +vă +vi +voastră +voastre +voi +voÅŸtri +vostru +vouă +vreo +vreun diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ru.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ru.txt new file mode 100644 index 00000000000..64307693457 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ru.txt @@ -0,0 +1,241 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | a russian stop word list. comments begin with vertical bar. each stop + | word is at the start of a line. + + | this is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | letter `Ñ‘' is translated to `е'. + +и | and +в | in/into +во | alternative form +не | not +что | what/that +он | he +на | on/onto +Ñ | i +Ñ | from +Ñо | alternative form +как | how +а | milder form of `no' (but) +то | conjunction and form of `that' +вÑе | all +она | she +так | so, thus +его | him +но | but +да | yes/and +Ñ‚Ñ‹ | thou +к | towards, by +у | around, chez +же | intensifier particle +вы | you +за | beyond, behind +бы | conditional/subj. particle +по | up to, along +только | only +ее | her +мне | to me +было | it was +вот | here is/are, particle +от | away from +Ð¼ÐµÐ½Ñ | me +еще | still, yet, more +нет | no, there isnt/arent +о | about +из | out of +ему | to him +теперь | now +когда | when +даже | even +ну | so, well +вдруг | suddenly +ли | interrogative particle +еÑли | if +уже | already, but homonym of `narrower' +или | or +ни | neither +быть | to be +был | he was +него | prepositional form of его +до | up to +Ð²Ð°Ñ | you accusative +нибудь | indef. suffix preceded by hyphen +опÑÑ‚ÑŒ | again +уж | already, but homonym of `adder' +вам | to you +Ñказал | he said +ведь | particle `after all' +там | there +потом | then +ÑÐµÐ±Ñ | oneself +ничего | nothing +ей | to her +может | usually with `быть' as `maybe' +они | they +тут | here +где | where +еÑÑ‚ÑŒ | there is/are +надо | got to, must +ней | prepositional form of ей +Ð´Ð»Ñ | for +мы | we +Ñ‚ÐµÐ±Ñ | thee +их | them, their +чем | than +была | she was +Ñам | self +чтоб | in order to +без | without +будто | as if +человек | man, person, one +чего | genitive form of `what' +раз | once +тоже | also +Ñебе | to oneself +под | beneath +жизнь | life +будет | will be +ж | short form of intensifer particle `же' +тогда | then +кто | who +Ñтот | this +говорил | was saying +того | genitive form of `that' +потому | for that reason +Ñтого | genitive form of `this' +какой | which +ÑовÑем | altogether +ним | prepositional form of `его', `они' +здеÑÑŒ | here +Ñтом | prepositional form of `Ñтот' +один | one +почти | almost +мой | my +тем | instrumental/dative plural of `тот', `то' +чтобы | full form of `in order that' +нее | her (acc.) +кажетÑÑ | it seems +ÑÐµÐ¹Ñ‡Ð°Ñ | now +были | they were +куда | where to +зачем | why +Ñказать | to say +вÑех | all (acc., gen. preposn. plural) +никогда | never +ÑÐµÐ³Ð¾Ð´Ð½Ñ | today +можно | possible, one can +при | by +наконец | finally +два | two +об | alternative form of `о', about +другой | another +хоть | even +поÑле | after +над | above +больше | more +тот | that one (masc.) +через | across, in +Ñти | these +Ð½Ð°Ñ | us +про | about +вÑего | in all, only, of all +них | prepositional form of `они' (they) +ÐºÐ°ÐºÐ°Ñ | which, feminine +много | lots +разве | interrogative particle +Ñказала | she said +три | three +Ñту | this, acc. fem. sing. +Ð¼Ð¾Ñ | my, feminine +впрочем | moreover, besides +хорошо | good +Ñвою | ones own, acc. fem. sing. +Ñтой | oblique form of `Ñта', fem. `this' +перед | in front of +иногда | sometimes +лучше | better +чуть | a little +том | preposn. form of `that one' +Ð½ÐµÐ»ÑŒÐ·Ñ | one must not +такой | such a one +им | to them +более | more +вÑегда | always +конечно | of course +вÑÑŽ | acc. fem. sing of `all' +между | between + + + | b: some paradigms + | + | personal pronouns + | + | Ñ Ð¼ÐµÐ½Ñ Ð¼Ð½Ðµ мной [мною] + | Ñ‚Ñ‹ Ñ‚ÐµÐ±Ñ Ñ‚ÐµÐ±Ðµ тобой [тобою] + | он его ему им [него, нему, ним] + | она ее Ñи ею [нее, нÑи, нею] + | оно его ему им [него, нему, ним] + | + | мы Ð½Ð°Ñ Ð½Ð°Ð¼ нами + | вы Ð²Ð°Ñ Ð²Ð°Ð¼ вами + | они их им ими [них, ним, ними] + | + | ÑÐµÐ±Ñ Ñебе Ñобой [Ñобою] + | + | demonstrative pronouns: Ñтот (this), тот (that) + | + | Ñтот Ñта Ñто Ñти + | Ñтого ÑÑ‚Ñ‹ Ñто Ñти + | Ñтого Ñтой Ñтого Ñтих + | Ñтому Ñтой Ñтому Ñтим + | Ñтим Ñтой Ñтим [Ñтою] Ñтими + | Ñтом Ñтой Ñтом Ñтих + | + | тот та то те + | того ту то те + | того той того тех + | тому той тому тем + | тем той тем [тою] теми + | том той том тех + | + | determinative pronouns + | + | (a) веÑÑŒ (all) + | + | веÑÑŒ вÑÑ Ð²Ñе вÑе + | вÑего вÑÑŽ вÑе вÑе + | вÑего вÑей вÑего вÑех + | вÑему вÑей вÑему вÑем + | вÑем вÑей вÑем [вÑею] вÑеми + | вÑем вÑей вÑем вÑех + | + | (b) Ñам (himself etc) + | + | Ñам Ñама Ñамо Ñами + | Ñамого Ñаму Ñамо Ñамих + | Ñамого Ñамой Ñамого Ñамих + | Ñамому Ñамой Ñамому Ñамим + | Ñамим Ñамой Ñамим [Ñамою] Ñамими + | Ñамом Ñамой Ñамом Ñамих + | + | stems of verbs `to be', `to have', `to do' and modal + | + | быть бы буд быв еÑÑ‚ÑŒ Ñуть + | име + | дел + | мог мож мочь + | уме + | хоч хот + | долж + | можн + | нужн + | Ð½ÐµÐ»ÑŒÐ·Ñ + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_sv.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_sv.txt new file mode 100644 index 00000000000..22bddfd8cb3 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_sv.txt @@ -0,0 +1,131 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Swedish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | Swedish stop words occasionally exhibit homonym clashes. For example + | sÃ¥ = so, but also seed. These are indicated clearly below. + +och | and +det | it, this/that +att | to (with infinitive) +i | in, at +en | a +jag | I +hon | she +som | who, that +han | he +pÃ¥ | on +den | it, this/that +med | with +var | where, each +sig | him(self) etc +för | for +sÃ¥ | so (also: seed) +till | to +är | is +men | but +ett | a +om | if; around, about +hade | had +de | they, these/those +av | of +icke | not, no +mig | me +du | you +henne | her +dÃ¥ | then, when +sin | his +nu | now +har | have +inte | inte nÃ¥gon = no one +hans | his +honom | him +skulle | 'sake' +hennes | her +där | there +min | my +man | one (pronoun) +ej | nor +vid | at, by, on (also: vast) +kunde | could +nÃ¥got | some etc +frÃ¥n | from, off +ut | out +när | when +efter | after, behind +upp | up +vi | we +dem | them +vara | be +vad | what +över | over +än | than +dig | you +kan | can +sina | his +här | here +ha | have +mot | towards +alla | all +under | under (also: wonder) +nÃ¥gon | some etc +eller | or (else) +allt | all +mycket | much +sedan | since +ju | why +denna | this/that +själv | myself, yourself etc +detta | this/that +Ã¥t | to +utan | without +varit | was +hur | how +ingen | no +mitt | my +ni | you +bli | to be, become +blev | from bli +oss | us +din | thy +dessa | these/those +nÃ¥gra | some etc +deras | their +blir | from bli +mina | my +samma | (the) same +vilken | who, that +er | you, your +sÃ¥dan | such a +vÃ¥r | our +blivit | from bli +dess | its +inom | within +mellan | between +sÃ¥dant | such a +varför | why +varje | each +vilka | who, that +ditt | thy +vem | who +vilket | who, that +sitta | his +sÃ¥dana | such a +vart | each +dina | thy +vars | whose +vÃ¥rt | our +vÃ¥ra | our +ert | your +era | your +vilkas | whose + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_th.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_th.txt new file mode 100644 index 00000000000..07f0fabe692 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_th.txt @@ -0,0 +1,119 @@ +# Thai stopwords from: +# "Opinion Detection in Thai Political News Columns +# Based on Subjectivity Analysis" +# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak +ไว้ +ไม่ +ไป +ได้ +ให้ +ใน +โดย +à¹à¸«à¹ˆà¸‡ +à¹à¸¥à¹‰à¸§ +à¹à¸¥à¸° +à¹à¸£à¸ +à¹à¸šà¸š +à¹à¸•à¹ˆ +เอง +เห็น +เลย +เริ่ม +เรา +เมื่อ +เพื่อ +เพราะ +เป็นà¸à¸²à¸£ +เป็น +เปิดเผย +เปิด +เนื่องจาภ+เดียวà¸à¸±à¸™ +เดียว +เช่น +เฉพาะ +เคย +เข้า +เขา +อีภ+อาจ +อะไร +ออภ+อย่าง +อยู่ +อยาภ+หาภ+หลาย +หลังจาภ+หลัง +หรือ +หนึ่ง +ส่วน +ส่ง +สุด +สà¹à¸²à¸«à¸£à¸±à¸š +ว่า +วัน +ลง +ร่วม +ราย +รับ +ระหว่าง +รวม +ยัง +มี +มาภ+มา +พร้อม +พบ +ผ่าน +ผล +บาง +น่า +นี้ +นà¹à¸² +นั้น +นัภ+นอà¸à¸ˆà¸²à¸ +ทุภ+ที่สุด +ที่ +ทà¹à¸²à¹ƒà¸«à¹‰ +ทà¹à¸² +ทาง +ทั้งนี้ +ทั้ง +ถ้า +ถูภ+ถึง +ต้อง +ต่างๆ +ต่าง +ต่อ +ตาม +ตั้งà¹à¸•à¹ˆ +ตั้ง +ด้าน +ด้วย +ดัง +ซึ่ง +ช่วง +จึง +จาภ+จัด +จะ +คือ +ความ +ครั้ง +คง +ขึ้น +ของ +ขอ +ขณะ +à¸à¹ˆà¸­à¸™ +à¸à¹‡ +à¸à¸²à¸£ +à¸à¸±à¸š +à¸à¸±à¸™ +à¸à¸§à¹ˆà¸² +à¸à¸¥à¹ˆà¸²à¸§ diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_tr.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_tr.txt new file mode 100644 index 00000000000..84d9408d4ea --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_tr.txt @@ -0,0 +1,212 @@ +# Turkish stopwords from LUCENE-559 +# merged with the list from "Information Retrieval on Turkish Texts" +# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) +acaba +altmış +altı +ama +ancak +arada +aslında +ayrıca +bana +bazı +belki +ben +benden +beni +benim +beri +beÅŸ +bile +bin +bir +birçok +biri +birkaç +birkez +birÅŸey +birÅŸeyi +biz +bize +bizden +bizi +bizim +böyle +böylece +bu +buna +bunda +bundan +bunlar +bunları +bunların +bunu +bunun +burada +çok +çünkü +da +daha +dahi +de +defa +deÄŸil +diÄŸer +diye +doksan +dokuz +dolayı +dolayısıyla +dört +edecek +eden +ederek +edilecek +ediliyor +edilmesi +ediyor +eÄŸer +elli +en +etmesi +etti +ettiÄŸi +ettiÄŸini +gibi +göre +halen +hangi +hatta +hem +henüz +hep +hepsi +her +herhangi +herkesin +hiç +hiçbir +için +iki +ile +ilgili +ise +iÅŸte +itibaren +itibariyle +kadar +karşın +katrilyon +kendi +kendilerine +kendini +kendisi +kendisine +kendisini +kez +ki +kim +kimden +kime +kimi +kimse +kırk +milyar +milyon +mu +mü +mı +nasıl +ne +neden +nedenle +nerde +nerede +nereye +niye +niçin +o +olan +olarak +oldu +olduÄŸu +olduÄŸunu +olduklarını +olmadı +olmadığı +olmak +olması +olmayan +olmaz +olsa +olsun +olup +olur +olursa +oluyor +on +ona +ondan +onlar +onlardan +onları +onların +onu +onun +otuz +oysa +öyle +pek +raÄŸmen +sadece +sanki +sekiz +seksen +sen +senden +seni +senin +siz +sizden +sizi +sizin +ÅŸey +ÅŸeyden +ÅŸeyi +ÅŸeyler +şöyle +ÅŸu +ÅŸuna +ÅŸunda +ÅŸundan +ÅŸunları +ÅŸunu +tarafından +trilyon +tüm +üç +üzere +var +vardı +ve +veya +ya +yani +yapacak +yapılan +yapılması +yapıyor +yapmak +yaptı +yaptığı +yaptığını +yaptıkları +yedi +yerine +yetmiÅŸ +yine +yirmi +yoksa +yüz +zaten diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/userdict_ja.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/userdict_ja.txt new file mode 100644 index 00000000000..6f0368e4d81 --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/lang/userdict_ja.txt @@ -0,0 +1,29 @@ +# +# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) +# +# Add entries to this file in order to override the statistical model in terms +# of segmentation, readings and part-of-speech tags. Notice that entries do +# not have weights since they are always used when found. This is by-design +# in order to maximize ease-of-use. +# +# Entries are defined using the following CSV format: +# , ... , ... , +# +# Notice that a single half-width space separates tokens and readings, and +# that the number tokens and readings must match exactly. +# +# Also notice that multiple entries with the same is undefined. +# +# Whitespace only lines are ignored. Comments are not allowed on entry lines. +# + +# Custom segmentation for kanji compounds +日本経済新èž,日本 経済 æ–°èž,ニホン ケイザイ シンブン,カスタムåè©ž +関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタムåè©ž + +# Custom segmentation for compound katakana +トートãƒãƒƒã‚°,トート ãƒãƒƒã‚°,トート ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž +ショルダーãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž + +# Custom reading for former sumo wrestler +æœé’é¾,æœé’é¾,アサショウリュウ,カスタム人å diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/protwords.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/protwords.txt new file mode 100644 index 00000000000..1dfc0abecbf --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/schema.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/schema.xml new file mode 100644 index 00000000000..83080dfa40c --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/schema.xml @@ -0,0 +1,914 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + iddiff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/solrconfig.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/solrconfig.xml new file mode 100644 index 00000000000..9d9178746cf --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/solrconfig.xml @@ -0,0 +1,1764 @@ + + + + + + + + + LUCENE_43 + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.data.dir:} + + + + + + + + + + + + + + + + ${solr.maxIndexingThreads:8} + + + + + + 128 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.ulog.dir:} + + + + + ${solr.autoCommit.maxTime:60000} + false + + + + + + ${solr.autoSoftCommit.maxTime:1000} + + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + static firstSearcher warming in solrconfig.xml + + + + + + false + + + 4 + + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + text + + + + + + + + + + + + + + explicit + json + true + text + + + + + + + + true + json + true + + + + + + + + explicit + + + velocity + browse + layout + Solritas + + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text + 100% + *:* + 10 + *,score + + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename + 3 + + + on + cat + manu_exact + content_type + author_s + ipod + GB + 1 + cat,inStock + after + price + 0 + 600 + 50 + popularity + 0 + 10 + 3 + manufacturedate_dt + NOW/YEAR-10YEARS + NOW + +1YEAR + before + after + + + on + content features title name + html + <b> + </b> + 0 + title + 0 + name + 3 + 200 + content + 750 + + + on + false + 5 + 2 + 5 + true + true + 5 + 3 + + + + + spellcheck + + + + + + + + + + + + + + application/json + + + + + application/csv + + + + + + + + + + + + + + + + + + + + + solrpingquery + + + all + + + + + + + + + explicit + true + + + + + + + + + + + + + + + + textSpell + + + + + + default + name + solr.DirectSolrSpellChecker + + internal + + 0.5 + + 2 + + 1 + + 5 + + 4 + + 0.01 + + + + + + wordbreak + solr.WordBreakSolrSpellChecker + name + true + true + 10 + + + + + + + + + + + + + + + + text + + default + wordbreak + on + true + 10 + 5 + 5 + true + true + 10 + 5 + + + spellcheck + + + + + + + + + + text + true + + + tvComponent + + + + + + + + + default + + + org.carrot2.clustering.lingo.LingoClusteringAlgorithm + + + 20 + + + clustering/carrot2 + + + ENGLISH + + + stc + org.carrot2.clustering.stc.STCClusteringAlgorithm + + + + + + + true + default + true + + name + id + + features + + true + + + + false + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + *:* + 10 + *,score + + + clustering + + + + + + + + + + true + + + terms + + + + + + + + string + elevate.xml + + + + + + explicit + text + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + + + + + + + + + + + text/plain; charset=UTF-8 + + + + + + + + + 5 + + + + + + + + + + + + + + + + + + *:* + + + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/stopwords.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/stopwords.txt new file mode 100644 index 00000000000..ae1e83eeb3d --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/stopwords.txt @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/synonyms.txt b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/synonyms.txt new file mode 100644 index 00000000000..7f72128303b --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcelltest/collection1/conf/synonyms.txt @@ -0,0 +1,29 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaafoo => aaabar +bbbfoo => bbbfoo bbbbar +cccfoo => cccbar cccbaz +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + diff --git a/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcloud/conf/solrconfig.xml b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcloud/conf/solrconfig.xml new file mode 100644 index 00000000000..a37ab12ecfe --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test-files/solr/solrcloud/conf/solrconfig.xml @@ -0,0 +1,1787 @@ + + + + + + + + + LUCENE_43 + + + + + + + + + + + + + + + + + + + + + + + + ${solr.data.dir:} + + + + + ${solr.hdfs.home:} + ${solr.hdfs.confdir:} + ${solr.hdfs.security.kerberos.enabled:false} + ${solr.hdfs.security.kerberos.keytabfile:} + ${solr.hdfs.security.kerberos.principal:} + ${solr.hdfs.blockcache.enabled:true} + ${solr.hdfs.blockcache.slab.count:1} + ${solr.hdfs.blockcache.direct.memory.allocation:true} + ${solr.hdfs.blockcache.blocksperbank:16384} + ${solr.hdfs.blockcache.read.enabled:true} + ${solr.hdfs.blockcache.write.enabled:true} + ${solr.hdfs.nrtcachingdirectory.enable:true} + ${solr.hdfs.nrtcachingdirectory.maxmergesizemb:16} + ${solr.hdfs.nrtcachingdirectory.maxcachedmb:192} + + + + + + + + + + + + + ${solr.maxIndexingThreads:8} + + + + + + 128 + + + + + + + + + + + + + ${solr.lock.type:hdfs} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.ulog.dir:} + + + + + ${solr.autoCommit.maxTime:60000} + false + + + + + ${solr.autoSoftCommit.maxTime:1000} + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + static firstSearcher warming in solrconfig.xml + + + + + + false + + + 4 + + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + text + + + + + + + + + + + + + + explicit + json + true + text + + + + + + + + true + json + true + + + + + + + + explicit + + + velocity + browse + layout + Solritas + + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text + 100% + *:* + 10 + *,score + + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename + 3 + + + on + cat + manu_exact + content_type + author_s + ipod + GB + 1 + cat,inStock + after + price + 0 + 600 + 50 + popularity + 0 + 10 + 3 + manufacturedate_dt + NOW/YEAR-10YEARS + NOW + +1YEAR + before + after + + + on + content features title name + html + <b> + </b> + 0 + title + 0 + name + 3 + 200 + content + 750 + + + on + false + 5 + 2 + 5 + true + true + 5 + 3 + + + + + spellcheck + + + + + + + + + + + + + + application/json + + + + + application/csv + + + + + + + + + + + + + + + + + + + + + solrpingquery + + + all + + + + + + + + + explicit + true + + + + + + + + + + + + + + + + text_general + + + + + + default + text + solr.DirectSolrSpellChecker + + internal + + 0.5 + + 2 + + 1 + + 5 + + 4 + + 0.01 + + + + + + wordbreak + solr.WordBreakSolrSpellChecker + name + true + true + 10 + + + + + + + + + + + + + + + + text + + default + wordbreak + on + true + 10 + 5 + 5 + true + true + 10 + 5 + + + spellcheck + + + + + + + + + + text + true + + + tvComponent + + + + + + + + + default + + + org.carrot2.clustering.lingo.LingoClusteringAlgorithm + + + 20 + + + clustering/carrot2 + + + ENGLISH + + + stc + org.carrot2.clustering.stc.STCClusteringAlgorithm + + + + + + + true + default + true + + name + id + + features + + true + + + + false + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + *:* + 10 + *,score + + + clustering + + + + + + + + + + true + false + + + terms + + + + + + + + string + elevate.xml + + + + + + explicit + text + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + + + + + + + + + + + text/plain; charset=UTF-8 + + + + + + + + + 5 + + + + + + + + + + + + + + + + + + *:* + + + diff --git a/solr/contrib/solr-morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java b/solr/contrib/solr-morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java new file mode 100644 index 00000000000..80d2d43499c --- /dev/null +++ b/solr/contrib/solr-morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java @@ -0,0 +1,208 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.cell; + +import java.io.File; +import java.util.HashMap; +import java.util.Map; + +import org.apache.commons.io.FileUtils; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.params.MapSolrParams; +import org.apache.solr.common.util.DateUtil; +import org.apache.solr.handler.extraction.SolrContentHandler; +import org.apache.solr.morphlines.solr.AbstractSolrMorphlineTestBase; +import org.apache.solr.schema.IndexSchema; +import org.apache.tika.metadata.Metadata; +import org.junit.Before; +import org.junit.Test; + + +public class SolrCellMorphlineTest extends AbstractSolrMorphlineTestBase { + + private Map expectedRecords = new HashMap(); + + + @Before + public void setUp() throws Exception { + super.setUp(); + String path = RESOURCES_DIR + "/test-documents"; + expectedRecords.put(path + "/sample-statuses-20120906-141433.avro", 2); + expectedRecords.put(path + "/sample-statuses-20120906-141433", 2); + expectedRecords.put(path + "/sample-statuses-20120906-141433.gz", 2); + expectedRecords.put(path + "/sample-statuses-20120906-141433.bz2", 2); + expectedRecords.put(path + "/cars.csv", 5); + expectedRecords.put(path + "/cars.csv.gz", 5); + expectedRecords.put(path + "/cars.tar.gz", 4); + expectedRecords.put(path + "/cars.tsv", 5); + expectedRecords.put(path + "/cars.ssv", 5); + expectedRecords.put(path + "/test-documents.7z", 9); + expectedRecords.put(path + "/test-documents.cpio", 9); + expectedRecords.put(path + "/test-documents.tar", 9); + expectedRecords.put(path + "/test-documents.tbz2", 9); + expectedRecords.put(path + "/test-documents.tgz", 9); + expectedRecords.put(path + "/test-documents.zip", 9); + expectedRecords.put(path + "/multiline-stacktrace.log", 4); + + FileUtils.copyFile(new File(RESOURCES_DIR + "/custom-mimetypes.xml"), new File(tempDir + "/custom-mimetypes.xml")); + } + + @Test + public void testSolrCellJPGCompressed() throws Exception { + + morphline = createMorphline("test-morphlines/solrCellJPGCompressed"); + String path = RESOURCES_DIR + "/test-documents"; + String[] files = new String[] { + path + "/testJPEG_EXIF.jpg", + path + "/testJPEG_EXIF.jpg.gz", + path + "/testJPEG_EXIF.jpg.tar.gz", + //path + "/jpeg2000.jp2", + }; + testDocumentTypesInternal(files, expectedRecords); + } + + @Test + public void testSolrCellXML() throws Exception { + morphline = createMorphline("test-morphlines/solrCellXML"); + String path = RESOURCES_DIR + "/test-documents"; + String[] files = new String[] { + path + "/testXML2.xml", + }; + testDocumentTypesInternal(files, expectedRecords); + } + + @Test + public void testSolrCellDocumentTypes() throws Exception { + + morphline = createMorphline("test-morphlines/solrCellDocumentTypes"); + String path = RESOURCES_DIR + "/test-documents"; + String[] files = new String[] { + path + "/testBMPfp.txt", + path + "/boilerplate.html", + path + "/NullHeader.docx", + path + "/testWORD_various.doc", + path + "/testPDF.pdf", + path + "/testJPEG_EXIF.jpg", + path + "/testJPEG_EXIF.jpg.gz", + path + "/testJPEG_EXIF.jpg.tar.gz", + path + "/testXML.xml", +// path + "/cars.csv", +// path + "/cars.tsv", +// path + "/cars.ssv", +// path + "/cars.csv.gz", +// path + "/cars.tar.gz", + path + "/sample-statuses-20120906-141433.avro", + path + "/sample-statuses-20120906-141433", + path + "/sample-statuses-20120906-141433.gz", + path + "/sample-statuses-20120906-141433.bz2", + }; + testDocumentTypesInternal(files, expectedRecords); + } + + @Test + public void testSolrCellDocumentTypes2() throws Exception { + morphline = createMorphline("test-morphlines/solrCellDocumentTypes"); + String path = RESOURCES_DIR + "/test-documents"; + String[] files = new String[] { + path + "/testPPT_various.ppt", + path + "/testPPT_various.pptx", + path + "/testEXCEL.xlsx", + path + "/testEXCEL.xls", + path + "/testPages.pages", + //path + "/testNumbers.numbers", + //path + "/testKeynote.key", + + path + "/testRTFVarious.rtf", + path + "/complex.mbox", + path + "/test-outlook.msg", + path + "/testEMLX.emlx", +// path + "/testRFC822", + path + "/rsstest.rss", +// path + "/testDITA.dita", + + path + "/testMP3i18n.mp3", + path + "/testAIFF.aif", + path + "/testFLAC.flac", +// path + "/testFLAC.oga", +// path + "/testVORBIS.ogg", + path + "/testMP4.m4a", + path + "/testWAV.wav", +// path + "/testWMA.wma", + + path + "/testFLV.flv", +// path + "/testWMV.wmv", + + path + "/testBMP.bmp", + path + "/testPNG.png", + path + "/testPSD.psd", + path + "/testSVG.svg", + path + "/testTIFF.tif", + +// path + "/test-documents.7z", +// path + "/test-documents.cpio", +// path + "/test-documents.tar", +// path + "/test-documents.tbz2", +// path + "/test-documents.tgz", +// path + "/test-documents.zip", +// path + "/test-zip-of-zip.zip", +// path + "/testJAR.jar", + +// path + "/testKML.kml", +// path + "/testRDF.rdf", + path + "/testVISIO.vsd", +// path + "/testWAR.war", +// path + "/testWindows-x86-32.exe", +// path + "/testWINMAIL.dat", +// path + "/testWMF.wmf", + }; + testDocumentTypesInternal(files, expectedRecords); + } + + /** + * Test that the ContentHandler properly strips the illegal characters + */ + @Test + public void testTransformValue() { + String fieldName = "user_name"; + assertFalse("foobar".equals(getFoobarWithNonChars())); + + Metadata metadata = new Metadata(); + // load illegal char string into a metadata field and generate a new document, + // which will cause the ContentHandler to be invoked. + metadata.set(fieldName, getFoobarWithNonChars()); + StripNonCharSolrContentHandlerFactory contentHandlerFactory = + new StripNonCharSolrContentHandlerFactory(DateUtil.DEFAULT_DATE_FORMATS); + IndexSchema schema = h.getCore().getLatestSchema(); + SolrContentHandler contentHandler = + contentHandlerFactory.createSolrContentHandler(metadata, new MapSolrParams(new HashMap()), schema); + SolrInputDocument doc = contentHandler.newDocument(); + String foobar = doc.getFieldValue(fieldName).toString(); + assertTrue("foobar".equals(foobar)); + } + + /** + * Returns string "foobar" with illegal characters interspersed. + */ + private String getFoobarWithNonChars() { + char illegalChar = '\uffff'; + StringBuilder builder = new StringBuilder(); + builder.append(illegalChar).append(illegalChar).append("foo").append(illegalChar) + .append(illegalChar).append("bar").append(illegalChar).append(illegalChar); + return builder.toString(); + } + +} diff --git a/solr/contrib/solr-morphlines-core/build.xml b/solr/contrib/solr-morphlines-core/build.xml new file mode 100644 index 00000000000..ad11be1226c --- /dev/null +++ b/solr/contrib/solr-morphlines-core/build.xml @@ -0,0 +1,107 @@ + + + + + + + + Solr Morphlines commands. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/ivy.xml b/solr/contrib/solr-morphlines-core/ivy.xml new file mode 100644 index 00000000000..290460d27cb --- /dev/null +++ b/solr/contrib/solr-morphlines-core/ivy.xml @@ -0,0 +1,122 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/DocumentLoader.java b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/DocumentLoader.java new file mode 100644 index 00000000000..f3030247065 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/DocumentLoader.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.io.IOException; + +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.response.SolrPingResponse; +import org.apache.solr.client.solrj.response.UpdateResponse; +import org.apache.solr.common.SolrInputDocument; + +/** + * A vehicle to load a list of Solr documents into some kind of destination, + * such as a SolrServer or MapReduce RecordWriter. + */ +public interface DocumentLoader { + + /** Begins a transaction */ + public void beginTransaction() throws IOException, SolrServerException; + + /** Loads the given document into the destination */ + public void load(SolrInputDocument doc) throws IOException, SolrServerException; + + /** + * Sends any outstanding documents to the destination and waits for a positive + * or negative ack (i.e. exception). Depending on the outcome the caller + * should then commit or rollback the current flume transaction + * correspondingly. + * + * @throws IOException + * If there is a low-level I/O error. + */ + public void commitTransaction() throws IOException, SolrServerException; + + /** + * Performs a rollback of all non-committed documents pending. + *

+ * Note that this is not a true rollback as in databases. Content you have + * previously added may have already been committed due to autoCommit, buffer + * full, other client performing a commit etc. So this is only a best-effort + * rollback. + * + * @throws IOException + * If there is a low-level I/O error. + */ + public UpdateResponse rollbackTransaction() throws IOException, SolrServerException; + + /** Releases allocated resources */ + public void shutdown() throws IOException, SolrServerException; + + /** + * Issues a ping request to check if the server is alive + * + * @throws IOException + * If there is a low-level I/O error. + */ + public SolrPingResponse ping() throws IOException, SolrServerException; + +} diff --git a/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/GenerateSolrSequenceKeyBuilder.java b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/GenerateSolrSequenceKeyBuilder.java new file mode 100644 index 00000000000..251d016634c --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/GenerateSolrSequenceKeyBuilder.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.security.SecureRandom; +import java.util.Arrays; +import java.util.Collection; +import java.util.Random; + +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; + +import com.cloudera.cdk.morphline.api.Command; +import com.cloudera.cdk.morphline.api.CommandBuilder; +import com.cloudera.cdk.morphline.api.MorphlineContext; +import com.cloudera.cdk.morphline.api.MorphlineRuntimeException; +import com.cloudera.cdk.morphline.api.Record; +import com.cloudera.cdk.morphline.base.AbstractCommand; +import com.cloudera.cdk.morphline.base.Fields; +import com.cloudera.cdk.morphline.base.Notifications; +import com.typesafe.config.Config; + +/** + * A command that assigns a record unique key that is the concatenation of the given + * baseIdField record field, followed by a running count of the record number within + * the current session. The count is reset to zero whenever a "startSession" notification is + * received. + *

+ * For example, assume a CSV file containing multiple records but no unique ids, and the + * baseIdField field is the filesystem path of the file. Now this command can be used + * to assign the following record values to Solr's unique key field: + * $path#0, $path#1, ... $path#N. + *

+ * The name of the unique key field is fetched from Solr's schema.xml file, as directed by the + * solrLocator configuration parameter. + */ +public final class GenerateSolrSequenceKeyBuilder implements CommandBuilder { + + @Override + public Collection getNames() { + return Arrays.asList( + "generateSolrSequenceKey", + "sanitizeUniqueSolrKey" // old name (retained for backwards compatibility) + ); + } + + @Override + public Command build(Config config, Command parent, Command child, MorphlineContext context) { + return new GenerateSolrSequenceKey(config, parent, child, context); + } + + + /////////////////////////////////////////////////////////////////////////////// + // Nested classes: + /////////////////////////////////////////////////////////////////////////////// + private static final class GenerateSolrSequenceKey extends AbstractCommand { + + private final boolean preserveExisting; + private final String baseIdFieldName; + private final String uniqueKeyName; + private long recordCounter = 0; + + private final String idPrefix; // for load testing only; enables adding same document many times with a different unique key + private final Random randomIdPrefix; // for load testing only; enables adding same document many times with a different unique key + + public GenerateSolrSequenceKey(Config config, Command parent, Command child, MorphlineContext context) { + super(config, parent, child, context); + this.baseIdFieldName = getConfigs().getString(config, "baseIdField", Fields.BASE_ID); + this.preserveExisting = getConfigs().getBoolean(config, "preserveExisting", true); + + Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator"); + SolrLocator locator = new SolrLocator(solrLocatorConfig, context); + LOG.debug("solrLocator: {}", locator); + IndexSchema schema = locator.getIndexSchema(); + SchemaField uniqueKey = schema.getUniqueKeyField(); + uniqueKeyName = uniqueKey == null ? null : uniqueKey.getName(); + + String tmpIdPrefix = getConfigs().getString(config, "idPrefix", null); // for load testing only + Random tmpRandomIdPrefx = null; + if ("random".equals(tmpIdPrefix)) { // for load testing only + tmpRandomIdPrefx = new Random(new SecureRandom().nextLong()); + tmpIdPrefix = null; + } + idPrefix = tmpIdPrefix; + randomIdPrefix = tmpRandomIdPrefx; + validateArguments(); + } + + @Override + protected boolean doProcess(Record doc) { + long num = recordCounter++; + // LOG.debug("record #{} id before sanitizing doc: {}", num, doc); + if (uniqueKeyName == null || (preserveExisting && doc.getFields().containsKey(uniqueKeyName))) { + ; // we must preserve the existing id + } else { + Object baseId = doc.getFirstValue(baseIdFieldName); + if (baseId == null) { + throw new MorphlineRuntimeException("Record field " + baseIdFieldName + + " must not be null as it is needed as a basis for a unique key for solr doc: " + doc); + } + doc.replaceValues(uniqueKeyName, baseId.toString() + "#" + num); + } + + // for load testing only; enables adding same document many times with a different unique key + if (idPrefix != null) { + String id = doc.getFirstValue(uniqueKeyName).toString(); + id = idPrefix + id; + doc.replaceValues(uniqueKeyName, id); + } else if (randomIdPrefix != null) { + String id = doc.getFirstValue(uniqueKeyName).toString(); + id = String.valueOf(Math.abs(randomIdPrefix.nextInt())) + "#" + id; + doc.replaceValues(uniqueKeyName, id); + } + + LOG.debug("record #{} unique key sanitized to this: {}", num, doc); + + return super.doProcess(doc); + } + + @Override + protected void doNotify(Record notification) { + if (Notifications.containsLifecycleEvent(notification, Notifications.LifecycleEvent.START_SESSION)) { + recordCounter = 0; // reset + } + super.doNotify(notification); + } + + } +} diff --git a/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/LoadSolrBuilder.java b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/LoadSolrBuilder.java new file mode 100644 index 00000000000..019dfcf0f52 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/LoadSolrBuilder.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.common.SolrInputDocument; + +import com.cloudera.cdk.morphline.api.Command; +import com.cloudera.cdk.morphline.api.CommandBuilder; +import com.cloudera.cdk.morphline.api.MorphlineContext; +import com.cloudera.cdk.morphline.api.MorphlineRuntimeException; +import com.cloudera.cdk.morphline.api.Record; +import com.cloudera.cdk.morphline.base.AbstractCommand; +import com.cloudera.cdk.morphline.base.Metrics; +import com.cloudera.cdk.morphline.base.Notifications; +import com.codahale.metrics.Timer; +import com.typesafe.config.Config; +import com.typesafe.config.ConfigFactory; + +/** + * A command that loads a record into a SolrServer or MapReduce SolrOutputFormat. + */ +public final class LoadSolrBuilder implements CommandBuilder { + + @Override + public Collection getNames() { + return Collections.singletonList("loadSolr"); + } + + @Override + public Command build(Config config, Command parent, Command child, MorphlineContext context) { + return new LoadSolr(config, parent, child, context); + } + + + /////////////////////////////////////////////////////////////////////////////// + // Nested classes: + /////////////////////////////////////////////////////////////////////////////// + private static final class LoadSolr extends AbstractCommand { + + private final DocumentLoader loader; + private final Map boosts = new HashMap(); + private final Timer elapsedTime; + + public LoadSolr(Config config, Command parent, Command child, MorphlineContext context) { + super(config, parent, child, context); + Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator"); + SolrLocator locator = new SolrLocator(solrLocatorConfig, context); + LOG.debug("solrLocator: {}", locator); + this.loader = locator.getLoader(); + Config boostsConfig = getConfigs().getConfig(config, "boosts", ConfigFactory.empty()); + for (Map.Entry entry : boostsConfig.root().unwrapped().entrySet()) { + String fieldName = entry.getKey(); + float boost = Float.parseFloat(entry.getValue().toString().trim()); + boosts.put(fieldName, boost); + } + validateArguments(); + this.elapsedTime = getTimer(Metrics.ELAPSED_TIME); + } + + @Override + protected void doNotify(Record notification) { + for (Object event : Notifications.getLifecycleEvents(notification)) { + if (event == Notifications.LifecycleEvent.BEGIN_TRANSACTION) { + try { + loader.beginTransaction(); + } catch (SolrServerException e) { + throw new MorphlineRuntimeException(e); + } catch (IOException e) { + throw new MorphlineRuntimeException(e); + } + } else if (event == Notifications.LifecycleEvent.COMMIT_TRANSACTION) { + try { + loader.commitTransaction(); + } catch (SolrServerException e) { + throw new MorphlineRuntimeException(e); + } catch (IOException e) { + throw new MorphlineRuntimeException(e); + } + } + else if (event == Notifications.LifecycleEvent.ROLLBACK_TRANSACTION) { + try { + loader.rollbackTransaction(); + } catch (SolrServerException e) { + throw new MorphlineRuntimeException(e); + } catch (IOException e) { + throw new MorphlineRuntimeException(e); + } + } + else if (event == Notifications.LifecycleEvent.SHUTDOWN) { + try { + loader.shutdown(); + } catch (SolrServerException e) { + throw new MorphlineRuntimeException(e); + } catch (IOException e) { + throw new MorphlineRuntimeException(e); + } + } + } + super.doNotify(notification); + } + + @Override + protected boolean doProcess(Record record) { + Timer.Context timerContext = elapsedTime.time(); + SolrInputDocument doc = convert(record); + try { + loader.load(doc); + } catch (IOException e) { + throw new MorphlineRuntimeException(e); + } catch (SolrServerException e) { + throw new MorphlineRuntimeException(e); + } finally { + timerContext.stop(); + } + + // pass record to next command in chain: + return super.doProcess(record); + } + + private SolrInputDocument convert(Record record) { + Map> map = record.getFields().asMap(); + SolrInputDocument doc = new SolrInputDocument(new HashMap(2 * map.size())); + for (Map.Entry> entry : map.entrySet()) { + String key = entry.getKey(); + doc.setField(key, entry.getValue(), getBoost(key)); + } + return doc; + } + + private float getBoost(String key) { + if (boosts.size() > 0) { + Float boost = boosts.get(key); + if (boost != null) { + return boost.floatValue(); + } + } + return 1.0f; + } + + } +} diff --git a/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SafeConcurrentUpdateSolrServer.java b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SafeConcurrentUpdateSolrServer.java new file mode 100644 index 00000000000..f98eeb25016 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SafeConcurrentUpdateSolrServer.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import org.apache.http.client.HttpClient; +import org.apache.solr.client.solrj.impl.ConcurrentUpdateSolrServer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * ConcurrentUpdateSolrServer that propagates exceptions up to the submitter of + * requests on blockUntilFinished() + */ +final class SafeConcurrentUpdateSolrServer extends ConcurrentUpdateSolrServer { + + private Throwable currentException = null; + private final Object myLock = new Object(); + + private static final Logger LOGGER = LoggerFactory.getLogger(SafeConcurrentUpdateSolrServer.class); + + public SafeConcurrentUpdateSolrServer(String solrServerUrl, int queueSize, int threadCount) { + this(solrServerUrl, null, queueSize, threadCount); + } + + public SafeConcurrentUpdateSolrServer(String solrServerUrl, HttpClient client, int queueSize, int threadCount) { + super(solrServerUrl, client, queueSize, threadCount); + } + + @Override + public void handleError(Throwable ex) { + assert ex != null; + synchronized (myLock) { + currentException = ex; + } + LOGGER.error("handleError", ex); + } + + @Override + public void blockUntilFinished() { + super.blockUntilFinished(); + synchronized (myLock) { + if (currentException != null) { + throw new RuntimeException(currentException); + } + } + } + + public void clearException() { + synchronized (myLock) { + currentException = null; + } + } + +} diff --git a/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SanitizeUnknownSolrFieldsBuilder.java b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SanitizeUnknownSolrFieldsBuilder.java new file mode 100644 index 00000000000..fbc8de21bda --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SanitizeUnknownSolrFieldsBuilder.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Map; +import java.util.TreeMap; + +import org.apache.solr.schema.IndexSchema; + +import com.cloudera.cdk.morphline.api.Command; +import com.cloudera.cdk.morphline.api.CommandBuilder; +import com.cloudera.cdk.morphline.api.MorphlineContext; +import com.cloudera.cdk.morphline.api.Record; +import com.cloudera.cdk.morphline.base.AbstractCommand; +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.typesafe.config.Config; + +/** + * Command that sanitizes record fields that are unknown to Solr schema.xml by either deleting them + * (renameToPrefix is absent or a zero length string), or by moving them to a field prefixed with + * the given renameToPrefix (e.g. renameToPrefix = "ignored_" to use typical dynamic Solr fields). + *

+ * Recall that Solr throws an exception on any attempt to load a document that contains a field that + * isn't specified in schema.xml. + */ +public final class SanitizeUnknownSolrFieldsBuilder implements CommandBuilder { + + @Override + public Collection getNames() { + return Collections.singletonList("sanitizeUnknownSolrFields"); + } + + @Override + public Command build(Config config, Command parent, Command child, MorphlineContext context) { + return new SanitizeUnknownSolrFields(config, parent, child, context); + } + + + /////////////////////////////////////////////////////////////////////////////// + // Nested classes: + /////////////////////////////////////////////////////////////////////////////// + private static final class SanitizeUnknownSolrFields extends AbstractCommand { + + private final IndexSchema schema; + private final String renameToPrefix; + + public SanitizeUnknownSolrFields(Config config, Command parent, Command child, MorphlineContext context) { + super(config, parent, child, context); + + Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator"); + SolrLocator locator = new SolrLocator(solrLocatorConfig, context); + LOG.debug("solrLocator: {}", locator); + this.schema = locator.getIndexSchema(); + Preconditions.checkNotNull(schema); + LOG.trace("Solr schema: \n{}", Joiner.on("\n").join(new TreeMap(schema.getFields()).values())); + + String str = getConfigs().getString(config, "renameToPrefix", "").trim(); + this.renameToPrefix = str.length() > 0 ? str : null; + validateArguments(); + } + + @Override + protected boolean doProcess(Record record) { + Collection entries = new ArrayList(record.getFields().asMap().entrySet()); + for (Map.Entry> entry : entries) { + String key = entry.getKey(); + if (schema.getFieldOrNull(key) == null) { + LOG.debug("Sanitizing unknown Solr field: {}", key); + Collection values = entry.getValue(); + if (renameToPrefix != null) { + record.getFields().putAll(renameToPrefix + key, values); + } + values.clear(); // implicitly removes key from record + } + } + return super.doProcess(record); + } + + } +} diff --git a/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrLocator.java b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrLocator.java new file mode 100644 index 00000000000..2381a08b082 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrLocator.java @@ -0,0 +1,270 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.io.File; +import java.io.IOException; +import java.net.MalformedURLException; + +import javax.xml.parsers.ParserConfigurationException; + +import org.apache.solr.client.solrj.SolrServer; +import org.apache.solr.client.solrj.impl.CloudSolrServer; +import org.apache.solr.common.cloud.SolrZkClient; +import org.apache.solr.core.SolrConfig; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.util.SystemIdResolver; +import org.apache.zookeeper.KeeperException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +import com.cloudera.cdk.morphline.api.MorphlineCompilationException; +import com.cloudera.cdk.morphline.api.MorphlineContext; +import com.cloudera.cdk.morphline.api.MorphlineRuntimeException; +import com.cloudera.cdk.morphline.base.Configs; +import com.google.common.base.Preconditions; +import com.typesafe.config.Config; +import com.typesafe.config.ConfigFactory; +import com.typesafe.config.ConfigRenderOptions; +import com.typesafe.config.ConfigUtil; + +/** + * Set of configuration parameters that identify the location and schema of a Solr server or + * SolrCloud; Based on this information this class can return the schema and a corresponding + * {@link DocumentLoader}. + */ +public class SolrLocator { + + private Config config; + private MorphlineContext context; + private String collectionName; + private String zkHost; + private String solrUrl; + private String solrHomeDir; + private int batchSize = 1000; + + private static final String SOLR_HOME_PROPERTY_NAME = "solr.solr.home"; + + private static final Logger LOG = LoggerFactory.getLogger(SolrLocator.class); + + protected SolrLocator(MorphlineContext context) { + Preconditions.checkNotNull(context); + this.context = context; + } + + public SolrLocator(Config config, MorphlineContext context) { + this(context); + this.config = config; + Configs configs = new Configs(); + collectionName = configs.getString(config, "collection", null); + zkHost = configs.getString(config, "zkHost", null); + solrHomeDir = configs.getString(config, "solrHomeDir", null); + solrUrl = configs.getString(config, "solrUrl", null); + batchSize = configs.getInt(config, "batchSize", batchSize); + LOG.trace("Constructed solrLocator: {}", this); + configs.validateArguments(config); + } + + public DocumentLoader getLoader() { + if (context instanceof SolrMorphlineContext) { + DocumentLoader loader = ((SolrMorphlineContext)context).getDocumentLoader(); + if (loader != null) { + return loader; + } + } + + if (zkHost != null && zkHost.length() > 0) { + if (collectionName == null || collectionName.length() == 0) { + throw new MorphlineCompilationException("Parameter 'zkHost' requires that you also pass parameter 'collection'", config); + } + try { + CloudSolrServer cloudSolrServer = new CloudSolrServer(zkHost); + cloudSolrServer.setDefaultCollection(collectionName); + cloudSolrServer.connect(); + return new SolrServerDocumentLoader(cloudSolrServer, batchSize); + } catch (MalformedURLException e) { + throw new MorphlineRuntimeException(e); + } + } else { + if (solrUrl == null || solrUrl.length() == 0) { + throw new MorphlineCompilationException("Missing parameter 'solrUrl'", config); + } + int solrServerNumThreads = 2; + int solrServerQueueLength = solrServerNumThreads; + SolrServer server = new SafeConcurrentUpdateSolrServer(solrUrl, solrServerQueueLength, solrServerNumThreads); + // SolrServer server = new HttpSolrServer(solrServerUrl); + // SolrServer server = new ConcurrentUpdateSolrServer(solrServerUrl, solrServerQueueLength, solrServerNumThreads); + // server.setParser(new XMLResponseParser()); // binary parser is used by default + return new SolrServerDocumentLoader(server, batchSize); + } + } + + public IndexSchema getIndexSchema() { + if (context instanceof SolrMorphlineContext) { + IndexSchema schema = ((SolrMorphlineContext)context).getIndexSchema(); + if (schema != null) { + validateSchema(schema); + return schema; + } + } + + // If solrHomeDir isn't defined and zkHost and collectionName are defined + // then download schema.xml and solrconfig.xml, etc from zk and use that as solrHomeDir + String oldSolrHomeDir = null; + String mySolrHomeDir = solrHomeDir; + if (solrHomeDir == null || solrHomeDir.length() == 0) { + if (zkHost == null || zkHost.length() == 0) { + // TODO: implement download from solrUrl if specified + throw new MorphlineCompilationException( + "Downloading a Solr schema requires either parameter 'solrHomeDir' or parameters 'zkHost' and 'collection'", + config); + } + if (collectionName == null || collectionName.length() == 0) { + throw new MorphlineCompilationException( + "Parameter 'zkHost' requires that you also pass parameter 'collection'", config); + } + ZooKeeperDownloader zki = new ZooKeeperDownloader(); + SolrZkClient zkClient = zki.getZkClient(zkHost); + try { + String configName = zki.readConfigName(zkClient, collectionName); + File downloadedSolrHomeDir = zki.downloadConfigDir(zkClient, configName); + mySolrHomeDir = downloadedSolrHomeDir.getAbsolutePath(); + } catch (KeeperException e) { + throw new MorphlineCompilationException("Cannot download schema.xml from ZooKeeper", config, e); + } catch (InterruptedException e) { + throw new MorphlineCompilationException("Cannot download schema.xml from ZooKeeper", config, e); + } catch (IOException e) { + throw new MorphlineCompilationException("Cannot download schema.xml from ZooKeeper", config, e); + } finally { + zkClient.close(); + } + } + + oldSolrHomeDir = System.setProperty(SOLR_HOME_PROPERTY_NAME, mySolrHomeDir); + try { + SolrConfig solrConfig = new SolrConfig(); // TODO use SolrResourceLoader ala TikaMapper? + // SolrConfig solrConfig = new SolrConfig("solrconfig.xml"); + // SolrConfig solrConfig = new + // SolrConfig("/cloud/apache-solr-4.0.0-BETA/example/solr/collection1", + // "solrconfig.xml", null); + // SolrConfig solrConfig = new + // SolrConfig("/cloud/apache-solr-4.0.0-BETA/example/solr/collection1/conf/solrconfig.xml"); + SolrResourceLoader loader = solrConfig.getResourceLoader(); + + InputSource is = new InputSource(loader.openSchema("schema.xml")); + is.setSystemId(SystemIdResolver.createSystemIdFromResourceName("schema.xml")); + + IndexSchema schema = new IndexSchema(solrConfig, "schema.xml", is); + validateSchema(schema); + return schema; + } catch (ParserConfigurationException e) { + throw new MorphlineRuntimeException(e); + } catch (IOException e) { + throw new MorphlineRuntimeException(e); + } catch (SAXException e) { + throw new MorphlineRuntimeException(e); + } finally { // restore old global state + if (solrHomeDir != null) { + if (oldSolrHomeDir == null) { + System.clearProperty(SOLR_HOME_PROPERTY_NAME); + } else { + System.setProperty(SOLR_HOME_PROPERTY_NAME, oldSolrHomeDir); + } + } + } + } + + private void validateSchema(IndexSchema schema) { + if (schema.getUniqueKeyField() == null) { + throw new MorphlineCompilationException("Solr schema.xml is missing unique key field", config); + } + if (!schema.getUniqueKeyField().isRequired()) { + throw new MorphlineCompilationException("Solr schema.xml must contain a required unique key field", config); + } + } + + @Override + public String toString() { + return toConfig(null).root().render(ConfigRenderOptions.concise()); + } + + public Config toConfig(String key) { + String json = ""; + if (key != null) { + json = toJson(key) + " : "; + } + json += + "{" + + " collection : " + toJson(collectionName) + ", " + + " zkHost : " + toJson(zkHost) + ", " + + " solrUrl : " + toJson(solrUrl) + ", " + + " solrHomeDir : " + toJson(solrHomeDir) + ", " + + " batchSize : " + toJson(batchSize) + " " + + "}"; + return ConfigFactory.parseString(json); + } + + private String toJson(Object key) { + String str = key == null ? "" : key.toString(); + str = ConfigUtil.quoteString(str); + return str; + } + + public String getCollectionName() { + return this.collectionName; + } + + public void setCollectionName(String collectionName) { + this.collectionName = collectionName; + } + + public String getZkHost() { + return this.zkHost; + } + + public void setZkHost(String zkHost) { + this.zkHost = zkHost; + } + + public String getSolrHomeDir() { + return this.solrHomeDir; + } + + public void setSolrHomeDir(String solrHomeDir) { + this.solrHomeDir = solrHomeDir; + } + + public String getServerUrl() { + return this.solrUrl; + } + + public void setServerUrl(String solrUrl) { + this.solrUrl = solrUrl; + } + + public int getBatchSize() { + return this.batchSize; + } + + public void setBatchSize(int batchSize) { + this.batchSize = batchSize; + } + +} diff --git a/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrMorphlineContext.java b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrMorphlineContext.java new file mode 100644 index 00000000000..56d6e39227c --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrMorphlineContext.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import org.apache.solr.schema.IndexSchema; + +import com.cloudera.cdk.morphline.api.MorphlineContext; + +/** + * A context that is specific to Solr. + */ +public class SolrMorphlineContext extends MorphlineContext { + + private DocumentLoader loader; + private IndexSchema schema; + + /** For public access use {@link Builder#build()} instead */ + protected SolrMorphlineContext() {} + + public DocumentLoader getDocumentLoader() { + return loader; + } + + public IndexSchema getIndexSchema() { + return schema; + } + + + /////////////////////////////////////////////////////////////////////////////// + // Nested classes: + /////////////////////////////////////////////////////////////////////////////// + /** + * Helper to construct a {@link SolrMorphlineContext} instance. + */ + public static class Builder extends MorphlineContext.Builder { + + private DocumentLoader loader; + private IndexSchema schema; + + public Builder() {} + + public Builder setDocumentLoader(DocumentLoader loader) { + this.loader = loader; + return this; + } + + public Builder setIndexSchema(IndexSchema schema) { + this.schema = schema; + return this; + } + + @Override + public SolrMorphlineContext build() { + ((SolrMorphlineContext)context).loader = loader; + ((SolrMorphlineContext)context).schema = schema; + return (SolrMorphlineContext) super.build(); + } + + @Override + protected SolrMorphlineContext create() { + return new SolrMorphlineContext(); + } + + } + +} diff --git a/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrServerDocumentLoader.java b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrServerDocumentLoader.java new file mode 100644 index 00000000000..d343230fcba --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrServerDocumentLoader.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.solr.client.solrj.SolrServer; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.impl.CloudSolrServer; +import org.apache.solr.client.solrj.impl.ConcurrentUpdateSolrServer; +import org.apache.solr.client.solrj.response.SolrPingResponse; +import org.apache.solr.client.solrj.response.UpdateResponse; +import org.apache.solr.common.SolrInputDocument; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A vehicle to load a list of Solr documents into a local or remote {@link SolrServer}. + */ +public class SolrServerDocumentLoader implements DocumentLoader { + + private final SolrServer server; // proxy to local or remote solr server + private long numLoadedDocs = 0; // number of documents loaded in the current transaction + private final int batchSize; + private final List batch = new ArrayList(); + + private static final Logger LOGGER = LoggerFactory.getLogger(SolrServerDocumentLoader.class); + + public SolrServerDocumentLoader(SolrServer server, int batchSize) { + if (server == null) { + throw new IllegalArgumentException("solr server must not be null"); + } + this.server = server; + if (batchSize <= 0) { + throw new IllegalArgumentException("batchSize must be a positive number: " + batchSize); + } + this.batchSize = batchSize; + } + + @Override + public void beginTransaction() { + LOGGER.trace("beginTransaction"); + batch.clear(); + numLoadedDocs = 0; + if (server instanceof SafeConcurrentUpdateSolrServer) { + ((SafeConcurrentUpdateSolrServer) server).clearException(); + } + } + + @Override + public void load(SolrInputDocument doc) throws IOException, SolrServerException { + LOGGER.trace("load doc: {}", doc); + batch.add(doc); + if (batch.size() >= batchSize) { + loadBatch(); + } + } + + @Override + public void commitTransaction() throws SolrServerException, IOException { + LOGGER.trace("commitTransaction"); + if (batch.size() > 0) { + loadBatch(); + } + if (numLoadedDocs > 0) { + if (server instanceof ConcurrentUpdateSolrServer) { + ((ConcurrentUpdateSolrServer) server).blockUntilFinished(); + } + } + } + + private void loadBatch() throws SolrServerException, IOException { + numLoadedDocs += batch.size(); + try { + UpdateResponse rsp = server.add(batch); + } finally { + batch.clear(); + } + } + + @Override + public UpdateResponse rollbackTransaction() throws SolrServerException, IOException { + LOGGER.trace("rollback"); + if (!(server instanceof CloudSolrServer)) { + return server.rollback(); + } else { + return new UpdateResponse(); + } + } + + @Override + public void shutdown() { + LOGGER.trace("shutdown"); + server.shutdown(); + } + + @Override + public SolrPingResponse ping() throws SolrServerException, IOException { + LOGGER.trace("ping"); + return server.ping(); + } + + public SolrServer getSolrServer() { + return server; + } + +} diff --git a/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/TokenizeTextBuilder.java b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/TokenizeTextBuilder.java new file mode 100644 index 00000000000..58c1bb5536c --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/TokenizeTextBuilder.java @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.io.IOException; +import java.io.Reader; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.IndexSchema; + +import com.cloudera.cdk.morphline.api.Command; +import com.cloudera.cdk.morphline.api.CommandBuilder; +import com.cloudera.cdk.morphline.api.MorphlineCompilationException; +import com.cloudera.cdk.morphline.api.MorphlineContext; +import com.cloudera.cdk.morphline.api.MorphlineRuntimeException; +import com.cloudera.cdk.morphline.api.Record; +import com.cloudera.cdk.morphline.base.AbstractCommand; +import com.google.common.base.Preconditions; +import com.typesafe.config.Config; + +/** + * A command that uses the embedded Solr/Lucene Analyzer library to generate tokens from a text + * string, without sending data to a Solr server. + */ +public final class TokenizeTextBuilder implements CommandBuilder { + + @Override + public Collection getNames() { + return Collections.singletonList("tokenizeText"); + } + + @Override + public Command build(Config config, Command parent, Command child, MorphlineContext context) { + return new TokenizeText(config, parent, child, context); + } + + + /////////////////////////////////////////////////////////////////////////////// + // Nested classes: + /////////////////////////////////////////////////////////////////////////////// + private static final class TokenizeText extends AbstractCommand { + + private final String inputFieldName; + private final String outputFieldName; + private final Analyzer analyzer; + private final CharTermAttribute token; // cached + private final ReusableStringReader reader = new ReusableStringReader(); // cached + + public TokenizeText(Config config, Command parent, Command child, MorphlineContext context) { + super(config, parent, child, context); + this.inputFieldName = getConfigs().getString(config, "inputField"); + this.outputFieldName = getConfigs().getString(config, "outputField"); + String solrFieldType = getConfigs().getString(config, "solrFieldType"); + Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator"); + SolrLocator locator = new SolrLocator(solrLocatorConfig, context); + LOG.debug("solrLocator: {}", locator); + IndexSchema schema = locator.getIndexSchema(); + FieldType fieldType = schema.getFieldTypeByName(solrFieldType); + if (fieldType == null) { + throw new MorphlineCompilationException("Missing Solr field type in schema.xml for name: " + solrFieldType, config); + } + this.analyzer = fieldType.getAnalyzer(); + Preconditions.checkNotNull(analyzer); + try { // register CharTermAttribute for later (implicit) reuse + this.token = analyzer.tokenStream("content", reader).addAttribute(CharTermAttribute.class); + } catch (IOException e) { + throw new MorphlineCompilationException("Cannot create token stream", config, e); + } + Preconditions.checkNotNull(token); + validateArguments(); + } + + @Override + protected boolean doProcess(Record record) { + try { + List outputValues = record.get(outputFieldName); + for (Object value : record.get(inputFieldName)) { + reader.setValue(value.toString()); + TokenStream tokenStream = analyzer.tokenStream("content", reader); + tokenStream.reset(); + while (tokenStream.incrementToken()) { + if (token.length() > 0) { // incrementToken() updates the token! + String tokenStr = new String(token.buffer(), 0, token.length()); + outputValues.add(tokenStr); + } + } + tokenStream.end(); + tokenStream.close(); + } + } catch (IOException e) { + throw new MorphlineRuntimeException(e); + } + + // pass record to next command in chain: + return super.doProcess(record); + } + + } + + + // Copied from org.apache.lucene.document.Field.java from lucene-4.3.0 + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + private static final class ReusableStringReader extends Reader { + private int pos = 0, size = 0; + private String s = null; + + void setValue(String s) { + this.s = s; + this.size = s.length(); + this.pos = 0; + } + + @Override + public int read() { + if (pos < size) { + return s.charAt(pos++); + } else { + s = null; + return -1; + } + } + + @Override + public int read(char[] c, int off, int len) { + if (pos < size) { + len = Math.min(len, size-pos); + s.getChars(pos, pos+len, c, off); + pos += len; + return len; + } else { + s = null; + return -1; + } + } + + @Override + public void close() { + pos = size; // this prevents NPE when reading after close! + s = null; + } + } +} diff --git a/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/ZooKeeperDownloader.java b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/ZooKeeperDownloader.java new file mode 100644 index 00000000000..68cb6270139 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/ZooKeeperDownloader.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.io.File; +import java.io.IOException; +import java.util.List; + +import org.apache.solr.cloud.ZkController; +import org.apache.solr.common.cloud.Aliases; +import org.apache.solr.common.cloud.ClusterState; +import org.apache.solr.common.cloud.SolrZkClient; +import org.apache.solr.common.cloud.ZkNodeProps; +import org.apache.solr.common.cloud.ZkStateReader; +import org.apache.solr.common.util.StrUtils; +import org.apache.zookeeper.KeeperException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.io.Files; + +/** + * Downloads SolrCloud information from ZooKeeper. + */ +final class ZooKeeperDownloader { + + private static final Logger LOG = LoggerFactory.getLogger(ZooKeeperDownloader.class); + + public SolrZkClient getZkClient(String zkHost) { + if (zkHost == null) { + throw new IllegalArgumentException("zkHost must not be null"); + } + + SolrZkClient zkClient; + try { + zkClient = new SolrZkClient(zkHost, 30000); + } catch (Exception e) { + throw new IllegalArgumentException("Cannot connect to ZooKeeper: " + zkHost, e); + } + return zkClient; + } + + /** + * Returns config value given collection name + * Borrowed heavily from Solr's ZKController. + */ + public String readConfigName(SolrZkClient zkClient, String collection) + throws KeeperException, InterruptedException { + if (collection == null) { + throw new IllegalArgumentException("collection must not be null"); + } + String configName = null; + + // first check for alias + byte[] aliasData = zkClient.getData(ZkStateReader.ALIASES, null, null, true); + Aliases aliases = ClusterState.load(aliasData); + String alias = aliases.getCollectionAlias(collection); + if (alias != null) { + List aliasList = StrUtils.splitSmart(alias, ",", true); + if (aliasList.size() > 1) { + throw new IllegalArgumentException("collection cannot be an alias that maps to multiple collections"); + } + collection = aliasList.get(0); + } + + String path = ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection; + if (LOG.isInfoEnabled()) { + LOG.info("Load collection config from:" + path); + } + byte[] data = zkClient.getData(path, null, null, true); + + if(data != null) { + ZkNodeProps props = ZkNodeProps.load(data); + configName = props.getStr(ZkController.CONFIGNAME_PROP); + } + + if (configName != null && !zkClient.exists(ZkController.CONFIGS_ZKNODE + "/" + configName, true)) { + LOG.error("Specified config does not exist in ZooKeeper:" + configName); + throw new IllegalArgumentException("Specified config does not exist in ZooKeeper:" + + configName); + } + + return configName; + } + + /** + * Download and return the config directory from ZK + */ + public File downloadConfigDir(SolrZkClient zkClient, String configName) + throws IOException, InterruptedException, KeeperException { + File dir = Files.createTempDir(); + dir.deleteOnExit(); + ZkController.downloadConfigDir(zkClient, configName, dir); + File confDir = new File(dir, "conf"); + if (!confDir.isDirectory()) { + // create a temporary directory with "conf" subdir and mv the config in there. This is + // necessary because of CDH-11188; solrctl does not generate nor accept directories with e.g. + // conf/solrconfig.xml which is necessary for proper solr operation. This should work + // even if solrctl changes. + confDir = new File(Files.createTempDir().getAbsolutePath(), "conf"); + confDir.getParentFile().deleteOnExit(); + Files.move(dir, confDir); + dir = confDir.getParentFile(); + } + return dir; + } + +} diff --git a/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/package.html b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/package.html new file mode 100644 index 00000000000..ecec1bdf4d8 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/java/org/apache/solr/morphlines/solr/package.html @@ -0,0 +1,22 @@ + + + + +Morphlines Solr related code. + + diff --git a/solr/contrib/solr-morphlines-core/src/java/overview.html b/solr/contrib/solr-morphlines-core/src/java/overview.html new file mode 100644 index 00000000000..7f8ad137a34 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/java/overview.html @@ -0,0 +1,21 @@ + + + +Apache Solr Search Server: Solr Core Morphline Commands + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/README b/solr/contrib/solr-morphlines-core/src/test-files/README new file mode 100644 index 00000000000..10f878acccb --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/README @@ -0,0 +1,21 @@ + + +This directory is where any non-transient, non-java files needed +for the execution of tests should live. + +It is used as the CWD when running JUnit tests. diff --git a/solr/contrib/solr-morphlines-core/src/test-files/books_numeric_ids.csv b/solr/contrib/solr-morphlines-core/src/test-files/books_numeric_ids.csv new file mode 100644 index 00000000000..817e8b769cf --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/books_numeric_ids.csv @@ -0,0 +1,11 @@ +id,cat,name,price,inStock,author_t,series_t,sequence_i,genre_s +0553573403,book,A Game of Thrones,7.99,true,George R.R. Martin,"A Song of Ice and Fire",1,fantasy +0553579908,book,A Clash of Kings,7.99,true,George R.R. Martin,"A Song of Ice and Fire",2,fantasy +0553573429,book,A Storm of Swords,7.99,true,George R.R. Martin,"A Song of Ice and Fire",3,fantasy +0553293354,book,Foundation,7.99,true,Isaac Asimov,Foundation Novels,1,scifi +0812521390,book,The Black Company,6.99,false,Glen Cook,The Chronicles of The Black Company,1,fantasy +0812550706,book,Ender's Game,6.99,true,Orson Scott Card,Ender,1,scifi +0441385532,book,Jhereg,7.95,false,Steven Brust,Vlad Taltos,1,fantasy +0380014300,book,Nine Princes In Amber,6.99,true,Roger Zelazny,the Chronicles of Amber,1,fantasy +0805080481,book,The Book of Three,5.99,true,Lloyd Alexander,The Chronicles of Prydain,1,fantasy +0805080499,book,The Black Cauldron,5.99,true,Lloyd Alexander,The Chronicles of Prydain,2,fantasy diff --git a/solr/contrib/solr-morphlines-core/src/test-files/exampledocs/example.html b/solr/contrib/solr-morphlines-core/src/test-files/exampledocs/example.html new file mode 100644 index 00000000000..5732f6214bc --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/exampledocs/example.html @@ -0,0 +1,49 @@ + + + Welcome to Solr + + +

+ Here is some text +

+
Here is some text in a div
+
This has a link.
+News + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/exampledocs/example.txt b/solr/contrib/solr-morphlines-core/src/test-files/exampledocs/example.txt new file mode 100644 index 00000000000..0c9928b9e26 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/exampledocs/example.txt @@ -0,0 +1,3 @@ +Example text document + +This is a simple example for a plain text document, indexed to Solr \ No newline at end of file diff --git a/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/README b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/README new file mode 100644 index 00000000000..b7ca5b834f4 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/README @@ -0,0 +1,18 @@ + + +Items under this directory are used by TestConfig.testLibs() diff --git a/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/a/a1/empty-file-a1.txt b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/a/a1/empty-file-a1.txt new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/a/a1/empty-file-a1.txt @@ -0,0 +1 @@ + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/a/a2/empty-file-a2.txt b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/a/a2/empty-file-a2.txt new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/a/a2/empty-file-a2.txt @@ -0,0 +1 @@ + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/b/b1/empty-file-b1.txt b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/b/b1/empty-file-b1.txt new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/b/b1/empty-file-b1.txt @@ -0,0 +1 @@ + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/b/b2/empty-file-b2.txt b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/b/b2/empty-file-b2.txt new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/b/b2/empty-file-b2.txt @@ -0,0 +1 @@ + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/c/c1/empty-file-c1.txt b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/c/c1/empty-file-c1.txt new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/c/c1/empty-file-c1.txt @@ -0,0 +1 @@ + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/c/c2/empty-file-c2.txt b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/c/c2/empty-file-c2.txt new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/c/c2/empty-file-c2.txt @@ -0,0 +1 @@ + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/d/d1/empty-file-d1.txt b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/d/d1/empty-file-d1.txt new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/d/d1/empty-file-d1.txt @@ -0,0 +1 @@ + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/d/d2/empty-file-d2.txt b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/d/d2/empty-file-d2.txt new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/lib-dirs/d/d2/empty-file-d2.txt @@ -0,0 +1 @@ + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/log4j.properties b/solr/contrib/solr-morphlines-core/src/test-files/log4j.properties new file mode 100644 index 00000000000..fb0577130bb --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/log4j.properties @@ -0,0 +1,12 @@ +# Logging level +log4j.rootLogger=INFO, CONSOLE + +log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender +log4j.appender.CONSOLE.Target=System.err +log4j.appender.CONSOLE.layout=org.apache.solr.util.SolrLogLayout +log4j.appender.CONSOLE.layout.ConversionPattern=%-5p - %d{yyyy-MM-dd HH:mm:ss.SSS}; %C; %m\n + +log4j.logger.org.apache.zookeeper=WARN +log4j.logger.org.apache.hadoop=WARN +#log4j.logger.org.apache.solr=WARN +log4j.logger.org.apache.solr.hadoop=INFO \ No newline at end of file diff --git a/solr/contrib/solr-morphlines-core/src/test-files/mailing_lists.pdf b/solr/contrib/solr-morphlines-core/src/test-files/mailing_lists.pdf new file mode 100755 index 00000000000..33b819f0649 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/mailing_lists.pdf @@ -0,0 +1,382 @@ +%PDF-1.3 +%ª«¬­ +4 0 obj +<< /Type /Info +/Producer (FOP 0.20.5) >> +endobj +5 0 obj +<< /Length 425 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gb!$BYuAO_'ZTnF'lQbNnGsdiUK'C#3dAWc3lI>k\P#:a@Qja<+itJa;R]7&ni\$9pOi?T._;3m?jT+q7>,P^70oB=!nr]%k%\U^KVqaF4*Z`$VJ7Gs`T5OO`(tY]Q1`-5*m;!--h%?*_0SbIU\BV=OFg<#%YcH_YI$(sDCIJts'M2*drjRrJE!OM7HP!^-&EW>B\:RYFnaY.m[$s5f"XG0>^fduHe6/++D0fY3@AWR@HYabmQ5jDQ.c0>I.uQX&(lA@VLm_s_9XnBh7%"*/%^]AO3eTI!BTo'pF?%''A*PDU*NW%d`2@p'@:D@U??4PP08m[K4N,8,(e`N+\7n+a>ac%q#,D8DRQ*3l]MS>'gn3lWNGmRAtQ7n]eDnLPrD!?DEdB/hNarb_7$B7U-H7!['nXLkV_no5AHq`>6~> +endstream +endobj +6 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 5 0 R +/Annots 7 0 R +>> +endobj +7 0 obj +[ +8 0 R +10 0 R +12 0 R +] +endobj +8 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 559.666 137.324 547.666 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 9 0 R +/H /I +>> +endobj +10 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 541.466 164.648 529.466 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 11 0 R +/H /I +>> +endobj +12 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 523.266 154.016 511.266 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 13 0 R +/H /I +>> +endobj +14 0 obj +<< /Length 2197 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gb"/)9lo&I&A@C2n5a2!7YkueV^?,ABrC@*[F.^sK-J\u-^*\VZ9A3]?'#&sU^3,]d[;/F9HjMs^A"j:!rHNC?7rs!0)f1q`$?\lOaRt/g/f.>-Am[t'`RUrGL7Uk8K90.i-up;qeIYfjWZ2&ki:[3`TuXFj]`a&Hbo8r&P(RZ+M_>&eY.T4jXOI%UHbq1GnF>g$KgW%R24nBkc\[qA$(koU$isG(W7`PE,nMam;U4(ZC8,Ca!_P2VYf>\V0gK0g;-.E[Y(&s=+&g6ms""'Ip>0b/D!>a&PX9eo_tuueR:b=r@6Q5LM],XbK;&L$0WubNX9c"=FM$543G_>rAQ_%2/dW<)/"U1&]l:AZ&\Mif8sF`r5>b<$lqK"2t]maZ*oDb!^$Zn6OC'%XkI];&*rkLP1BMGI@$,0fK(=gC-3q7n7d4EQ4DepBc'^Q^A%e?19a(`S*FHTN*RNjP&P%2`6%jpOU\DBUN)cnMYa3PQ!sYETiGJi'q>>m*e;[,.1l\rZo3K;>$K"a1:s3pU>o+:'7fND!+6GV@2G;qf`\`=J#WkOjSke<1f>VfbcUtXM"1jGN:@Ptec8Mc-hmS5S>q/nAY%[4%7BCI![NA:We(41]ld_`pU80;+e`1DbG.RQ:'#GQJAL2![aIWY'A*Y_>mF7>2S0IWM%nLg3%%;7r5=;3!7]05r?Ft-6I]9n9C\fUUF6R\9bPEVSutd9LFTpaoaP7Iuus-S#S.3;sVu-*T/:&2Ld]&g0oHoo`TmR'b]ps6hq9s&f+6_5c(k"m96-f:YA!:)K:q+(Hl=t`:+"<lQm6B=K&/r/Ep6Y]EG.T/34(fT0=6_m5PA-7PVo:"r)W'.mX>1A8Yg9kfa?"Qp+ta7Hb$FM`*OP^>3Sg)P[?jIXd]i]"h)Tdjnm[6@=kmEBkP1/K[bg`"7U:BWk^=!+3\ANTnN75*Rh_<-UA*!&rr#KW/7EXkeJU9GF5RA,#kqJ5aC9Ra5,PsiI`uF23/B"nkPHe2Q;B@pBXGM-i;<'oOM,dc3'qL)Ne,OV2.*f^Bt;0P#roPn?h]@-63,-9lQSF!dic13Ag\_]m=7Llb\*&C+>\+o6)Y,C._?+X1Qok%j>f[#T!,CD2T4cL'.Nb_Vit&M]!j7j6LHB.g9AQre&be$gJhbAg68kDJf@XZ7'2791RD*qAP]u")(lEjX)\-#O$aK(E]jq*3XbL:3q:o&9gcZLl?:E-l'-dHf;;_hhH3m/Q3]9jJRn>Z8]1Gt6PAVJ[r2gsg=4$!6I$RQ@Y6;H(U>,LWdW>Z5iTYZ'tAcSfoN,U=/fIoA::l8X^fXIa4m3-]9$Zc\E0H^!pmfeMjW3#p1J)pbH^VZML"NZ$U,Yg;f[AVrZRhlRCC[)D*>K0IRWR98A=<>dPSd)@Ec)OXGjK01hM%!FhVR[I<5Va3V,I"YuQZb-,XEM!Gk_-r<9T0W#M!!;RX!]MtBdJ0ah'FCoNF1r"gmU>Rb4aE:Z'I)d-f_1:B0gfmnM?K9ljY>R%*Fc9oYiohHndi(!dK+]ElID:'g:PKq6fKKHdO>bmG-2]ZmVcqs+ef-EWR(1Da)F&CoL[['3)UZ^!fo+Ua2NSC7m5oIXlLoF)+cWUr/MaMP@shSN$gD*jB=:/ru]MF>3-m'j6_-'>(Uq'PN4Fl*XC8ABmg\b`kmI@<0Sh)bkNopK]E6S7,V*o!<)infW?).%mtC2S8!kqh$BpiWu=4)>.Wm+Mt.YPC"ZlO^Ge*Y5)8QlX2 +endstream +endobj +15 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 14 0 R +/Annots 16 0 R +>> +endobj +16 0 obj +[ +17 0 R +18 0 R +19 0 R +20 0 R +21 0 R +22 0 R +23 0 R +24 0 R +25 0 R +26 0 R +27 0 R +28 0 R +29 0 R +] +endobj +17 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 232.344 608.466 372.012 596.466 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (mailto:solr-user@lucene.apache.org) +/S /URI >> +/H /I +>> +endobj +18 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 591.266 189.336 579.266 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (mailto:solr-user-subscribe@lucene.apache.org) +/S /URI >> +/H /I +>> +endobj +19 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 578.066 215.988 566.066 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (mailto:solr-user-unsubscribe@lucene.apache.org) +/S /URI >> +/H /I +>> +endobj +20 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 564.866 197.316 552.866 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://mail-archives.apache.org/mod_mbox/lucene-solr-user/) +/S /URI >> +/H /I +>> +endobj +21 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 453.924 564.866 475.26 552.866 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://wiki.apache.org/solr/SolrResources) +/S /URI >> +/H /I +>> +endobj +22 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 259.668 441.722 396.672 429.722 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (mailto:solr-dev@lucene.apache.org) +/S /URI >> +/H /I +>> +endobj +23 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 424.522 189.336 412.522 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (mailto:solr-dev-subscribe@lucene.apache.org) +/S /URI >> +/H /I +>> +endobj +24 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 411.322 215.988 399.322 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (mailto:solr-dev-unsubscribe@lucene.apache.org) +/S /URI >> +/H /I +>> +endobj +25 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 398.122 197.316 386.122 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://mail-archives.apache.org/mod_mbox/lucene-solr-dev/) +/S /URI >> +/H /I +>> +endobj +26 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 453.924 398.122 475.26 386.122 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://wiki.apache.org/solr/SolrResources) +/S /URI >> +/H /I +>> +endobj +27 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 294.624 296.178 403.284 284.178 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (version_control.html) +/S /URI >> +/H /I +>> +endobj +28 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 265.778 189.336 253.778 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (mailto:solr-commits-subscribe@lucene.apache.org) +/S /URI >> +/H /I +>> +endobj +29 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 252.578 215.988 240.578 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (mailto:solr-commits-unsubscribe@lucene.apache.org) +/S /URI >> +/H /I +>> +endobj +31 0 obj +<< + /Title (\376\377\0\61\0\40\0\125\0\163\0\145\0\162\0\163) + /Parent 30 0 R + /Next 32 0 R + /A 9 0 R +>> endobj +32 0 obj +<< + /Title (\376\377\0\62\0\40\0\104\0\145\0\166\0\145\0\154\0\157\0\160\0\145\0\162\0\163) + /Parent 30 0 R + /Prev 31 0 R + /Next 33 0 R + /A 11 0 R +>> endobj +33 0 obj +<< + /Title (\376\377\0\63\0\40\0\103\0\157\0\155\0\155\0\151\0\164\0\163) + /Parent 30 0 R + /Prev 32 0 R + /A 13 0 R +>> endobj +34 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F3 +/BaseFont /Helvetica-Bold +/Encoding /WinAnsiEncoding >> +endobj +35 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F5 +/BaseFont /Times-Roman +/Encoding /WinAnsiEncoding >> +endobj +36 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F1 +/BaseFont /Helvetica +/Encoding /WinAnsiEncoding >> +endobj +37 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F2 +/BaseFont /Helvetica-Oblique +/Encoding /WinAnsiEncoding >> +endobj +38 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F7 +/BaseFont /Times-Bold +/Encoding /WinAnsiEncoding >> +endobj +1 0 obj +<< /Type /Pages +/Count 2 +/Kids [6 0 R 15 0 R ] >> +endobj +2 0 obj +<< /Type /Catalog +/Pages 1 0 R + /Outlines 30 0 R + /PageMode /UseOutlines + >> +endobj +3 0 obj +<< +/Font << /F3 34 0 R /F5 35 0 R /F1 36 0 R /F2 37 0 R /F7 38 0 R >> +/ProcSet [ /PDF /ImageC /Text ] >> +endobj +9 0 obj +<< +/S /GoTo +/D [15 0 R /XYZ 85.0 659.0 null] +>> +endobj +11 0 obj +<< +/S /GoTo +/D [15 0 R /XYZ 85.0 492.256 null] +>> +endobj +13 0 obj +<< +/S /GoTo +/D [15 0 R /XYZ 85.0 325.512 null] +>> +endobj +30 0 obj +<< + /First 31 0 R + /Last 33 0 R +>> endobj +xref +0 39 +0000000000 65535 f +0000007198 00000 n +0000007263 00000 n +0000007355 00000 n +0000000015 00000 n +0000000071 00000 n +0000000587 00000 n +0000000707 00000 n +0000000746 00000 n +0000007478 00000 n +0000000881 00000 n +0000007541 00000 n +0000001018 00000 n +0000007607 00000 n +0000001155 00000 n +0000003445 00000 n +0000003568 00000 n +0000003679 00000 n +0000003867 00000 n +0000004063 00000 n +0000004261 00000 n +0000004471 00000 n +0000004665 00000 n +0000004852 00000 n +0000005047 00000 n +0000005244 00000 n +0000005453 00000 n +0000005647 00000 n +0000005821 00000 n +0000006020 00000 n +0000007673 00000 n +0000006221 00000 n +0000006342 00000 n +0000006508 00000 n +0000006642 00000 n +0000006755 00000 n +0000006865 00000 n +0000006973 00000 n +0000007089 00000 n +trailer +<< +/Size 39 +/Root 2 0 R +/Info 4 0 R +>> +startxref +7724 +%%EOF diff --git a/solr/contrib/solr-morphlines-core/src/test-files/old-solr-example/README.txt b/solr/contrib/solr-morphlines-core/src/test-files/old-solr-example/README.txt new file mode 100644 index 00000000000..6242cff237b --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/old-solr-example/README.txt @@ -0,0 +1 @@ +This is around for back compat testing purposes and should be able to be removed in Solr 5.0 \ No newline at end of file diff --git a/solr/contrib/solr-morphlines-core/src/test-files/old-solr-example/solr.xml b/solr/contrib/solr-morphlines-core/src/test-files/old-solr-example/solr.xml new file mode 100644 index 00000000000..75da88a52f1 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/old-solr-example/solr.xml @@ -0,0 +1,53 @@ + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/addfields.updateprocessor.js b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/addfields.updateprocessor.js new file mode 100644 index 00000000000..1b3c9fc2d6e --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/addfields.updateprocessor.js @@ -0,0 +1,26 @@ +function processAdd(cmd) { + // Integer.valueOf is needed here to get a tru java object, because + // all javascript numbers are floating point (ie: java.lang.Double) + cmd.getSolrInputDocument().addField("script_added_i", + java.lang.Integer.valueOf(42)); + cmd.getSolrInputDocument().addField("script_added_d", 42.3); + +} + +// // // + +function processDelete() { + // NOOP +} +function processCommit() { + // NOOP +} +function processRollback() { + // NOOP +} +function processMergeIndexes() { + // NOOP +} +function finish() { + // NOOP +} diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/analyzingInfixSuggest.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/analyzingInfixSuggest.txt new file mode 100644 index 00000000000..6d276c33a16 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/analyzingInfixSuggest.txt @@ -0,0 +1,5 @@ +# simple AnalyzingInfix suggest phrase dictionary for testing +Japanese Autocomplete and Japanese Highlighter broken +Add Japanese Kanji number normalization to Kuromoji +Add decompose compound Japanese Katakana token capability to Kuromoji +This is just another entry! \ No newline at end of file diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-currency.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-currency.xml new file mode 100644 index 00000000000..d7aeeeb2331 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-currency.xml @@ -0,0 +1,31 @@ + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-mp-solrconfig.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-mp-solrconfig.xml new file mode 100644 index 00000000000..af5d8fbb155 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-mp-solrconfig.xml @@ -0,0 +1,34 @@ + + + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + 8 + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-analyzer-class-and-nested.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-analyzer-class-and-nested.xml new file mode 100644 index 00000000000..16796361c66 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-analyzer-class-and-nested.xml @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-bogus-analysis-parameters.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-bogus-analysis-parameters.xml new file mode 100644 index 00000000000..3f8e224ce1b --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-bogus-analysis-parameters.xml @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-bogus-field-parameters.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-bogus-field-parameters.xml new file mode 100644 index 00000000000..3575c438c72 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-bogus-field-parameters.xml @@ -0,0 +1,29 @@ + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-codec-global-vs-ft-mismatch.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-codec-global-vs-ft-mismatch.xml new file mode 100644 index 00000000000..9a704fdd731 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-codec-global-vs-ft-mismatch.xml @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + pulsing1text + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-dynamic-multivalued.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-dynamic-multivalued.xml new file mode 100644 index 00000000000..a71b361c956 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-dynamic-multivalued.xml @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-bogus-code-in-xml.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-bogus-code-in-xml.xml new file mode 100644 index 00000000000..6339ae25eab --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-bogus-code-in-xml.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-bogus-default-code.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-bogus-default-code.xml new file mode 100644 index 00000000000..1f92977760e --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-bogus-default-code.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-multivalued.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-multivalued.xml new file mode 100644 index 00000000000..a1b788e628e --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-multivalued.xml @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-oer-norates.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-oer-norates.xml new file mode 100644 index 00000000000..bd23933b270 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-ft-oer-norates.xml @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-multivalued.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-multivalued.xml new file mode 100644 index 00000000000..84bfaea141d --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-currency-multivalued.xml @@ -0,0 +1,35 @@ + + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dup-dynamicField.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dup-dynamicField.xml new file mode 100644 index 00000000000..460fbda8ba2 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dup-dynamicField.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dup-field.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dup-field.xml new file mode 100644 index 00000000000..4272362a3f4 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dup-field.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dup-fieldType.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dup-fieldType.xml new file mode 100644 index 00000000000..34ef44bcc73 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dup-fieldType.xml @@ -0,0 +1,44 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dynamicfield-default-val.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dynamicfield-default-val.xml new file mode 100644 index 00000000000..0e3595d75cb --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dynamicfield-default-val.xml @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dynamicfield-required.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dynamicfield-required.xml new file mode 100644 index 00000000000..c372afd44a4 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-dynamicfield-required.xml @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-external-filefield.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-external-filefield.xml new file mode 100644 index 00000000000..e7874c88d25 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-external-filefield.xml @@ -0,0 +1,27 @@ + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-misplaced-asterisk-copyfield-dest-should-fail-test.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-misplaced-asterisk-copyfield-dest-should-fail-test.xml new file mode 100644 index 00000000000..5b32376751c --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-misplaced-asterisk-copyfield-dest-should-fail-test.xml @@ -0,0 +1,31 @@ + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-misplaced-asterisk-copyfield-source-should-fail-test.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-misplaced-asterisk-copyfield-source-should-fail-test.xml new file mode 100644 index 00000000000..ddc9f4dc685 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-misplaced-asterisk-copyfield-source-should-fail-test.xml @@ -0,0 +1,31 @@ + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-multiple-asterisk-copyfield-dest-should-fail-test.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-multiple-asterisk-copyfield-dest-should-fail-test.xml new file mode 100644 index 00000000000..fb3ddbe5c41 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-multiple-asterisk-copyfield-dest-should-fail-test.xml @@ -0,0 +1,29 @@ + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-multiple-asterisk-copyfield-source-should-fail-test.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-multiple-asterisk-copyfield-source-should-fail-test.xml new file mode 100644 index 00000000000..b3ca6ae3096 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-multiple-asterisk-copyfield-source-should-fail-test.xml @@ -0,0 +1,29 @@ + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-non-glob-copyfield-source-matching-nothing-should-fail-test.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-non-glob-copyfield-source-matching-nothing-should-fail-test.xml new file mode 100644 index 00000000000..86e80a4555e --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-non-glob-copyfield-source-matching-nothing-should-fail-test.xml @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-nontext-analyzer.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-nontext-analyzer.xml new file mode 100644 index 00000000000..06a689a8298 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-nontext-analyzer.xml @@ -0,0 +1,39 @@ + + + + + + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-not-indexed-but-norms.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-not-indexed-but-norms.xml new file mode 100644 index 00000000000..f7c4e9b2d80 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-not-indexed-but-norms.xml @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-not-indexed-but-pos.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-not-indexed-but-pos.xml new file mode 100644 index 00000000000..774d58755f4 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-not-indexed-but-pos.xml @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-not-indexed-but-tf.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-not-indexed-but-tf.xml new file mode 100644 index 00000000000..d153793830a --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-not-indexed-but-tf.xml @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-omit-tf-but-not-pos.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-omit-tf-but-not-pos.xml new file mode 100644 index 00000000000..116f116a176 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-omit-tf-but-not-pos.xml @@ -0,0 +1,41 @@ + + + + + + + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sim-global-vs-ft-mismatch.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sim-global-vs-ft-mismatch.xml new file mode 100644 index 00000000000..a776d105541 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sim-global-vs-ft-mismatch.xml @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + + + sim1text + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-both-tf.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-both-tf.xml new file mode 100644 index 00000000000..99028c18a7c --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-both-tf.xml @@ -0,0 +1,48 @@ + + + + + + + + + + + + + 6.0 + 1.5 + 3.3 + 7.7 + 5.0 + 5.0 + + + + + + + + + + + id + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-partial-baseline.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-partial-baseline.xml new file mode 100644 index 00000000000..cf34ec8e21b --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-partial-baseline.xml @@ -0,0 +1,44 @@ + + + + + + + + + + + + + 6.0 + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-partial-hyperbolic.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-partial-hyperbolic.xml new file mode 100644 index 00000000000..61e18ad73c7 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-partial-hyperbolic.xml @@ -0,0 +1,46 @@ + + + + + + + + + + + + + 3.3 + + 5.0 + 5.0 + + + + + + + + + + + id + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-partial-norms.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-partial-norms.xml new file mode 100644 index 00000000000..ef4e8042b3c --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-sweetspot-partial-norms.xml @@ -0,0 +1,45 @@ + + + + + + + + + + + + + 3 + + 0.5 + + + + + + + + + + + id + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-uniquekey-is-copyfield-dest.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-uniquekey-is-copyfield-dest.xml new file mode 100644 index 00000000000..bf1d53212e4 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-uniquekey-is-copyfield-dest.xml @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-uniquekey-multivalued.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-uniquekey-multivalued.xml new file mode 100644 index 00000000000..81ce319eb86 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-uniquekey-multivalued.xml @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-uniquekey-uses-default.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-uniquekey-uses-default.xml new file mode 100644 index 00000000000..026b529a942 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-uniquekey-uses-default.xml @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + id + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-unsupported-docValues.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-unsupported-docValues.xml new file mode 100644 index 00000000000..5f4d69a31a7 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-schema-unsupported-docValues.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-bogus-scriptengine-name.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-bogus-scriptengine-name.xml new file mode 100644 index 00000000000..fc9e108bee3 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-bogus-scriptengine-name.xml @@ -0,0 +1,32 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + giberish + missleading.extension.updateprocessor.js.txt + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-invalid-scriptfile.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-invalid-scriptfile.xml new file mode 100644 index 00000000000..dbadbb5c2c0 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-invalid-scriptfile.xml @@ -0,0 +1,33 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + javascript + + currency.xml + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-managed-schema-named-schema.xml.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-managed-schema-named-schema.xml.xml new file mode 100644 index 00000000000..a15c0ac1d6e --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-managed-schema-named-schema.xml.xml @@ -0,0 +1,30 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + false + schema.xml + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-missing-scriptfile.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-missing-scriptfile.xml new file mode 100644 index 00000000000..4dee70ce08f --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-missing-scriptfile.xml @@ -0,0 +1,31 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + a-file-name-that-does-not-exist.js + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-multiple-cfs.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-multiple-cfs.xml new file mode 100644 index 00000000000..f13acb3f6b0 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-multiple-cfs.xml @@ -0,0 +1,30 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + true + false + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-multiple-dirfactory.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-multiple-dirfactory.xml new file mode 100644 index 00000000000..4da2a002f40 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-multiple-dirfactory.xml @@ -0,0 +1,34 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-multiple-indexconfigs.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-multiple-indexconfigs.xml new file mode 100644 index 00000000000..00dd08c36fe --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-multiple-indexconfigs.xml @@ -0,0 +1,35 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + true + false + + + + ${useCompoundFile:false} + true + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-schema-mutable-but-not-managed.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-schema-mutable-but-not-managed.xml new file mode 100644 index 00000000000..9fe2e89e037 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-schema-mutable-but-not-managed.xml @@ -0,0 +1,32 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + false + schema.xml + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-unexpected-schema-attribute.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-unexpected-schema-attribute.xml new file mode 100644 index 00000000000..d07cb0d1c11 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-unexpected-schema-attribute.xml @@ -0,0 +1,32 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + false + managed-schema + bogusValue + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-warmer-no-reopen.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-warmer-no-reopen.xml new file mode 100644 index 00000000000..9c9c96402ec --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad-solrconfig-warmer-no-reopen.xml @@ -0,0 +1,27 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + false + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad_solrconfig.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad_solrconfig.xml new file mode 100644 index 00000000000..ed07d9afdea --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/bad_solrconfig.xml @@ -0,0 +1,27 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + ${unset.sys.property} + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/compoundDictionary.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/compoundDictionary.txt new file mode 100644 index 00000000000..f4977b5df72 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/compoundDictionary.txt @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# A set of words for testing the DictionaryCompound factory +soft +ball +team diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/conditional.updateprocessor.js b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/conditional.updateprocessor.js new file mode 100644 index 00000000000..5ec9487c150 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/conditional.updateprocessor.js @@ -0,0 +1,25 @@ +function processAdd(cmd) { + if (req.getParams().getBool("go-for-it",false)) { + cmd.getSolrInputDocument().addField("script_added_s", "i went for it"); + return true; + } + return false; +} + +// // // + +function processDelete() { + // NOOP +} +function processCommit() { + // NOOP +} +function processRollback() { + // NOOP +} +function processMergeIndexes() { + // NOOP +} +function finish() { + // NOOP +} diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/currency.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/currency.xml new file mode 100644 index 00000000000..6a12b32b2a8 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/currency.xml @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/da_UTF8.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/da_UTF8.xml new file mode 100644 index 00000000000..2c8d203be68 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/da_UTF8.xml @@ -0,0 +1,1208 @@ + + + + + + + + + + +aA +bB +cC +dD +eE +fF +gG +hH +iI +jJ +kK +lL +mM +nN +oO +pP +qQ +rR +sS +tT +uU +vV +wW +xX +yY +zZ +æÆ +øØ +åÅ + + + +.ae3 +.an3k +.an1s +.be5la +.be1t +.bi4tr +.der3i +.diagno5 +.her3 +.hoved3 +.ne4t5 +.om1 +.ove4 +.po1 +.til3 +.yd5r +ab5le +3abst +a3c +ade5la +5adg +a1e +5afg +5a4f1l +af3r +af4ri +5afs +a4gef +a4gi +ag5in +ag5si +3agti +a4gy +a3h +ais5t +a3j +a5ka +a3ke +a5kr +aku5 +a3la +a1le +a1li +al3k +4alkv +a1lo +al5si +a3lu +a1ly +am4pa +3analy +an4k5r +a3nu +3anv +a5o +a5pe +a3pi +a5po +a1ra +ar5af +1arb +a1re +5arg +a1ri +a3ro +a3sa +a3sc +a1si +a3sk +a3so +3a3sp +a3ste +a3sti +a1ta1 +a1te +a1ti +a4t5in +a1to +ato5v +a5tr +a1tu +a5va +a1ve +a5z +1ba +ba4ti +4bd +1be +be1k +be3ro +be5ru +be1s4 +be1tr +1bi +bi5sk +b1j +4b1n +1bo +bo4gr +bo3ra +bo5re +1br4 +4bs +bs5k +b3so +b1st +b5t +3bu +bu4s5tr +b5w +1by +by5s +4c1c +1ce +ce5ro +3ch +4ch. +ci4o +ck3 +5cy +3da +4d3af +d5anta +da4s +d1b +d1d4 +1de +de5d +4de4lem +der5eri +de4rig +de5sk +d1f +d1g +d3h +1di +di1e +di5l +d3j +d1k +d1l +d1m +4d1n +3do +4dop +d5ov +d1p +4drett +5d4reve +3drif +3driv +d5ros +d5ru +ds5an +ds5in +d1ski +d4sm +d4su +dsu5l +ds5vi +d3ta +d1te +dt5o +d5tr +dt5u +1du +dub5 +d1v +3dy +e5ad +e3af +e5ag +e3ak +e1al +ea4la +e3an +e5ap +e3at +e3bl +ebs3 +e1ci +ed5ar +edde4 +eddel5 +e4do +ed5ra +ed3re +ed3rin +ed4str +e3e +3eff +e3fr +3eft +e3gu +e1h +e3in +ei5s +e3je +e4j5el +e1ka +e3ke +e3kl +4e1ko +e5kr +ek5sa +3eksem +3eksp +e3ku +e1kv +e5ky +e3lad +el3ak +el3ar +e1las +e3le +e4lek +3elem +e1li +5elim +e3lo +el5sa +e5lu +e3ly +e4mad +em4p5le +em1s +en5ak +e4nan +4enn +e4no +en3so +e5nu +e5ol +e3op +e1or +e3ov +epi3 +e1pr +e3ra +er3af +e4rag +e4rak +e1re +e4ref +er5ege +5erhv +e1ri +e4rib +er1k +ero5d +er5ov +er3s +er5tr +e3rum +er5un +e5ry +e1ta +e1te +etek4s +e1ti +e3tj +e1to +e3tr +e3tu +e1ty +e3um +e3un +3eur +e1va +e3ve +e4v3erf +e1vi +e5x +1fa +fa4ce +fags3 +f1b +f1d +1fe +fej4 +fejl1 +f1f +f1g +f1h +1fi +f1k +3fl +1fo +for1en +fo4ri +f1p +f1s4 +4ft +f3ta +f1te +f1ti +f5to +f5tvi +1fu +f1v +3fy +1ga +g3art +g1b +g1d +1ge +4g5enden +ger3in +ge3s +g3f +g1g +g1h +1gi +gi4b +gi3st +5gj +g3k +g1l +g1m +3go +4g5om +g5ov +g3p +1gr +gs1a +gsde4len +g4se +gsha4 +g5sla +gs3or +gs1p +g5s4tide +g4str +gs1v +g3ta +g1te +g1ti +g5to +g3tr +gt4s +g3ud +gun5 +g3v +1gy +g5yd +4ha. +heds3 +he5s +4het +hi4e +hi4n5 +hi3s +ho5ko +ho5ve +4h3t +hun4 +hund3 +hvo4 +i1a +i3b +i4ble +i1c +i3dr +ids5k +i1el +i1en +i3er +i3et. +if3r +i3gu +i3h +i5i +i5j +i1ka +i1ke +ik1l +i5ko +ik3re +ik5ri +iks5t +ik4tu +i3ku +ik3v +i3lag +il3eg +il5ej +il5el +i3li +i4l5id +il3k +i1lo +il5u +i3mu +ind3t +5inf +ings1 +in3s +in4sv +inter1 +i3nu +i3od +i3og +i5ok +i3ol +ion4 +ions1 +i5o5r +i3ot +i5pi +i3pli +i5pr +i3re +i3ri +ir5t +i3sc +i3si +i4sm +is3p +i1ster +i3sti +i5sua +i1ta +i1te +i1ti +i3to +i3tr +it5re. +i1tu +i3ty +i1u +i1va +i1ve +i1vi +j3ag +jde4rer +jds1 +jek4to +4j5en. +j5k +j3le +j3li +jlmeld5 +jlmel4di +j3r +jre5 +ju3s +5kap +k5au +5kav +k5b +kel5s +ke3sk +ke5st +ke4t5a +k3h +ki3e +ki3st +k1k +k5lak +k1le +3klu +k4ny +5kod +1kon +ko3ra +3kort +ko3v +1kra +5kry +ks3an +k1si +ks3k +ks1p +k3ste +k5stu +ks5v +k1t +k4tar +k4terh +kti4e +kt5re +kt5s +3kur +1kus +3kut +k4vo +k4vu +5lab +lad3r +5lagd +la4g3r +5lam +1lat +l1b +ldiagnos5 +l3dr +ld3st +1le. +5led +4lele +le4mo +3len +1ler +1les +4leu +l1f +lfin4 +lfind5 +l1go1 +l3h +li4ga +4l5ins +4l3int +li5o +l3j +l1ke +l1ko +l3ky +l1l +l5mu +lo4du +l3op +4l5or +3lov +4l3p +l4ps +l3r +4ls +lses1 +ls5in +l5sj +l1ta +l4taf +l1te +l4t5erf +l3ti +lt3o +l3tr +l3tu +lu5l +l3ve +l3vi +1ma +m1b +m3d +1me +4m5ej +m3f +m1g +m3h +1mi +mi3k +m5ing +mi4o +mi5sty +m3k +m1l +m1m +mmen5 +m1n +3mo +mo4da +4mop +4m5ov +m1pe +m3pi +m3pl +m1po +m3pr +m1r +mse5s +ms5in +m5sk +ms3p +m3ste +ms5v +m3ta +m3te +m3ti +m3tr +m1ud +1mul +mu1li +3my +3na +4nak +1nal +n1b +n1c +4nd +n3dr +nd5si +nd5sk +nd5sp +1ne +ne5a +ne4da +nemen4 +nement5e +neo4 +n3erk +n5erl +ne5sl +ne5st +n1f +n4go +4n1h +1ni +4nim +ni5o +ni3st +n1ke +n1ko +n3kr +n3ku +n5kv +4n1l +n1m +n1n +1no +n3ord +n5p +n3r +4ns +n3si +n1sku +ns3po +n1sta +n5sti +n1ta +nta4le +n1te +n1ti +ntiali4 +n3to +n1tr +nt4s5t +nt4su +n3tu +n3ty +4n1v +3ny +n3z +o3a +o4as +ob3li +o1c +o4din +od5ri +od5s +od5un +o1e +of5r +o4gek +o4gel +o4g5o +og5re +og5sk +o5h +o5in +oi6s5e +o1j +o3ka +o1ke +o3ku +o3la +o3le +o1li +o1lo +o3lu +o5ly +1omr +on3k +ook5 +o3or +o5ov +o3pi +op3l +op3r +op3s +3opta +4or. +or1an +3ordn +ord5s +o3re. +o3reg +o3rek +o3rer +o3re3s +o3ret +o3ri +3orient +or5im +o4r5in +or3k +or5o +or3sl +or3st +o3si +o3so +o3t +o1te +o5un +ov4s +3pa +pa5gh +p5anl +p3d +4pec +3pen +1per +pe1ra +pe5s +pe3u +p3f +4p5h +1pla +p4lan +4ple. +4pler +4ples +p3m +p3n +5pok +4po3re +3pot +4p5p4 +p4ro +1proc +p3sk +p5so +ps4p +p3st +p1t +1pu +pu5b +p5ule +p5v +5py3 +qu4 +4raf +ra5is +4rarb +r1b +r4d5ar +r3dr +rd4s3 +4reks +1rel +re5la +r5enss +5rese +re5spo +4ress +re3st +re5s4u +5rett +r1f +r1gu +r1h +ri1e +ri5la +4rimo +r4ing +ringse4 +ringso4r +4rinp +4rint +r3ka +r1ke +r1ki +rk3so +r3ku +r1l +rmo4 +r5mu +r1n +ro1b +ro3p +r3or +r3p +r1r +rre5s +rro4n5 +r1sa +r1si +r5skr +r4sk5v +rs4n +r3sp +r5stu +r5su +r3sv +r5tal +r1te +r4teli +r1ti +r3to +r4t5or +rt5rat +rt3re +r5tri +r5tro +rt3s +r5ty +r3ud +run4da +5rut +r3va +r1ve +r3vi +ry4s +s3af +1sam +sa4ma +s3ap +s1ar +1sat +4s1b +s1d +sdy4 +1se +s4ed +5s4er +se4se +s1f +4s1g4 +4s3h +si4bl +1sig +s5int +5sis +5sit +5siu +s5ju +4sk. +1skab +1ske +s3kl +sk5s4 +5sky +s1le +s1li +slo3 +5slu +s5ly +s1m +s4my +4snin +s4nit +so5k +5sol +5som. +3somm +s5oms +5somt +3son +4s1op +sp4 +3spec +4sper +3s4pi +s1pl +3sprog. +s5r4 +s1s4 +4st. +5s4tam +1stan +st5as +3stat +1stav +1ste. +1sted +3stel +5stemo +1sten +5step +3ster. +3stes +5stet +5stj +3sto +st5om +1str +s1ud +3sul +s3un +3sur +s3ve +3s4y +1sy1s +5ta. +1tag +tands3 +4tanv +4tb +tede4l +teds5 +3teg +5tekn +teo1 +5term +te5ro +4t1f +6t3g +t1h +tialis5t +3tid +ti4en +ti3st +4t3k +4t1l +tli4s5 +t1m +t1n +to5ra +to1re +to1ri +tor4m +4t3p +t4ra +4tres +tro5v +1try +4ts +t3si +ts4pa +ts5pr +t3st +ts5ul +4t1t +t5uds +5tur +t5ve +1typ +u1a +5udl +ud5r +ud3s +3udv +u1e +ue4t5 +uge4ri +ugs3 +u5gu +u3i +u5kl +uk4ta +uk4tr +u1la +u1le +u5ly +u5pe +up5l +u5q +u3ra +u3re +u4r3eg +u1rer +u3ro +us5a +u3si +u5ska +u5so +us5v +u1te +u1ti +u1to +ut5r +ut5s4 +5u5v +va5d +3varm +1ved +ve4l5e +ve4reg +ve3s +5vet +v5h +vi4l3in +1vis +v5j +v5k +vl4 +v3le +v5li +vls1 +1vo +4v5om +v5p +v5re +v3st +v5su +v5t +3vu +y3a +y5dr +y3e +y3ke +y5ki +yk3li +y3ko +yk4s5 +y3kv +y5li +y5lo +y5mu +yns5 +y5o +y1pe +y3pi +y3re +yr3ek +y3ri +y3si +y3ti +y5t3r +y5ve +zi5o + +.sÃ¥3 +.ær5i +.øv3r +a3tø +a5væ +brød3 +5bæ +5drøv +dstÃ¥4 +3dæ +3dø +e3læ +e3lø +e3rø +er5øn +e5tæ +e5tø +e1væ +e3æ +e5Ã¥ +3fæ +3fø +fø4r5en +giø4 +g4sø +g5sÃ¥ +3gæ +3gø1 +3gÃ¥ +i5tæ +i3ø +3kø +3kÃ¥ +lingeniø4 +l3væ +5løs +m5tÃ¥ +1mæ +3mø +3mÃ¥ +n3kæ +n5tæ +3næ +4n5æb +5nø +o5læ +or3ø +o5Ã¥ +5præ +5pæd +pÃ¥3 +r5kæ +r5tæ +r5tø +r3væ +r5æl +4røn +5rør +3rÃ¥d +r5Ã¥r +s4kÃ¥ +3slÃ¥ +s4næ +5stø +1stÃ¥ +1sæ +4s5æn +1sø +s5øk +sÃ¥4r5 +ti4ø +3træk. +t4sø +t5sÃ¥ +t3væ +u3læ +3værd +1værk +5vÃ¥ +y5væ +æb3l +æ3c +æ3e +æg5a +æ4gek +æ4g5r +ægs5 +æ5i +æ5kv +ælle4 +æn1dr +æ5o +æ1re +ær4g5r +æ3ri +ær4ma +ær4mo +ær5s +æ5si +æ3so +æ3ste +æ3ve +øde5 +ø3e +ø1je +ø3ke +ø3le +øms5 +øn3st +øn4t3 +ø1re +ø3ri +ørne3 +ør5o +ø1ve +Ã¥1d +Ã¥1e +Ã¥5h +Ã¥3l +Ã¥3re +Ã¥rs5t +Ã¥5sk +Ã¥3t + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/da_compoundDictionary.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/da_compoundDictionary.txt new file mode 100644 index 00000000000..9a14f40c5f9 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/da_compoundDictionary.txt @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# A set of words for testing the HyphenationCompound factory, +# in conjunction with the danish hyphenation grammar. +læse +hest diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/elevate.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/elevate.xml new file mode 100644 index 00000000000..1befc5443e7 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/elevate.xml @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/frenchArticles.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/frenchArticles.txt new file mode 100644 index 00000000000..914161185f7 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/frenchArticles.txt @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# A set of articles for testing the French Elision filter. +# Requiring a text file is a bit weird here... +l +m +t +qu +n +s +j diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/fuzzysuggest.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/fuzzysuggest.txt new file mode 100644 index 00000000000..94e2152160a --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/fuzzysuggest.txt @@ -0,0 +1,4 @@ +# simple fuzzy suggest phrase dictionary for testing +change 1.0 +charge 1.0 +chance 1.0 \ No newline at end of file diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/hunspell-test.aff b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/hunspell-test.aff new file mode 100644 index 00000000000..d035ad18001 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/hunspell-test.aff @@ -0,0 +1,13 @@ +SET UTF-8 +TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ + +SFX A Y 2 +SFX A 0 e n +SFX A 0 e t + +SFX C Y 2 +SFX C 0 d/C c +SFX C 0 c b + +PFX B Y 1 +PFX B 0 s o \ No newline at end of file diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/hunspell-test.dic b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/hunspell-test.dic new file mode 100644 index 00000000000..92c35d2b6ab --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/hunspell-test.dic @@ -0,0 +1,6 @@ +5 +lucen/A +lucene +mahout/A +olr/B +ab/C \ No newline at end of file diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/hyphenation.dtd b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/hyphenation.dtd new file mode 100644 index 00000000000..083c2bd8e80 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/hyphenation.dtd @@ -0,0 +1,68 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/jasuggest.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/jasuggest.txt new file mode 100644 index 00000000000..6df149de61a --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/jasuggest.txt @@ -0,0 +1,5 @@ +# simple auto-suggest phrase dictionary for testing +# note this uses tabs as separator! +åŒ—æµ·é“ 1.0 +今夜 3.0 +話ã—㟠6.0 \ No newline at end of file diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/keep-1.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/keep-1.txt new file mode 100644 index 00000000000..8dfe80902d2 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/keep-1.txt @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +foo +bar \ No newline at end of file diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/keep-2.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/keep-2.txt new file mode 100644 index 00000000000..646b7ff4ddb --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/keep-2.txt @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +junk +more \ No newline at end of file diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/mapping-ISOLatin1Accent.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/mapping-ISOLatin1Accent.txt new file mode 100644 index 00000000000..ede7742581b --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/mapping-ISOLatin1Accent.txt @@ -0,0 +1,246 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Syntax: +# "source" => "target" +# "source".length() > 0 (source cannot be empty.) +# "target".length() >= 0 (target can be empty.) + +# example: +# "À" => "A" +# "\u00C0" => "A" +# "\u00C0" => "\u0041" +# "ß" => "ss" +# "\t" => " " +# "\n" => "" + +# À => A +"\u00C0" => "A" + +# à => A +"\u00C1" => "A" + +#  => A +"\u00C2" => "A" + +# à => A +"\u00C3" => "A" + +# Ä => A +"\u00C4" => "A" + +# Ã… => A +"\u00C5" => "A" + +# Æ => AE +"\u00C6" => "AE" + +# Ç => C +"\u00C7" => "C" + +# È => E +"\u00C8" => "E" + +# É => E +"\u00C9" => "E" + +# Ê => E +"\u00CA" => "E" + +# Ë => E +"\u00CB" => "E" + +# ÃŒ => I +"\u00CC" => "I" + +# à => I +"\u00CD" => "I" + +# ÃŽ => I +"\u00CE" => "I" + +# à => I +"\u00CF" => "I" + +# IJ => IJ +"\u0132" => "IJ" + +# à => D +"\u00D0" => "D" + +# Ñ => N +"\u00D1" => "N" + +# Ã’ => O +"\u00D2" => "O" + +# Ó => O +"\u00D3" => "O" + +# Ô => O +"\u00D4" => "O" + +# Õ => O +"\u00D5" => "O" + +# Ö => O +"\u00D6" => "O" + +# Ø => O +"\u00D8" => "O" + +# Å’ => OE +"\u0152" => "OE" + +# Þ +"\u00DE" => "TH" + +# Ù => U +"\u00D9" => "U" + +# Ú => U +"\u00DA" => "U" + +# Û => U +"\u00DB" => "U" + +# Ãœ => U +"\u00DC" => "U" + +# à => Y +"\u00DD" => "Y" + +# Ÿ => Y +"\u0178" => "Y" + +# à => a +"\u00E0" => "a" + +# á => a +"\u00E1" => "a" + +# â => a +"\u00E2" => "a" + +# ã => a +"\u00E3" => "a" + +# ä => a +"\u00E4" => "a" + +# Ã¥ => a +"\u00E5" => "a" + +# æ => ae +"\u00E6" => "ae" + +# ç => c +"\u00E7" => "c" + +# è => e +"\u00E8" => "e" + +# é => e +"\u00E9" => "e" + +# ê => e +"\u00EA" => "e" + +# ë => e +"\u00EB" => "e" + +# ì => i +"\u00EC" => "i" + +# í => i +"\u00ED" => "i" + +# î => i +"\u00EE" => "i" + +# ï => i +"\u00EF" => "i" + +# ij => ij +"\u0133" => "ij" + +# ð => d +"\u00F0" => "d" + +# ñ => n +"\u00F1" => "n" + +# ò => o +"\u00F2" => "o" + +# ó => o +"\u00F3" => "o" + +# ô => o +"\u00F4" => "o" + +# õ => o +"\u00F5" => "o" + +# ö => o +"\u00F6" => "o" + +# ø => o +"\u00F8" => "o" + +# Å“ => oe +"\u0153" => "oe" + +# ß => ss +"\u00DF" => "ss" + +# þ => th +"\u00FE" => "th" + +# ù => u +"\u00F9" => "u" + +# ú => u +"\u00FA" => "u" + +# û => u +"\u00FB" => "u" + +# ü => u +"\u00FC" => "u" + +# ý => y +"\u00FD" => "y" + +# ÿ => y +"\u00FF" => "y" + +# ff => ff +"\uFB00" => "ff" + +# ï¬ => fi +"\uFB01" => "fi" + +# fl => fl +"\uFB02" => "fl" + +# ffi => ffi +"\uFB03" => "ffi" + +# ffl => ffl +"\uFB04" => "ffl" + +# ſt => ft +"\uFB05" => "ft" + +# st => st +"\uFB06" => "st" diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/missing.functions.updateprocessor.js b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/missing.functions.updateprocessor.js new file mode 100644 index 00000000000..6e8728a0d77 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/missing.functions.updateprocessor.js @@ -0,0 +1,3 @@ +function doSomeStuff() { + return "This script doesn't contain any update processor functions"; +} diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/missleading.extension.updateprocessor.js.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/missleading.extension.updateprocessor.js.txt new file mode 100644 index 00000000000..984e1d82f10 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/missleading.extension.updateprocessor.js.txt @@ -0,0 +1,23 @@ +function processAdd(cmd) { + // Integer.valueOf is needed here to get a tru java object, because + // all javascript numbers are floating point (ie: java.lang.Double) + cmd.getSolrInputDocument().addField("script_added_i", + java.lang.Integer.valueOf(42)); + cmd.getSolrInputDocument().addField("script_added_d", 42.3); + +} +function processDelete() { + // NOOP +} +function processCommit() { + // NOOP +} +function processRollback() { + // NOOP +} +function processMergeIndexes() { + // NOOP +} +function finish() { + // NOOP +} diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/old_synonyms.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/old_synonyms.txt new file mode 100644 index 00000000000..a7624f0597d --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/old_synonyms.txt @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +a => aa +b => b1 b2 +c => c1,c2 +a\=>a => b\=>b +a\,a => b\,b +foo,bar,baz + +Television,TV,Televisions diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/open-exchange-rates.json b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/open-exchange-rates.json new file mode 100644 index 00000000000..8fbc217f6e9 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/open-exchange-rates.json @@ -0,0 +1,18 @@ +{ + "disclaimer": "This data is not real, it was synthetically created to match currency.xml. It is modeled after the data format available from openexchangerates.org. See https://openexchangerates.org/documentation for details", + "license": "http://www.apache.org/licenses/LICENSE-2.0", + "timestamp": 1332070464, + + + "IMPORTANT NOTE": "In order for tests to work, this data must be kept in sync with ./currency.xml", + + + "base": "USD", + "rates": { + "USD": 1, + "JPY": 81.29, + "EUR": 2.5, + "GBP": 0.5, + "MXN": 2.0 + } +} diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/phrasesuggest.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/phrasesuggest.txt new file mode 100644 index 00000000000..fd4984d70b8 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/phrasesuggest.txt @@ -0,0 +1,8 @@ +# simple auto-suggest phrase dictionary for testing +# note this uses tabs as separator! +the first phrase 1.0 +the second phrase 2.0 +testing 1234 3.0 +foo 5.0 +the fifth phrase 2.0 +the final phrase 4.0 diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/protwords.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/protwords.txt new file mode 100644 index 00000000000..ab7e3e2470e --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/protwords.txt @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#use a protected word file to avoid stemming two +#unrelated words to the same base word. +#to test, we will use words that would normally obviously be stemmed. +cats +ridding +c# +c++ +.net diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/regex-boost-processor-test.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/regex-boost-processor-test.txt new file mode 100644 index 00000000000..1dc0537c72b --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/regex-boost-processor-test.txt @@ -0,0 +1,10 @@ +# Sample config file for RegexBoostProcessor +# This example applies boost on the "url" field to boost or deboost certain urls +# All rules are evaluated, and if several of them match, the boosts are multiplied. +# If for example one rule with boost 2.0 and one rule with boost 0.1 match, the resulting urlboost=0.2 + +https?://[^/]+/old/.* 0.1 #Comments are removed +https?://[^/]+/.*index\([0-9]\).html$ 0.5 + +# Prioritize certain sites over others +https?://www.mydomain.no/.* 1.5 \ No newline at end of file diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-add-schema-fields-update-processor.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-add-schema-fields-update-processor.xml new file mode 100644 index 00000000000..2b59472f5f0 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-add-schema-fields-update-processor.xml @@ -0,0 +1,49 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-behavior.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-behavior.xml new file mode 100644 index 00000000000..20b5a3533b9 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-behavior.xml @@ -0,0 +1,121 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-binaryfield.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-binaryfield.xml new file mode 100644 index 00000000000..1f9312e61d0 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-binaryfield.xml @@ -0,0 +1,97 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-bm25.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-bm25.xml new file mode 100644 index 00000000000..54bdc0566aa --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-bm25.xml @@ -0,0 +1,52 @@ + + + + + + + + + + + + + + + + + + + + 1.2 + 0.76 + + + + + + + + + + + + text + id + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-charfilters.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-charfilters.xml new file mode 100644 index 00000000000..5eaab1f19e5 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-charfilters.xml @@ -0,0 +1,52 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + content + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-class-name-shortening-on-serialization.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-class-name-shortening-on-serialization.xml new file mode 100644 index 00000000000..46a1321260c --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-class-name-shortening-on-serialization.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-collate.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-collate.xml new file mode 100644 index 00000000000..7feb73a3015 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-collate.xml @@ -0,0 +1,62 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text + id + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-copyfield-test.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-copyfield-test.xml new file mode 100644 index 00000000000..3ab7837284f --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-copyfield-test.xmltext + id + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-dfr.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-dfr.xml new file mode 100644 index 00000000000..c4f7d8331dd --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-dfr.xml @@ -0,0 +1,70 @@ + + + + + + + + + + + + + + I(F) + B + H2 + + + + + + + + I(F) + B + H3 + 900 + + + + + + + + P + L + H2 + 7 + + + + + + + + + + + + + text + id + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-docValues.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-docValues.xml new file mode 100644 index 00000000000..63d87997402 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-docValues.xml @@ -0,0 +1,74 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-docValuesFaceting.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-docValuesFaceting.xml new file mode 100755 index 00000000000..0e3116d0797 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-docValuesFaceting.xml @@ -0,0 +1,50 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + id + id + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-docValuesMissing.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-docValuesMissing.xml new file mode 100644 index 00000000000..3e39c2c40ac --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-docValuesMissing.xml @@ -0,0 +1,88 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-docValuesMulti.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-docValuesMulti.xml new file mode 100644 index 00000000000..6d58feda4e5 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-docValuesMulti.xml @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-eff.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-eff.xml new file mode 100644 index 00000000000..60cab4f8601 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-eff.xml @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + id + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-folding.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-folding.xml new file mode 100644 index 00000000000..c2a0e60f3ed --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-folding.xmlcontent + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-ib.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-ib.xml new file mode 100644 index 00000000000..3d55b2ac70b --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-ib.xml @@ -0,0 +1,58 @@ + + + + + + + + + + + + + + SPL + DF + H2 + + + + + + + + LL + TTF + H3 + 900 + + + + + + + + + + + + text + id + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-id-and-version-fields-only.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-id-and-version-fields-only.xml new file mode 100644 index 00000000000..9f5059f26c1 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-id-and-version-fields-only.xml @@ -0,0 +1,29 @@ + + + + + + + + + + + + + id + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-lmdirichlet.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-lmdirichlet.xml new file mode 100644 index 00000000000..f39922f7c45 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-lmdirichlet.xml @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + + + + + 1000 + + + + + + + + + + + + text + id + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-lmjelinekmercer.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-lmjelinekmercer.xml new file mode 100644 index 00000000000..49b692e8d90 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-lmjelinekmercer.xml @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + + + + + 0.4 + + + + + + + + + + + + text + id + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-luceneMatchVersion.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-luceneMatchVersion.xml new file mode 100644 index 00000000000..3bb2b491b3b --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-luceneMatchVersion.xml @@ -0,0 +1,58 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-minimal.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-minimal.xml new file mode 100644 index 00000000000..9e2f9471026 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-minimal.xml @@ -0,0 +1,25 @@ + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-not-required-unique-key.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-not-required-unique-key.xml new file mode 100644 index 00000000000..b3869812375 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-not-required-unique-key.xml @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + subject + id + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-numeric.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-numeric.xml new file mode 100644 index 00000000000..d00545ed102 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-numeric.xml @@ -0,0 +1,75 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-one-field-no-dynamic-field-unique-key.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-one-field-no-dynamic-field-unique-key.xml new file mode 100644 index 00000000000..783ae77c958 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-one-field-no-dynamic-field-unique-key.xml @@ -0,0 +1,29 @@ + + + + + + + + + + + + + str + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-one-field-no-dynamic-field.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-one-field-no-dynamic-field.xml new file mode 100644 index 00000000000..035f975d6b2 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-one-field-no-dynamic-field.xml @@ -0,0 +1,28 @@ + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-phrasesuggest.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-phrasesuggest.xml new file mode 100644 index 00000000000..f5ed9155e66 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-phrasesuggest.xml @@ -0,0 +1,60 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text + id + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-postingshighlight.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-postingshighlight.xml new file mode 100644 index 00000000000..e58b2e82eaf --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-postingshighlight.xml @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text + id + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-replication1.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-replication1.xml new file mode 100644 index 00000000000..fe123dfa6d0 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-replication1.xml @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-replication2.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-replication2.xml new file mode 100644 index 00000000000..a2409459aa7 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-replication2.xml @@ -0,0 +1,49 @@ + + + + + + + + + + + + + + + + + + + + + + + + + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-required-fields.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-required-fields.xml new file mode 100644 index 00000000000..8dea7914549 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-required-fields.xml @@ -0,0 +1,436 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text + id + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-rest-lucene-match-version.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-rest-lucene-match-version.xml new file mode 100644 index 00000000000..15caf81c67d --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-rest-lucene-match-version.xml @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-rest.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-rest.xml new file mode 100755 index 00000000000..a735e434bc7 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-rest.xmltext + id + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-reversed.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-reversed.xml new file mode 100644 index 00000000000..40fc0e8e2f5 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-reversed.xml @@ -0,0 +1,88 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + one + id + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-sim.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-sim.xml new file mode 100644 index 00000000000..ca2bd788b38 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-sim.xml @@ -0,0 +1,68 @@ + + + + + + + + + + + + + + + + + + + + + + + + is there an echo? + + + + + + + + + + + + + + + + + + + + + + + + sim1text + id + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-snippet-field.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-snippet-field.xml new file mode 100644 index 00000000000..9e0d29f3e20 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-snippet-field.xml @@ -0,0 +1,3 @@ + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-snippet-type.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-snippet-type.xml new file mode 100644 index 00000000000..bfbd3334204 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-snippet-type.xml @@ -0,0 +1,3 @@ + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-snippet-types.incl b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-snippet-types.incl new file mode 100644 index 00000000000..fe9fd6d7a7b --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-snippet-types.incl @@ -0,0 +1,19 @@ + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-spatial.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-spatial.xml new file mode 100644 index 00000000000..d1ca1f701cd --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-spatial.xml @@ -0,0 +1,63 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-spellchecker.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-spellchecker.xml new file mode 100644 index 00000000000..7124065626d --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-spellchecker.xml @@ -0,0 +1,87 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + + text + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-stop-keep.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-stop-keep.xml new file mode 100644 index 00000000000..831539ee8be --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-stop-keep.xml @@ -0,0 +1,64 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + one + id + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-sweetspot.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-sweetspot.xml new file mode 100644 index 00000000000..350e2e90851 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-sweetspot.xml @@ -0,0 +1,76 @@ + + + + + + + + + + + + + + + + + + + + + 6.0 + 1.5 + + 3 + 5 + 0.5 + + + + + + + + 3.3 + 7.7 + 2.718281828459045 + 5.0 + + 1 + 5 + 0.2 + + + + + + + + + + + + + text + id + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-synonym-tokenizer.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-synonym-tokenizer.xml new file mode 100644 index 00000000000..0906a13bfb5 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-synonym-tokenizer.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + text + id + \ No newline at end of file diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-tfidf.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-tfidf.xml new file mode 100644 index 00000000000..eacea9009a8 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-tfidf.xml @@ -0,0 +1,50 @@ + + + + + + + + + + + + + + + + + + + + false + + + + + + + + + + + + text + id + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-tiny.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-tiny.xml new file mode 100644 index 00000000000..08e0aebc42f --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-tiny.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + id + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-trie.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-trie.xml new file mode 100644 index 00000000000..1819bfa9020 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-trie.xml @@ -0,0 +1,332 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + + text + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-xinclude.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-xinclude.xml new file mode 100644 index 00000000000..94194df6192 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema-xinclude.xml @@ -0,0 +1,30 @@ + + +]> + + + + + + &schema_entity_include; + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema.xml new file mode 100644 index 00000000000..a22844de0c4 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema.xmltext + id + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + I am your default sim + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema11.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema11.xml new file mode 100755 index 00000000000..a993cbd6f61 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema11.xml @@ -0,0 +1,387 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + + text + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema12.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema12.xml new file mode 100755 index 00000000000..506e08d787a --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema12.xmltext + id + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema15.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema15.xml new file mode 100755 index 00000000000..b05e1a7ce9e --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema15.xmltext + id + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema_codec.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema_codec.xml new file mode 100644 index 00000000000..4e49dce953e --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schema_codec.xml @@ -0,0 +1,49 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + string_f + string_f + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schemasurround.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schemasurround.xml new file mode 100644 index 00000000000..04e90e33678 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/schemasurround.xmltext + id + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-SOLR-749.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-SOLR-749.xml new file mode 100644 index 00000000000..1fabd5c202f --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-SOLR-749.xml @@ -0,0 +1,29 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-add-schema-fields-update-processor-chains.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-add-schema-fields-update-processor-chains.xml new file mode 100644 index 00000000000..9a59d90820a --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-add-schema-fields-update-processor-chains.xml @@ -0,0 +1,155 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + true + managed-schema + + + + + text + + java.lang.Boolean + boolean + + + java.lang.Integer + tint + + + java.lang.Float + tfloat + + + java.util.Date + tdate + + + java.lang.Long + java.lang.Integer + tlong + + + + java.lang.Double + java.lang.Float + + tdouble + + + + + + + text + + java.lang.Boolean + boolean + + + java.lang.Integer + tint + + + java.lang.Float + tfloat + + + java.util.Date + tdate + + + java.lang.Long + java.lang.Integer + tlong + + + java.lang.Number + tdouble + + + + + + + + + + + + yyyy-MM-dd'T'HH:mm:ss.SSSZ + yyyy-MM-dd'T'HH:mm:ss,SSSZ + yyyy-MM-dd'T'HH:mm:ss.SSS + yyyy-MM-dd'T'HH:mm:ss,SSS + yyyy-MM-dd'T'HH:mm:ssZ + yyyy-MM-dd'T'HH:mm:ss + yyyy-MM-dd'T'HH:mmZ + yyyy-MM-dd'T'HH:mm + yyyy-MM-dd HH:mm:ss.SSSZ + yyyy-MM-dd HH:mm:ss,SSSZ + yyyy-MM-dd HH:mm:ss.SSS + yyyy-MM-dd HH:mm:ss,SSS + yyyy-MM-dd HH:mm:ssZ + yyyy-MM-dd HH:mm:ss + yyyy-MM-dd HH:mmZ + yyyy-MM-dd HH:mm + yyyy-MM-dd + + + + text + + java.lang.Boolean + boolean + + + java.lang.Integer + tint + + + java.lang.Float + tfloat + + + java.util.Date + tdate + + + java.lang.Long + java.lang.Integer + tlong + + + java.lang.Number + tdouble + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-altdirectory.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-altdirectory.xml new file mode 100755 index 00000000000..3105baf5157 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-altdirectory.xml @@ -0,0 +1,26 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-basic.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-basic.xml new file mode 100644 index 00000000000..03963023ae1 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-basic.xml @@ -0,0 +1,29 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + ${solr.data.dir:} + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-caching.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-caching.xml new file mode 100644 index 00000000000..0de6f9412f7 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-caching.xml @@ -0,0 +1,39 @@ + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-components-name.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-components-name.xml new file mode 100644 index 00000000000..b5501d85508 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-components-name.xml @@ -0,0 +1,75 @@ + + + + + + + + + + + + ${solr.data.dir:} + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + true + + component1 + + + component2 + + + + + + + + + + max-age=30, public + + + + + solr + solrconfig.xml schema.xml admin-extra.html + + + + foo + + + bar + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-defaults.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-defaults.xml new file mode 100644 index 00000000000..fe39eef6a3e --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-defaults.xml @@ -0,0 +1,43 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-delpolicy1.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-delpolicy1.xml new file mode 100644 index 00000000000..5cd0e7edf1a --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-delpolicy1.xml @@ -0,0 +1,51 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + + ${useCompoundFile:false} + + ${solr.tests.maxBufferedDocs} + ${solr.tests.maxIndexingThreads} + ${solr.tests.ramBufferSizeMB} + + + + single + + + + true + 3 + 100MILLISECONDS + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-delpolicy2.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-delpolicy2.xml new file mode 100644 index 00000000000..9925a1e1b69 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-delpolicy2.xml @@ -0,0 +1,48 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + ${useCompoundFile:false} + + ${solr.tests.maxBufferedDocs} + ${solr.tests.maxIndexingThreads} + ${solr.tests.ramBufferSizeMB} + + + + single + + + value1 + value2 + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-elevate.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-elevate.xml new file mode 100644 index 00000000000..b7dc855a0c5 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-elevate.xml @@ -0,0 +1,178 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + ${solr.data.dir:} + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + true + + 10 + + + + + + + + + + + + + + string + ${elevate.file:elevate.xml} + + + + + + string + ${elevate.data.file:elevate-data.xml} + + + + + explicit + + + elevate + + + + + + explicit + + + dataElevate + + + + + + + + + + max-age=30, public + + + + + solr + solrconfig.xml schema.xml admin-extra.html + + + + prefix-${solr.test.sys.prop2}-suffix + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-functionquery.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-functionquery.xml new file mode 100755 index 00000000000..1a1a4ffca62 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-functionquery.xml @@ -0,0 +1,43 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + + + + + + + + + 0.0 + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-highlight.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-highlight.xml new file mode 100644 index 00000000000..7d55cc2adef --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-highlight.xml @@ -0,0 +1,60 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + + + + + + + + + + 100 + + + + + + 70 + + + + + + + ]]> + ]]> + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-implicitproperties.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-implicitproperties.xml new file mode 100644 index 00000000000..a54168c38cd --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-implicitproperties.xml @@ -0,0 +1,79 @@ + + + + + + + LUCENE_41 + + ${solr.data.dir:} + + + + + + + + + + + + true + 20 + 20 + + true + + 1 + + + + + + + + + + + + all + text + ${solr.core.name} + ${solr.core.dataDir} + ${solr.core.config} + ${solr.core.schema} + ${solr.core.transient} + + + + + + + + + text/plain; charset=UTF-8 + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-indexconfig.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-indexconfig.xml new file mode 100644 index 00000000000..066f8632e96 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-indexconfig.xml @@ -0,0 +1,30 @@ + + + + + ${solr.data.dir:} + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + ${useCompoundFile:false} + 123 + true + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-infostream-logging.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-infostream-logging.xml new file mode 100644 index 00000000000..722f5e42265 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-infostream-logging.xml @@ -0,0 +1,27 @@ + + + + + ${solr.data.dir:} + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + true + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-lazywriter.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-lazywriter.xml new file mode 100644 index 00000000000..0636a1dcfac --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-lazywriter.xml @@ -0,0 +1,28 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-logmergepolicy.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-logmergepolicy.xml new file mode 100644 index 00000000000..371bfb5638d --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-logmergepolicy.xml @@ -0,0 +1,37 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + -1 + -1 + -1 + + 11 + 456 + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-managed-schema.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-managed-schema.xml new file mode 100644 index 00000000000..fc49a7b1c8c --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-managed-schema.xml @@ -0,0 +1,51 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + ${managed.schema.mutable} + managed-schema + + + + + + + ${solr.ulog.dir:} + + + + + true + + + + + true + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master.xml new file mode 100644 index 00000000000..9118bef45f0 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master.xml @@ -0,0 +1,72 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + ${solr.data.dir:} + + + + + + + + true + + + + + commit + + schema.xml,xslt/dummy.xsl + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + + + + max-age=30, public + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master1-keepOneBackup.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master1-keepOneBackup.xml new file mode 100644 index 00000000000..30b4e3b7cb6 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master1-keepOneBackup.xml @@ -0,0 +1,49 @@ + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + ${solr.data.dir:} + + + + + + + + + + + + commit + schema-replication2.xml:schema.xml + + 1 + + + + + + + + + max-age=30, public + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master1.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master1.xml new file mode 100644 index 00000000000..2e9885f4478 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master1.xml @@ -0,0 +1,69 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + ${solr.data.dir:} + + + + + + + + + true + + + + + commit + schema-replication2.xml:schema.xml + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + + + + max-age=30, public + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master2.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master2.xml new file mode 100644 index 00000000000..21d38a3af94 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master2.xml @@ -0,0 +1,69 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + ${solr.data.dir:} + + + + + + + + + true + + + + + startup + schema.xml + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + + + + max-age=30, public + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master3.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master3.xml new file mode 100644 index 00000000000..b19073ba0ef --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-master3.xml @@ -0,0 +1,70 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + ${solr.data.dir:} + + + + + + + + + true + + + + + commit + startup + schema.xml + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + + + + max-age=30, public + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-mergepolicy-defaults.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-mergepolicy-defaults.xml new file mode 100644 index 00000000000..9d2a99aff4d --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-mergepolicy-defaults.xml @@ -0,0 +1,32 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-mergepolicy-legacy.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-mergepolicy-legacy.xml new file mode 100644 index 00000000000..00c77ae5e78 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-mergepolicy-legacy.xml @@ -0,0 +1,31 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + 7 + ${useCompoundFile:false} + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-minimal.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-minimal.xml new file mode 100644 index 00000000000..78a4eb711d3 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-minimal.xml @@ -0,0 +1,75 @@ + + + + + + + LUCENE_41 + + ${solr.data.dir:} + + + + + + + + + + + + true + 20 + 20 + + true + + 1 + + + + + + + + + + + explicit + json + true + text + + + + + + + + + text/plain; charset=UTF-8 + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-nocache.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-nocache.xml new file mode 100644 index 00000000000..ee27d0c49de --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-nocache.xml @@ -0,0 +1,41 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + ${solr.data.dir:} + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-noopregen.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-noopregen.xml new file mode 100644 index 00000000000..4537724b433 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-noopregen.xml @@ -0,0 +1,36 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + ${solr.data.dir:} + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-parsing-update-processor-chains.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-parsing-update-processor-chains.xml new file mode 100644 index 00000000000..3c41f507158 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-parsing-update-processor-chains.xml @@ -0,0 +1,230 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + yyyy-MM-dd'T'HH:mm:ss.SSSZ + + + + + + + yyyy-MM-dd'T'HH:mm:ss.SSSZ + + + + + + false + yyyy-MM-dd'T'HH:mm:ss.SSSZ + + + + + + solr.DateField + solr.TrieDateField + yyyy-MM-dd'T'HH:mm:ss.SSSZ + + + + + + America/New_York + en_US + yyyy-MM-dd'T'HH:mm:ss.SSSZ + yyyy-MM-dd'T'HH:mm:ss.SSS + + + + + + + America/Los_Angeles + + MM/dd/yyyy + + + + + + + UTC + en_US + + yyyy-MM-dd'T'HH:mm:ss.SSSZ + yyyy-MM-dd'T'HH:mm:ss,SSSZ + yyyy-MM-dd'T'HH:mm:ss.SSS + yyyy-MM-dd'T'HH:mm:ss,SSS + yyyy-MM-dd'T'HH:mm:ssZ + yyyy-MM-dd'T'HH:mm:ss + yyyy-MM-dd'T'HH:mmZ + yyyy-MM-dd'T'HH:mm + yyyy-MM-dd HH:mm:ss.SSSZ + yyyy-MM-dd HH:mm:ss,SSSZ + yyyy-MM-dd HH:mm:ss.SSS + yyyy-MM-dd HH:mm:ss,SSS + yyyy-MM-dd HH:mm:ssZ + yyyy-MM-dd HH:mm:ss + yyyy-MM-dd HH:mmZ + yyyy-MM-dd HH:mm + yyyy-MM-dd hh:mm a + yyyy-MM-dd hh:mma + yyyy-MM-dd + EEE MMM dd HH:mm:ss Z yyyy + EEE MMM dd HH:mm:ss yyyy Z + EEE MMM dd HH:mm:ss yyyy + EEE, dd MMM yyyy HH:mm:ss Z + EEEE, dd-MMM-yy HH:mm:ss Z + EEEE, MMMM dd, yyyy + MMMM dd, yyyy + MMM. dd, yyyy + + + + + + + UTC + fr + 'le' EEEE dd MMMM yyyy + + + + + + + + + + + + + + + ru_RU + + + + + + + + + + + + + + + ru_RU + + + + + + + + + + + + + + + fr_FR + + + + + + + + + + + + + + + fr_FR + + + + + + + + + + + + + + + false + + true + YES + on + + + false + no + oFF + + + + + + + yup + nope + + + + + + + + + + + + + + + + yyyy-MM-dd + yyyy-MM-dd'T'HH:mm:ss.SSSZ + yyyy-MM-dd'T'HH:mm + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml new file mode 100644 index 00000000000..b4f560ed32f --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml @@ -0,0 +1,272 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + ${solr.data.dir:} + + + + + + + suggest_wfst + org.apache.solr.spelling.suggest.Suggester + org.apache.solr.spelling.suggest.fst.WFSTLookupFactory + suggest_wfst + false + + + true + + phrasesuggest.txt + + + + phrase_suggest + + + + + + suggest_analyzing + org.apache.solr.spelling.suggest.Suggester + org.apache.solr.spelling.suggest.fst.AnalyzingLookupFactory + suggest_analyzing + false + + + true + ja_suggest + false + + jasuggest.txt + + + + phrase_suggest + + + + + + infix_suggest_analyzing + org.apache.solr.spelling.suggest.Suggester + org.apache.solr.spelling.suggest.fst.AnalyzingInfixLookupFactory + false + + + text + + analyzingInfixSuggest.txt + + + + phrase_suggest + + + + + + fuzzy_suggest_analyzing + org.apache.solr.spelling.suggest.Suggester + org.apache.solr.spelling.suggest.fst.FuzzyLookupFactory + fuzzy_suggest_analyzing + false + + + true + text + false + + fuzzysuggest.txt + + + + phrase_suggest + + + + + + fuzzy_suggest_analyzing_with_max_edit_2 + org.apache.solr.spelling.suggest.Suggester + org.apache.solr.spelling.suggest.fst.FuzzyLookupFactory + fuzzy_suggest_analyzing_with_max_edit_2 + false + + + true + text + false + 2 + + fuzzysuggest.txt + + + + phrase_suggest + + + + + + fuzzy_suggest_analyzing_with_non_fuzzy_prefix_4 + org.apache.solr.spelling.suggest.Suggester + org.apache.solr.spelling.suggest.fst.FuzzyLookupFactory + fuzzy_suggest_analyzing_with_non_fuzzy_prefix_4 + false + + + true + text + false + 4 + + fuzzysuggest.txt + + + + phrase_suggest + + + + + + fuzzy_suggest_analyzing_with_min_fuzzy_length_2 + org.apache.solr.spelling.suggest.Suggester + org.apache.solr.spelling.suggest.fst.FuzzyLookupFactory + fuzzy_suggest_analyzing_with_min_fuzzy_length_2 + false + + + true + text + false + 2 + + fuzzysuggest.txt + + + + phrase_suggest + + + + + + + + + true + suggest_wfst + false + + true + + + suggest_wfst + + + + + + + true + suggest_analyzing + false + + true + + + suggest_analyzing + + + + + + + true + infix_suggest_analyzing + false + + true + + + infix_suggest_analyzing + + + + + + true + fuzzy_suggest_analyzing + false + + true + + + fuzzy_suggest_analyzing + + + + + + + true + fuzzy_suggest_analyzing_with_max_edit_2 + false + + true + + + fuzzy_suggest_analyzing_with_max_edit_2 + + + + + + + true + fuzzy_suggest_analyzing_with_non_fuzzy_prefix_4 + false + + true + + + fuzzy_suggest_analyzing_with_non_fuzzy_prefix_4 + + + + + + + true + fuzzy_suggest_analyzing_with_min_fuzzy_length_2 + false + + true + + + fuzzy_suggest_analyzing_with_min_fuzzy_length_2 + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-postingshighlight.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-postingshighlight.xml new file mode 100644 index 00000000000..c3d9d544e1f --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-postingshighlight.xml @@ -0,0 +1,34 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + ${solr.data.dir:} + + + + + false + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-querysender-noquery.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-querysender-noquery.xml new file mode 100644 index 00000000000..af6cc75112d --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-querysender-noquery.xml @@ -0,0 +1,74 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-querysender.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-querysender.xml new file mode 100644 index 00000000000..12252c06b6f --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-querysender.xml @@ -0,0 +1,70 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + + + + + + + + + + solr 0 10 mock + rocks 0 10 mock + + + + + + + + fast_warm 0 10 + mock + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-repeater.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-repeater.xml new file mode 100644 index 00000000000..5ec8e5920b3 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-repeater.xml @@ -0,0 +1,63 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + ${solr.data.dir:} + + + + + + + + true + + + + + + + + + + + + + + + + commit + schema.xml + + + http://127.0.0.1:TEST_PORT/solr/replication + + + + + + + + max-age=30, public + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-reqHandler.incl b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-reqHandler.incl new file mode 100644 index 00000000000..03f236fccf7 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-reqHandler.incl @@ -0,0 +1,5 @@ + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-response-log-component.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-response-log-component.xml new file mode 100644 index 00000000000..859883d52f0 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-response-log-component.xml @@ -0,0 +1,54 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + ${solr.data.dir:} + + + + + + + + + + dismax + + + responselog + + + + + + dismax + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-script-updateprocessor.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-script-updateprocessor.xml new file mode 100644 index 00000000000..43fbc2873da --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-script-updateprocessor.xml @@ -0,0 +1,112 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + javascript + missleading.extension.updateprocessor.js.txt + + + + + + + + + + + + trivial.updateprocessor0.js + + true + 1 + + + + + + + + + trivial.updateprocessor0.js + trivial.updateprocessor1.js + + + true + 1 + + + + + + + + trivial.updateprocessor0.js + trivial.updateprocessor1.js + + true + 1 + + + + + + + + + conditional.updateprocessor.js + addfields.updateprocessor.js + + + + + + + conditional.updateprocessor.js + + + addfields.updateprocessor.js + + + + + + throw.error.on.add.updateprocessor.js + + + + + missing.functions.updateprocessor.js + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-slave.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-slave.xml new file mode 100644 index 00000000000..ac2e59ee56e --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-slave.xml @@ -0,0 +1,61 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + ${solr.data.dir:} + + + + + + + + true + + + + + + + + + + + + + + + + http://127.0.0.1:TEST_PORT/solr + 00:00:01 + COMPRESSION + + + + + + + + max-age=30, public + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-slave1.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-slave1.xml new file mode 100644 index 00000000000..36d6d92e146 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-slave1.xml @@ -0,0 +1,57 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + ${solr.data.dir:} + + + + + + + + + true + + + + + + + + + + + + + + + + + + + + + + max-age=30, public + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-snippet-processor.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-snippet-processor.xml new file mode 100644 index 00000000000..8c76857f32b --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-snippet-processor.xml @@ -0,0 +1,6 @@ + + + field-included + x + x_x + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-solcoreproperties.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-solcoreproperties.xml new file mode 100644 index 00000000000..3a1547f1b1c --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-solcoreproperties.xml @@ -0,0 +1,35 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + ${solr.data.dir:} + + + + + + + + ${foo.foo1} + ${foo.foo2} + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-spellcheckcomponent.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-spellcheckcomponent.xml new file mode 100644 index 00000000000..9092a5875a8 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-spellcheckcomponent.xml @@ -0,0 +1,178 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + ${solr.data.dir:} + + + + + + + lowerpunctfilt + + + default + lowerfilt + spellchecker1 + true + + + default_teststop + default_teststop + true + teststop + + + direct + solr.DirectSolrSpellChecker + 3 + 100 + teststop + + + direct_lowerfilt + solr.DirectSolrSpellChecker + 3 + 100 + lowerfilt + + + wordbreak + solr.WordBreakSolrSpellChecker + lowerfilt + true + true + MAX_FREQ + 10 + + + threshold + lowerfilt + spellcheckerThreshold + true + .29 + + + threshold_direct + solr.DirectSolrSpellChecker + lowerfilt + spellcheckerThreshold + true + .29 + + + multipleFields + lowerfilt1and2 + spellcheckerMultipleFields + true + + + + jarowinkler + lowerfilt + + org.apache.lucene.search.spell.JaroWinklerDistance + spellchecker2 + + + + solr.FileBasedSpellChecker + external + spellings.txt + UTF-8 + spellchecker3 + + + + freq + lowerfilt + spellcheckerFreq + + freq + true + + + fqcn + lowerfilt + spellcheckerFQCN + org.apache.solr.spelling.SampleComparator + true + + + perDict + org.apache.solr.handler.component.DummyCustomParamSpellChecker + lowerfilt + + + + + + + + + + false + + false + + 1 + + + spellcheck + + + + + dismax + lowerfilt1^1 + + + spellcheck + + + + + default + wordbreak + 20 + + + spellcheck + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-spellchecker.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-spellchecker.xml new file mode 100644 index 00000000000..e6744cb3944 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-spellchecker.xml @@ -0,0 +1,142 @@ + + + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + + + suggest + org.apache.solr.spelling.suggest.Suggester + org.apache.solr.spelling.suggest.jaspell.JaspellLookup + suggest + suggest + true + + + 0.0 + + + + + + + suggest_tst + org.apache.solr.spelling.suggest.Suggester + org.apache.solr.spelling.suggest.tst.TSTLookup + suggest + suggest_tst + true + + + 0.0 + + + + + + + suggest_fst + org.apache.solr.spelling.suggest.Suggester + org.apache.solr.spelling.suggest.fst.FSTLookup + suggest + suggest_fst + true + + + 5 + true + + + + + + + suggest_wfst + org.apache.solr.spelling.suggest.Suggester + org.apache.solr.spelling.suggest.fst.WFSTLookupFactory + suggest + suggest_wfst + true + + + true + + + + + + + true + suggest + true + + + suggest_jaspell + + + + + + + true + suggest_tst + true + + + suggest_tst + + + + + + + true + suggest_fst + false + + + suggest_fst + + + + + + + true + suggest_wfst + false + + + suggest_wfst + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-test-misc.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-test-misc.xml new file mode 100644 index 00000000000..fdca7893d92 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-test-misc.xml @@ -0,0 +1,52 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + solr + solrconfig.xml schema.xml admin-extra.html + + + + + + + + + + prefix-${solr.test.sys.prop2}-suffix + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-tieredmergepolicy.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-tieredmergepolicy.xml new file mode 100644 index 00000000000..86a79fbf8fc --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-tieredmergepolicy.xml @@ -0,0 +1,47 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + 7 + + 19 + 9 + 0.1 + + + ${useCompoundFile:false} + + + + 987 + 42 + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml new file mode 100644 index 00000000000..d55845c13d0 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml @@ -0,0 +1,120 @@ + + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + ${solr.hdfs.blockcache.enabled:true} + ${solr.hdfs.blockcache.blocksperbank:1024} + ${solr.hdfs.home:} + ${solr.hdfs.confdir:} + + + ${solr.data.dir:} + + + + + + + + + + + + + + + true + + + + + + + + + + ${solr.ulog.dir:} + + + + + + true + true + v_t,t_field + org.apache.solr.update.processor.TextProfileSignature + + + + + + + true + non_indexed_signature_sS + false + v_t,t_field + org.apache.solr.update.processor.TextProfileSignature + + + + + + + + + + regex_dup_A_s + x + x_x + + + + regex_dup_B_s + x + x_x + + + + + + + + regex_dup_A_s + x + x_x + + + regex_dup_B_s + x + x_x + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-transformers.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-transformers.xml new file mode 100644 index 00000000000..ecaaf1146d5 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-transformers.xml @@ -0,0 +1,84 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + + x1 + x2 + + + + 100 + + + + x1 + x2 + + + + + xA + xA + + + + + + + + 88 + 99 + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml new file mode 100644 index 00000000000..1b99f61dc36 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml @@ -0,0 +1,464 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + solr.TrieIntField + solr.TrieLongField + + + + min_foo_l + + + max_foo_l + + + ; + + primary_author_s1 + + + + primary_author_s1 + first_foo_l + + + + + + + + + + + + + foo_t + + + + + + + + foo_t + + + + + + foo.* + bar.* + + .*HOSS.* + + + + + + foo.* + bar.* + + + solr.DateField + + + .*HOSS.* + + + + + + foo.* + bar.* + + + solr.DateField + .*HOSS.* + + + + + + + name + foo_t + + + + + + name + foo_t + + + + + + + foo.* + bar.*_s + + + + + + nametext + text_sw + + + + + + solr.DateField + solr.StrField + + + + + + solr.DateField + solr.StrField + + foo.* + + + + + + + + + + + + + + + + + + foo.* + yak.* + + + + + + + + + + + foo_s + + + + + string + ; + + + + + + foo_s + bar_s + + + + + foo_s + bar_s + + + + + foo_i + foo_s + bar_s + + + + + foo_i + foo_s + bar_s + + + + + + html_s + + + + + + + trunc + 5 + + + + + + count_field + + + + + + + + + + false + + + + + + true + + + + + + foo.* + false + + + + + + foo.* + + false + + + + + + + false + + + + + + true + + + + + + .*_raw + + + + + + source1_s + dest_s + + + + + source1_s + source2_s + dest_s + + + + + + + source1_s + source2_s + + dest_s + + + + + + + source\d_.* + + source0_.* + + + dest_s + + + + + + field1 + toField + + + toField + 3 + + + + + + field1 + toField + + + field1 + + + + + + toField + + + field1 + toField + + + + + + field1 + field2 + toField + + + ; + toField + + + + + + + category + category_s + + + + authors + editors + + contributors + + + + .*_price + + list_price + + + all_prices + + + + + + category + category_count + + + category_count + + + category_count + 0 + + + + + + content + title + \s+ + X + + + + + + processor_default_s + X + + + processor_default_i + 42 + + + uuid + + + timestamp + + + + + + uniq_.* + + + + + + subject + title + teststop + nonexistent + ssto + sind + simple + + + + + + + subject + title + teststop + nonexistent + ssto + sind + json + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-warmer.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-warmer.xml new file mode 100644 index 00000000000..3f187f34d9d --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-warmer.xml @@ -0,0 +1,46 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + ${solr.data.dir:} + + + + + + + + + ${useCompoundFile} + ${solr.tests.maxBufferedDocs} + ${solr.tests.maxIndexingThreads} + ${solr.tests.ramBufferSizeMB} + + 1000 + 10000 + single + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-xinclude.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-xinclude.xml new file mode 100644 index 00000000000..230a1ebf2f6 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig-xinclude.xml @@ -0,0 +1,35 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml new file mode 100644 index 00000000000..055f3d7faeb --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml @@ -0,0 +1,48 @@ + + + + + + + + + ${useCompoundFile:false} + + ${solr.tests.maxBufferedDocs} + ${solr.tests.maxIndexingThreads} + ${solr.tests.ramBufferSizeMB} + + + ${solr.tests.nrtMode:true} + + 1000 + 10000 + + + ${solr.tests.lockType:single} + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig.xml new file mode 100644 index 00000000000..810aa1d312e --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig.xml @@ -0,0 +1,562 @@ + + + + + + + + + + + + ${solr.data.dir:} + + + + 1000000 + 2000000 + 3000000 + 4000000 + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + + + + + + ${solr.ulog.dir:} + + + + ${solr.commitwithin.softcommit:true} + + + + + + + 1024 + + + + + + + + + + + + true + + + + + + 10 + + + + + + + + + + + + + + + + + + + + + + + + + true + + + + + true + + + + + + dismax + *:* + 0.01 + + text^0.5 features_t^1.0 subject^1.4 title_stemmed^2.0 + + + text^0.2 features_t^1.1 subject^1.4 title_stemmed^2.0 title^1.5 + + + ord(weight)^0.5 recip(rord(iind),1,1000,1000)^0.3 + + + 3<-1 5<-2 6<90% + + 100 + + + + + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + + lowerpunctfilt + + + default + lowerfilt + spellchecker1 + false + + + direct + DirectSolrSpellChecker + lowerfilt + 3 + + + wordbreak + solr.WordBreakSolrSpellChecker + lowerfilt + true + true + 10 + + + multipleFields + lowerfilt1and2 + spellcheckerMultipleFields + false + + + + jarowinkler + lowerfilt + + org.apache.lucene.search.spell.JaroWinklerDistance + spellchecker2 + + + + solr.FileBasedSpellChecker + external + spellings.txt + UTF-8 + spellchecker3 + + + + freq + lowerfilt + spellcheckerFreq + + freq + false + + + fqcn + lowerfilt + spellcheckerFQCN + org.apache.solr.spelling.SampleComparator + false + + + perDict + org.apache.solr.handler.component.DummyCustomParamSpellChecker + lowerfilt + + + + + + + + termsComp + + + + + + + + + false + + false + + 1 + + + spellcheck + + + + + direct + false + false + 1 + + + spellcheck + + + + + default + wordbreak + 20 + + + spellcheck + + + + + direct + wordbreak + 20 + + + spellcheck + + + + + dismax + lowerfilt1^1 + + + spellcheck + + + + + + + + + + + + + + + tvComponent + + + + + + + + + + + + 100 + + + + + + 70 + + + + + + + ]]> + ]]> + + + + + + + + + + + + + 10 + .,!? + + + + + + WORD + en + US + + + + + + + + + + max-age=30, public + + + + + + + explicit + true + + + + + solr + solrconfig.xml schema.xml admin-extra.html + + + + prefix-${solr.test.sys.prop2}-suffix + + + + + + false + true + v_t,t_field + org.apache.solr.update.processor.TextProfileSignature + + + + + + false + false + id + + org.apache.solr.update.processor.Lookup3Signature + + + + + + + true + non_indexed_signature_sS + false + v_t,t_field + org.apache.solr.update.processor.TextProfileSignature + + + + + + + uniq + uniq2 + uniq3 + + + + + + + + + regex_dup_A_s + x + x_x + + + + regex_dup_B_s + x + x_x + + + + + + + + regex_dup_A_s + x + x_x + + + regex_dup_B_s + x + x_x + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig_codec.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig_codec.xml new file mode 100644 index 00000000000..c5cc04cfe9d --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig_codec.xml @@ -0,0 +1,25 @@ + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig_perf.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig_perf.xml new file mode 100755 index 00000000000..172fc953f37 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/solrconfig_perf.xml @@ -0,0 +1,76 @@ + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + ${solr.data.dir:} + + + + + + + + + + + + + + + + + true + 20 + 200 + false + 2 + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stemdict.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stemdict.txt new file mode 100644 index 00000000000..f57a4ad490f --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stemdict.txt @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# test that we can override the stemming algorithm with our own mappings +# these must be tab-separated +monkeys monkey +otters otter +# some crazy ones that a stemmer would never do +dogs cat diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stop-1.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stop-1.txt new file mode 100644 index 00000000000..8dfe80902d2 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stop-1.txt @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +foo +bar \ No newline at end of file diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stop-2.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stop-2.txt new file mode 100644 index 00000000000..646b7ff4ddb --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stop-2.txt @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +junk +more \ No newline at end of file diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stop-snowball.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stop-snowball.txt new file mode 100644 index 00000000000..1c0c6f51142 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stop-snowball.txt @@ -0,0 +1,10 @@ + | This is a file in snowball format, empty lines are ignored, '|' is a comment + | Additionally, multiple words can be on the same line, allowing stopwords to be + | arranged in tables (useful in some languages where they might inflect) + + | fictitious table below + +|third person singular +|Subject Object Possessive Reflexive +he him his himself| masculine +she her hers herself| feminine diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stoptypes-1.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stoptypes-1.txt new file mode 100644 index 00000000000..456348ea9dc --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stoptypes-1.txt @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stoptypes-2.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stoptypes-2.txt new file mode 100644 index 00000000000..d8a3810c26c --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stoptypes-2.txt @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stopwithbom.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stopwithbom.txt new file mode 100644 index 00000000000..eb5f6e1c0f8 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stopwithbom.txt @@ -0,0 +1 @@ +BOMsAreEvil diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stopwords.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stopwords.txt new file mode 100644 index 00000000000..b5824da3263 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stopwords.txt @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +#Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +s +such +t +that +the +their +then +there +these +they +this +to +was +will +with + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stopwordsWrongEncoding.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stopwordsWrongEncoding.txt new file mode 100644 index 00000000000..0d305c88c59 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/stopwordsWrongEncoding.txt @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# stopwords in the wrong encoding (ISO-8859-1). +# tests resourceloader's ability to report wrongly encoded files. +bañadores diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/synonyms.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/synonyms.txt new file mode 100644 index 00000000000..b0e31cb7ec8 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/synonyms.txt @@ -0,0 +1,31 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaa => aaaa +bbb => bbbb1 bbbb2 +ccc => cccc1,cccc2 +a\=>a => b\=>b +a\,a => b\,b +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/throw.error.on.add.updateprocessor.js b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/throw.error.on.add.updateprocessor.js new file mode 100644 index 00000000000..ca56fe35cfe --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/throw.error.on.add.updateprocessor.js @@ -0,0 +1,21 @@ +function processAdd() { + throw "guess what? no-soup-fo-you !!!"; +} + +// // // + +function processDelete() { + // NOOP +} +function processCommit() { + // NOOP +} +function processRollback() { + // NOOP +} +function processMergeIndexes() { + // NOOP +} +function finish() { + // NOOP +} diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/trivial.updateprocessor0.js b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/trivial.updateprocessor0.js new file mode 100644 index 00000000000..b1856b15d85 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/trivial.updateprocessor0.js @@ -0,0 +1,59 @@ +var Assert = Packages.org.junit.Assert; + +function processAdd(cmd) { + functionMessages.add("processAdd0"); + Assert.assertNotNull(req); + Assert.assertNotNull(rsp); + Assert.assertNotNull(logger); + Assert.assertNotNull(cmd); + Assert.assertNotNull(params); + Assert.assertTrue(1 == params.get('intValue').intValue()); // had issues with assertTrue(1, params.get('intValue').intValue()) casting to wrong variant + Assert.assertTrue(params.get('boolValue').booleanValue()); + + // Integer.valueOf is needed here to get a tru java object, because + // all javascript numbers are floating point (ie: java.lang.Double) + cmd.getSolrInputDocument().addField("script_added_i", + java.lang.Integer.valueOf(42)); + cmd.getSolrInputDocument().addField("script_added_d", 42.3); + +} + +function processDelete(cmd) { + functionMessages.add("processDelete0"); + Assert.assertNotNull(req); + Assert.assertNotNull(rsp); + Assert.assertNotNull(logger); + Assert.assertNotNull(cmd); +} + +function processMergeIndexes(cmd) { + functionMessages.add("processMergeIndexes0"); + Assert.assertNotNull(req); + Assert.assertNotNull(rsp); + Assert.assertNotNull(logger); + Assert.assertNotNull(cmd); +} + +function processCommit(cmd) { + functionMessages.add("processCommit0"); + Assert.assertNotNull(req); + Assert.assertNotNull(rsp); + Assert.assertNotNull(logger); + Assert.assertNotNull(cmd); +} + +function processRollback(cmd) { + functionMessages.add("processRollback0"); + Assert.assertNotNull(req); + Assert.assertNotNull(rsp); + Assert.assertNotNull(logger); + Assert.assertNotNull(cmd); +} + +function finish() { + functionMessages.add("finish0"); + Assert.assertNotNull(req); + Assert.assertNotNull(rsp); + Assert.assertNotNull(logger); +} + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/trivial.updateprocessor1.js b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/trivial.updateprocessor1.js new file mode 100644 index 00000000000..98bdf2ab060 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/trivial.updateprocessor1.js @@ -0,0 +1,25 @@ +function processAdd(cmd) { + functionMessages.add("processAdd1"); + +} + +function processDelete(cmd) { + functionMessages.add("processDelete1"); +} + +function processMergeIndexes(cmd) { + functionMessages.add("processMergeIndexes1"); +} + +function processCommit(cmd) { + functionMessages.add("processCommit1"); +} + +function processRollback(cmd) { + functionMessages.add("processRollback1"); +} + +function finish() { + functionMessages.add("finish1"); +} + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/wdftypes.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/wdftypes.txt new file mode 100644 index 00000000000..7378b0802e7 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/wdftypes.txt @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# A customized type mapping for WordDelimiterFilterFactory +# the allowable types are: LOWER, UPPER, ALPHA, DIGIT, ALPHANUM, SUBWORD_DELIM +# +# the default for any character without a mapping is always computed from +# Unicode character properties + +# Map the $, %, '.', and ',' characters to DIGIT +# This might be useful for financial data. +$ => DIGIT +% => DIGIT +. => DIGIT +\u002C => DIGIT + +# in some cases you might not want to split on ZWJ +# this also tests the case where we need a bigger byte[] +# see http://en.wikipedia.org/wiki/Zero-width_joiner +\u200D => ALPHANUM diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/xslt/dummy-using-include.xsl b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/xslt/dummy-using-include.xsl new file mode 100644 index 00000000000..f10cfbf9330 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/xslt/dummy-using-include.xsl @@ -0,0 +1,31 @@ + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/xslt/dummy.xsl b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/xslt/dummy.xsl new file mode 100644 index 00000000000..fbbd8f745cd --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/xslt/dummy.xsl @@ -0,0 +1,39 @@ + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/xslt/xsl-update-handler-test.xsl b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/xslt/xsl-update-handler-test.xsl new file mode 100644 index 00000000000..2e7359a62b6 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/conf/xslt/xsl-update-handler-test.xsl @@ -0,0 +1,49 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/lib/README b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/lib/README new file mode 100644 index 00000000000..b7ca5b834f4 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/lib/README @@ -0,0 +1,18 @@ + + +Items under this directory are used by TestConfig.testLibs() diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/lib/classes/empty-file-main-lib.txt b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/lib/classes/empty-file-main-lib.txt new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/collection1/lib/classes/empty-file-main-lib.txt @@ -0,0 +1 @@ + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/conf/core.properties b/solr/contrib/solr-morphlines-core/src/test-files/solr/conf/core.properties new file mode 100644 index 00000000000..65df5e6114f --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/conf/core.properties @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +schema=schema-tiny.xml +config=solrconfig-minimal.xml +transient=true +loadOnStartup=false + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/crazy-path-to-config.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/crazy-path-to-config.xml new file mode 100644 index 00000000000..55801c4faf1 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/crazy-path-to-config.xml @@ -0,0 +1,59 @@ + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + 0 + + + + 1024 + true + 10 + + + + + + + + implicit + + + + + + + + + + + solr + solrconfig.xml schema.xml + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/crazy-path-to-schema.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/crazy-path-to-schema.xml new file mode 100644 index 00000000000..a2216ddfa99 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/crazy-path-to-schema.xml @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + subject + id + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/external_eff b/solr/contrib/solr-morphlines-core/src/test-files/solr/external_eff new file mode 100644 index 00000000000..a23f9b554bd --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/external_eff @@ -0,0 +1,10 @@ +1=0.354 +2=0.975 +3=0.001 +4=100.35 +5=53.9 +6=70 +7=3.957 +8=1400 +9=24 +10=450 \ No newline at end of file diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-50-all.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-50-all.xml new file mode 100644 index 00000000000..886e4434631 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-50-all.xml @@ -0,0 +1,52 @@ + + + + testAdminHandler + 11 + ${coreRootDirectory:testCoreRootDirectory} + testManagementPath + testSharedLib + ${shareSchema:testShareSchema} + 66 + + + 22 + 33 + 55 + testHost + testHostContext + ${hostPort:44} + 77 + testZkHost + + + + testLoggingClass + testLoggingEnabled + + 88 + 99 + + + + + ${socketTimeout:100} + ${connTimeout:110} + + + \ No newline at end of file diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-multicore.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-multicore.xml new file mode 100644 index 00000000000..abb308ec997 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-multicore.xml @@ -0,0 +1,70 @@ + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-no-core.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-no-core.xml new file mode 100644 index 00000000000..476b5bc7a10 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-no-core.xml @@ -0,0 +1,39 @@ + + + + + + ${shareSchema:false} + + + 127.0.0.1 + ${hostContext:solr} + ${hostPort:8983} + ${solr.zkclienttimeout:30000} + ${genericCoreNodeNames:true} + ${distribUpdateConnTimeout:15000} + ${distribUpdateSoTimeout:120000} + + + + ${socketTimeout:120000} + ${connTimeout:15000} + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-shardhandler-old.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-shardhandler-old.xml new file mode 100644 index 00000000000..70aaa56faa0 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-shardhandler-old.xml @@ -0,0 +1,29 @@ + + + + + + + + myMagicRequiredValue + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-shardhandler.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-shardhandler.xml new file mode 100644 index 00000000000..f5d24fe931d --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-shardhandler.xml @@ -0,0 +1,29 @@ + + + + + + + + myMagicRequiredValue + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-stress-new.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-stress-new.xml new file mode 100644 index 00000000000..3f8b213eab5 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-stress-new.xml @@ -0,0 +1,34 @@ + + + + + + + 127.0.0.1 + 8983 + ${hostContext:solr} + + + + ${socketTimeout:120000} + ${connTimeout:15000} + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-stress-old.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-stress-old.xml new file mode 100644 index 00000000000..6bc1c35e888 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr-stress-old.xml @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${socketTimeout:120000} + ${connTimeout:15000} + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/solr/solr.xml b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr.xml new file mode 100644 index 00000000000..4604f60476f --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/solr/solr.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + ${socketTimeout:120000} + ${connTimeout:15000} + + + + diff --git a/solr/contrib/solr-morphlines-core/src/test-files/spellings.txt b/solr/contrib/solr-morphlines-core/src/test-files/spellings.txt new file mode 100644 index 00000000000..2d2472e340a --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test-files/spellings.txt @@ -0,0 +1,16 @@ +foo +bar +Solr +junk +foo +bar +Solr +junk +foo +bar +Solr +junk +foo +bar +Solr +junk \ No newline at end of file diff --git a/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineTestBase.java b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineTestBase.java new file mode 100644 index 00000000000..e5e1d3cce67 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineTestBase.java @@ -0,0 +1,268 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Map.Entry; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.commons.io.FileUtils; +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.SolrServer; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.impl.HttpSolrServer; +import org.apache.solr.client.solrj.impl.XMLResponseParser; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.util.ExternalPaths; +import org.junit.After; +import org.junit.Before; +import org.junit.BeforeClass; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.cloudera.cdk.morphline.api.Collector; +import com.cloudera.cdk.morphline.api.Command; +import com.cloudera.cdk.morphline.api.MorphlineContext; +import com.cloudera.cdk.morphline.api.Record; +import com.cloudera.cdk.morphline.base.Compiler; +import com.cloudera.cdk.morphline.base.FaultTolerance; +import com.cloudera.cdk.morphline.base.Fields; +import com.cloudera.cdk.morphline.base.Notifications; +import com.cloudera.cdk.morphline.stdlib.PipeBuilder; +import com.codahale.metrics.MetricRegistry; +import com.google.common.io.Files; +import com.typesafe.config.Config; + +public class AbstractSolrMorphlineTestBase extends SolrTestCaseJ4 { + + protected Collector collector; + protected Command morphline; + protected SolrServer solrServer; + protected DocumentLoader testServer; + + protected static final boolean TEST_WITH_EMBEDDED_SOLR_SERVER = true; + protected static final String EXTERNAL_SOLR_SERVER_URL = System.getProperty("externalSolrServer"); +// protected static final String EXTERNAL_SOLR_SERVER_URL = "http://127.0.0.1:8983/solr"; + + protected static final String RESOURCES_DIR = ExternalPaths.SOURCE_HOME + "/contrib/solr-mr/src/test-files"; + protected static final String DEFAULT_BASE_DIR = "solr"; + protected static final AtomicInteger SEQ_NUM = new AtomicInteger(); + protected static final AtomicInteger SEQ_NUM2 = new AtomicInteger(); + + private static final Logger LOGGER = LoggerFactory.getLogger(AbstractSolrMorphlineTestBase.class); + + protected String tempDir; + + @BeforeClass + public static void beforeClass() throws Exception { + myInitCore(DEFAULT_BASE_DIR); + } + + protected static void myInitCore(String baseDirName) throws Exception { + initCore( + RESOURCES_DIR + "/" + baseDirName + "/collection1/conf/solrconfig.xml", + RESOURCES_DIR + "/" + baseDirName + "/collection1/conf/schema.xml", + RESOURCES_DIR + "/" + baseDirName + ); + } + + @Before + public void setUp() throws Exception { + super.setUp(); + collector = new Collector(); + + if (EXTERNAL_SOLR_SERVER_URL != null) { + //solrServer = new ConcurrentUpdateSolrServer(EXTERNAL_SOLR_SERVER_URL, 2, 2); + //solrServer = new SafeConcurrentUpdateSolrServer(EXTERNAL_SOLR_SERVER_URL, 2, 2); + solrServer = new HttpSolrServer(EXTERNAL_SOLR_SERVER_URL); + ((HttpSolrServer)solrServer).setParser(new XMLResponseParser()); + } else { + if (TEST_WITH_EMBEDDED_SOLR_SERVER) { + solrServer = new EmbeddedTestSolrServer(h.getCoreContainer(), ""); + } else { + throw new RuntimeException("Not yet implemented"); + //solrServer = new TestSolrServer(getSolrServer()); + } + } + + int batchSize = SEQ_NUM2.incrementAndGet() % 2 == 0 ? 100 : 1; //SolrInspector.DEFAULT_SOLR_SERVER_BATCH_SIZE : 1; + testServer = new SolrServerDocumentLoader(solrServer, batchSize); + deleteAllDocuments(); + + tempDir = TEMP_DIR + "/test-morphlines-" + System.currentTimeMillis(); + new File(tempDir).mkdirs(); + } + + @After + public void tearDown() throws Exception { + collector = null; + solrServer = null; + super.tearDown(); + } + + protected void testDocumentTypesInternal(String[] files, Map expectedRecords) throws Exception { + deleteAllDocuments(); + int numDocs = 0; + for (int i = 0; i < 1; i++) { + + for (String file : files) { + File f = new File(file); + byte[] body = Files.toByteArray(f); + Record event = new Record(); + //event.put(Fields.ID, docId++); + event.getFields().put(Fields.ATTACHMENT_BODY, new ByteArrayInputStream(body)); + event.getFields().put(Fields.ATTACHMENT_NAME, f.getName()); + event.getFields().put(Fields.BASE_ID, f.getName()); + load(event); + Integer count = expectedRecords.get(file); + if (count != null) { + numDocs += count; + } else { + numDocs++; + } + assertEquals("unexpected results in " + file, numDocs, queryResultSetSize("*:*")); + } + } + assertEquals(numDocs, queryResultSetSize("*:*")); + } + + private boolean load(Record record) { + Notifications.notifyStartSession(morphline); + return morphline.process(record); + } + + protected int queryResultSetSize(String query) { +// return collector.getRecords().size(); + try { + testServer.commitTransaction(); + solrServer.commit(false, true, true); + QueryResponse rsp = solrServer.query(new SolrQuery(query).setRows(Integer.MAX_VALUE)); + LOGGER.debug("rsp: {}", rsp); + int i = 0; + for (SolrDocument doc : rsp.getResults()) { + LOGGER.debug("rspDoc #{}: {}", i++, doc); + } + int size = rsp.getResults().size(); + return size; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private void deleteAllDocuments() throws SolrServerException, IOException { + collector.reset(); + SolrServer s = solrServer; + s.deleteByQuery("*:*"); // delete everything! + s.commit(); + } + + + public static void setupMorphline(String tempDir, String file) throws IOException { + String morphlineText = FileUtils.readFileToString(new File(RESOURCES_DIR + "/" + file + ".conf"), "UTF-8"); + morphlineText = morphlineText.replaceAll("RESOURCES_DIR", new File(tempDir).getAbsolutePath()); + + FileUtils.writeStringToFile(new File(tempDir + "/" + file + ".conf"), morphlineText, "UTF-8"); + } + + protected Command createMorphline(String file) throws IOException { + setupMorphline(tempDir, file); + + return new PipeBuilder().build(parse(file), null, collector, createMorphlineContext()); + } + + private MorphlineContext createMorphlineContext() { + return new SolrMorphlineContext.Builder() + .setDocumentLoader(testServer) +// .setDocumentLoader(new CollectingDocumentLoader(100)) + .setExceptionHandler(new FaultTolerance(false, false, SolrServerException.class.getName())) + .setMetricRegistry(new MetricRegistry()) + .build(); + } + + private Config parse(String file) throws IOException { + SolrLocator locator = new SolrLocator(createMorphlineContext()); + locator.setSolrHomeDir(testSolrHome + "/collection1"); + Config config = new Compiler().parse(new File(tempDir + "/" + file + ".conf"), locator.toConfig("SOLR_LOCATOR")); + config = config.getConfigList("morphlines").get(0); + return config; + } + + protected void startSession() { + Notifications.notifyStartSession(morphline); + } + + protected void testDocumentContent(HashMap expectedResultMap) + throws Exception { + QueryResponse rsp = solrServer.query(new SolrQuery("*:*").setRows(Integer.MAX_VALUE)); + // Check that every expected field/values shows up in the actual query + for (Entry current : expectedResultMap.entrySet()) { + String field = current.getKey(); + for (String expectedFieldValue : current.getValue().getFieldValues()) { + ExpectedResult.CompareType compareType = current.getValue().getCompareType(); + boolean foundField = false; + + for (SolrDocument doc : rsp.getResults()) { + Collection actualFieldValues = doc.getFieldValues(field); + if (compareType == ExpectedResult.CompareType.equals) { + if (actualFieldValues != null && actualFieldValues.contains(expectedFieldValue)) { + foundField = true; + break; + } + } + else { + for (Iterator it = actualFieldValues.iterator(); it.hasNext(); ) { + String actualValue = it.next().toString(); // test only supports string comparison + if (actualFieldValues != null && actualValue.contains(expectedFieldValue)) { + foundField = true; + break; + } + } + } + } + assert(foundField); // didn't find expected field/value in query + } + } + } + + /** + * Representation of the expected output of a SolrQuery. + */ + protected static class ExpectedResult { + private HashSet fieldValues; + public enum CompareType { + equals, // Compare with equals, i.e. actual.equals(expected) + contains; // Compare with contains, i.e. actual.contains(expected) + } + private CompareType compareType; + + public ExpectedResult(HashSet fieldValues, CompareType compareType) { + this.fieldValues = fieldValues; + this.compareType = compareType; + } + public HashSet getFieldValues() { return fieldValues; } + public CompareType getCompareType() { return compareType; } + } +} diff --git a/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineZkTestBase.java b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineZkTestBase.java new file mode 100644 index 00000000000..62cf325d5a7 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineZkTestBase.java @@ -0,0 +1,249 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.morphlines.solr; + +import java.io.File; +import java.io.IOException; +import java.util.Iterator; + +import org.apache.commons.io.FileUtils; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.embedded.JettySolrRunner; +import org.apache.solr.cloud.AbstractFullDistribZkTestBase; +import org.apache.solr.cloud.AbstractZkTestCase; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.cloud.SolrZkClient; +import org.apache.solr.util.ExternalPaths; +import org.junit.After; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import com.cloudera.cdk.morphline.api.Command; +import com.cloudera.cdk.morphline.api.Collector; +import com.cloudera.cdk.morphline.api.MorphlineContext; +import com.cloudera.cdk.morphline.api.Record; +import com.cloudera.cdk.morphline.base.Compiler; +import com.cloudera.cdk.morphline.base.FaultTolerance; +import com.cloudera.cdk.morphline.base.Notifications; +import com.cloudera.cdk.morphline.stdlib.PipeBuilder; +import com.codahale.metrics.MetricRegistry; +import com.google.common.collect.ListMultimap; +import com.typesafe.config.Config; + +public abstract class AbstractSolrMorphlineZkTestBase extends AbstractFullDistribZkTestBase { + private static final File solrHomeDirectory = new File(TEMP_DIR, AbstractSolrMorphlineZkTestBase.class.getName()); + + protected static final String RESOURCES_DIR = ExternalPaths.SOURCE_HOME + "/contrib/solr-mr/src/test-files"; + private static final File SOLR_INSTANCE_DIR = new File(RESOURCES_DIR + "/solr"); + private static final File SOLR_CONF_DIR = new File(RESOURCES_DIR + "/solr/collection1"); + + protected Collector collector; + protected Command morphline; + + @Override + public String getSolrHome() { + return solrHomeDirectory.getPath(); + } + + public AbstractSolrMorphlineZkTestBase() { + fixShardCount = true; + sliceCount = 3; + shardCount = 3; + } + + @BeforeClass + public static void setupClass() throws Exception { + AbstractZkTestCase.SOLRHOME = solrHomeDirectory; + FileUtils.copyDirectory(SOLR_INSTANCE_DIR, solrHomeDirectory); + createTempDir(); + } + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + System.setProperty("host", "127.0.0.1"); + System.setProperty("numShards", Integer.toString(sliceCount)); + uploadConfFiles(); + collector = new Collector(); + } + + @Override + @After + public void tearDown() throws Exception { + super.tearDown(); + System.clearProperty("host"); + System.clearProperty("numShards"); + } + + @Test + @Override + public void testDistribSearch() throws Exception { + super.testDistribSearch(); + } + + @Override + protected void commit() throws Exception { + Notifications.notifyCommitTransaction(morphline); + super.commit(); + } + + protected Command parse(String file) throws IOException { + return parse(file, "collection1"); + } + + protected Command parse(String file, String collection) throws IOException { + SolrLocator locator = new SolrLocator(createMorphlineContext()); + locator.setCollectionName(collection); + locator.setZkHost(zkServer.getZkAddress()); + //locator.setServerUrl(cloudJettys.get(0).url); // TODO: download IndexSchema from solrUrl not yet implemented + //locator.setSolrHomeDir(SOLR_HOME_DIR.getPath()); + Config config = new Compiler().parse(new File(RESOURCES_DIR + "/" + file + ".conf"), locator.toConfig("SOLR_LOCATOR")); + config = config.getConfigList("morphlines").get(0); + return createMorphline(config); + } + + private Command createMorphline(Config config) { + return new PipeBuilder().build(config, null, collector, createMorphlineContext()); + } + + private MorphlineContext createMorphlineContext() { + return new MorphlineContext.Builder() + .setExceptionHandler(new FaultTolerance(false, false, SolrServerException.class.getName())) + .setMetricRegistry(new MetricRegistry()) + .build(); + } + + protected void startSession() { + Notifications.notifyStartSession(morphline); + } + + protected ListMultimap next(Iterator iter) { + SolrDocument doc = iter.next(); + Record record = toRecord(doc); + record.removeAll("_version_"); // the values of this field are unknown and internal to solr + return record.getFields(); + } + + private Record toRecord(SolrDocument doc) { + Record record = new Record(); + for (String key : doc.keySet()) { + record.getFields().replaceValues(key, doc.getFieldValues(key)); + } + return record; + } + + @Override + public JettySolrRunner createJetty(File solrHome, String dataDir, + String shardList, String solrConfigOverride, String schemaOverride) + throws Exception { + + JettySolrRunner jetty = new JettySolrRunner(solrHome.getAbsolutePath(), + context, 0, solrConfigOverride, schemaOverride); + + jetty.setShards(shardList); + + if (System.getProperty("collection") == null) { + System.setProperty("collection", "collection1"); + } + + jetty.start(); + + System.clearProperty("collection"); + + return jetty; + } + + private static void putConfig(SolrZkClient zkClient, File solrhome, String name) throws Exception { + putConfig(zkClient, solrhome, name, name); + } + + private static void putConfig(SolrZkClient zkClient, File solrhome, String srcName, String destName) + throws Exception { + + File file = new File(solrhome, "conf" + File.separator + srcName); + if (!file.exists()) { + // LOG.info("skipping " + file.getAbsolutePath() + + // " because it doesn't exist"); + return; + } + + String destPath = "/configs/conf1/" + destName; + // LOG.info("put " + file.getAbsolutePath() + " to " + destPath); + zkClient.makePath(destPath, file, false, true); + } + + private void uploadConfFiles() throws Exception { + // upload our own config files + SolrZkClient zkClient = new SolrZkClient(zkServer.getZkAddress(), 10000); + putConfig(zkClient, SOLR_CONF_DIR, "solrconfig.xml"); + putConfig(zkClient, SOLR_CONF_DIR, "schema.xml"); + putConfig(zkClient, SOLR_CONF_DIR, "elevate.xml"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_en.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_ar.txt"); + + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_bg.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_ca.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_cz.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_da.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_el.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_es.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_eu.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_de.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_fa.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_fi.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_fr.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_ga.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_gl.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_hi.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_hu.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_hy.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_id.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_it.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_ja.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_lv.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_nl.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_no.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_pt.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_ro.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_ru.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_sv.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_th.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/stopwords_tr.txt"); + + putConfig(zkClient, SOLR_CONF_DIR, "lang/contractions_ca.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/contractions_fr.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/contractions_ga.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "lang/contractions_it.txt"); + + putConfig(zkClient, SOLR_CONF_DIR, "lang/stemdict_nl.txt"); + + putConfig(zkClient, SOLR_CONF_DIR, "lang/hyphenations_ga.txt"); + + putConfig(zkClient, SOLR_CONF_DIR, "stopwords.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "protwords.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "currency.xml"); + putConfig(zkClient, SOLR_CONF_DIR, "open-exchange-rates.json"); + putConfig(zkClient, SOLR_CONF_DIR, "mapping-ISOLatin1Accent.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "old_synonyms.txt"); + putConfig(zkClient, SOLR_CONF_DIR, "synonyms.txt"); + zkClient.close(); + } + +} diff --git a/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/CollectingDocumentLoader.java b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/CollectingDocumentLoader.java new file mode 100644 index 00000000000..ed58cffff6e --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/CollectingDocumentLoader.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.solr.client.solrj.response.SolrPingResponse; +import org.apache.solr.client.solrj.response.UpdateResponse; +import org.apache.solr.common.SolrInputDocument; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A mockup DocumentLoader implementation for unit tests; collects all documents into a main memory list. + */ +class CollectingDocumentLoader implements DocumentLoader { + + private final int batchSize; + private final List batch = new ArrayList (); + private List results = new ArrayList (); + + private static final Logger LOGGER = LoggerFactory.getLogger(CollectingDocumentLoader.class); + + public CollectingDocumentLoader(int batchSize) { + if (batchSize <= 0) { + throw new IllegalArgumentException("batchSize must be a positive number: " + batchSize); + } + this.batchSize = batchSize; + } + + @Override + public void beginTransaction() { + LOGGER.trace("beginTransaction"); + batch.clear(); + } + + @Override + public void load(SolrInputDocument doc) { + LOGGER.trace("load doc: {}", doc); + batch.add(doc); + if (batch.size() >= batchSize) { + loadBatch(); + } + } + + @Override + public void commitTransaction() { + LOGGER.trace("commitTransaction"); + if (batch.size() > 0) { + loadBatch(); + } + } + + private void loadBatch() { + try { + results.addAll(batch); + } finally { + batch.clear(); + } + } + + @Override + public UpdateResponse rollbackTransaction() { + LOGGER.trace("rollback"); + return new UpdateResponse(); + } + + @Override + public void shutdown() { + LOGGER.trace("shutdown"); + } + + @Override + public SolrPingResponse ping() { + LOGGER.trace("ping"); + return new SolrPingResponse(); + } + +} diff --git a/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/EmbeddedTestSolrServer.java b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/EmbeddedTestSolrServer.java new file mode 100644 index 00000000000..1f747f3d2d8 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/EmbeddedTestSolrServer.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.io.IOException; + +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; +import org.apache.solr.client.solrj.response.UpdateResponse; +import org.apache.solr.core.CoreContainer; + +/** + * An EmbeddedSolrServer that supresses shutdown and rollback requests as + * necessary for testing + */ +public class EmbeddedTestSolrServer extends EmbeddedSolrServer { + + public EmbeddedTestSolrServer(CoreContainer coreContainer, String coreName) { + super(coreContainer, coreName); + } + + @Override + public void shutdown() { + ; // NOP + } + + @Override + public UpdateResponse rollback() throws SolrServerException, IOException { + return new UpdateResponse(); + } + +} diff --git a/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineTest.java b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineTest.java new file mode 100644 index 00000000000..126eef34979 --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineTest.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.util.Arrays; + +import org.junit.Test; + +import com.cloudera.cdk.morphline.api.Record; +import com.cloudera.cdk.morphline.base.Fields; +import com.cloudera.cdk.morphline.base.Notifications; + +public class SolrMorphlineTest extends AbstractSolrMorphlineTestBase { + + @Test + public void testLoadSolrBasic() throws Exception { + //System.setProperty("ENV_SOLR_HOME", testSolrHome + "/collection1"); + morphline = createMorphline("test-morphlines/loadSolrBasic"); + //System.clearProperty("ENV_SOLR_HOME"); + Record record = new Record(); + record.put(Fields.ID, "id0"); + record.put("first_name", "Nadja"); // will be sanitized + startSession(); + Notifications.notifyBeginTransaction(morphline); + assertTrue(morphline.process(record)); + assertEquals(1, collector.getNumStartEvents()); + Notifications.notifyCommitTransaction(morphline); + Record expected = new Record(); + expected.put(Fields.ID, "id0"); + assertEquals(Arrays.asList(expected), collector.getRecords()); + assertEquals(1, queryResultSetSize("*:*")); + Notifications.notifyRollbackTransaction(morphline); + Notifications.notifyShutdown(morphline); + } + + @Test + public void testTokenizeText() throws Exception { + morphline = createMorphline("test-morphlines/tokenizeText"); + Record record = new Record(); + record.put(Fields.MESSAGE, "Hello World!"); + record.put(Fields.MESSAGE, "\nFoo@Bar.com #%()123"); + Record expected = record.copy(); + expected.getFields().putAll("tokens", Arrays.asList("hello", "world", "foo", "bar.com", "123")); + startSession(); + Notifications.notifyBeginTransaction(morphline); + assertTrue(morphline.process(record)); + assertEquals(1, collector.getNumStartEvents()); + Notifications.notifyCommitTransaction(morphline); + assertEquals(expected, collector.getFirstRecord()); + } + +} diff --git a/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAliasTest.java b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAliasTest.java new file mode 100644 index 00000000000..2fce297b34d --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAliasTest.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.io.IOException; +import java.util.Iterator; + +import org.apache.lucene.util.LuceneTestCase.Slow; +import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.params.CollectionParams.CollectionAction; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.util.NamedList; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence; +import com.cloudera.cdk.morphline.api.Record; +import com.cloudera.cdk.morphline.base.Fields; +import com.cloudera.cdk.morphline.base.Notifications; + +@ThreadLeakAction({Action.WARN}) +@ThreadLeakLingering(linger = 0) +@ThreadLeakZombies(Consequence.CONTINUE) +@ThreadLeakScope(Scope.NONE) +@SuppressCodecs({"Lucene3x", "Lucene40"}) +@Slow +public class SolrMorphlineZkAliasTest extends AbstractSolrMorphlineZkTestBase { + + @Override + public void doTest() throws Exception { + + waitForRecoveriesToFinish(false); + + createAlias("aliascollection", "collection1"); + + morphline = parse("test-morphlines/loadSolrBasic", "aliascollection"); + Record record = new Record(); + record.put(Fields.ID, "id0-innsbruck"); + record.put("text", "mytext"); + record.put("user_screen_name", "foo"); + record.put("first_name", "Nadja"); // will be sanitized + startSession(); + assertEquals(1, collector.getNumStartEvents()); + Notifications.notifyBeginTransaction(morphline); + assertTrue(morphline.process(record)); + + record = new Record(); + record.put(Fields.ID, "id1-innsbruck"); + record.put("text", "mytext1"); + record.put("user_screen_name", "foo1"); + record.put("first_name", "Nadja1"); // will be sanitized + assertTrue(morphline.process(record)); + + Record expected = new Record(); + expected.put(Fields.ID, "id0-innsbruck"); + expected.put("text", "mytext"); + expected.put("user_screen_name", "foo"); + Iterator citer = collector.getRecords().iterator(); + assertEquals(expected, citer.next()); + + Record expected2 = new Record(); + expected2.put(Fields.ID, "id1-innsbruck"); + expected2.put("text", "mytext1"); + expected2.put("user_screen_name", "foo1"); + assertEquals(expected2, citer.next()); + + assertFalse(citer.hasNext()); + + commit(); + + QueryResponse rsp = cloudClient.query(new SolrQuery("*:*").setRows(100000).addSort(Fields.ID, SolrQuery.ORDER.asc)); + //System.out.println(rsp); + Iterator iter = rsp.getResults().iterator(); + assertEquals(expected.getFields(), next(iter)); + assertEquals(expected2.getFields(), next(iter)); + assertFalse(iter.hasNext()); + + Notifications.notifyRollbackTransaction(morphline); + Notifications.notifyShutdown(morphline); + + + createAlias("aliascollection", "collection1,collection2"); + + try { + parse("test-morphlines/loadSolrBasic", "aliascollection"); + fail("Expected IAE because update alias maps to multiple collections"); + } catch (IllegalArgumentException e) { + + } + + cloudClient.shutdown(); + } + + private NamedList createAlias(String alias, String collections) throws SolrServerException, IOException { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("collections", collections); + params.set("name", alias); + params.set("action", CollectionAction.CREATEALIAS.toString()); + QueryRequest request = new QueryRequest(params); + request.setPath("/admin/collections"); + return cloudClient.request(request); + } + +} diff --git a/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAvroTest.java b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAvroTest.java new file mode 100644 index 00000000000..4e082cc260f --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAvroTest.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.io.File; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; + +import org.apache.avro.Schema.Field; +import org.apache.avro.file.DataFileReader; +import org.apache.avro.file.FileReader; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.lucene.util.LuceneTestCase.Slow; +import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrDocument; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence; +import com.cloudera.cdk.morphline.api.Record; +import com.cloudera.cdk.morphline.base.Fields; +import com.cloudera.cdk.morphline.base.Notifications; +import com.google.common.base.Preconditions; +import com.google.common.io.Files; + +@ThreadLeakAction({Action.WARN}) +@ThreadLeakLingering(linger = 0) +@ThreadLeakZombies(Consequence.CONTINUE) +@ThreadLeakScope(Scope.NONE) +@SuppressCodecs({"Lucene3x", "Lucene40"}) +@Slow +public class SolrMorphlineZkAvroTest extends AbstractSolrMorphlineZkTestBase { + + @Override + public void doTest() throws Exception { + File file = new File(RESOURCES_DIR + "/test-documents/sample-statuses-20120906-141433-medium.avro"); + + waitForRecoveriesToFinish(false); + + // load avro records via morphline and zk into solr + morphline = parse("test-morphlines/tutorialReadAvroContainer"); + Record record = new Record(); + byte[] body = Files.toByteArray(file); + record.put(Fields.ATTACHMENT_BODY, body); + startSession(); + Notifications.notifyBeginTransaction(morphline); + assertTrue(morphline.process(record)); + assertEquals(1, collector.getNumStartEvents()); + + commit(); + + // fetch sorted result set from solr + QueryResponse rsp = cloudClient.query(new SolrQuery("*:*").setRows(100000).addSort("id", SolrQuery.ORDER.asc)); + assertEquals(2104, collector.getRecords().size()); + assertEquals(collector.getRecords().size(), rsp.getResults().size()); + + Collections.sort(collector.getRecords(), new Comparator() { + @Override + public int compare(Record r1, Record r2) { + return r1.get("id").toString().compareTo(r2.get("id").toString()); + } + }); + + // fetch test input data and sort like solr result set + List records = new ArrayList(); + FileReader reader = new DataFileReader(file, new GenericDatumReader()); + while (reader.hasNext()) { + GenericData.Record expected = reader.next(); + records.add(expected); + } + assertEquals(collector.getRecords().size(), records.size()); + Collections.sort(records, new Comparator() { + @Override + public int compare(GenericData.Record r1, GenericData.Record r2) { + return r1.get("id").toString().compareTo(r2.get("id").toString()); + } + }); + + Object lastId = null; + for (int i = 0; i < records.size(); i++) { + //System.out.println("myrec" + i + ":" + records.get(i)); + Object id = records.get(i); + if (id != null && id.equals(lastId)) { + throw new IllegalStateException("Detected duplicate id. Test input data must not contain duplicate ids!"); + } + lastId = id; + } + + for (int i = 0; i < records.size(); i++) { + //System.out.println("myrsp" + i + ":" + rsp.getResults().get(i)); + } + + Iterator rspIter = rsp.getResults().iterator(); + for (int i = 0; i < records.size(); i++) { + // verify morphline spat out expected data + Record actual = collector.getRecords().get(i); + GenericData.Record expected = records.get(i); + Preconditions.checkNotNull(expected); + assertTweetEquals(expected, actual, i); + + // verify Solr result set contains expected data + actual = new Record(); + actual.getFields().putAll(next(rspIter)); + assertTweetEquals(expected, actual, i); + } + + Notifications.notifyRollbackTransaction(morphline); + Notifications.notifyShutdown(morphline); + cloudClient.shutdown(); + } + + private void assertTweetEquals(GenericData.Record expected, Record actual, int i) { + Preconditions.checkNotNull(expected); + Preconditions.checkNotNull(actual); +// System.out.println("\n\nexpected: " + toString(expected)); +// System.out.println("actual: " + actual); + String[] fieldNames = new String[] { + "id", + "in_reply_to_status_id", + "in_reply_to_user_id", + "retweet_count", + "text", + }; + for (String fieldName : fieldNames) { + assertEquals( + i + " fieldName: " + fieldName, + expected.get(fieldName).toString(), + actual.getFirstValue(fieldName).toString()); + } + } + + private String toString(GenericData.Record avroRecord) { + Record record = new Record(); + for (Field field : avroRecord.getSchema().getFields()) { + record.put(field.name(), avroRecord.get(field.pos())); + } + return record.toString(); // prints sorted by key for human readability + } + +} diff --git a/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkTest.java b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkTest.java new file mode 100644 index 00000000000..0537c2e23ab --- /dev/null +++ b/solr/contrib/solr-morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkTest.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.morphlines.solr; + +import java.util.Iterator; + +import org.apache.lucene.util.LuceneTestCase.Slow; +import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrDocument; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence; +import com.cloudera.cdk.morphline.api.Record; +import com.cloudera.cdk.morphline.base.Fields; +import com.cloudera.cdk.morphline.base.Notifications; + +@ThreadLeakAction({Action.WARN}) +@ThreadLeakLingering(linger = 0) +@ThreadLeakZombies(Consequence.CONTINUE) +@ThreadLeakScope(Scope.NONE) +@SuppressCodecs({"Lucene3x", "Lucene40"}) +@Slow +public class SolrMorphlineZkTest extends AbstractSolrMorphlineZkTestBase { + + @Override + public void doTest() throws Exception { + + waitForRecoveriesToFinish(false); + + morphline = parse("test-morphlines/loadSolrBasic"); + Record record = new Record(); + record.put(Fields.ID, "id0-innsbruck"); + record.put("text", "mytext"); + record.put("user_screen_name", "foo"); + record.put("first_name", "Nadja"); // will be sanitized + startSession(); + assertEquals(1, collector.getNumStartEvents()); + Notifications.notifyBeginTransaction(morphline); + assertTrue(morphline.process(record)); + + record = new Record(); + record.put(Fields.ID, "id1-innsbruck"); + record.put("text", "mytext1"); + record.put("user_screen_name", "foo1"); + record.put("first_name", "Nadja1"); // will be sanitized + assertTrue(morphline.process(record)); + + Record expected = new Record(); + expected.put(Fields.ID, "id0-innsbruck"); + expected.put("text", "mytext"); + expected.put("user_screen_name", "foo"); + Iterator citer = collector.getRecords().iterator(); + assertEquals(expected, citer.next()); + + Record expected2 = new Record(); + expected2.put(Fields.ID, "id1-innsbruck"); + expected2.put("text", "mytext1"); + expected2.put("user_screen_name", "foo1"); + assertEquals(expected2, citer.next()); + + assertFalse(citer.hasNext()); + + commit(); + + QueryResponse rsp = cloudClient.query(new SolrQuery("*:*").setRows(100000).addSort(Fields.ID, SolrQuery.ORDER.asc)); + //System.out.println(rsp); + Iterator iter = rsp.getResults().iterator(); + assertEquals(expected.getFields(), next(iter)); + assertEquals(expected2.getFields(), next(iter)); + assertFalse(iter.hasNext()); + + Notifications.notifyRollbackTransaction(morphline); + Notifications.notifyShutdown(morphline); + cloudClient.shutdown(); + } + +} diff --git a/solr/contrib/solr-mr/build.xml b/solr/contrib/solr-mr/build.xml new file mode 100644 index 00000000000..d9f1f72a26b --- /dev/null +++ b/solr/contrib/solr-mr/build.xml @@ -0,0 +1,147 @@ + + + + + + + + Solr map-reduce index construction. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-mr/ivy.xml b/solr/contrib/solr-mr/ivy.xml new file mode 100644 index 00000000000..d51fd3b020e --- /dev/null +++ b/solr/contrib/solr-mr/ivy.xml @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-mr/src/java/assembly/hadoop-job.xml b/solr/contrib/solr-mr/src/java/assembly/hadoop-job.xml new file mode 100644 index 00000000000..1640b6ff72e --- /dev/null +++ b/solr/contrib/solr-mr/src/java/assembly/hadoop-job.xml @@ -0,0 +1,39 @@ + + + + + + job + + jar + + false + + + false + runtime + lib + + ${groupId}:${artifactId} + + + + true + + ${groupId}:${artifactId} + + + + diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/BatchWriter.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/BatchWriter.java new file mode 100644 index 00000000000..6b650b6cc7b --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/BatchWriter.java @@ -0,0 +1,241 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Locale; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskID; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; +import org.apache.solr.client.solrj.response.UpdateResponse; +import org.apache.solr.common.SolrInputDocument; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Enables adding batches of documents to an EmbeddedSolrServer. + */ +class BatchWriter { + + private final EmbeddedSolrServer solr; + private volatile Exception batchWriteException = null; + + private static final Logger LOG = LoggerFactory.getLogger(BatchWriter.class); + + public Exception getBatchWriteException() { + return batchWriteException; + } + + public void setBatchWriteException(Exception batchWriteException) { + this.batchWriteException = batchWriteException; + } + + /** The number of writing threads. */ + final int writerThreads; + + /** Queue Size */ + final int queueSize; + + private final ThreadPoolExecutor batchPool; + + private TaskID taskId = null; + + /** + * The number of in progress batches, must be zero before the close can + * actually start closing + */ + AtomicInteger executingBatches = new AtomicInteger(0); + + /** + * Create the batch writer object, set the thread to daemon mode, and start + * it. + * + */ + + final class Batch implements Runnable { + + private List documents; + private UpdateResponse result; + + public Batch(Collection batch) { + documents = new ArrayList(batch); + } + + public void run() { + try { + executingBatches.getAndIncrement(); + result = runUpdate(documents); + } finally { + executingBatches.getAndDecrement(); + } + } + + protected List getDocuments() { + return documents; + } + + protected void setDocuments(List documents) { + this.documents = documents; + } + + protected UpdateResponse getResult() { + return result; + } + + protected void setResult(UpdateResponse result) { + this.result = result; + } + + protected void reset(List documents) { + if (this.documents == null) { + this.documents = new ArrayList(documents); + } else { + this.documents.clear(); + this.documents.addAll(documents); + } + result = null; + } + + protected void reset(SolrInputDocument document) { + if (this.documents == null) { + this.documents = new ArrayList(); + } else { + this.documents.clear(); + } + this.documents.add(document); + result = null; + } + } + + protected UpdateResponse runUpdate(List batchToWrite) { + try { + UpdateResponse result = solr.add(batchToWrite); + SolrRecordWriter.incrementCounter(taskId, SolrCounters.class.getName(), SolrCounters.BATCHES_WRITTEN.toString(), 1); + SolrRecordWriter.incrementCounter(taskId, SolrCounters.class.getName(), SolrCounters.DOCUMENTS_WRITTEN.toString(), batchToWrite.size()); + if (LOG.isDebugEnabled()) { + SolrRecordWriter.incrementCounter(taskId, SolrCounters.class.getName(), SolrCounters.BATCH_WRITE_TIME.toString(), result.getElapsedTime()); + } + return result; + } catch (Throwable e) { + if (e instanceof Exception) { + setBatchWriteException((Exception) e); + } else { + setBatchWriteException(new Exception(e)); + } + SolrRecordWriter.incrementCounter(taskId, getClass().getName() + ".errors", e.getClass().getName(), 1); + LOG.error("Unable to process batch", e); + return null; + } + } + + + public BatchWriter(EmbeddedSolrServer solr, int batchSize, TaskID tid, + int writerThreads, int queueSize) { + this.solr = solr; + this.writerThreads = writerThreads; + this.queueSize = queueSize; + taskId = tid; + + // we need to obtain the settings before the constructor + if (writerThreads != 0) { + batchPool = new ThreadPoolExecutor(writerThreads, writerThreads, 5, + TimeUnit.SECONDS, new LinkedBlockingQueue(queueSize), + new ThreadPoolExecutor.CallerRunsPolicy()); + } else { // single threaded case + batchPool = null; + } + } + + public void queueBatch(Collection batch) + throws IOException, SolrServerException { + + throwIf(); + Batch b = new Batch(batch); + if (batchPool != null) { + batchPool.execute(b); + } else { // single threaded case + b.run(); + throwIf(); + } + } + + public synchronized void close(TaskAttemptContext context) + throws InterruptedException, SolrServerException, IOException { + + if (batchPool != null) { + context.setStatus("Waiting for batches to complete"); + batchPool.shutdown(); + + while (!batchPool.isTerminated()) { + LOG.info(String.format(Locale.ENGLISH, + "Waiting for %d items and %d threads to finish executing", batchPool + .getQueue().size(), batchPool.getActiveCount())); + batchPool.awaitTermination(5, TimeUnit.SECONDS); + } + } + //reporter.setStatus("Committing Solr"); + //solr.commit(true, false); + context.setStatus("Optimizing Solr"); + int maxSegments = context.getConfiguration().getInt(SolrOutputFormat.SOLR_RECORD_WRITER_MAX_SEGMENTS, 1); + LOG.info("Optimizing Solr: forcing merge down to {} segments", maxSegments); + long start = System.currentTimeMillis(); + solr.optimize(true, false, maxSegments); + context.getCounter(SolrCounters.class.getName(), SolrCounters.PHYSICAL_REDUCER_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start); + float secs = (System.currentTimeMillis() - start) / 1000.0f; + LOG.info("Optimizing Solr: done forcing merge down to {} segments in {} secs", maxSegments, secs); + context.setStatus("Shutting down Solr"); + // TODO is core close needed? - according to TestEmbeddedSolrServer it's not... + //core.close(); + solr.shutdown(); + } + + /** + * Throw a legal exception if a previous batch write had an exception. The + * previous state is cleared. Uses {@link #batchWriteException} for the state + * from the last exception. + * + * This will loose individual exceptions if the exceptions happen rapidly. + * + * @throws IOException On low level IO error + * @throws SolrServerException On Solr Exception + */ + private void throwIf() throws IOException, SolrServerException { + + final Exception last = batchWriteException; + batchWriteException = null; + + if (last == null) { + return; + } + if (last instanceof SolrServerException) { + throw (SolrServerException) last; + } + if (last instanceof IOException) { + throw (IOException) last; + } + throw new IOException("Batch Write Failure", last); + } +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/DataInputInputStream.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/DataInputInputStream.java new file mode 100644 index 00000000000..33f609f1f2d --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/DataInputInputStream.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.hadoop; + +import java.io.DataInput; +import java.io.IOException; +import java.io.InputStream; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * An InputStream that wraps a DataInput. + * @see DataOutputOutputStream + */ +@InterfaceAudience.Private +public class DataInputInputStream extends InputStream { + + private DataInput in; + + /** + * Construct an InputStream from the given DataInput. If 'in' + * is already an InputStream, simply returns it. Otherwise, wraps + * it in an InputStream. + * @param in the DataInput to wrap + * @return an InputStream instance that reads from 'in' + */ + public static InputStream constructInputStream(DataInput in) { + if (in instanceof InputStream) { + return (InputStream)in; + } else { + return new DataInputInputStream(in); + } + } + + + public DataInputInputStream(DataInput in) { + this.in = in; + } + + @Override + public int read() throws IOException { + return in.readUnsignedByte(); + } +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/DataOutputOutputStream.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/DataOutputOutputStream.java new file mode 100644 index 00000000000..389c52a577d --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/DataOutputOutputStream.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.DataOutput; +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * OutputStream implementation that wraps a DataOutput. + */ +@InterfaceAudience.Private +public class DataOutputOutputStream extends OutputStream { + + private final DataOutput out; + + /** + * Construct an OutputStream from the given DataOutput. If 'out' + * is already an OutputStream, simply returns it. Otherwise, wraps + * it in an OutputStream. + * @param out the DataOutput to wrap + * @return an OutputStream instance that outputs to 'out' + */ + public static OutputStream constructOutputStream(DataOutput out) { + if (out instanceof OutputStream) { + return (OutputStream)out; + } else { + return new DataOutputOutputStream(out); + } + } + + private DataOutputOutputStream(DataOutput out) { + this.out = out; + } + + @Override + public void write(int b) throws IOException { + out.writeByte(b); + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + out.write(b, off, len); + } + + @Override + public void write(byte[] b) throws IOException { + out.write(b); + } +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/DryRunDocumentLoader.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/DryRunDocumentLoader.java new file mode 100644 index 00000000000..bacf1d0e1fc --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/DryRunDocumentLoader.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import org.apache.solr.client.solrj.response.SolrPingResponse; +import org.apache.solr.client.solrj.response.UpdateResponse; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.morphlines.solr.DocumentLoader; + +/** + * Prints documents to stdout instead of loading them into Solr for quicker turnaround during early + * trial & debug sessions. + */ +final class DryRunDocumentLoader implements DocumentLoader { + + @Override + public void beginTransaction() { + } + + @Override + public void load(SolrInputDocument doc) { + System.out.println("dryrun: " + doc); + } + + @Override + public void commitTransaction() { + } + + @Override + public UpdateResponse rollbackTransaction() { + return new UpdateResponse(); + } + + @Override + public void shutdown() { + } + + @Override + public SolrPingResponse ping() { + return new SolrPingResponse(); + } + +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/GoLive.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/GoLive.java new file mode 100644 index 00000000000..a7e4f7dda9d --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/GoLive.java @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.CompletionService; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.impl.CloudSolrServer; +import org.apache.solr.client.solrj.impl.HttpSolrServer; +import org.apache.solr.client.solrj.request.CoreAdminRequest; +import org.apache.solr.hadoop.MapReduceIndexerTool.Options; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The optional (parallel) GoLive phase merges the output shards of the previous + * phase into a set of live customer facing Solr servers, typically a SolrCloud. + */ +class GoLive { + + private static final Logger LOG = LoggerFactory.getLogger(GoLive.class); + + // TODO: handle clusters with replicas + public boolean goLive(Options options, FileStatus[] outDirs) { + LOG.info("Live merging of output shards into Solr cluster..."); + boolean success = false; + long start = System.currentTimeMillis(); + int concurrentMerges = options.goLiveThreads; + ThreadPoolExecutor executor = new ThreadPoolExecutor(concurrentMerges, + concurrentMerges, 1, TimeUnit.SECONDS, + new LinkedBlockingQueue()); + + try { + CompletionService completionService = new ExecutorCompletionService(executor); + Set> pending = new HashSet>(); + int cnt = -1; + for (final FileStatus dir : outDirs) { + + LOG.debug("processing: " + dir.getPath()); + + cnt++; + List urls = options.shardUrls.get(cnt); + + for (String url : urls) { + + String baseUrl = url; + if (baseUrl.endsWith("/")) { + baseUrl = baseUrl.substring(0, baseUrl.length() - 1); + } + + int lastPathIndex = baseUrl.lastIndexOf("/"); + if (lastPathIndex == -1) { + LOG.error("Found unexpected shardurl, live merge failed: " + baseUrl); + return false; + } + + final String name = baseUrl.substring(lastPathIndex + 1); + baseUrl = baseUrl.substring(0, lastPathIndex); + final String mergeUrl = baseUrl; + + Callable task = new Callable() { + @Override + public Request call() { + Request req = new Request(); + LOG.info("Live merge " + dir.getPath() + " into " + mergeUrl); + final HttpSolrServer server = new HttpSolrServer(mergeUrl); + try { + CoreAdminRequest.MergeIndexes mergeRequest = new CoreAdminRequest.MergeIndexes(); + mergeRequest.setCoreName(name); + mergeRequest.setIndexDirs(Arrays.asList(dir.getPath().toString() + "/data/index")); + try { + mergeRequest.process(server); + req.success = true; + } catch (SolrServerException e) { + req.e = e; + return req; + } catch (IOException e) { + req.e = e; + return req; + } + } finally { + server.shutdown(); + } + return req; + } + }; + pending.add(completionService.submit(task)); + } + } + + while (pending != null && pending.size() > 0) { + try { + Future future = completionService.take(); + if (future == null) break; + pending.remove(future); + + try { + Request req = future.get(); + + if (!req.success) { + // failed + LOG.error("A live merge command failed", req.e); + return false; + } + + } catch (ExecutionException e) { + LOG.error("Error sending live merge command", e); + return false; + } + + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOG.error("Live merge process interrupted", e); + return false; + } + } + + cnt = -1; + + + try { + LOG.info("Committing live merge..."); + if (options.zkHost != null) { + CloudSolrServer server = new CloudSolrServer(options.zkHost); + server.setDefaultCollection(options.collection); + server.commit(); + server.shutdown(); + } else { + for (List urls : options.shardUrls) { + for (String url : urls) { + // TODO: we should do these concurrently + HttpSolrServer server = new HttpSolrServer(url); + server.commit(); + server.shutdown(); + } + } + } + LOG.info("Done committing live merge"); + } catch (Exception e) { + LOG.error("Error sending commits to live Solr cluster", e); + return false; + } + + success = true; + return true; + } finally { + shutdownNowAndAwaitTermination(executor); + float secs = (System.currentTimeMillis() - start) / 1000.0f; + LOG.info("Live merging of index shards into Solr cluster took " + secs + " secs"); + if (success) { + LOG.info("Live merging completed successfully"); + } else { + LOG.info("Live merging failed"); + } + } + + // if an output dir does not exist, we should fail and do no merge? + } + + private void shutdownNowAndAwaitTermination(ExecutorService pool) { + pool.shutdown(); // Disable new tasks from being submitted + pool.shutdownNow(); // Cancel currently executing tasks + boolean shutdown = false; + while (!shutdown) { + try { + // Wait a while for existing tasks to terminate + shutdown = pool.awaitTermination(5, TimeUnit.SECONDS); + } catch (InterruptedException ie) { + // Preserve interrupt status + Thread.currentThread().interrupt(); + } + if (!shutdown) { + pool.shutdownNow(); // Cancel currently executing tasks + } + } + } + + + private static final class Request { + Exception e; + boolean success = false; + } + +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/HdfsFileFieldNames.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/HdfsFileFieldNames.java new file mode 100644 index 00000000000..c9eaef6c9e9 --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/HdfsFileFieldNames.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + + +/** + * Solr field names for metadata of an HDFS file. + */ +public interface HdfsFileFieldNames { + + public static final String FILE_UPLOAD_URL = "file_upload_url"; + public static final String FILE_DOWNLOAD_URL = "file_download_url"; + public static final String FILE_SCHEME = "file_scheme"; + public static final String FILE_HOST = "file_host"; + public static final String FILE_PORT = "file_port"; + public static final String FILE_PATH = "file_path"; + public static final String FILE_NAME = "file_name"; + public static final String FILE_LENGTH = "file_length"; + public static final String FILE_LAST_MODIFIED = "file_last_modified"; + public static final String FILE_OWNER = "file_owner"; + public static final String FILE_GROUP = "file_group"; + public static final String FILE_PERMISSIONS_USER = "file_permissions_user"; + public static final String FILE_PERMISSIONS_GROUP = "file_permissions_group"; + public static final String FILE_PERMISSIONS_OTHER = "file_permissions_other"; + public static final String FILE_PERMISSIONS_STICKYBIT = "file_permissions_stickybit"; + +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/HeartBeater.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/HeartBeater.java new file mode 100644 index 00000000000..229235b96b6 --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/HeartBeater.java @@ -0,0 +1,158 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.util.Locale; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +import org.apache.hadoop.mapreduce.TaskInputOutputContext; +import org.apache.hadoop.util.Progressable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class runs a background thread that once every 60 seconds checks to see if + * a progress report is needed. If a report is needed it is issued. + * + * A simple counter {@link #threadsNeedingHeartBeat} handles the number of + * threads requesting a heart beat. + * + * The expected usage pattern is + * + *
+ *  try {
+ *       heartBeater.needHeartBeat();
+ *       do something that may take a while
+ *    } finally {
+ *       heartBeater.cancelHeartBeat();
+ *    }
+ * 
+ * + * + */ +public class HeartBeater extends Thread { + + public static Logger LOG = LoggerFactory.getLogger(HeartBeater.class); + + /** + * count of threads asking for heart beat, at 0 no heart beat done. This could + * be an atomic long but then missmatches in need/cancel could result in + * negative counts. + */ + private volatile int threadsNeedingHeartBeat = 0; + + private Progressable progress; + + /** + * The amount of time to wait between checks for the need to issue a heart + * beat. In milliseconds. + */ + private final long waitTimeMs = TimeUnit.MILLISECONDS.convert(60, TimeUnit.SECONDS); + + private final CountDownLatch isClosing = new CountDownLatch(1); + + /** + * Create the heart beat object thread set it to daemon priority and start the + * thread. When the count in {@link #threadsNeedingHeartBeat} is positive, the + * heart beat will be issued on the progress object every 60 seconds. + */ + public HeartBeater(Progressable progress) { + setDaemon(true); + this.progress = progress; + LOG.info("Heart beat reporting class is " + progress.getClass().getName()); + start(); + } + + public Progressable getProgress() { + return progress; + } + + public void setProgress(Progressable progress) { + this.progress = progress; + } + + @Override + public void run() { + LOG.info("HeartBeat thread running"); + while (true) { + try { + synchronized (this) { + if (threadsNeedingHeartBeat > 0) { + progress.progress(); + if (LOG.isInfoEnabled()) { + LOG.info(String.format(Locale.ENGLISH, "Issuing heart beat for %d threads", + threadsNeedingHeartBeat)); + } + } else { + if (LOG.isInfoEnabled()) { + LOG.info(String.format(Locale.ENGLISH, "heartbeat skipped count %d", + threadsNeedingHeartBeat)); + } + } + } + if (isClosing.await(waitTimeMs, TimeUnit.MILLISECONDS)) { + return; + } + } catch (Throwable e) { + LOG.error("HeartBeat throwable", e); + } + } + } + + /** + * inform the background thread that heartbeats are to be issued. Issue a + * heart beat also + */ + public synchronized void needHeartBeat() { + threadsNeedingHeartBeat++; + // Issue a progress report right away, + // just in case the the cancel comes before the background thread issues a + // report. + // If enough cases like this happen the 600 second timeout can occur + progress.progress(); + if (threadsNeedingHeartBeat == 1) { + // this.notify(); // wake up the heartbeater + } + } + + /** + * inform the background thread that this heartbeat request is not needed. + * This must be called at some point after each {@link #needHeartBeat()} + * request. + */ + public synchronized void cancelHeartBeat() { + if (threadsNeedingHeartBeat > 0) { + threadsNeedingHeartBeat--; + } else { + Exception e = new Exception("Dummy"); + e.fillInStackTrace(); + LOG.warn("extra call to cancelHeartBeat", e); + } + } + + public void setStatus(String status) { + if (progress instanceof TaskInputOutputContext) { + ((TaskInputOutputContext) progress).setStatus(status); + } + } + + /** Releases any resources */ + public void close() { + isClosing.countDown(); + } +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/LineRandomizerMapper.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/LineRandomizerMapper.java new file mode 100644 index 00000000000..5d65fa306df --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/LineRandomizerMapper.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Mapper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * MR Mapper that randomizing a list of URLs. + * + * Mapper input is (offset, URL) pairs. Each such pair indicates a file to + * index. + * + * Mapper output is (randomPosition, URL) pairs. The reducer receives these + * pairs sorted by randomPosition. + */ +public class LineRandomizerMapper extends Mapper { + + private Random random; + + private static final Logger LOGGER = LoggerFactory.getLogger(LineRandomizerMapper.class); + + @Override + protected void setup(Context context) throws IOException, InterruptedException { + super.setup(context); + random = createRandom(context); + } + + @Override + protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { + LOGGER.debug("map key: {}, value: {}", key, value); + context.write(new LongWritable(random.nextLong()), value); + } + + private Random createRandom(Context context) { + long taskId = 0; + if (context.getTaskAttemptID() != null) { // MRUnit returns null + LOGGER.debug("context.getTaskAttemptID().getId(): {}", context.getTaskAttemptID().getId()); + LOGGER.debug("context.getTaskAttemptID().getTaskID().getId(): {}", context.getTaskAttemptID().getTaskID().getId()); + taskId = context.getTaskAttemptID().getTaskID().getId(); // taskId = 0, 1, ..., N + } + // create a good random seed, yet ensure deterministic PRNG sequence for easy reproducability + return new Random(421439783L * (taskId + 1)); + } + +} \ No newline at end of file diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/LineRandomizerReducer.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/LineRandomizerReducer.java new file mode 100644 index 00000000000..af7759e9f90 --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/LineRandomizerReducer.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * MR Reducer that randomizing a list of URLs. + * + * Reducer input is (randomPosition, URL) pairs. Each such pair indicates a file + * to index. + * + * Reducer output is a list of URLs, each URL in a random position. + */ +public class LineRandomizerReducer extends Reducer { + + private static final Logger LOGGER = LoggerFactory.getLogger(LineRandomizerReducer.class); + + @Override + protected void reduce(LongWritable key, Iterable values, Context context) throws IOException, InterruptedException { + for (Text value : values) { + LOGGER.debug("reduce key: {}, value: {}", key, value); + context.write(value, NullWritable.get()); + } + } +} \ No newline at end of file diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/MapReduceIndexerTool.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/MapReduceIndexerTool.java new file mode 100644 index 00000000000..e0e3e62709f --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/MapReduceIndexerTool.java @@ -0,0 +1,1300 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + + +import java.io.BufferedInputStream; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.io.UnsupportedEncodingException; +import java.io.Writer; +import java.net.URISyntaxException; +import java.net.URL; +import java.net.URLClassLoader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Random; + +import net.sourceforge.argparse4j.ArgumentParsers; +import net.sourceforge.argparse4j.impl.Arguments; +import net.sourceforge.argparse4j.impl.action.HelpArgumentAction; +import net.sourceforge.argparse4j.impl.choice.RangeArgumentChoice; +import net.sourceforge.argparse4j.impl.type.FileArgumentType; +import net.sourceforge.argparse4j.inf.Argument; +import net.sourceforge.argparse4j.inf.ArgumentGroup; +import net.sourceforge.argparse4j.inf.ArgumentParser; +import net.sourceforge.argparse4j.inf.ArgumentParserException; +import net.sourceforge.argparse4j.inf.FeatureControl; +import net.sourceforge.argparse4j.inf.Namespace; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.JobClient; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hadoop.util.GenericOptionsParser; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.log4j.PropertyConfigurator; +import org.apache.solr.common.cloud.SolrZkClient; +import org.apache.solr.hadoop.dedup.RetainMostRecentUpdateConflictResolver; +import org.apache.solr.hadoop.morphline.MorphlineMapRunner; +import org.apache.solr.hadoop.morphline.MorphlineMapper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.cloudera.cdk.morphline.base.Fields; + + +/** + * Public API for a MapReduce batch job driver that creates a set of Solr index shards from a set of + * input files and writes the indexes into HDFS, in a flexible, scalable and fault-tolerant manner. + * Also supports merging the output shards into a set of live customer facing Solr servers, + * typically a SolrCloud. + */ +public class MapReduceIndexerTool extends Configured implements Tool { + + Job job; // visible for testing only + + public static final String RESULTS_DIR = "results"; + + static final String MAIN_MEMORY_RANDOMIZATION_THRESHOLD = + MapReduceIndexerTool.class.getName() + ".mainMemoryRandomizationThreshold"; + + private static final String FULL_INPUT_LIST = "full-input-list.txt"; + + private static final Logger LOG = LoggerFactory.getLogger(MapReduceIndexerTool.class); + + + /** + * See http://argparse4j.sourceforge.net and for details see http://argparse4j.sourceforge.net/usage.html + */ + static final class MyArgumentParser { + + /** + * Parses the given command line arguments. + * + * @return exitCode null indicates the caller shall proceed with processing, + * non-null indicates the caller shall exit the program with the + * given exit status code. + */ + public Integer parseArgs(String[] args, Configuration conf, Options opts) { + assert args != null; + assert conf != null; + assert opts != null; + + if (args.length == 0) { + args = new String[] { "--help" }; + } + + ArgumentParser parser = ArgumentParsers + .newArgumentParser("hadoop [GenericOptions]... jar search-mr-*-job.jar " + MapReduceIndexerTool.class.getName(), false) + .defaultHelp(true) + .description( + "MapReduce batch job driver that takes a morphline and creates a set of Solr index shards from a set of input files " + + "and writes the indexes into HDFS, in a flexible, scalable and fault-tolerant manner. " + + "It also supports merging the output shards into a set of live customer facing Solr servers, " + + "typically a SolrCloud. The program proceeds in several consecutive MapReduce based phases, as follows:" + + "\n\n" + + "1) Randomization phase: This (parallel) phase randomizes the list of input files in order to spread " + + "indexing load more evenly among the mappers of the subsequent phase." + + "\n\n" + + "2) Mapper phase: This (parallel) phase takes the input files, extracts the relevant content, transforms it " + + "and hands SolrInputDocuments to a set of reducers. " + + "The ETL functionality is flexible and " + + "customizable using chains of arbitrary morphline commands that pipe records from one transformation command to another. " + + "Commands to parse and transform a set of standard data formats such as Avro, CSV, Text, HTML, XML, " + + "PDF, Word, Excel, etc. are provided out of the box, and additional custom commands and parsers for additional " + + "file or data formats can be added as morphline plugins. " + + "This is done by implementing a simple Java interface that consumes a record (e.g. a file in the form of an InputStream " + + "plus some headers plus contextual metadata) and generates as output zero or more records. " + + "Any kind of data format can be indexed and any Solr documents for any kind of Solr schema can be generated, " + + "and any custom ETL logic can be registered and executed.\n" + + "Record fields, including MIME types, can also explicitly be passed by force from the CLI to the morphline, for example: " + + "hadoop ... -D " + MorphlineMapRunner.MORPHLINE_FIELD_PREFIX + Fields.ATTACHMENT_MIME_TYPE + "=text/csv" + + "\n\n" + + "3) Reducer phase: This (parallel) phase loads the mapper's SolrInputDocuments into one EmbeddedSolrServer per reducer. " + + "Each such reducer and Solr server can be seen as a (micro) shard. The Solr servers store their " + + "data in HDFS." + + "\n\n" + + "4) Mapper-only merge phase: This (parallel) phase merges the set of reducer shards into the number of solr " + + "shards expected by the user, using a mapper-only job. This phase is omitted if the number " + + "of shards is already equal to the number of shards expected by the user. " + + "\n\n" + + "5) Go-live phase: This optional (parallel) phase merges the output shards of the previous phase into a set of " + + "live customer facing Solr servers, typically a SolrCloud. " + + "If this phase is omitted you can explicitly point each Solr server to one of the HDFS output shard directories." + + "\n\n" + + "Fault Tolerance: Mapper and reducer task attempts are retried on failure per the standard MapReduce semantics. " + + "On program startup all data in the --output-dir is deleted if that output directory already exists. " + + "If the whole job fails you can retry simply by rerunning the program again using the same arguments." + ); + + parser.addArgument("--help", "-help", "-h") + .help("Show this help message and exit") + .action(new HelpArgumentAction() { + @Override + public void run(ArgumentParser parser, Argument arg, Map attrs, String flag, Object value) throws ArgumentParserException { + try { + parser.printHelp(new PrintWriter(new OutputStreamWriter(System.out, "UTF-8"))); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException("Won't Happen for UTF-8"); + } + System.out.println(); + System.out.print(ToolRunnerHelpFormatter.getGenericCommandUsage()); + //ToolRunner.printGenericCommandUsage(System.out); + System.out.println( + "Examples: \n\n" + + + "# (Re)index an Avro based Twitter tweet file:\n" + + "sudo -u hdfs hadoop \\\n" + + " --config /etc/hadoop/conf.cloudera.mapreduce1 \\\n" + + " jar target/search-mr-*-job.jar " + MapReduceIndexerTool.class.getName() + " \\\n" + + " -D 'mapred.child.java.opts=-Xmx500m' \\\n" + +// " -D 'mapreduce.child.java.opts=-Xmx500m' \\\n" + + " --log4j src/test/resources/log4j.properties \\\n" + + " --morphline-file ../search-core/src/test/resources/test-morphlines/tutorialReadAvroContainer.conf \\\n" + + " --solr-home-dir src/test/resources/solr/minimr \\\n" + + " --output-dir hdfs://c2202.mycompany.com/user/$USER/test \\\n" + + " --shards 1 \\\n" + + " hdfs:///user/$USER/test-documents/sample-statuses-20120906-141433.avro\n" + + "\n" + + "# (Re)index all files that match all of the following conditions:\n" + + "# 1) File is contained in dir tree hdfs:///user/$USER/solrloadtest/twitter/tweets\n" + + "# 2) file name matches the glob pattern 'sample-statuses*.gz'\n" + + "# 3) file was last modified less than 100000 minutes ago\n" + + "# 4) file size is between 1 MB and 1 GB\n" + + "# Also include extra library jar file containing JSON tweet Java parser:\n" + + "hadoop jar target/search-mr-*-job.jar " + "com.cloudera.cdk.morphline.hadoop.find.HdfsFindTool" + " \\\n" + + " -find hdfs:///user/$USER/solrloadtest/twitter/tweets \\\n" + + " -type f \\\n" + + " -name 'sample-statuses*.gz' \\\n" + + " -mmin -1000000 \\\n" + + " -size -100000000c \\\n" + + " -size +1000000c \\\n" + + "| sudo -u hdfs hadoop \\\n" + + " --config /etc/hadoop/conf.cloudera.mapreduce1 \\\n" + + " jar target/search-mr-*-job.jar " + MapReduceIndexerTool.class.getName() + " \\\n" + + " -D 'mapred.child.java.opts=-Xmx500m' \\\n" + +// " -D 'mapreduce.child.java.opts=-Xmx500m' \\\n" + + " --log4j src/test/resources/log4j.properties \\\n" + + " --morphline-file ../search-core/src/test/resources/test-morphlines/tutorialReadJsonTestTweets.conf \\\n" + + " --solr-home-dir src/test/resources/solr/minimr \\\n" + + " --output-dir hdfs://c2202.mycompany.com/user/$USER/test \\\n" + + " --shards 100 \\\n" + + " --input-list -\n" + + "\n" + + "# Go live by merging resulting index shards into a live Solr cluster\n" + + "# (explicitly specify Solr URLs - for a SolrCloud cluster see next example):\n" + + "sudo -u hdfs hadoop \\\n" + + " --config /etc/hadoop/conf.cloudera.mapreduce1 \\\n" + + " jar target/search-mr-*-job.jar " + MapReduceIndexerTool.class.getName() + " \\\n" + + " -D 'mapred.child.java.opts=-Xmx500m' \\\n" + +// " -D 'mapreduce.child.java.opts=-Xmx500m' \\\n" + + " --log4j src/test/resources/log4j.properties \\\n" + + " --morphline-file ../search-core/src/test/resources/test-morphlines/tutorialReadAvroContainer.conf \\\n" + + " --solr-home-dir src/test/resources/solr/minimr \\\n" + + " --output-dir hdfs://c2202.mycompany.com/user/$USER/test \\\n" + + " --shard-url http://solr001.mycompany.com:8983/solr/collection1 \\\n" + + " --shard-url http://solr002.mycompany.com:8983/solr/collection1 \\\n" + + " --go-live \\\n" + + " hdfs:///user/foo/indir\n" + + "\n" + + "# Go live by merging resulting index shards into a live SolrCloud cluster\n" + + "# (discover shards and Solr URLs through ZooKeeper):\n" + + "sudo -u hdfs hadoop \\\n" + + " --config /etc/hadoop/conf.cloudera.mapreduce1 \\\n" + + " jar target/search-mr-*-job.jar " + MapReduceIndexerTool.class.getName() + " \\\n" + + " -D 'mapred.child.java.opts=-Xmx500m' \\\n" + +// " -D 'mapreduce.child.java.opts=-Xmx500m' \\\n" + + " --log4j src/test/resources/log4j.properties \\\n" + + " --morphline-file ../search-core/src/test/resources/test-morphlines/tutorialReadAvroContainer.conf \\\n" + + " --output-dir hdfs://c2202.mycompany.com/user/$USER/test \\\n" + + " --zk-host zk01.mycompany.com:2181/solr \\\n" + + " --collection collection1 \\\n" + + " --go-live \\\n" + + " hdfs:///user/foo/indir\n" + ); + throw new FoundHelpArgument(); // Trick to prevent processing of any remaining arguments + } + }); + + ArgumentGroup requiredGroup = parser.addArgumentGroup("Required arguments"); + + Argument outputDirArg = requiredGroup.addArgument("--output-dir") + .metavar("HDFS_URI") + .type(new PathArgumentType(conf) { + @Override + public Path convert(ArgumentParser parser, Argument arg, String value) throws ArgumentParserException { + Path path = super.convert(parser, arg, value); + if ("hdfs".equals(path.toUri().getScheme()) && path.toUri().getAuthority() == null) { + // TODO: consider defaulting to hadoop's fs.default.name here or in SolrRecordWriter.createEmbeddedSolrServer() + throw new ArgumentParserException("Missing authority in path URI: " + path, parser); + } + return path; + } + }.verifyHasScheme().verifyIsAbsolute().verifyCanWriteParent()) + .required(true) + .help("HDFS directory to write Solr indexes to. Inside there one output directory per shard will be generated. " + + "Example: hdfs://c2202.mycompany.com/user/$USER/test"); + + Argument inputListArg = parser.addArgument("--input-list") + .action(Arguments.append()) + .metavar("URI") + // .type(new PathArgumentType(fs).verifyExists().verifyCanRead()) + .type(Path.class) + .help("Local URI or HDFS URI of a UTF-8 encoded file containing a list of HDFS URIs to index, " + + "one URI per line in the file. If '-' is specified, URIs are read from the standard input. " + + "Multiple --input-list arguments can be specified."); + + Argument morphlineFileArg = requiredGroup.addArgument("--morphline-file") + .metavar("FILE") + .type(new FileArgumentType().verifyExists().verifyIsFile().verifyCanRead()) + .required(true) + .help("Relative or absolute path to a local config file that contains one or more morphlines. " + + "The file must be UTF-8 encoded. Example: /path/to/morphline.conf"); + + Argument morphlineIdArg = parser.addArgument("--morphline-id") + .metavar("STRING") + .type(String.class) + .help("The identifier of the morphline that shall be executed within the morphline config file " + + "specified by --morphline-file. If the --morphline-id option is ommitted the first (i.e. " + + "top-most) morphline within the config file is used. Example: morphline1"); + + Argument solrHomeDirArg = parser.addArgument("--solr-home-dir") + .metavar("DIR") + .type(new FileArgumentType() { + @Override + public File convert(ArgumentParser parser, Argument arg, String value) throws ArgumentParserException { + File solrHomeDir = super.convert(parser, arg, value); + File solrConfigFile = new File(new File(solrHomeDir, "conf"), "solrconfig.xml"); + new FileArgumentType().verifyExists().verifyIsFile().verifyCanRead().convert( + parser, arg, solrConfigFile.getPath()); + return solrHomeDir; + } + }.verifyIsDirectory().verifyCanRead()) + .required(false) + .help("Relative or absolute path to a local dir containing Solr conf/ dir and in particular " + + "conf/solrconfig.xml and optionally also lib/ dir. This directory will be uploaded to each MR task. " + + "Example: src/test/resources/solr/minimr"); + + Argument updateConflictResolverArg = parser.addArgument("--update-conflict-resolver") + .metavar("FQCN") + .type(String.class) + .setDefault(RetainMostRecentUpdateConflictResolver.class.getName()) + .help("Fully qualified class name of a Java class that implements the UpdateConflictResolver interface. " + + "This enables deduplication and ordering of a series of document updates for the same unique document " + + "key. For example, a MapReduce batch job might index multiple files in the same job where some of the " + + "files contain old and new versions of the very same document, using the same unique document key.\n" + + "Typically, implementations of this interface forbid collisions by throwing an exception, or ignore all but " + + "the most recent document version, or, in the general case, order colliding updates ascending from least " + + "recent to most recent (partial) update. The caller of this interface (i.e. the Hadoop Reducer) will then " + + "apply the updates to Solr in the order returned by the orderUpdates() method.\n" + + "The default RetainMostRecentUpdateConflictResolver implementation ignores all but the most recent document " + + "version, based on a configurable numeric Solr field, which defaults to the file_last_modified timestamp"); + + Argument mappersArg = parser.addArgument("--mappers") + .metavar("INTEGER") + .type(Integer.class) + .choices(new RangeArgumentChoice(-1, Integer.MAX_VALUE)) // TODO: also support X% syntax where X is an integer + .setDefault(-1) + .help("Tuning knob that indicates the maximum number of MR mapper tasks to use. -1 indicates use all map slots " + + "available on the cluster."); + + Argument reducersArg = parser.addArgument("--reducers") + .metavar("INTEGER") + .type(Integer.class) + .choices(new RangeArgumentChoice(-1, Integer.MAX_VALUE)) // TODO: also support X% syntax where X is an integer + .setDefault(-1) + .help("Tuning knob that indicates the number of reducers to index into. " + + "-1 indicates use all reduce slots available on the cluster. " + + "0 indicates use one reducer per output shard, which disables the mtree merge MR algorithm. " + + "The mtree merge MR algorithm improves scalability by spreading load " + + "(in particular CPU load) among a number of parallel reducers that can be much larger than the number " + + "of solr shards expected by the user. It can be seen as an extension of concurrent lucene merges " + + "and tiered lucene merges to the clustered case. The subsequent mapper-only phase " + + "merges the output of said large number of reducers to the number of shards expected by the user, " + + "again by utilizing more available parallelism on the cluster."); + + Argument fanoutArg = parser.addArgument("--fanout") + .metavar("INTEGER") + .type(Integer.class) + .choices(new RangeArgumentChoice(2, Integer.MAX_VALUE)) + .setDefault(Integer.MAX_VALUE) + .help(FeatureControl.SUPPRESS); + + Argument maxSegmentsArg = parser.addArgument("--max-segments") + .metavar("INTEGER") + .type(Integer.class) + .choices(new RangeArgumentChoice(1, Integer.MAX_VALUE)) + .setDefault(1) + .help("Tuning knob that indicates the maximum number of segments to be contained on output in the index of " + + "each reducer shard. After a reducer has built its output index it applies a merge policy to merge segments " + + "until there are <= maxSegments lucene segments left in this index. " + + "Merging segments involves reading and rewriting all data in all these segment files, " + + "potentially multiple times, which is very I/O intensive and time consuming. " + + "However, an index with fewer segments can later be merged faster, " + + "and it can later be queried faster once deployed to a live Solr serving shard. " + + "Set maxSegments to 1 to optimize the index for low query latency. " + + "In a nutshell, a small maxSegments value trades indexing latency for subsequently improved query latency. " + + "This can be a reasonable trade-off for batch indexing systems."); + + Argument fairSchedulerPoolArg = parser.addArgument("--fair-scheduler-pool") + .metavar("STRING") + .help("Optional tuning knob that indicates the name of the fair scheduler pool to submit jobs to. " + + "The Fair Scheduler is a pluggable MapReduce scheduler that provides a way to share large clusters. " + + "Fair scheduling is a method of assigning resources to jobs such that all jobs get, on average, an " + + "equal share of resources over time. When there is a single job running, that job uses the entire " + + "cluster. When other jobs are submitted, tasks slots that free up are assigned to the new jobs, so " + + "that each job gets roughly the same amount of CPU time. Unlike the default Hadoop scheduler, which " + + "forms a queue of jobs, this lets short jobs finish in reasonable time while not starving long jobs. " + + "It is also an easy way to share a cluster between multiple of users. Fair sharing can also work with " + + "job priorities - the priorities are used as weights to determine the fraction of total compute time " + + "that each job gets."); + + Argument dryRunArg = parser.addArgument("--dry-run") + .action(Arguments.storeTrue()) + .help("Run in local mode and print documents to stdout instead of loading them into Solr. This executes " + + "the morphline in the client process (without submitting a job to MR) for quicker turnaround during " + + "early trial & debug sessions."); + + Argument log4jConfigFileArg = parser.addArgument("--log4j") + .metavar("FILE") + .type(new FileArgumentType().verifyExists().verifyIsFile().verifyCanRead()) + .help("Relative or absolute path to a log4j.properties config file on the local file system. This file " + + "will be uploaded to each MR task. Example: /path/to/log4j.properties"); + + Argument verboseArg = parser.addArgument("--verbose", "-v") + .action(Arguments.storeTrue()) + .help("Turn on verbose output."); + + ArgumentGroup clusterInfoGroup = parser + .addArgumentGroup("Cluster arguments") + .description( + "Arguments that provide information about your Solr cluster. " + + "If you are not using --go-live, pass the --shards argument. If you are building shards for " + + "a Non-SolrCloud cluster, pass the --shard-url argument one or more times. To build indexes for" + + " a replicated cluster with --shard-url, pass replica urls consecutively and also pass --shards. " + + "If you are building shards for a SolrCloud cluster, pass the --zk-host argument. " + + "Using --go-live requires either --shard-url or --zk-host."); + + Argument shardUrlsArg = clusterInfoGroup.addArgument("--shard-url") + .metavar("URL") + .type(String.class) + .action(Arguments.append()) + .help("Solr URL to merge resulting shard into if using --go-live. " + + "Example: http://solr001.mycompany.com:8983/solr/collection1. " + + "Multiple --shard-url arguments can be specified, one for each desired shard. " + + "If you are merging shards into a SolrCloud cluster, use --zk-host instead."); + + Argument zkHostArg = clusterInfoGroup.addArgument("--zk-host") + .metavar("STRING") + .type(String.class) + .help("The address of a ZooKeeper ensemble being used by a SolrCloud cluster. " + + "This ZooKeeper ensemble will be examined to determine the number of output " + + "shards to create as well as the Solr URLs to merge the output shards into when using the --go-live option. " + + "Requires that you also pass the --collection to merge the shards into.\n" + + "\n" + + "The --zk-host option implements the same partitioning semantics as the standard SolrCloud " + + "Near-Real-Time (NRT) API. This enables to mix batch updates from MapReduce ingestion with " + + "updates from standard Solr NRT ingestion on the same SolrCloud cluster, " + + "using identical unique document keys.\n" + + "\n" + + "Format is: a list of comma separated host:port pairs, each corresponding to a zk " + + "server. Example: '127.0.0.1:2181,127.0.0.1:2182,127.0.0.1:2183' If " + + "the optional chroot suffix is used the example would look " + + "like: '127.0.0.1:2181/solr,127.0.0.1:2182/solr,127.0.0.1:2183/solr' " + + "where the client would be rooted at '/solr' and all paths " + + "would be relative to this root - i.e. getting/setting/etc... " + + "'/foo/bar' would result in operations being run on " + + "'/solr/foo/bar' (from the server perspective).\n" + + "\n" + + "If --solr-home-dir is not specified, the Solr home directory for the collection " + + "will be downloaded from this ZooKeeper ensemble."); + + Argument shardsArg = clusterInfoGroup.addArgument("--shards") + .metavar("INTEGER") + .type(Integer.class) + .choices(new RangeArgumentChoice(1, Integer.MAX_VALUE)) + .help("Number of output shards to generate."); + + ArgumentGroup goLiveGroup = parser.addArgumentGroup("Go live arguments") + .description("Arguments for merging the shards that are built into a live Solr cluster. " + + "Also see the Cluster arguments."); + + Argument goLiveArg = goLiveGroup.addArgument("--go-live") + .action(Arguments.storeTrue()) + .help("Allows you to optionally merge the final index shards into a live Solr cluster after they are built. " + + "You can pass the ZooKeeper address with --zk-host and the relevant cluster information will be auto detected. " + + "If you are not using a SolrCloud cluster, --shard-url arguments can be used to specify each SolrCore to merge " + + "each shard into."); + + Argument collectionArg = goLiveGroup.addArgument("--collection") + .metavar("STRING") + .help("The SolrCloud collection to merge shards into when using --go-live and --zk-host. Example: collection1"); + + Argument goLiveThreadsArg = goLiveGroup.addArgument("--go-live-threads") + .metavar("INTEGER") + .type(Integer.class) + .choices(new RangeArgumentChoice(1, Integer.MAX_VALUE)) + .setDefault(1000) + .help("Tuning knob that indicates the maximum number of live merges to run in parallel at one time."); + + // trailing positional arguments + Argument inputFilesArg = parser.addArgument("input-files") + .metavar("HDFS_URI") + .type(new PathArgumentType(conf).verifyHasScheme().verifyExists().verifyCanRead()) + .nargs("*") + .setDefault() + .help("HDFS URI of file or directory tree to index."); + + Namespace ns; + try { + ns = parser.parseArgs(args); + } catch (FoundHelpArgument e) { + return 0; + } catch (ArgumentParserException e) { + parser.handleError(e); + return 1; + } + + opts.log4jConfigFile = (File) ns.get(log4jConfigFileArg.getDest()); + if (opts.log4jConfigFile != null) { + PropertyConfigurator.configure(opts.log4jConfigFile.getPath()); + } + LOG.debug("Parsed command line args: {}", ns); + + opts.inputLists = ns.getList(inputListArg.getDest()); + if (opts.inputLists == null) { + opts.inputLists = Collections.EMPTY_LIST; + } + opts.inputFiles = ns.getList(inputFilesArg.getDest()); + opts.outputDir = (Path) ns.get(outputDirArg.getDest()); + opts.mappers = ns.getInt(mappersArg.getDest()); + opts.reducers = ns.getInt(reducersArg.getDest()); + opts.updateConflictResolver = ns.getString(updateConflictResolverArg.getDest()); + opts.fanout = ns.getInt(fanoutArg.getDest()); + opts.maxSegments = ns.getInt(maxSegmentsArg.getDest()); + opts.morphlineFile = (File) ns.get(morphlineFileArg.getDest()); + opts.morphlineId = ns.getString(morphlineIdArg.getDest()); + opts.solrHomeDir = (File) ns.get(solrHomeDirArg.getDest()); + opts.fairSchedulerPool = ns.getString(fairSchedulerPoolArg.getDest()); + opts.isDryRun = ns.getBoolean(dryRunArg.getDest()); + opts.isVerbose = ns.getBoolean(verboseArg.getDest()); + opts.zkHost = ns.getString(zkHostArg.getDest()); + opts.shards = ns.getInt(shardsArg.getDest()); + opts.shardUrls = buildShardUrls(ns.getList(shardUrlsArg.getDest()), opts.shards); + opts.goLive = ns.getBoolean(goLiveArg.getDest()); + opts.goLiveThreads = ns.getInt(goLiveThreadsArg.getDest()); + opts.collection = ns.getString(collectionArg.getDest()); + + try { + verifyGoLiveArgs(opts, parser); + } catch (ArgumentParserException e) { + parser.handleError(e); + return 1; + } + + if (opts.inputLists.isEmpty() && opts.inputFiles.isEmpty()) { + LOG.info("No input files specified - nothing to process"); + return 0; // nothing to process + } + return null; + } + + /** Marker trick to prevent processing of any remaining arguments once --help option has been parsed */ + private static final class FoundHelpArgument extends RuntimeException { + } + } + // END OF INNER CLASS + + static List> buildShardUrls(List urls, Integer numShards) { + if (urls == null) return null; + List> shardUrls = new ArrayList>(urls.size()); + List list = null; + + int sz; + if (numShards == null) { + numShards = urls.size(); + } + sz = (int) Math.ceil(urls.size() / (float)numShards); + for (int i = 0; i < urls.size(); i++) { + if (i % sz == 0) { + list = new ArrayList(); + shardUrls.add(list); + } + list.add((String) urls.get(i)); + } + + return shardUrls; + } + + static final class Options { + boolean goLive; + String collection; + String zkHost; + Integer goLiveThreads; + List> shardUrls; + List inputLists; + List inputFiles; + Path outputDir; + int mappers; + int reducers; + String updateConflictResolver; + int fanout; + Integer shards; + int maxSegments; + File morphlineFile; + String morphlineId; + File solrHomeDir; + String fairSchedulerPool; + boolean isDryRun; + File log4jConfigFile; + boolean isVerbose; + } + // END OF INNER CLASS + + + /** API for command line clients */ + public static void main(String[] args) throws Exception { + int res = ToolRunner.run(new Configuration(), new MapReduceIndexerTool(), args); + System.exit(res); + } + + public MapReduceIndexerTool() {} + + @Override + public int run(String[] args) throws Exception { + Options opts = new Options(); + Integer exitCode = new MyArgumentParser().parseArgs(args, getConf(), opts); + if (exitCode != null) { + return exitCode; + } + return run(opts); + } + + /** API for Java clients; visible for testing; may become a public API eventually */ + int run(Options options) throws Exception { + + if ("local".equals(getConf().get("mapred.job.tracker"))) { + throw new IllegalStateException( + "Running with LocalJobRunner (i.e. all of Hadoop inside a single JVM) is not supported " + + "because LocalJobRunner does not (yet) implement the Hadoop Distributed Cache feature, " + + "which is required for passing files via --files and --libjars"); + } + + long programStartTime = System.currentTimeMillis(); + if (options.fairSchedulerPool != null) { + getConf().set("mapred.fairscheduler.pool", options.fairSchedulerPool); + } + getConf().setInt(SolrOutputFormat.SOLR_RECORD_WRITER_MAX_SEGMENTS, options.maxSegments); + + // switch off a false warning about allegedly not implementing Tool + // also see http://hadoop.6.n7.nabble.com/GenericOptionsParser-warning-td8103.html + // also see https://issues.apache.org/jira/browse/HADOOP-8183 + getConf().setBoolean("mapred.used.genericoptionsparser", true); + + if (options.log4jConfigFile != null) { + Utils.setLogConfigFile(options.log4jConfigFile, getConf()); + addDistributedCacheFile(options.log4jConfigFile, getConf()); + } + + job = Job.getInstance(getConf()); + job.setJarByClass(getClass()); + + if (options.morphlineFile == null) { + throw new ArgumentParserException("Argument --morphline-file is required", null); + } + verifyGoLiveArgs(options, null); + verifyZKStructure(options, null); + + int mappers = new JobClient(job.getConfiguration()).getClusterStatus().getMaxMapTasks(); // MR1 + //int mappers = job.getCluster().getClusterStatus().getMapSlotCapacity(); // Yarn only + LOG.info("Cluster reports {} mapper slots", mappers); + + if (options.mappers == -1) { + mappers = 8 * mappers; // better accomodate stragglers + } else { + mappers = options.mappers; + } + if (mappers <= 0) { + throw new IllegalStateException("Illegal number of mappers: " + mappers); + } + options.mappers = mappers; + + FileSystem fs = options.outputDir.getFileSystem(job.getConfiguration()); + if (fs.exists(options.outputDir) && !delete(options.outputDir, true, fs)) { + return -1; + } + Path outputResultsDir = new Path(options.outputDir, RESULTS_DIR); + Path outputReduceDir = new Path(options.outputDir, "reducers"); + Path outputStep1Dir = new Path(options.outputDir, "tmp1"); + Path outputStep2Dir = new Path(options.outputDir, "tmp2"); + Path outputTreeMergeStep = new Path(options.outputDir, "mtree-merge-output"); + Path fullInputList = new Path(outputStep1Dir, FULL_INPUT_LIST); + + LOG.debug("Creating list of input files for mappers: {}", fullInputList); + long numFiles = addInputFiles(options.inputFiles, options.inputLists, fullInputList, job.getConfiguration()); + if (numFiles == 0) { + LOG.info("No input files found - nothing to process"); + return 0; + } + int numLinesPerSplit = (int) ceilDivide(numFiles, mappers); + if (numLinesPerSplit < 0) { // numeric overflow from downcasting long to int? + numLinesPerSplit = Integer.MAX_VALUE; + } + numLinesPerSplit = Math.max(1, numLinesPerSplit); + + int realMappers = Math.min(mappers, (int) ceilDivide(numFiles, numLinesPerSplit)); + calculateNumReducers(options, realMappers); + int reducers = options.reducers; + LOG.info("Using these parameters: " + + "numFiles: {}, mappers: {}, realMappers: {}, reducers: {}, shards: {}, fanout: {}, maxSegments: {}", + new Object[] {numFiles, mappers, realMappers, reducers, options.shards, options.fanout, options.maxSegments}); + + + LOG.info("Randomizing list of {} input files to spread indexing load more evenly among mappers", numFiles); + long startTime = System.currentTimeMillis(); + if (numFiles < job.getConfiguration().getInt(MAIN_MEMORY_RANDOMIZATION_THRESHOLD, 100001)) { + // If there are few input files reduce latency by directly running main memory randomization + // instead of launching a high latency MapReduce job + randomizeFewInputFiles(fs, outputStep2Dir, fullInputList); + } else { + // Randomize using a MapReduce job. Use sequential algorithm below a certain threshold because there's no + // benefit in using many parallel mapper tasks just to randomize the order of a few lines each + int numLinesPerRandomizerSplit = Math.max(10 * 1000 * 1000, numLinesPerSplit); + Job randomizerJob = randomizeManyInputFiles(getConf(), fullInputList, outputStep2Dir, numLinesPerRandomizerSplit); + if (!waitForCompletion(randomizerJob, options.isVerbose)) { + return -1; // job failed + } + } + float secs = (System.currentTimeMillis() - startTime) / 1000.0f; + LOG.info("Done. Randomizing list of {} input files took {} secs", numFiles, secs); + + + job.setInputFormatClass(NLineInputFormat.class); + NLineInputFormat.addInputPath(job, outputStep2Dir); + NLineInputFormat.setNumLinesPerSplit(job, numLinesPerSplit); + FileOutputFormat.setOutputPath(job, outputReduceDir); + + String mapperClass = job.getConfiguration().get(JobContext.MAP_CLASS_ATTR); + if (mapperClass == null) { // enable customization + Class clazz = MorphlineMapper.class; + mapperClass = clazz.getName(); + job.setMapperClass(clazz); + } + job.setJobName(getClass().getName() + "/" + Utils.getShortClassName(mapperClass)); + + if (job.getConfiguration().get(JobContext.REDUCE_CLASS_ATTR) == null) { // enable customization + job.setReducerClass(SolrReducer.class); + } + if (options.updateConflictResolver == null) { + throw new IllegalArgumentException("updateConflictResolver must not be null"); + } + job.getConfiguration().set(SolrReducer.UPDATE_CONFLICT_RESOLVER, options.updateConflictResolver); + + if (options.zkHost != null) { + assert options.collection != null; + /* + * MapReduce partitioner that partitions the Mapper output such that each + * SolrInputDocument gets sent to the SolrCloud shard that it would have + * been sent to if the document were ingested via the standard SolrCloud + * Near Real Time (NRT) API. + * + * In other words, this class implements the same partitioning semantics + * as the standard SolrCloud NRT API. This enables to mix batch updates + * from MapReduce ingestion with updates from standard NRT ingestion on + * the same SolrCloud cluster, using identical unique document keys. + */ + if (job.getConfiguration().get(JobContext.PARTITIONER_CLASS_ATTR) == null) { // enable customization + job.setPartitionerClass(SolrCloudPartitioner.class); + } + job.getConfiguration().set(SolrCloudPartitioner.ZKHOST, options.zkHost); + job.getConfiguration().set(SolrCloudPartitioner.COLLECTION, options.collection); + } + job.getConfiguration().setInt(SolrCloudPartitioner.SHARDS, options.shards); + + job.setOutputFormatClass(SolrOutputFormat.class); + if (options.solrHomeDir != null) { + SolrOutputFormat.setupSolrHomeCache(options.solrHomeDir, job); + } else { + assert options.zkHost != null; + // use the config that this collection uses for the SolrHomeCache. + ZooKeeperInspector zki = new ZooKeeperInspector(); + SolrZkClient zkClient = zki.getZkClient(options.zkHost); + try { + String configName = zki.readConfigName(zkClient, options.collection); + File tmpSolrHomeDir = zki.downloadConfigDir(zkClient, configName); + SolrOutputFormat.setupSolrHomeCache(tmpSolrHomeDir, job); + options.solrHomeDir = tmpSolrHomeDir; + } finally { + zkClient.close(); + } + } + + MorphlineMapRunner runner = setupMorphline(options); + if (options.isDryRun && runner != null) { + LOG.info("Indexing {} files in dryrun mode", numFiles); + startTime = System.currentTimeMillis(); + dryRun(runner, fs, fullInputList); + secs = (System.currentTimeMillis() - startTime) / 1000.0f; + LOG.info("Done. Indexing {} files in dryrun mode took {} secs", numFiles, secs); + goodbye(null, programStartTime); + return 0; + } + job.getConfiguration().set(MorphlineMapRunner.MORPHLINE_FILE_PARAM, options.morphlineFile.getName()); + + job.setNumReduceTasks(reducers); + job.setOutputKeyClass(Text.class); + job.setOutputValueClass(SolrInputDocumentWritable.class); + LOG.info("Indexing {} files using {} real mappers into {} reducers", new Object[] {numFiles, realMappers, reducers}); + startTime = System.currentTimeMillis(); + if (!waitForCompletion(job, true)) { + return -1; // job failed + } + + secs = (System.currentTimeMillis() - startTime) / 1000.0f; + LOG.info("Done. Indexing {} files using {} real mappers into {} reducers took {} secs", new Object[] {numFiles, realMappers, reducers, secs}); + + int mtreeMergeIterations = 0; + if (reducers > options.shards) { + mtreeMergeIterations = (int) Math.round(log(options.fanout, reducers / options.shards)); + } + LOG.debug("MTree merge iterations to do: {}", mtreeMergeIterations); + int mtreeMergeIteration = 1; + while (reducers > options.shards) { // run a mtree merge iteration + job = Job.getInstance(getConf()); + job.setJarByClass(getClass()); + job.setJobName(getClass().getName() + "/" + Utils.getShortClassName(TreeMergeMapper.class)); + job.setMapperClass(TreeMergeMapper.class); + job.setOutputFormatClass(TreeMergeOutputFormat.class); + job.setNumReduceTasks(0); + job.setOutputKeyClass(Text.class); + job.setOutputValueClass(NullWritable.class); + job.setInputFormatClass(NLineInputFormat.class); + + Path inputStepDir = new Path(options.outputDir, "mtree-merge-input-iteration" + mtreeMergeIteration); + fullInputList = new Path(inputStepDir, FULL_INPUT_LIST); + LOG.debug("MTree merge iteration {}/{}: Creating input list file for mappers {}", new Object[] {mtreeMergeIteration, mtreeMergeIterations, fullInputList}); + numFiles = createTreeMergeInputDirList(outputReduceDir, fs, fullInputList); + if (numFiles != reducers) { + throw new IllegalStateException("Not same reducers: " + reducers + ", numFiles: " + numFiles); + } + NLineInputFormat.addInputPath(job, fullInputList); + NLineInputFormat.setNumLinesPerSplit(job, options.fanout); + FileOutputFormat.setOutputPath(job, outputTreeMergeStep); + + LOG.info("MTree merge iteration {}/{}: Merging {} shards into {} shards using fanout {}", new Object[] { + mtreeMergeIteration, mtreeMergeIterations, reducers, (reducers / options.fanout), options.fanout}); + startTime = System.currentTimeMillis(); + if (!waitForCompletion(job, options.isVerbose)) { + return -1; // job failed + } + secs = (System.currentTimeMillis() - startTime) / 1000.0f; + LOG.info("MTree merge iteration {}/{}: Done. Merging {} shards into {} shards using fanout {} took {} secs", + new Object[] {mtreeMergeIteration, mtreeMergeIterations, reducers, (reducers / options.fanout), options.fanout, secs}); + + if (!delete(outputReduceDir, true, fs)) { + return -1; + } + if (!rename(outputTreeMergeStep, outputReduceDir, fs)) { + return -1; + } + assert reducers % options.fanout == 0; + reducers = reducers / options.fanout; + mtreeMergeIteration++; + } + assert reducers == options.shards; + + // normalize output shard dir prefix, i.e. + // rename part-r-00000 to part-00000 (stems from zero tree merge iterations) + // rename part-m-00000 to part-00000 (stems from > 0 tree merge iterations) + for (FileStatus stats : fs.listStatus(outputReduceDir)) { + String dirPrefix = SolrOutputFormat.getOutputName(job); + Path srcPath = stats.getPath(); + if (stats.isDirectory() && srcPath.getName().startsWith(dirPrefix)) { + String dstName = dirPrefix + srcPath.getName().substring(dirPrefix.length() + "-m".length()); + Path dstPath = new Path(srcPath.getParent(), dstName); + if (!rename(srcPath, dstPath, fs)) { + return -1; + } + } + }; + + // publish results dir + if (!rename(outputReduceDir, outputResultsDir, fs)) { + return -1; + } + + if (options.goLive && !new GoLive().goLive(options, listSortedOutputShardDirs(outputResultsDir, fs))) { + return -1; + } + + goodbye(job, programStartTime); + return 0; + } + + private void calculateNumReducers(Options options, int realMappers) throws IOException { + if (options.shards <= 0) { + throw new IllegalStateException("Illegal number of shards: " + options.shards); + } + if (options.fanout <= 1) { + throw new IllegalStateException("Illegal fanout: " + options.fanout); + } + if (realMappers <= 0) { + throw new IllegalStateException("Illegal realMappers: " + realMappers); + } + + + int reducers = new JobClient(job.getConfiguration()).getClusterStatus().getMaxReduceTasks(); // MR1 + //reducers = job.getCluster().getClusterStatus().getReduceSlotCapacity(); // Yarn only + LOG.info("Cluster reports {} reduce slots", reducers); + + if (options.reducers == 0) { + reducers = options.shards; + } else if (options.reducers == -1) { + reducers = Math.min(reducers, realMappers); // no need to use many reducers when using few mappers + } else { + reducers = options.reducers; + } + reducers = Math.max(reducers, options.shards); + + if (reducers != options.shards) { + // Ensure fanout isn't misconfigured. fanout can't meaningfully be larger than what would be + // required to merge all leaf shards in one single tree merge iteration into root shards + options.fanout = Math.min(options.fanout, (int) ceilDivide(reducers, options.shards)); + + // Ensure invariant reducers == options.shards * (fanout ^ N) where N is an integer >= 1. + // N is the number of mtree merge iterations. + // This helps to evenly spread docs among root shards and simplifies the impl of the mtree merge algorithm. + int s = options.shards; + while (s < reducers) { + s = s * options.fanout; + } + reducers = s; + assert reducers % options.fanout == 0; + } + options.reducers = reducers; + } + + private long addInputFiles(List inputFiles, List inputLists, Path fullInputList, Configuration conf) + throws IOException { + + long numFiles = 0; + FileSystem fs = fullInputList.getFileSystem(conf); + FSDataOutputStream out = fs.create(fullInputList); + try { + Writer writer = new BufferedWriter(new OutputStreamWriter(out, "UTF-8")); + + for (Path inputFile : inputFiles) { + FileSystem inputFileFs = inputFile.getFileSystem(conf); + if (inputFileFs.exists(inputFile)) { + PathFilter pathFilter = new PathFilter() { + @Override + public boolean accept(Path path) { + return !path.getName().startsWith("."); // ignore "hidden" files and dirs + } + }; + numFiles += addInputFilesRecursively(inputFile, writer, inputFileFs, pathFilter); + } + } + + for (Path inputList : inputLists) { + InputStream in; + if (inputList.toString().equals("-")) { + in = System.in; + } else if (inputList.isAbsoluteAndSchemeAuthorityNull()) { + in = new BufferedInputStream(new FileInputStream(inputList.toString())); + } else { + in = inputList.getFileSystem(conf).open(inputList); + } + try { + BufferedReader reader = new BufferedReader(new InputStreamReader(in, "UTF-8")); + String line; + while ((line = reader.readLine()) != null) { + writer.write(line + "\n"); + numFiles++; + } + reader.close(); + } finally { + in.close(); + } + } + + writer.close(); + } finally { + out.close(); + } + return numFiles; + } + + /** + * Add the specified file to the input set, if path is a directory then + * add the files contained therein. + */ + private long addInputFilesRecursively(Path path, Writer writer, FileSystem fs, PathFilter pathFilter) throws IOException { + long numFiles = 0; + for (FileStatus stat : fs.listStatus(path, pathFilter)) { + LOG.debug("Adding path {}", stat.getPath()); + if (stat.isDirectory()) { + numFiles += addInputFilesRecursively(stat.getPath(), writer, fs, pathFilter); + } else { + writer.write(stat.getPath().toString() + "\n"); + numFiles++; + } + } + return numFiles; + } + + private void randomizeFewInputFiles(FileSystem fs, Path outputStep2Dir, Path fullInputList) throws IOException { + List lines = new ArrayList(); + BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(fullInputList), "UTF-8")); + try { + String line; + while ((line = reader.readLine()) != null) { + lines.add(line); + } + } finally { + reader.close(); + } + + Collections.shuffle(lines, new Random(421439783L)); // constant seed for reproducability + + FSDataOutputStream out = fs.create(new Path(outputStep2Dir, FULL_INPUT_LIST)); + Writer writer = new BufferedWriter(new OutputStreamWriter(out, "UTF-8")); + try { + for (String line : lines) { + writer.write(line + "\n"); + } + } finally { + writer.close(); + } + } + + /** + * To uniformly spread load across all mappers we randomize fullInputList + * with a separate small Mapper & Reducer preprocessing step. This way + * each input line ends up on a random position in the output file list. + * Each mapper indexes a disjoint consecutive set of files such that each + * set has roughly the same size, at least from a probabilistic + * perspective. + * + * For example an input file with the following input list of URLs: + * + * A + * B + * C + * D + * + * might be randomized into the following output list of URLs: + * + * C + * A + * D + * B + * + * The implementation sorts the list of lines by randomly generated numbers. + */ + private Job randomizeManyInputFiles(Configuration baseConfig, Path fullInputList, Path outputStep2Dir, int numLinesPerSplit) + throws IOException { + + Job job2 = Job.getInstance(baseConfig); + job2.setJarByClass(getClass()); + job2.setJobName(getClass().getName() + "/" + Utils.getShortClassName(LineRandomizerMapper.class)); + job2.setInputFormatClass(NLineInputFormat.class); + NLineInputFormat.addInputPath(job2, fullInputList); + NLineInputFormat.setNumLinesPerSplit(job2, numLinesPerSplit); + job2.setMapperClass(LineRandomizerMapper.class); + job2.setReducerClass(LineRandomizerReducer.class); + job2.setOutputFormatClass(TextOutputFormat.class); + FileOutputFormat.setOutputPath(job2, outputStep2Dir); + job2.setNumReduceTasks(1); + job2.setOutputKeyClass(LongWritable.class); + job2.setOutputValueClass(Text.class); + return job2; + } + + // do the same as if the user had typed 'hadoop ... --files ' + private void addDistributedCacheFile(File file, Configuration conf) throws IOException { + String HADOOP_TMP_FILES = "tmpfiles"; // see Hadoop's GenericOptionsParser + String tmpFiles = conf.get(HADOOP_TMP_FILES, ""); + if (tmpFiles.length() > 0) { // already present? + tmpFiles = tmpFiles + ","; + } + GenericOptionsParser parser = new GenericOptionsParser( + new Configuration(conf), + new String[] { "--files", file.getCanonicalPath() }); + String additionalTmpFiles = parser.getConfiguration().get(HADOOP_TMP_FILES); + assert additionalTmpFiles != null; + assert additionalTmpFiles.length() > 0; + tmpFiles += additionalTmpFiles; + conf.set(HADOOP_TMP_FILES, tmpFiles); + } + + private MorphlineMapRunner setupMorphline(Options options) throws IOException, URISyntaxException { + if (options.morphlineId != null) { + job.getConfiguration().set(MorphlineMapRunner.MORPHLINE_ID_PARAM, options.morphlineId); + } + addDistributedCacheFile(options.morphlineFile, job.getConfiguration()); + if (!options.isDryRun) { + return null; + } + + /* + * Ensure scripting support for Java via morphline "java" command works even in dryRun mode, + * i.e. when executed in the client side driver JVM. To do so, collect all classpath URLs from + * the class loaders chain that org.apache.hadoop.util.RunJar (hadoop jar xyz-job.jar) and + * org.apache.hadoop.util.GenericOptionsParser (--libjars) have installed, then tell + * FastJavaScriptEngine.parse() where to find classes that JavaBuilder scripts might depend on. + * This ensures that scripts that reference external java classes compile without exceptions + * like this: + * + * ... caused by compilation failed: mfm:///MyJavaClass1.java:2: package + * com.cloudera.cdk.morphline.api does not exist + */ + LOG.trace("dryRun: java.class.path: {}", System.getProperty("java.class.path")); + String fullClassPath = ""; + ClassLoader loader = Thread.currentThread().getContextClassLoader(); // see org.apache.hadoop.util.RunJar + while (loader != null) { // walk class loaders, collect all classpath URLs + if (loader instanceof URLClassLoader) { + URL[] classPathPartURLs = ((URLClassLoader) loader).getURLs(); // see org.apache.hadoop.util.RunJar + LOG.trace("dryRun: classPathPartURLs: {}", Arrays.asList(classPathPartURLs)); + StringBuilder classPathParts = new StringBuilder(); + for (URL url : classPathPartURLs) { + File file = new File(url.toURI()); + if (classPathPartURLs.length > 0) { + classPathParts.append(File.pathSeparator); + } + classPathParts.append(file.getPath()); + } + LOG.trace("dryRun: classPathParts: {}", classPathParts); + String separator = File.pathSeparator; + if (fullClassPath.length() == 0 || classPathParts.length() == 0) { + separator = ""; + } + fullClassPath = classPathParts + separator + fullClassPath; + } + loader = loader.getParent(); + } + + // tell FastJavaScriptEngine.parse() where to find the classes that the script might depend on + if (fullClassPath.length() > 0) { + assert System.getProperty("java.class.path") != null; + fullClassPath = System.getProperty("java.class.path") + File.pathSeparator + fullClassPath; + LOG.trace("dryRun: fullClassPath: {}", fullClassPath); + System.setProperty("java.class.path", fullClassPath); // see FastJavaScriptEngine.parse() + } + + job.getConfiguration().set(MorphlineMapRunner.MORPHLINE_FILE_PARAM, options.morphlineFile.getPath()); + return new MorphlineMapRunner( + job.getConfiguration(), new DryRunDocumentLoader(), options.solrHomeDir.getPath()); + } + + /* + * Executes the morphline in the current process (without submitting a job to MR) for quicker + * turnaround during trial & debug sessions + */ + private void dryRun(MorphlineMapRunner runner, FileSystem fs, Path fullInputList) throws IOException { + BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(fullInputList), "UTF-8")); + try { + String line; + while ((line = reader.readLine()) != null) { + runner.map(line, job.getConfiguration(), null); + } + runner.cleanup(); + } finally { + reader.close(); + } + } + + private int createTreeMergeInputDirList(Path outputReduceDir, FileSystem fs, Path fullInputList) + throws FileNotFoundException, IOException { + + FileStatus[] dirs = listSortedOutputShardDirs(outputReduceDir, fs); + int numFiles = 0; + FSDataOutputStream out = fs.create(fullInputList); + try { + Writer writer = new BufferedWriter(new OutputStreamWriter(out, "UTF-8")); + for (FileStatus stat : dirs) { + LOG.debug("Adding path {}", stat.getPath()); + Path dir = new Path(stat.getPath(), "data/index"); + if (!fs.isDirectory(dir)) { + throw new IllegalStateException("Not a directory: " + dir); + } + writer.write(dir.toString() + "\n"); + numFiles++; + } + writer.close(); + } finally { + out.close(); + } + return numFiles; + } + + private FileStatus[] listSortedOutputShardDirs(Path outputReduceDir, FileSystem fs) throws FileNotFoundException, + IOException { + + final String dirPrefix = SolrOutputFormat.getOutputName(job); + FileStatus[] dirs = fs.listStatus(outputReduceDir, new PathFilter() { + @Override + public boolean accept(Path path) { + return path.getName().startsWith(dirPrefix); + } + }); + for (FileStatus dir : dirs) { + if (!dir.isDirectory()) { + throw new IllegalStateException("Not a directory: " + dir.getPath()); + } + } + Arrays.sort(dirs); // FIXME: handle more than 99999 shards (need numeric sort rather than lexicographical sort) + return dirs; + } + + private static void verifyGoLiveArgs(Options opts, ArgumentParser parser) throws ArgumentParserException { + if (opts.zkHost == null && opts.solrHomeDir == null) { + throw new ArgumentParserException("At least one of --zk-host or --solr-home-dir is required", parser); + } + if (opts.goLive && opts.zkHost == null && opts.shardUrls == null) { + throw new ArgumentParserException("--go-live requires that you also pass --shard-url or --zk-host", parser); + } + + if (opts.zkHost != null && opts.collection == null) { + throw new ArgumentParserException("--zk-host requires that you also pass --collection", parser); + } + + if (opts.zkHost != null) { + return; + // verify structure of ZK directory later, to avoid checking run-time errors during parsing. + } else if (opts.shardUrls != null) { + if (opts.shardUrls.size() == 0) { + throw new ArgumentParserException("--shard-url requires at least one URL", parser); + } + } else if (opts.shards != null) { + if (opts.shards <= 0) { + throw new ArgumentParserException("--shards must be a positive number: " + opts.shards, parser); + } + } else { + throw new ArgumentParserException("You must specify one of the following (mutually exclusive) arguments: " + + "--zk-host or --shard-url or --shards", parser); + } + + if (opts.shardUrls != null) { + opts.shards = opts.shardUrls.size(); + } + + assert opts.shards != null; + assert opts.shards > 0; + } + + private static void verifyZKStructure(Options opts, ArgumentParser parser) throws ArgumentParserException { + if (opts.zkHost != null) { + assert opts.collection != null; + ZooKeeperInspector zki = new ZooKeeperInspector(); + try { + opts.shardUrls = zki.extractShardUrls(opts.zkHost, opts.collection); + } catch (Exception e) { + LOG.debug("Cannot extract SolrCloud shard URLs from ZooKeeper", e); + throw new ArgumentParserException(e, parser); + } + assert opts.shardUrls != null; + if (opts.shardUrls.size() == 0) { + throw new ArgumentParserException("--zk-host requires ZooKeeper " + opts.zkHost + + " to contain at least one SolrCore for collection: " + opts.collection, parser); + } + opts.shards = opts.shardUrls.size(); + LOG.debug("Using SolrCloud shard URLs: {}", opts.shardUrls); + } + } + + private boolean waitForCompletion(Job job, boolean isVerbose) + throws IOException, InterruptedException, ClassNotFoundException { + + LOG.debug("Running job: " + getJobInfo(job)); + boolean success = job.waitForCompletion(isVerbose); + if (!success) { + LOG.error("Job failed! " + getJobInfo(job)); + } + return success; + } + + private void goodbye(Job job, long startTime) { + float secs = (System.currentTimeMillis() - startTime) / 1000.0f; + if (job != null) { + LOG.info("Succeeded with job: " + getJobInfo(job)); + } + LOG.info("Success. Done. Program took {} secs. Goodbye.", secs); + } + + private String getJobInfo(Job job) { + return "jobName: " + job.getJobName() + ", jobId: " + job.getJobID(); + } + + private boolean rename(Path src, Path dst, FileSystem fs) throws IOException { + boolean success = fs.rename(src, dst); + if (!success) { + LOG.error("Cannot rename " + src + " to " + dst); + } + return success; + } + + private boolean delete(Path path, boolean recursive, FileSystem fs) throws IOException { + boolean success = fs.delete(path, recursive); + if (!success) { + LOG.error("Cannot delete " + path); + } + return success; + } + + // same as IntMath.divide(p, q, RoundingMode.CEILING) + private long ceilDivide(long p, long q) { + long result = p / q; + if (p % q != 0) { + result++; + } + return result; + } + + /** + * Returns logbasevalue. + */ + private double log(double base, double value) { + return Math.log(value) / Math.log(base); + } + +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/PathArgumentType.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/PathArgumentType.java new file mode 100644 index 00000000000..770a2f9f90b --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/PathArgumentType.java @@ -0,0 +1,233 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; + +import net.sourceforge.argparse4j.inf.Argument; +import net.sourceforge.argparse4j.inf.ArgumentParser; +import net.sourceforge.argparse4j.inf.ArgumentParserException; +import net.sourceforge.argparse4j.inf.ArgumentType; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsAction; + +/** + * ArgumentType subclass for HDFS Path type, using fluent style API. + */ +public class PathArgumentType implements ArgumentType { + + private final Configuration conf; + private FileSystem fs; + private boolean acceptSystemIn = false; + private boolean verifyExists = false; + private boolean verifyNotExists = false; + private boolean verifyIsFile = false; + private boolean verifyIsDirectory = false; + private boolean verifyCanRead = false; + private boolean verifyCanWrite = false; + private boolean verifyCanWriteParent = false; + private boolean verifyCanExecute = false; + private boolean verifyIsAbsolute = false; + private boolean verifyHasScheme = false; + private String verifyScheme = null; + + public PathArgumentType(Configuration conf) { + this.conf = conf; + } + + public PathArgumentType acceptSystemIn() { + acceptSystemIn = true; + return this; + } + + public PathArgumentType verifyExists() { + verifyExists = true; + return this; + } + + public PathArgumentType verifyNotExists() { + verifyNotExists = true; + return this; + } + + public PathArgumentType verifyIsFile() { + verifyIsFile = true; + return this; + } + + public PathArgumentType verifyIsDirectory() { + verifyIsDirectory = true; + return this; + } + + public PathArgumentType verifyCanRead() { + verifyCanRead = true; + return this; + } + + public PathArgumentType verifyCanWrite() { + verifyCanWrite = true; + return this; + } + + public PathArgumentType verifyCanWriteParent() { + verifyCanWriteParent = true; + return this; + } + + public PathArgumentType verifyCanExecute() { + verifyCanExecute = true; + return this; + } + + public PathArgumentType verifyIsAbsolute() { + verifyIsAbsolute = true; + return this; + } + + public PathArgumentType verifyHasScheme() { + verifyHasScheme = true; + return this; + } + + public PathArgumentType verifyScheme(String scheme) { + verifyScheme = scheme; + return this; + } + + @Override + public Path convert(ArgumentParser parser, Argument arg, String value) throws ArgumentParserException { + Path file = new Path(value); + try { + fs = file.getFileSystem(conf); + if (verifyHasScheme && !isSystemIn(file)) { + verifyHasScheme(parser, file); + } + if (verifyScheme != null && !isSystemIn(file)) { + verifyScheme(parser, file); + } + if (verifyIsAbsolute && !isSystemIn(file)) { + verifyIsAbsolute(parser, file); + } + if (verifyExists && !isSystemIn(file)) { + verifyExists(parser, file); + } + if (verifyNotExists && !isSystemIn(file)) { + verifyNotExists(parser, file); + } + if (verifyIsFile && !isSystemIn(file)) { + verifyIsFile(parser, file); + } + if (verifyIsDirectory && !isSystemIn(file)) { + verifyIsDirectory(parser, file); + } + if (verifyCanRead && !isSystemIn(file)) { + verifyCanRead(parser, file); + } + if (verifyCanWrite && !isSystemIn(file)) { + verifyCanWrite(parser, file); + } + if (verifyCanWriteParent && !isSystemIn(file)) { + verifyCanWriteParent(parser, file); + } + if (verifyCanExecute && !isSystemIn(file)) { + verifyCanExecute(parser, file); + } + } catch (IOException e) { + throw new ArgumentParserException(e, parser); + } + return file; + } + + private void verifyExists(ArgumentParser parser, Path file) throws ArgumentParserException, IOException { + if (!fs.exists(file)) { + throw new ArgumentParserException("File not found: " + file, parser); + } + } + + private void verifyNotExists(ArgumentParser parser, Path file) throws ArgumentParserException, IOException { + if (fs.exists(file)) { + throw new ArgumentParserException("File found: " + file, parser); + } + } + + private void verifyIsFile(ArgumentParser parser, Path file) throws ArgumentParserException, IOException { + if (!fs.isFile(file)) { + throw new ArgumentParserException("Not a file: " + file, parser); + } + } + + private void verifyIsDirectory(ArgumentParser parser, Path file) throws ArgumentParserException, IOException { + if (!fs.isDirectory(file)) { + throw new ArgumentParserException("Not a directory: " + file, parser); + } + } + + private void verifyCanRead(ArgumentParser parser, Path file) throws ArgumentParserException, IOException { + verifyExists(parser, file); + if (!fs.getFileStatus(file).getPermission().getUserAction().implies(FsAction.READ)) { + throw new ArgumentParserException("Insufficient permissions to read file: " + file, parser); + } + } + + private void verifyCanWrite(ArgumentParser parser, Path file) throws ArgumentParserException, IOException { + verifyExists(parser, file); + if (!fs.getFileStatus(file).getPermission().getUserAction().implies(FsAction.WRITE)) { + throw new ArgumentParserException("Insufficient permissions to write file: " + file, parser); + } + } + + private void verifyCanWriteParent(ArgumentParser parser, Path file) throws ArgumentParserException, IOException { + Path parent = file.getParent(); + if (parent == null || !fs.exists(parent) || !fs.getFileStatus(parent).getPermission().getUserAction().implies(FsAction.WRITE)) { + throw new ArgumentParserException("Cannot write parent of file: " + file, parser); + } + } + + private void verifyCanExecute(ArgumentParser parser, Path file) throws ArgumentParserException, IOException { + verifyExists(parser, file); + if (!fs.getFileStatus(file).getPermission().getUserAction().implies(FsAction.EXECUTE)) { + throw new ArgumentParserException("Insufficient permissions to execute file: " + file, parser); + } + } + + private void verifyIsAbsolute(ArgumentParser parser, Path file) throws ArgumentParserException { + if (!file.isAbsolute()) { + throw new ArgumentParserException("Not an absolute file: " + file, parser); + } + } + + private void verifyHasScheme(ArgumentParser parser, Path file) throws ArgumentParserException { + if (file.toUri().getScheme() == null) { + throw new ArgumentParserException("URI scheme is missing in path: " + file, parser); + } + } + + private void verifyScheme(ArgumentParser parser, Path file) throws ArgumentParserException { + if (!verifyScheme.equals(file.toUri().getScheme())) { + throw new ArgumentParserException("Scheme of path: " + file + " must be: " + verifyScheme, parser); + } + } + + private boolean isSystemIn(Path file) { + return acceptSystemIn && file.toString().equals("-"); + } + +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/PathParts.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/PathParts.java new file mode 100644 index 00000000000..690901b4c76 --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/PathParts.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; +import java.net.URI; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.server.namenode.NameNode; + +/** + * Extracts various components of an HDFS Path + */ +public final class PathParts { + + private final String uploadURL; + private final Configuration conf; + private final FileSystem fs; + private final Path normalizedPath; + private FileStatus stats; + + public PathParts(String uploadURL, Configuration conf) throws IOException { + if (uploadURL == null) { + throw new IllegalArgumentException("Path must not be null: " + uploadURL); + } + this.uploadURL = uploadURL; + if (conf == null) { + throw new IllegalArgumentException("Configuration must not be null: " + uploadURL); + } + this.conf = conf; + URI uri = stringToUri(uploadURL); + this.fs = FileSystem.get(uri, conf); + if (fs == null) { + throw new IllegalArgumentException("File system must not be null: " + uploadURL); + } + this.normalizedPath = fs.makeQualified(new Path(uri)); + if (!normalizedPath.isAbsolute()) { + throw new IllegalArgumentException("Path must be absolute: " + uploadURL); + } + if (getScheme() == null) { + throw new IllegalArgumentException("Scheme must not be null: " + uploadURL); + } + if (getHost() == null) { + throw new IllegalArgumentException("Host must not be null: " + uploadURL); + } + if (getPort() < 0) { + throw new IllegalArgumentException("Port must not be negative: " + uploadURL); + } + } + + public String getUploadURL() { + return uploadURL; + } + + public Path getUploadPath() { + return new Path(getUploadURL()); + } + + public String getURIPath() { + return normalizedPath.toUri().getPath(); + } + + public String getName() { + return normalizedPath.getName(); + } + + public String getScheme() { + return normalizedPath.toUri().getScheme(); + } + + public String getHost() { + return normalizedPath.toUri().getHost(); + } + + public int getPort() { + int port = normalizedPath.toUri().getPort(); + if (port == -1) { + port = fs.getWorkingDirectory().toUri().getPort(); + if (port == -1) { + port = NameNode.DEFAULT_PORT; + } + } + return port; + } + + public String getId() { + return getScheme() + "://" + getHost() + ":" + getPort() + getURIPath(); + } + + public String getDownloadURL() { + return getId(); + } + + public Configuration getConfiguration() { + return conf; + } + + public FileSystem getFileSystem() { + return fs; + } + + public FileStatus getFileStatus() throws IOException { + if (stats == null) { + stats = getFileSystem().getFileStatus(getUploadPath()); + } + return stats; + } + + private URI stringToUri(String pathString) { + //return new Path(pathString).toUri().normalize(); + return URI.create(pathString).normalize(); + } +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrCloudPartitioner.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrCloudPartitioner.java new file mode 100644 index 00000000000..27f532c174a --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrCloudPartitioner.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Partitioner; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.DocRouter; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.common.params.MapSolrParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.Hash; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * MapReduce partitioner that partitions the Mapper output such that each + * SolrInputDocument gets sent to the SolrCloud shard that it would have been + * sent to if the document were ingested via the standard SolrCloud Near Real + * Time (NRT) API. + * + * In other words, this class implements the same partitioning semantics as the + * standard SolrCloud NRT API. This enables to mix batch updates from MapReduce + * ingestion with updates from standard NRT ingestion on the same SolrCloud + * cluster, using identical unique document keys. + */ +public class SolrCloudPartitioner extends Partitioner implements Configurable { + + private Configuration conf; + private DocCollection docCollection; + private Map shardNumbers; + private int shards = 0; + private final SolrParams emptySolrParams = new MapSolrParams(Collections.EMPTY_MAP); + + public static final String SHARDS = SolrCloudPartitioner.class.getName() + ".shards"; + public static final String ZKHOST = SolrCloudPartitioner.class.getName() + ".zkHost"; + public static final String COLLECTION = SolrCloudPartitioner.class.getName() + ".collection"; + + private static final Logger LOG = LoggerFactory.getLogger(SolrCloudPartitioner.class); + + public SolrCloudPartitioner() {} + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + this.shards = conf.getInt(SHARDS, -1); + if (shards <= 0) { + throw new IllegalArgumentException("Illegal shards: " + shards); + } + String zkHost = conf.get(ZKHOST); + if (zkHost == null) { + throw new IllegalArgumentException("zkHost must not be null"); + } + String collection = conf.get(COLLECTION); + if (collection == null) { + throw new IllegalArgumentException("collection must not be null"); + } + LOG.info("Using SolrCloud zkHost: {}, collection: {}", zkHost, collection); + docCollection = new ZooKeeperInspector().extractDocCollection(zkHost, collection); + if (docCollection == null) { + throw new IllegalArgumentException("docCollection must not be null"); + } + if (docCollection.getSlicesMap().size() != shards) { + throw new IllegalArgumentException("Incompatible shards: + " + shards + " for docCollection: " + docCollection); + } + List slices = new ZooKeeperInspector().getSortedSlices(docCollection.getSlices()); + if (slices.size() != shards) { + throw new IllegalStateException("Incompatible sorted shards: + " + shards + " for docCollection: " + docCollection); + } + shardNumbers = new HashMap(10 * slices.size()); // sparse for performance + for (int i = 0; i < slices.size(); i++) { + shardNumbers.put(slices.get(i).getName(), i); + } + LOG.debug("Using SolrCloud docCollection: {}", docCollection); + DocRouter docRouter = docCollection.getRouter(); + if (docRouter == null) { + throw new IllegalArgumentException("docRouter must not be null"); + } + LOG.info("Using SolrCloud docRouterClass: {}", docRouter.getClass()); + } + + @Override + public Configuration getConf() { + return conf; + } + + @Override + public int getPartition(Text key, SolrInputDocumentWritable value, int numPartitions) { + DocRouter docRouter = docCollection.getRouter(); + SolrInputDocument doc = value.getSolrInputDocument(); + String keyStr = key.toString(); + + // TODO: scalability: replace linear search in HashBasedRouter.hashToSlice() with binary search on sorted hash ranges + Slice slice = docRouter.getTargetSlice(keyStr, doc, emptySolrParams, docCollection); + +// LOG.info("slice: {}", slice); + if (slice == null) { + throw new IllegalStateException("No matching slice found! The slice seems unavailable. docRouterClass: " + + docRouter.getClass().getName()); + } + int rootShard = shardNumbers.get(slice.getName()); + if (rootShard < 0 || rootShard >= shards) { + throw new IllegalStateException("Illegal shard number " + rootShard + " for slice: " + slice + ", docCollection: " + + docCollection); + } + + // map doc to micro shard aka leaf shard, akin to HashBasedRouter.sliceHash() + // taking into account mtree merge algorithm + assert numPartitions % shards == 0; // Also note that numPartitions is equal to the number of reducers + int hashCode = Hash.murmurhash3_x86_32(keyStr, 0, keyStr.length(), 0); + int offset = (hashCode & Integer.MAX_VALUE) % (numPartitions / shards); + int microShard = (rootShard * (numPartitions / shards)) + offset; +// LOG.info("Subpartitions rootShard: {}, offset: {}", rootShard, offset); +// LOG.info("Partitioned to p: {} for numPartitions: {}, shards: {}, key: {}, value: {}", microShard, numPartitions, shards, key, value); + + assert microShard >= 0 && microShard < numPartitions; + return microShard; + } + +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrCounters.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrCounters.java new file mode 100644 index 00000000000..88e9acb57cc --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrCounters.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +public enum SolrCounters { + + DOCUMENTS_WRITTEN (getClassName(SolrReducer.class) + + ": Number of documents processed"), + + BATCHES_WRITTEN (getClassName(SolrReducer.class) + + ": Number of document batches processed"), + + BATCH_WRITE_TIME (getClassName(SolrReducer.class) + + ": Time spent by reducers writing batches [ms]"), + + PHYSICAL_REDUCER_MERGE_TIME (getClassName(SolrReducer.class) + + ": Time spent by reducers on physical merges [ms]"), + + LOGICAL_TREE_MERGE_TIME (getClassName(TreeMergeMapper.class) + + ": Time spent on logical tree merges [ms]"), + + PHYSICAL_TREE_MERGE_TIME (getClassName(TreeMergeMapper.class) + + ": Time spent on physical tree merges [ms]"); + + private final String label; + + private SolrCounters(String label) { + this.label = label; + } + + public String toString() { + return label; + } + + private static String getClassName(Class clazz) { + return Utils.getShortClassName(clazz); + } + +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrInputDocumentWritable.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrInputDocumentWritable.java new file mode 100644 index 00000000000..e043f7a0ed2 --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrInputDocumentWritable.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.io.Writable; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.util.FastOutputStream; +import org.apache.solr.common.util.JavaBinCodec; + +public class SolrInputDocumentWritable implements Writable { + private SolrInputDocument sid; + + public SolrInputDocumentWritable() { + } + + public SolrInputDocumentWritable(SolrInputDocument sid) { + this.sid = sid; + } + + public SolrInputDocument getSolrInputDocument() { + return sid; + } + + @Override + public String toString() { + return sid.toString(); + } + + @Override + public void write(DataOutput out) throws IOException { + JavaBinCodec codec = new JavaBinCodec(); + FastOutputStream daos = FastOutputStream.wrap(DataOutputOutputStream.constructOutputStream(out)); + codec.init(daos); + try { + codec.writeVal(sid); + } finally { + daos.flushBuffer(); + } + } + + @Override + public void readFields(DataInput in) throws IOException { + JavaBinCodec codec = new JavaBinCodec(); + UnbufferedDataInputInputStream dis = new UnbufferedDataInputInputStream(in); + sid = (SolrInputDocument)codec.readVal(dis); + } + +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrMapper.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrMapper.java new file mode 100644 index 00000000000..2a6d699b541 --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrMapper.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Mapper; + +public class SolrMapper extends Mapper { + + private Path solrHomeDir; + + @Override + protected void setup(Context context) throws IOException, InterruptedException { + Utils.getLogConfigFile(context.getConfiguration()); + super.setup(context); + solrHomeDir = SolrRecordWriter.findSolrConfig(context.getConfiguration()); + } + + protected Path getSolrHomeDir() { + return solrHomeDir; + } +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrOutputFormat.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrOutputFormat.java new file mode 100644 index 00000000000..97b2b79404e --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrOutputFormat.java @@ -0,0 +1,278 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; +import java.util.UUID; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.filecache.DistributedCache; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SolrOutputFormat extends FileOutputFormat { + + private static final Logger LOG = LoggerFactory.getLogger(SolrOutputFormat.class); + + /** + * The parameter used to pass the solr config zip file information. This will + * be the hdfs path to the configuration zip file + */ + public static final String SETUP_OK = "solr.output.format.setup"; + + /** The key used to pass the zip file name through the configuration. */ + public static final String ZIP_NAME = "solr.zip.name"; + + /** + * The base name of the zip file containing the configuration information. + * This file is passed via the distributed cache using a unique name, obtained + * via {@link #getZipName(Configuration jobConf)}. + */ + public static final String ZIP_FILE_BASE_NAME = "solr.zip"; + + /** + * The key used to pass the boolean configuration parameter that instructs for + * regular or zip file output + */ + public static final String OUTPUT_ZIP_FILE = "solr.output.zip.format"; + + static int defaultSolrWriterThreadCount = 0; + + public static final String SOLR_WRITER_THREAD_COUNT = "solr.record.writer.num.threads"; + + static int defaultSolrWriterQueueSize = 1; + + public static final String SOLR_WRITER_QUEUE_SIZE = "solr.record.writer.max.queues.size"; + + static int defaultSolrBatchSize = 20; + + public static final String SOLR_RECORD_WRITER_BATCH_SIZE = "solr.record.writer.batch.size"; + + public static final String SOLR_RECORD_WRITER_MAX_SEGMENTS = "solr.record.writer.maxSegments"; + + public static String getSetupOk() { + return SETUP_OK; + } + + /** Get the number of threads used for index writing */ + public static void setSolrWriterThreadCount(int count, Configuration conf) { + conf.setInt(SOLR_WRITER_THREAD_COUNT, count); + } + + /** Set the number of threads used for index writing */ + public static int getSolrWriterThreadCount(Configuration conf) { + return conf.getInt(SOLR_WRITER_THREAD_COUNT, defaultSolrWriterThreadCount); + } + + /** + * Set the maximum size of the the queue for documents to be written to the + * index. + */ + public static void setSolrWriterQueueSize(int count, Configuration conf) { + conf.setInt(SOLR_WRITER_QUEUE_SIZE, count); + } + + /** Return the maximum size for the number of documents pending index writing. */ + public static int getSolrWriterQueueSize(Configuration conf) { + return conf.getInt(SOLR_WRITER_QUEUE_SIZE, defaultSolrWriterQueueSize); + } + + /** + * Return the file name portion of the configuration zip file, from the + * configuration. + */ + public static String getZipName(Configuration conf) { + return conf.get(ZIP_NAME, ZIP_FILE_BASE_NAME); + } + + /** + * configure the job to output zip files of the output index, or full + * directory trees. Zip files are about 1/5th the size of the raw index, and + * much faster to write, but take more cpu to create. + * + * @param output true if should output zip files + * @param conf to use + */ + public static void setOutputZipFormat(boolean output, Configuration conf) { + conf.setBoolean(OUTPUT_ZIP_FILE, output); + } + + /** + * return true if the output should be a zip file of the index, rather than + * the raw index + * + * @param conf to use + * @return true if output zip files is on + */ + public static boolean isOutputZipFormat(Configuration conf) { + return conf.getBoolean(OUTPUT_ZIP_FILE, false); + } + + public static String getOutputName(JobContext job) { + return FileOutputFormat.getOutputName(job); + } + + @Override + public void checkOutputSpecs(JobContext job) throws IOException { + super.checkOutputSpecs(job); + if (job.getConfiguration().get(SETUP_OK) == null) { + throw new IOException("Solr home cache not set up!"); + } + } + + + @Override + public RecordWriter getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { + Utils.getLogConfigFile(context.getConfiguration()); + Path workDir = getDefaultWorkFile(context, ""); + int batchSize = getBatchSize(context.getConfiguration()); + return new SolrRecordWriter(context, workDir, batchSize); + } + + public static void setupSolrHomeCache(File solrHomeDir, Job job) throws IOException{ + File solrHomeZip = createSolrHomeZip(solrHomeDir); + addSolrConfToDistributedCache(job, solrHomeZip); + } + + public static File createSolrHomeZip(File solrHomeDir) throws IOException { + return createSolrHomeZip(solrHomeDir, false); + } + + private static File createSolrHomeZip(File solrHomeDir, boolean safeToModify) throws IOException { + if (solrHomeDir == null || !(solrHomeDir.exists() && solrHomeDir.isDirectory())) { + throw new IOException("Invalid solr home: " + solrHomeDir); + } + File solrHomeZip = File.createTempFile("solr", ".zip"); + createZip(solrHomeDir, solrHomeZip); + return solrHomeZip; + } + + public static void addSolrConfToDistributedCache(Job job, File solrHomeZip) + throws IOException { + // Make a reasonably unique name for the zip file in the distributed cache + // to avoid collisions if multiple jobs are running. + String hdfsZipName = UUID.randomUUID().toString() + '.' + + ZIP_FILE_BASE_NAME; + Configuration jobConf = job.getConfiguration(); + jobConf.set(ZIP_NAME, hdfsZipName); + + Path zipPath = new Path("/tmp", getZipName(jobConf)); + FileSystem fs = FileSystem.get(jobConf); + fs.copyFromLocalFile(new Path(solrHomeZip.toString()), zipPath); + final URI baseZipUrl = fs.getUri().resolve( + zipPath.toString() + '#' + getZipName(jobConf)); + + DistributedCache.addCacheArchive(baseZipUrl, jobConf); + LOG.debug("Set Solr distributed cache: {}", Arrays.asList(job.getCacheArchives())); + LOG.debug("Set zipPath: {}", zipPath); + // Actually send the path for the configuration zip file + jobConf.set(SETUP_OK, zipPath.toString()); + } + + private static void createZip(File dir, File out) throws IOException { + HashSet files = new HashSet(); + // take only conf/ and lib/ + for (String allowedDirectory : SolrRecordWriter + .getAllowedConfigDirectories()) { + File configDir = new File(dir, allowedDirectory); + boolean configDirExists; + /** If the directory does not exist, and is required, bail out */ + if (!(configDirExists = configDir.exists()) + && SolrRecordWriter.isRequiredConfigDirectory(allowedDirectory)) { + throw new IOException(String.format(Locale.ENGLISH, + "required configuration directory %s is not present in %s", + allowedDirectory, dir)); + } + if (!configDirExists) { + continue; + } + listFiles(configDir, files); // Store the files in the existing, allowed + // directory configDir, in the list of files + // to store in the zip file + } + + out.delete(); + int subst = dir.toString().length(); + ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(out)); + byte[] buf = new byte[1024]; + for (File f : files) { + ZipEntry ze = new ZipEntry(f.toString().substring(subst)); + zos.putNextEntry(ze); + InputStream is = new FileInputStream(f); + int cnt; + while ((cnt = is.read(buf)) >= 0) { + zos.write(buf, 0, cnt); + } + is.close(); + zos.flush(); + zos.closeEntry(); + } + + ZipEntry ze = new ZipEntry("solr.xml"); + zos.putNextEntry(ze); + zos.write("".getBytes("UTF-8")); + zos.flush(); + zos.closeEntry(); + zos.close(); + } + + private static void listFiles(File dir, Set files) throws IOException { + File[] list = dir.listFiles(); + + if (list == null && dir.isFile()) { + files.add(dir); + return; + } + + for (File f : list) { + if (f.isFile()) { + files.add(f); + } else { + listFiles(f, files); + } + } + } + + public static int getBatchSize(Configuration jobConf) { + // TODO Auto-generated method stub + return jobConf.getInt(SolrOutputFormat.SOLR_RECORD_WRITER_BATCH_SIZE, + defaultSolrBatchSize); + } + + public static void setBatchSize(int count, Configuration jobConf) { + jobConf.setInt(SOLR_RECORD_WRITER_BATCH_SIZE, count); + } + +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrRecordWriter.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrRecordWriter.java new file mode 100644 index 00000000000..e589c36313f --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrRecordWriter.java @@ -0,0 +1,516 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Properties; +import java.util.Set; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.filecache.DistributedCache; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskID; +import org.apache.solr.hadoop.SolrOutputFormat; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.core.CoreContainer; +import org.apache.solr.core.CoreDescriptor; +import org.apache.solr.core.SolrCore; +import org.apache.solr.core.SolrResourceLoader; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +class SolrRecordWriter extends RecordWriter { + + private static final Logger LOG = LoggerFactory.getLogger(SolrRecordWriter.class); + + public final static List allowedConfigDirectories = new ArrayList( + Arrays.asList(new String[] { "conf", "lib", "solr.xml" })); + + public final static Set requiredConfigDirectories = new HashSet(); + + static { + requiredConfigDirectories.add("conf"); + } + + /** + * Return the list of directories names that may be included in the + * configuration data passed to the tasks. + * + * @return an UnmodifiableList of directory names + */ + public static List getAllowedConfigDirectories() { + return Collections.unmodifiableList(allowedConfigDirectories); + } + + /** + * check if the passed in directory is required to be present in the + * configuration data set. + * + * @param directory The directory to check + * @return true if the directory is required. + */ + public static boolean isRequiredConfigDirectory(final String directory) { + return requiredConfigDirectories.contains(directory); + } + + /** The path that the final index will be written to */ + + /** The location in a local temporary directory that the index is built in. */ + +// /** +// * If true, create a zip file of the completed index in the final storage +// * location A .zip will be appended to the final output name if it is not +// * already present. +// */ +// private boolean outputZipFile = false; + + private final HeartBeater heartBeater; + private final BatchWriter batchWriter; + private final List batch; + private final int batchSize; + private long numDocsWritten = 0; + private long nextLogTime = System.currentTimeMillis(); + + private static HashMap.Context> contextMap = new HashMap.Context>(); + + public SolrRecordWriter(TaskAttemptContext context, Path outputShardDir, int batchSize) { + this.batchSize = batchSize; + this.batch = new ArrayList(batchSize); + Configuration conf = context.getConfiguration(); + + // setLogLevel("org.apache.solr.core", "WARN"); + // setLogLevel("org.apache.solr.update", "WARN"); + + heartBeater = new HeartBeater(context); + try { + heartBeater.needHeartBeat(); + + Path solrHomeDir = SolrRecordWriter.findSolrConfig(conf); + FileSystem fs = outputShardDir.getFileSystem(conf); + EmbeddedSolrServer solr = createEmbeddedSolrServer(solrHomeDir, fs, outputShardDir); + batchWriter = new BatchWriter(solr, batchSize, + context.getTaskAttemptID().getTaskID(), + SolrOutputFormat.getSolrWriterThreadCount(conf), + SolrOutputFormat.getSolrWriterQueueSize(conf)); + + } catch (Exception e) { + throw new IllegalStateException(String.format(Locale.ENGLISH, + "Failed to initialize record writer for %s, %s", context.getJobName(), conf + .get("mapred.task.id")), e); + } finally { + heartBeater.cancelHeartBeat(); + } + } + + public static EmbeddedSolrServer createEmbeddedSolrServer(Path solrHomeDir, FileSystem fs, Path outputShardDir) + throws IOException { + + if (solrHomeDir == null) { + throw new IOException("Unable to find solr home setting"); + } + LOG.info("Creating embedded Solr server with solrHomeDir: " + solrHomeDir + ", fs: " + fs + ", outputShardDir: " + outputShardDir); + + Properties props = new Properties(); + // FIXME note this is odd (no scheme) given Solr doesn't currently + // support uris (just abs/relative path) + Path solrDataDir = new Path(outputShardDir, "data"); + if (!fs.exists(solrDataDir) && !fs.mkdirs(solrDataDir)) { + throw new IOException("Unable to create " + solrDataDir); + } + + String dataDirStr = solrDataDir.toUri().toString(); + props.setProperty("solr.data.dir", dataDirStr); + props.setProperty("solr.home", solrHomeDir.toString()); + + SolrResourceLoader loader = new SolrResourceLoader(solrHomeDir.toString(), + null, props); + + LOG.info(String + .format(Locale.ENGLISH, + "Constructed instance information solr.home %s (%s), instance dir %s, conf dir %s, writing index to solr.data.dir %s, with permdir %s", + solrHomeDir, solrHomeDir.toUri(), loader.getInstanceDir(), + loader.getConfigDir(), dataDirStr, outputShardDir)); + + CoreContainer container = new CoreContainer(loader); + container.load(); + CoreDescriptor descr = new CoreDescriptor(container, "core1", + ".", props); + + SolrCore core = container.create(descr); + container.register(core, false); + + System.setProperty("solr.hdfs.nrtcachingdirectory", "false"); + System.setProperty("solr.hdfs.blockcache.enabled", "false"); + System.setProperty("solr.autoCommit.maxTime", "-1"); + System.setProperty("solr.autoSoftCommit.maxTime", "-1"); + EmbeddedSolrServer solr = new EmbeddedSolrServer(container, "core1"); + return solr; + } + + public static void incrementCounter(TaskID taskId, String groupName, String counterName, long incr) { + Reducer.Context context = contextMap.get(taskId); + if (context != null) { + context.getCounter(groupName, counterName).increment(incr); + } + } + + public static void incrementCounter(TaskID taskId, Enum counterName, long incr) { + Reducer.Context context = contextMap.get(taskId); + if (context != null) { + context.getCounter(counterName).increment(incr); + } + } + + public static void addReducerContext(Reducer.Context context) { + TaskID taskID = context.getTaskAttemptID().getTaskID(); + contextMap.put(taskID, context); + } + + public static Path findSolrConfig(Configuration conf) throws IOException { + Path solrHome = null; + // FIXME when mrunit supports the new cache apis + //URI[] localArchives = context.getCacheArchives(); + Path[] localArchives = DistributedCache.getLocalCacheArchives(conf); + if (localArchives.length == 0) { + throw new IOException(String.format(Locale.ENGLISH, + "No local cache archives, where is %s:%s", SolrOutputFormat + .getSetupOk(), SolrOutputFormat.getZipName(conf))); + } + for (Path unpackedDir : localArchives) { + // Only logged if debugging + if (LOG.isDebugEnabled()) { + LOG.debug(String.format(Locale.ENGLISH, "Examining unpack directory %s for %s", + unpackedDir, SolrOutputFormat.getZipName(conf))); + + ProcessBuilder lsCmd = new ProcessBuilder(new String[] { "/bin/ls", + "-lR", unpackedDir.toString() }); + lsCmd.redirectErrorStream(); + Process ls = lsCmd.start(); + byte[] buf = new byte[16 * 1024]; + InputStream all = ls.getInputStream(); + try { + int count; + while ((count = all.read(buf)) >= 0) { + System.err.write(buf, 0, count); + } + } catch (IOException ignore) { + } finally { + all.close(); + } + String exitValue; + try { + exitValue = String.valueOf(ls.waitFor()); + } catch (InterruptedException e) { + exitValue = "interrupted"; + } + System.err.format("Exit value of 'ls -lR' is %s%n", exitValue); + } + if (unpackedDir.getName().equals(SolrOutputFormat.getZipName(conf))) { + LOG.info("Using this unpacked directory as solr home: {}", unpackedDir); + solrHome = unpackedDir; + break; + } + } + + return solrHome; + } + + /** + * Write a record. This method accumulates records in to a batch, and when + * {@link #batchSize} items are present flushes it to the indexer. The writes + * can take a substantial amount of time, depending on {@link #batchSize}. If + * there is heavy disk contention the writes may take more than the 600 second + * default timeout. + */ + @Override + public void write(K key, V value) throws IOException { + heartBeater.needHeartBeat(); + try { + try { + SolrInputDocumentWritable sidw = (SolrInputDocumentWritable) value; + batch.add(sidw.getSolrInputDocument()); + if (batch.size() >= batchSize) { + batchWriter.queueBatch(batch); + numDocsWritten += batch.size(); + if (System.currentTimeMillis() >= nextLogTime) { + LOG.info("docsWritten: {}", numDocsWritten); + nextLogTime += 10000; + } + batch.clear(); + } + } catch (SolrServerException e) { + throw new IOException(e); + } + } finally { + heartBeater.cancelHeartBeat(); + } + + } + + @Override + public void close(TaskAttemptContext context) throws IOException, InterruptedException { + if (context != null) { + heartBeater.setProgress(context); + } + try { + heartBeater.needHeartBeat(); + if (batch.size() > 0) { + batchWriter.queueBatch(batch); + numDocsWritten += batch.size(); + batch.clear(); + } + LOG.info("docsWritten: {}", numDocsWritten); + batchWriter.close(context); +// if (outputZipFile) { +// context.setStatus("Writing Zip"); +// packZipFile(); // Written to the perm location +// } else { +// context.setStatus("Copying Index"); +// fs.completeLocalOutput(perm, temp); // copy to dfs +// } + } catch (Exception e) { + if (e instanceof IOException) { + throw (IOException) e; + } + throw new IOException(e); + } finally { + heartBeater.cancelHeartBeat(); + heartBeater.close(); +// File tempFile = new File(temp.toString()); +// if (tempFile.exists()) { +// FileUtils.forceDelete(new File(temp.toString())); +// } + } + + context.setStatus("Done"); + } + +// private void packZipFile() throws IOException { +// FSDataOutputStream out = null; +// ZipOutputStream zos = null; +// int zipCount = 0; +// LOG.info("Packing zip file for " + perm); +// try { +// out = fs.create(perm, false); +// zos = new ZipOutputStream(out); +// +// String name = perm.getName().replaceAll(".zip$", ""); +// LOG.info("adding index directory" + temp); +// zipCount = zipDirectory(conf, zos, name, temp.toString(), temp); +// /** +// for (String configDir : allowedConfigDirectories) { +// if (!isRequiredConfigDirectory(configDir)) { +// continue; +// } +// final Path confPath = new Path(solrHome, configDir); +// LOG.info("adding configdirectory" + confPath); +// +// zipCount += zipDirectory(conf, zos, name, solrHome.toString(), confPath); +// } +// **/ +// } catch (Throwable ohFoo) { +// LOG.error("packZipFile exception", ohFoo); +// if (ohFoo instanceof RuntimeException) { +// throw (RuntimeException) ohFoo; +// } +// if (ohFoo instanceof IOException) { +// throw (IOException) ohFoo; +// } +// throw new IOException(ohFoo); +// +// } finally { +// if (zos != null) { +// if (zipCount == 0) { // If no entries were written, only close out, as +// // the zip will throw an error +// LOG.error("No entries written to zip file " + perm); +// fs.delete(perm, false); +// // out.close(); +// } else { +// LOG.info(String.format("Wrote %d items to %s for %s", zipCount, perm, +// temp)); +// zos.close(); +// } +// } +// } +// } +// +// /** +// * Write a file to a zip output stream, removing leading path name components +// * from the actual file name when creating the zip file entry. +// * +// * The entry placed in the zip file is baseName/ +// * relativePath, where relativePath is constructed +// * by removing a leading root from the path for +// * itemToZip. +// * +// * If itemToZip is an empty directory, it is ignored. If +// * itemToZip is a directory, the contents of the directory are +// * added recursively. +// * +// * @param zos The zip output stream +// * @param baseName The base name to use for the file name entry in the zip +// * file +// * @param root The path to remove from itemToZip to make a +// * relative path name +// * @param itemToZip The path to the file to be added to the zip file +// * @return the number of entries added +// * @throws IOException +// */ +// static public int zipDirectory(final Configuration conf, +// final ZipOutputStream zos, final String baseName, final String root, +// final Path itemToZip) throws IOException { +// LOG +// .info(String +// .format("zipDirectory: %s %s %s", baseName, root, itemToZip)); +// LocalFileSystem localFs = FileSystem.getLocal(conf); +// int count = 0; +// +// final FileStatus itemStatus = localFs.getFileStatus(itemToZip); +// if (itemStatus.isDirectory()) { +// final FileStatus[] statai = localFs.listStatus(itemToZip); +// +// // Add a directory entry to the zip file +// final String zipDirName = relativePathForZipEntry(itemToZip.toUri() +// .getPath(), baseName, root); +// final ZipEntry dirZipEntry = new ZipEntry(zipDirName +// + Path.SEPARATOR_CHAR); +// LOG.info(String.format("Adding directory %s to zip", zipDirName)); +// zos.putNextEntry(dirZipEntry); +// zos.closeEntry(); +// count++; +// +// if (statai == null || statai.length == 0) { +// LOG.info(String.format("Skipping empty directory %s", itemToZip)); +// return count; +// } +// for (FileStatus status : statai) { +// count += zipDirectory(conf, zos, baseName, root, status.getPath()); +// } +// LOG.info(String.format("Wrote %d entries for directory %s", count, +// itemToZip)); +// return count; +// } +// +// final String inZipPath = relativePathForZipEntry(itemToZip.toUri() +// .getPath(), baseName, root); +// +// if (inZipPath.length() == 0) { +// LOG.warn(String.format("Skipping empty zip file path for %s (%s %s)", +// itemToZip, root, baseName)); +// return 0; +// } +// +// // Take empty files in case the place holder is needed +// FSDataInputStream in = null; +// try { +// in = localFs.open(itemToZip); +// final ZipEntry ze = new ZipEntry(inZipPath); +// ze.setTime(itemStatus.getModificationTime()); +// // Comments confuse looking at the zip file +// // ze.setComment(itemToZip.toString()); +// zos.putNextEntry(ze); +// +// IOUtils.copyBytes(in, zos, conf, false); +// zos.closeEntry(); +// LOG.info(String.format("Wrote %d entries for file %s", count, itemToZip)); +// return 1; +// } finally { +// in.close(); +// } +// +// } +// +// static String relativePathForZipEntry(final String rawPath, +// final String baseName, final String root) { +// String relativePath = rawPath.replaceFirst(Pattern.quote(root.toString()), +// ""); +// LOG.info(String.format("RawPath %s, baseName %s, root %s, first %s", +// rawPath, baseName, root, relativePath)); +// +// if (relativePath.startsWith(Path.SEPARATOR)) { +// relativePath = relativePath.substring(1); +// } +// LOG.info(String.format( +// "RawPath %s, baseName %s, root %s, post leading slash %s", rawPath, +// baseName, root, relativePath)); +// if (relativePath.isEmpty()) { +// LOG.warn(String.format( +// "No data after root (%s) removal from raw path %s", root, rawPath)); +// return baseName; +// } +// // Construct the path that will be written to the zip file, including +// // removing any leading '/' characters +// String inZipPath = baseName + Path.SEPARATOR_CHAR + relativePath; +// +// LOG.info(String.format("RawPath %s, baseName %s, root %s, inZip 1 %s", +// rawPath, baseName, root, inZipPath)); +// if (inZipPath.startsWith(Path.SEPARATOR)) { +// inZipPath = inZipPath.substring(1); +// } +// LOG.info(String.format("RawPath %s, baseName %s, root %s, inZip 2 %s", +// rawPath, baseName, root, inZipPath)); +// +// return inZipPath; +// +// } +// + /* + static boolean setLogLevel(String packageName, String level) { + Log logger = LogFactory.getLog(packageName); + if (logger == null) { + return false; + } + // look for: org.apache.commons.logging.impl.SLF4JLocationAwareLog + LOG.warn("logger class:"+logger.getClass().getName()); + if (logger instanceof Log4JLogger) { + process(((Log4JLogger) logger).getLogger(), level); + return true; + } + if (logger instanceof Jdk14Logger) { + process(((Jdk14Logger) logger).getLogger(), level); + return true; + } + return false; + } + + public static void process(org.apache.log4j.Logger log, String level) { + if (level != null) { + log.setLevel(org.apache.log4j.Level.toLevel(level)); + } + } + + public static void process(java.util.logging.Logger log, String level) { + if (level != null) { + log.setLevel(java.util.logging.Level.parse(level)); + } + } + */ +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrReducer.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrReducer.java new file mode 100644 index 00000000000..59f64ee493f --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/SolrReducer.java @@ -0,0 +1,167 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; +import java.util.Iterator; + +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.hadoop.dedup.NoChangeUpdateConflictResolver; +import org.apache.solr.hadoop.dedup.RetainMostRecentUpdateConflictResolver; +import org.apache.solr.hadoop.dedup.UpdateConflictResolver; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.cloudera.cdk.morphline.api.ExceptionHandler; +import com.cloudera.cdk.morphline.base.FaultTolerance; + +/** + * This class loads the mapper's SolrInputDocuments into one EmbeddedSolrServer + * per reducer. Each such reducer and Solr server can be seen as a (micro) + * shard. The Solr servers store their data in HDFS. + * + * More specifically, this class consumes a list of <docId, SolrInputDocument> + * pairs, sorted by docId, and sends them to an embedded Solr server to generate + * a Solr index shard from the documents. + */ +public class SolrReducer extends Reducer { + + private UpdateConflictResolver resolver; + private HeartBeater heartBeater; + private ExceptionHandler exceptionHandler; + + public static final String UPDATE_CONFLICT_RESOLVER = SolrReducer.class.getName() + ".updateConflictResolver"; + + private static final Logger LOG = LoggerFactory.getLogger(SolrReducer.class); + + @Override + protected void setup(Context context) throws IOException, InterruptedException { + SolrRecordWriter.addReducerContext(context); + Class resolverClass = context.getConfiguration().getClass( + UPDATE_CONFLICT_RESOLVER, RetainMostRecentUpdateConflictResolver.class, UpdateConflictResolver.class); + + this.resolver = ReflectionUtils.newInstance(resolverClass, context.getConfiguration()); + /* + * Note that ReflectionUtils.newInstance() above also implicitly calls + * resolver.configure(context.getConfiguration()) if the resolver + * implements org.apache.hadoop.conf.Configurable + */ + + this.exceptionHandler = new FaultTolerance( + context.getConfiguration().getBoolean(FaultTolerance.IS_PRODUCTION_MODE, false), + context.getConfiguration().getBoolean(FaultTolerance.IS_IGNORING_RECOVERABLE_EXCEPTIONS, false), + context.getConfiguration().get(FaultTolerance.RECOVERABLE_EXCEPTION_CLASSES, SolrServerException.class.getName())); + + this.heartBeater = new HeartBeater(context); + } + + protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException { + heartBeater.needHeartBeat(); + try { + values = resolve(key, values, context); + super.reduce(key, values, context); + } catch (Exception e) { + LOG.error("Unable to process key " + key, e); + context.getCounter(getClass().getName() + ".errors", e.getClass().getName()).increment(1); + exceptionHandler.handleException(e, null); + } finally { + heartBeater.cancelHeartBeat(); + } + } + + private Iterable resolve( + final Text key, final Iterable values, final Context context) { + + if (resolver instanceof NoChangeUpdateConflictResolver) { + return values; // fast path + } + return new Iterable() { + @Override + public Iterator iterator() { + return new WrapIterator(resolver.orderUpdates(key, new UnwrapIterator(values.iterator()), context)); + } + }; + } + + @Override + protected void cleanup(Context context) throws IOException, InterruptedException { + heartBeater.close(); + super.cleanup(context); + } + + + /////////////////////////////////////////////////////////////////////////////// + // Nested classes: + /////////////////////////////////////////////////////////////////////////////// + private static final class WrapIterator implements Iterator { + + private Iterator parent; + + private WrapIterator(Iterator parent) { + this.parent = parent; + } + + @Override + public boolean hasNext() { + return parent.hasNext(); + } + + @Override + public SolrInputDocumentWritable next() { + return new SolrInputDocumentWritable(parent.next()); + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + } + + + /////////////////////////////////////////////////////////////////////////////// + // Nested classes: + /////////////////////////////////////////////////////////////////////////////// + private static final class UnwrapIterator implements Iterator { + + private Iterator parent; + + private UnwrapIterator(Iterator parent) { + this.parent = parent; + } + + @Override + public boolean hasNext() { + return parent.hasNext(); + } + + @Override + public SolrInputDocument next() { + return parent.next().getSolrInputDocument(); + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + } + +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/ToolRunnerHelpFormatter.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/ToolRunnerHelpFormatter.java new file mode 100644 index 00000000000..d2efa96cdcf --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/ToolRunnerHelpFormatter.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.BufferedReader; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.io.PrintWriter; +import java.io.StringReader; +import java.io.StringWriter; +import java.io.UnsupportedEncodingException; + +import net.sourceforge.argparse4j.ArgumentParsers; +import net.sourceforge.argparse4j.helper.ASCIITextWidthCounter; +import net.sourceforge.argparse4j.helper.TextHelper; + +import org.apache.hadoop.util.ToolRunner; + +/** + * Nicely formats the output of + * {@link ToolRunner#printGenericCommandUsage(PrintStream)} with the same look and feel that argparse4j uses for help text. + */ +class ToolRunnerHelpFormatter { + + public static String getGenericCommandUsage() { + ByteArrayOutputStream bout = new ByteArrayOutputStream(); + String msg; + try { + ToolRunner.printGenericCommandUsage(new PrintStream(bout, true, "UTF-8")); + msg = new String(bout.toByteArray(), "UTF-8"); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); // unreachable + } + + BufferedReader reader = new BufferedReader(new StringReader(msg)); + StringBuilder result = new StringBuilder(); + while (true) { + String line; + try { + line = reader.readLine(); + } catch (IOException e) { + throw new RuntimeException(e); // unreachable + } + + if (line == null) { + return result.toString(); // EOS + } + + if (!line.startsWith("-")) { + result.append(line + "\n"); + } else { + line = line.trim(); + int i = line.indexOf(" "); + if (i < 0) { + i = line.indexOf('\t'); + } + if (i < 0) { + result.append(line + "\n"); + } else { + String title = line.substring(0, i).trim(); + if (title.length() >= 3 && Character.isLetterOrDigit(title.charAt(1)) && Character.isLetterOrDigit(title.charAt(2))) { + title = "-" + title; // prefer "--libjars" long arg style over "-libjars" style but retain "-D foo" short arg style + } + String help = line.substring(i, line.length()).trim(); + StringWriter strWriter = new StringWriter(); + PrintWriter writer = new PrintWriter(strWriter, true); + TextHelper.printHelp(writer, title, help, new ASCIITextWidthCounter(), ArgumentParsers.getFormatWidth()); + result.append(strWriter.toString()); + } + } + } + } +} + diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/TreeMergeMapper.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/TreeMergeMapper.java new file mode 100644 index 00000000000..5e2fe86a6fe --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/TreeMergeMapper.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Mapper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * For the meat see {@link TreeMergeOutputFormat}. + */ +public class TreeMergeMapper extends Mapper { + + private static final Logger LOGGER = LoggerFactory.getLogger(TreeMergeMapper.class); + + public static final String MAX_SEGMENTS_ON_TREE_MERGE = "maxSegmentsOnTreeMerge"; + + @Override + protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { + LOGGER.trace("map key: {}, value: {}", key, value); + context.write(value, NullWritable.get()); + } + +} \ No newline at end of file diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/TreeMergeOutputFormat.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/TreeMergeOutputFormat.java new file mode 100644 index 00000000000..26de0aaa42c --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/TreeMergeOutputFormat.java @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.index.LogMergePolicy; +import org.apache.lucene.index.MergePolicy; +import org.apache.lucene.index.TieredMergePolicy; +import org.apache.lucene.misc.IndexMergeTool; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.Version; +import org.apache.solr.store.hdfs.HdfsDirectory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * See {@link IndexMergeTool}. + */ +public class TreeMergeOutputFormat extends FileOutputFormat { + + @Override + public RecordWriter getRecordWriter(TaskAttemptContext context) throws IOException { + Utils.getLogConfigFile(context.getConfiguration()); + Path workDir = getDefaultWorkFile(context, ""); + return new TreeMergeRecordWriter(context, workDir); + } + + + /////////////////////////////////////////////////////////////////////////////// + // Nested classes: + /////////////////////////////////////////////////////////////////////////////// + private static final class TreeMergeRecordWriter extends RecordWriter { + + private final Path workDir; + private final List shards = new ArrayList(); + private final HeartBeater heartBeater; + private final TaskAttemptContext context; + + private static final Logger LOG = LoggerFactory.getLogger(TreeMergeRecordWriter.class); + + public TreeMergeRecordWriter(TaskAttemptContext context, Path workDir) { + this.workDir = new Path(workDir, "data/index"); + this.heartBeater = new HeartBeater(context); + this.context = context; + } + + @Override + public void write(Text key, NullWritable value) { + LOG.info("map key: {}", key); + heartBeater.needHeartBeat(); + try { + Path path = new Path(key.toString()); + shards.add(path); + } finally { + heartBeater.cancelHeartBeat(); + } + } + + @Override + public void close(TaskAttemptContext context) throws IOException { + LOG.debug("Merging into dstDir: " + workDir + ", srcDirs: {}", shards); + heartBeater.needHeartBeat(); + try { + Directory mergedIndex = new HdfsDirectory(workDir, context.getConfiguration()); + + IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_CURRENT, null) + .setOpenMode(OpenMode.CREATE) + //.setMergePolicy(mergePolicy) // TODO: grab tuned MergePolicy from solrconfig.xml? + //.setMergeScheduler(...) // TODO: grab tuned MergeScheduler from solrconfig.xml? + ; + + if (LOG.isDebugEnabled()) { + writerConfig.setInfoStream(System.out); + } +// writerConfig.setRAMBufferSizeMB(100); // improve performance +// writerConfig.setMaxThreadStates(1); + + // disable compound file to improve performance + // also see http://lucene.472066.n3.nabble.com/Questions-on-compound-file-format-td489105.html + // also see defaults in SolrIndexConfig + MergePolicy mergePolicy = writerConfig.getMergePolicy(); + LOG.debug("mergePolicy was: {}", mergePolicy); + if (mergePolicy instanceof TieredMergePolicy) { + ((TieredMergePolicy) mergePolicy).setNoCFSRatio(0.0); +// ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnceExplicit(10000); +// ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnce(10000); +// ((TieredMergePolicy) mergePolicy).setSegmentsPerTier(10000); + } else if (mergePolicy instanceof LogMergePolicy) { + ((LogMergePolicy) mergePolicy).setNoCFSRatio(0.0); + } + LOG.info("Using mergePolicy: {}", mergePolicy); + + IndexWriter writer = new IndexWriter(mergedIndex, writerConfig); + + Directory[] indexes = new Directory[shards.size()]; + for (int i = 0; i < shards.size(); i++) { + indexes[i] = new HdfsDirectory(shards.get(i), context.getConfiguration()); + } + + context.setStatus("Logically merging " + shards.size() + " shards into one shard"); + LOG.info("Logically merging " + shards.size() + " shards into one shard: " + workDir); + long start = System.currentTimeMillis(); + + writer.addIndexes(indexes); + // TODO: avoid intermediate copying of files into dst directory; rename the files into the dir instead (cp -> rename) + // This can improve performance and turns this phase into a true "logical" merge, completing in constant time. + // See https://issues.apache.org/jira/browse/LUCENE-4746 + + if (LOG.isDebugEnabled()) { + context.getCounter(SolrCounters.class.getName(), SolrCounters.LOGICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start); + } + float secs = (System.currentTimeMillis() - start) / 1000.0f; + LOG.info("Logical merge took {} secs", secs); + int maxSegments = context.getConfiguration().getInt(TreeMergeMapper.MAX_SEGMENTS_ON_TREE_MERGE, Integer.MAX_VALUE); + context.setStatus("Optimizing Solr: forcing mtree merge down to " + maxSegments + " segments"); + LOG.info("Optimizing Solr: forcing tree merge down to {} segments", maxSegments); + start = System.currentTimeMillis(); + if (maxSegments < Integer.MAX_VALUE) { + writer.forceMerge(maxSegments); + // TODO: consider perf enhancement for no-deletes merges: bulk-copy the postings data + // see http://lucene.472066.n3.nabble.com/Experience-with-large-merge-factors-tp1637832p1647046.html + } + if (LOG.isDebugEnabled()) { + context.getCounter(SolrCounters.class.getName(), SolrCounters.PHYSICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start); + } + secs = (System.currentTimeMillis() - start) / 1000.0f; + LOG.info("Optimizing Solr: done forcing tree merge down to {} segments in {} secs", maxSegments, secs); + + start = System.currentTimeMillis(); + LOG.info("Optimizing Solr: Closing index writer"); + writer.close(); + secs = (System.currentTimeMillis() - start) / 1000.0f; + LOG.info("Optimizing Solr: Done closing index writer in {} secs", secs); + context.setStatus("Done"); + } finally { + heartBeater.cancelHeartBeat(); + heartBeater.close(); + } + } + } +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/UnbufferedDataInputInputStream.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/UnbufferedDataInputInputStream.java new file mode 100644 index 00000000000..1ad141a4264 --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/UnbufferedDataInputInputStream.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.hadoop; + +import java.io.BufferedReader; +import java.io.DataInput; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStreamReader; + +public class UnbufferedDataInputInputStream extends org.apache.solr.common.util.DataInputInputStream { + private final DataInputStream in; + + public UnbufferedDataInputInputStream(DataInput in) { + this.in = new DataInputStream(DataInputInputStream.constructInputStream(in)); + } + + @Override + public void readFully(byte[] b) throws IOException { + in.readFully(b); + } + + @Override + public void readFully(byte[] b, int off, int len) throws IOException { + in.readFully(b, off, len); + } + + @Override + public int skipBytes(int n) throws IOException { + return in.skipBytes(n); + } + + @Override + public boolean readBoolean() throws IOException { + return in.readBoolean(); + } + + @Override + public byte readByte() throws IOException { + return in.readByte(); + } + + @Override + public int readUnsignedByte() throws IOException { + return in.readUnsignedByte(); + } + + @Override + public short readShort() throws IOException { + return in.readShort(); + } + + @Override + public int readUnsignedShort() throws IOException { + return in.readUnsignedShort(); + } + + @Override + public char readChar() throws IOException { + return in.readChar(); + } + + @Override + public int readInt() throws IOException { + return in.readInt(); + } + + @Override + public long readLong() throws IOException { + return in.readLong(); + } + + @Override + public float readFloat() throws IOException { + return in.readFloat(); + } + + @Override + public double readDouble() throws IOException { + return in.readDouble(); + } + + @Override + public String readLine() throws IOException { + BufferedReader reader = new BufferedReader(new InputStreamReader(in, "UTF-8")); + return reader.readLine(); + } + + @Override + public String readUTF() throws IOException { + return in.readUTF(); + } + + @Override + public int read() throws IOException { + return in.read(); + } + +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/Utils.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/Utils.java new file mode 100644 index 00000000000..c20d5784c0d --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/Utils.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.File; + +import org.apache.hadoop.conf.Configuration; +import org.apache.log4j.PropertyConfigurator; + +import com.google.common.annotations.Beta; + + +@Beta +public final class Utils { + + private static final String LOG_CONFIG_FILE = "hadoop.log4j.configuration"; + + public static void setLogConfigFile(File file, Configuration conf) { + conf.set(LOG_CONFIG_FILE, file.getName()); + } + + public static void getLogConfigFile(Configuration conf) { + String log4jPropertiesFile = conf.get(LOG_CONFIG_FILE); + if (log4jPropertiesFile != null) { + PropertyConfigurator.configure(log4jPropertiesFile); + } + } + + public static String getShortClassName(Class clazz) { + return getShortClassName(clazz.getName()); + } + + public static String getShortClassName(String className) { + int i = className.lastIndexOf('.'); // regular class + int j = className.lastIndexOf('$'); // inner class + return className.substring(1 + Math.max(i, j)); + } + +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/ZooKeeperInspector.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/ZooKeeperInspector.java new file mode 100644 index 00000000000..ed916a33c93 --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/ZooKeeperInspector.java @@ -0,0 +1,198 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.hadoop; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; + +import org.apache.commons.io.FileUtils; +import org.apache.solr.cloud.ZkController; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.cloud.Aliases; +import org.apache.solr.common.cloud.ClusterState; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.common.cloud.SolrZkClient; +import org.apache.solr.common.cloud.ZkCoreNodeProps; +import org.apache.solr.common.cloud.ZkNodeProps; +import org.apache.solr.common.cloud.ZkStateReader; +import org.apache.solr.common.util.StrUtils; +import org.apache.zookeeper.KeeperException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.io.Files; + +/** + * Extracts SolrCloud information from ZooKeeper. + */ +final class ZooKeeperInspector { + + private static final Logger LOG = LoggerFactory.getLogger(ZooKeeperInspector.class); + + public List> extractShardUrls(String zkHost, String collection) { + + DocCollection docCollection = extractDocCollection(zkHost, collection); + List slices = getSortedSlices(docCollection.getSlices()); + List> solrUrls = new ArrayList>(slices.size()); + for (Slice slice : slices) { + if (slice.getLeader() == null) { + throw new IllegalArgumentException("Cannot find SolrCloud slice leader. " + + "It looks like not all of your shards are registered in ZooKeeper yet"); + } + Collection replicas = slice.getReplicas(); + List urls = new ArrayList(replicas.size()); + for (Replica replica : replicas) { + ZkCoreNodeProps props = new ZkCoreNodeProps(replica); + urls.add(props.getCoreUrl()); + } + solrUrls.add(urls); + } + return solrUrls; + } + + public DocCollection extractDocCollection(String zkHost, String collection) { + if (collection == null) { + throw new IllegalArgumentException("collection must not be null"); + } + SolrZkClient zkClient = getZkClient(zkHost); + + try { + ZkStateReader zkStateReader = new ZkStateReader(zkClient); + try { + // first check for alias + collection = checkForAlias(zkClient, collection); + zkStateReader.createClusterStateWatchersAndUpdate(); + } catch (Exception e) { + throw new IllegalArgumentException("Cannot find expected information for SolrCloud in ZooKeeper: " + zkHost, e); + } + + try { + return zkStateReader.getClusterState().getCollection(collection); + } catch (SolrException e) { + throw new IllegalArgumentException("Cannot find collection '" + collection + "' in ZooKeeper: " + zkHost, e); + } + } finally { + zkClient.close(); + } + } + + public SolrZkClient getZkClient(String zkHost) { + if (zkHost == null) { + throw new IllegalArgumentException("zkHost must not be null"); + } + + SolrZkClient zkClient; + try { + zkClient = new SolrZkClient(zkHost, 30000); + } catch (Exception e) { + throw new IllegalArgumentException("Cannot connect to ZooKeeper: " + zkHost, e); + } + return zkClient; + } + + public List getSortedSlices(Collection slices) { + List sorted = new ArrayList(slices); + Collections.sort(sorted, new Comparator() { + @Override + public int compare(Slice slice1, Slice slice2) { + return slice1.getName().compareTo(slice2.getName()); + } + }); + return sorted; + } + + /** + * Returns config value given collection name + * Borrowed heavily from Solr's ZKController. + */ + public String readConfigName(SolrZkClient zkClient, String collection) + throws KeeperException, InterruptedException { + if (collection == null) { + throw new IllegalArgumentException("collection must not be null"); + } + String configName = null; + + // first check for alias + collection = checkForAlias(zkClient, collection); + + String path = ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection; + if (LOG.isInfoEnabled()) { + LOG.info("Load collection config from:" + path); + } + byte[] data = zkClient.getData(path, null, null, true); + + if(data != null) { + ZkNodeProps props = ZkNodeProps.load(data); + configName = props.getStr(ZkController.CONFIGNAME_PROP); + } + + if (configName != null && !zkClient.exists(ZkController.CONFIGS_ZKNODE + "/" + configName, true)) { + LOG.error("Specified config does not exist in ZooKeeper:" + configName); + throw new IllegalArgumentException("Specified config does not exist in ZooKeeper:" + + configName); + } + + return configName; + } + + private String checkForAlias(SolrZkClient zkClient, String collection) + throws KeeperException, InterruptedException { + byte[] aliasData = zkClient.getData(ZkStateReader.ALIASES, null, null, true); + Aliases aliases = ClusterState.load(aliasData); + String alias = aliases.getCollectionAlias(collection); + if (alias != null) { + List aliasList = StrUtils.splitSmart(alias, ",", true); + if (aliasList.size() > 1) { + throw new IllegalArgumentException("collection cannot be an alias that maps to multiple collections"); + } + collection = aliasList.get(0); + } + return collection; + } + + /** + * Download and return the config directory from ZK + */ + public File downloadConfigDir(SolrZkClient zkClient, String configName) + throws IOException, InterruptedException, KeeperException { + File dir = Files.createTempDir(); + dir.deleteOnExit(); + ZkController.downloadConfigDir(zkClient, configName, dir); + File confDir = new File(dir, "conf"); + if (!confDir.isDirectory()) { + // create a temporary directory with "conf" subdir and mv the config in there. This is + // necessary because of CDH-11188; solrctl does not generate nor accept directories with e.g. + // conf/solrconfig.xml which is necessary for proper solr operation. This should work + // even if solrctl changes. + confDir = new File(Files.createTempDir().getAbsolutePath(), "conf"); + confDir.getParentFile().deleteOnExit(); + Files.move(dir, confDir); + dir = confDir.getParentFile(); + } + FileUtils.writeStringToFile(new File(dir, "solr.xml"), "", "UTF-8"); + return dir; + } + +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/NoChangeUpdateConflictResolver.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/NoChangeUpdateConflictResolver.java new file mode 100644 index 00000000000..0eae9405717 --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/NoChangeUpdateConflictResolver.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.dedup; + +import java.util.Iterator; + +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer.Context; +import org.apache.solr.common.SolrInputDocument; + +/** + * UpdateConflictResolver implementation that returns the solr documents in the + * same order as they are received on input, i.e. without change in order. + */ +public final class NoChangeUpdateConflictResolver implements UpdateConflictResolver { + + @Override + public Iterator orderUpdates(Text key, Iterator updates, Context ctx) { + return updates; + } + +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/RejectingUpdateConflictResolver.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/RejectingUpdateConflictResolver.java new file mode 100644 index 00000000000..60efb4c15bb --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/RejectingUpdateConflictResolver.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.dedup; + +import java.util.Collections; +import java.util.Iterator; + +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer.Context; +import org.apache.solr.common.SolrInputDocument; + +/** + * UpdateConflictResolver implementation that rejects multiple documents with + * the same key with an exception. + */ +public final class RejectingUpdateConflictResolver implements UpdateConflictResolver { + + @Override + public Iterator orderUpdates(Text key, Iterator updates, Context ctx) { + SolrInputDocument firstUpdate = null; + while (updates.hasNext()) { + if (firstUpdate == null) { + firstUpdate = updates.next(); + assert firstUpdate != null; + } else { + throw new IllegalArgumentException("Update conflict! Documents with the same unique key are forbidden: " + + key); + } + } + assert firstUpdate != null; + return Collections.singletonList(firstUpdate).iterator(); + } + +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/RetainMostRecentUpdateConflictResolver.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/RetainMostRecentUpdateConflictResolver.java new file mode 100644 index 00000000000..1994c163dea --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/RetainMostRecentUpdateConflictResolver.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.dedup; + +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer.Context; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.hadoop.HdfsFileFieldNames; +import org.apache.solr.hadoop.Utils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * UpdateConflictResolver implementation that ignores all but the most recent + * document version, based on a configurable numeric Solr field, which defaults + * to the file_last_modified timestamp. + */ +public class RetainMostRecentUpdateConflictResolver implements UpdateConflictResolver, Configurable { + + private Configuration conf; + private String orderByFieldName = ORDER_BY_FIELD_NAME_DEFAULT; + + public static final String ORDER_BY_FIELD_NAME_KEY = + RetainMostRecentUpdateConflictResolver.class.getName() + ".orderByFieldName"; + + public static final String ORDER_BY_FIELD_NAME_DEFAULT = HdfsFileFieldNames.FILE_LAST_MODIFIED; + + public static final String COUNTER_GROUP = Utils.getShortClassName(RetainMostRecentUpdateConflictResolver.class); + public static final String DUPLICATES_COUNTER_NAME = "Number of documents ignored as duplicates"; + public static final String OUTDATED_COUNTER_NAME = "Number of documents ignored as outdated"; + + private static final Logger LOG = LoggerFactory.getLogger(RetainMostRecentUpdateConflictResolver.class); + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + this.orderByFieldName = conf.get(ORDER_BY_FIELD_NAME_KEY, orderByFieldName); + } + + @Override + public Configuration getConf() { + return conf; + } + + protected String getOrderByFieldName() { + return orderByFieldName; + } + + @Override + public Iterator orderUpdates(Text key, Iterator updates, Context ctx) { + return getMaximum(updates, getOrderByFieldName(), new SolrInputDocumentComparator.TimeStampComparator(), ctx); + } + + /** Returns the most recent document among the colliding updates */ + protected Iterator getMaximum(Iterator updates, String fieldName, + Comparator child, Context context) { + + SolrInputDocumentComparator comp = new SolrInputDocumentComparator(fieldName, child); + SolrInputDocument max = null; + long numDupes = 0; + long numOutdated = 0; + while (updates.hasNext()) { + SolrInputDocument next = updates.next(); + assert next != null; + if (max == null) { + max = next; + } else { + int c = comp.compare(next, max); + if (c == 0) { + LOG.debug("Ignoring document version because it is a duplicate: {}", next); + numDupes++; + } else if (c > 0) { + LOG.debug("Ignoring document version because it is outdated: {}", max); + max = next; + numOutdated++; + } else { + LOG.debug("Ignoring document version because it is outdated: {}", next); + numOutdated++; + } + } + } + + assert max != null; + if (numDupes > 0) { + context.getCounter(COUNTER_GROUP, DUPLICATES_COUNTER_NAME).increment(numDupes); + } + if (numOutdated > 0) { + context.getCounter(COUNTER_GROUP, OUTDATED_COUNTER_NAME).increment(numOutdated); + } + return Collections.singletonList(max).iterator(); + } + +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/SolrInputDocumentComparator.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/SolrInputDocumentComparator.java new file mode 100644 index 00000000000..e8cfdbb52e4 --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/SolrInputDocumentComparator.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.dedup; + +import java.util.Comparator; + +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.SolrInputField; + +/** + * Default mechanism of determining which of two Solr documents with the same + * key is the more recent version. + */ +public final class SolrInputDocumentComparator implements Comparator { + + private Comparator child; + private String fieldName; + + SolrInputDocumentComparator(String fieldName, Comparator child) { + this.child = child; + this.fieldName = fieldName; + } + + @Override + public int compare(SolrInputDocument doc1, SolrInputDocument doc2) { + SolrInputField f1 = doc1.getField(fieldName); + SolrInputField f2 = doc2.getField(fieldName); + if (f1 == f2) { + return 0; + } else if (f1 == null) { + return -1; + } else if (f2 == null) { + return 1; + } + + Object v1 = f1.getFirstValue(); + Object v2 = f2.getFirstValue(); + return child.compare(v1, v2); + } + + /////////////////////////////////////////////////////////////////////////////// + // Nested classes: + /////////////////////////////////////////////////////////////////////////////// + public static final class TimeStampComparator implements Comparator { + + @Override + public int compare(Object v1, Object v2) { + if (v1 == v2) { + return 0; + } else if (v1 == null) { + return -1; + } else if (v2 == null) { + return 1; + } + long t1 = getLong(v1); + long t2 = getLong(v2); + return (t1 < t2 ? -1 : (t1==t2 ? 0 : 1)); + } + + private long getLong(Object v) { + if (v instanceof Long) { + return ((Long) v).longValue(); + } else { + return Long.parseLong(v.toString()); + } + } + + } + +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/SortingUpdateConflictResolver.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/SortingUpdateConflictResolver.java new file mode 100644 index 00000000000..24ea9363801 --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/SortingUpdateConflictResolver.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.dedup; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer.Context; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.hadoop.HdfsFileFieldNames; + +/** + * UpdateConflictResolver implementation that orders colliding updates ascending + * from least recent to most recent (partial) update, based on a configurable + * numeric Solr field, which defaults to the file_last_modified timestamp. + */ +public class SortingUpdateConflictResolver implements UpdateConflictResolver, Configurable { + + private Configuration conf; + private String orderByFieldName = ORDER_BY_FIELD_NAME_DEFAULT; + + public static final String ORDER_BY_FIELD_NAME_KEY = + SortingUpdateConflictResolver.class.getName() + ".orderByFieldName"; + + public static final String ORDER_BY_FIELD_NAME_DEFAULT = HdfsFileFieldNames.FILE_LAST_MODIFIED; + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + this.orderByFieldName = conf.get(ORDER_BY_FIELD_NAME_KEY, orderByFieldName); + } + + @Override + public Configuration getConf() { + return conf; + } + + protected String getOrderByFieldName() { + return orderByFieldName; + } + + @Override + public Iterator orderUpdates(Text key, Iterator updates, Context ctx) { + return sort(updates, getOrderByFieldName(), new SolrInputDocumentComparator.TimeStampComparator()); + } + + protected Iterator sort(Iterator updates, String fieldName, Comparator child) { + // TODO: use an external merge sort in the pathological case where there are a huge amount of collisions + List sortedUpdates = new ArrayList(1); + while (updates.hasNext()) { + sortedUpdates.add(updates.next()); + } + if (sortedUpdates.size() > 1) { // conflicts are rare + Collections.sort(sortedUpdates, new SolrInputDocumentComparator(fieldName, child)); + } + return sortedUpdates.iterator(); + } + +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/UpdateConflictResolver.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/UpdateConflictResolver.java new file mode 100644 index 00000000000..94e23e134eb --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/UpdateConflictResolver.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.dedup; + +import java.util.Iterator; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.Reducer.Context; +import org.apache.solr.common.SolrInputDocument; + +/** + * Interface that enables deduplication and ordering of a series of document + * updates for the same unique document key. + * + * For example, a MapReduce batch job might index multiple files in the same job + * where some of the files contain old and new versions of the very same + * document, using the same unique document key. + * + * Typically, implementations of this interface forbid collisions by throwing an + * exception, or ignore all but the most recent document version, or, in the + * general case, order colliding updates ascending from least recent to most + * recent (partial) update. + * + * The caller of this interface (i.e. the Hadoop Reducer) will then apply the + * updates to Solr in the order returned by the orderUpdates() method. + * + * Configuration: If an UpdateConflictResolver implementation also implements + * {@link Configurable} then the Hadoop Reducer will call + * {@link Configurable#setConf(org.apache.hadoop.conf.Configuration)} on + * instance construction and pass the standard Hadoop configuration information. + */ +public interface UpdateConflictResolver { + + /** + * Given a list of all colliding document updates for the same unique document + * key, this method returns zero or more documents in an application specific + * order. + * + * The caller will then apply the updates for this key to Solr in the order + * returned by the orderUpdate() method. + * + * @param uniqueKey + * the document key common to all collidingUpdates mentioned below + * @param collidingUpdates + * all updates in the MapReduce job that have a key equal to + * {@code uniqueKey} mentioned above. The input order is unspecified. + * @param context + * The Context passed from the {@link Reducer} + * implementations. + * @return the order in which the updates shall be applied to Solr + */ + Iterator orderUpdates( + Text uniqueKey, Iterator collidingUpdates, Context context); + +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/package.html b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/package.html new file mode 100644 index 00000000000..5543f0262be --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/dedup/package.html @@ -0,0 +1,22 @@ + + + + +Dedupe related code. + + diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/morphline/MorphlineCounters.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/morphline/MorphlineCounters.java new file mode 100644 index 00000000000..5ba98ff3968 --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/morphline/MorphlineCounters.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.morphline; + +import org.apache.solr.hadoop.Utils; + +public enum MorphlineCounters { + + FILES_READ (getClassName(MorphlineMapper.class) + ": Number of files read"), + + FILE_BYTES_READ (getClassName(MorphlineMapper.class) + ": Number of file bytes read"), + + DOCS_READ (getClassName(MorphlineMapper.class) + ": Number of documents read"), + + PARSER_OUTPUT_BYTES (getClassName(MorphlineMapper.class) + ": Number of document bytes generated by Tika parser"), + + ERRORS (getClassName(MorphlineMapper.class) + ": Number of errors"); + + private final String label; + + private MorphlineCounters(String label) { + this.label = label; + } + + public String toString() { + return label; + } + + private static String getClassName(Class clazz) { + return Utils.getShortClassName(clazz); + } + +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/morphline/MorphlineMapRunner.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/morphline/MorphlineMapRunner.java new file mode 100644 index 00000000000..606ac05fd2e --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/morphline/MorphlineMapRunner.java @@ -0,0 +1,266 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.morphline; + +import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.util.HashMap; +import java.util.Map; +import java.util.TreeMap; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.mapreduce.Mapper.Context; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.hadoop.HdfsFileFieldNames; +import org.apache.solr.hadoop.PathParts; +import org.apache.solr.hadoop.Utils; +import org.apache.solr.morphlines.solr.DocumentLoader; +import org.apache.solr.morphlines.solr.SolrLocator; +import org.apache.solr.morphlines.solr.SolrMorphlineContext; +import org.apache.solr.schema.IndexSchema; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.cloudera.cdk.morphline.api.Command; +import com.cloudera.cdk.morphline.api.MorphlineCompilationException; +import com.cloudera.cdk.morphline.api.MorphlineContext; +import com.cloudera.cdk.morphline.api.Record; +import com.cloudera.cdk.morphline.base.Compiler; +import com.cloudera.cdk.morphline.base.FaultTolerance; +import com.cloudera.cdk.morphline.base.Fields; +import com.cloudera.cdk.morphline.base.Metrics; +import com.cloudera.cdk.morphline.base.Notifications; +import com.codahale.metrics.MetricRegistry; +import com.codahale.metrics.Timer; +import com.google.common.annotations.Beta; +import com.google.common.base.Joiner; +import com.typesafe.config.Config; +import com.typesafe.config.ConfigFactory; + +/** + * Internal helper for {@link MorphlineMapper} and dryRun mode; This API is for *INTERNAL* use only + * and should not be considered public. + */ +@Beta +public final class MorphlineMapRunner { + + private MorphlineContext morphlineContext; + private Command morphline; + private IndexSchema schema; + private Map commandLineMorphlineHeaders; + private boolean disableFileOpen; + private String morphlineFileAndId; + private final Timer elapsedTime; + + public static final String MORPHLINE_FILE_PARAM = "morphlineFile"; + public static final String MORPHLINE_ID_PARAM = "morphlineId"; + + /** + * Morphline variables can be passed from the CLI to the Morphline, e.g.: + * hadoop ... -D morphlineVariable.zkHost=127.0.0.1:2181/solr + */ + public static final String MORPHLINE_VARIABLE_PARAM = "morphlineVariable"; + + /** + * Headers, including MIME types, can also explicitly be passed by force from the CLI to Morphline, e.g: + * hadoop ... -D morphlineField._attachment_mimetype=text/csv + */ + public static final String MORPHLINE_FIELD_PREFIX = "morphlineField."; + + /** + * Flag to disable reading of file contents if indexing just file metadata is sufficient. + * This improves performance and confidentiality. + */ + public static final String DISABLE_FILE_OPEN = "morphlineDisableFileOpen"; + + private static final Logger LOG = LoggerFactory.getLogger(MorphlineMapRunner.class); + + MorphlineContext getMorphlineContext() { + return morphlineContext; + } + + IndexSchema getSchema() { + return schema; + } + + public MorphlineMapRunner(Configuration configuration, DocumentLoader loader, String solrHomeDir) throws IOException { + if (LOG.isTraceEnabled()) { + LOG.trace("CWD is {}", new File(".").getCanonicalPath()); + TreeMap map = new TreeMap(); + for (Map.Entry entry : configuration) { + map.put(entry.getKey(), entry.getValue()); + } + LOG.trace("Configuration:\n{}", Joiner.on("\n").join(map.entrySet())); + } + + FaultTolerance faultTolerance = new FaultTolerance( + configuration.getBoolean(FaultTolerance.IS_PRODUCTION_MODE, false), + configuration.getBoolean(FaultTolerance.IS_IGNORING_RECOVERABLE_EXCEPTIONS, false), + configuration.get(FaultTolerance.RECOVERABLE_EXCEPTION_CLASSES, SolrServerException.class.getName()) + ); + + morphlineContext = new SolrMorphlineContext.Builder() + .setDocumentLoader(loader) + .setExceptionHandler(faultTolerance) + .setMetricRegistry(new MetricRegistry()) + .build(); + + class MySolrLocator extends SolrLocator { // trick to access protected ctor + public MySolrLocator(MorphlineContext ctx) { + super(ctx); + } + } + + SolrLocator locator = new MySolrLocator(morphlineContext); + locator.setSolrHomeDir(solrHomeDir); + schema = locator.getIndexSchema(); + + // rebuild context, now with schema + morphlineContext = new SolrMorphlineContext.Builder() + .setIndexSchema(schema) + .setDocumentLoader(loader) + .setExceptionHandler(faultTolerance) + .setMetricRegistry(morphlineContext.getMetricRegistry()) + .build(); + + String morphlineFile = configuration.get(MORPHLINE_FILE_PARAM); + String morphlineId = configuration.get(MORPHLINE_ID_PARAM); + if (morphlineFile == null || morphlineFile.trim().length() == 0) { + throw new MorphlineCompilationException("Missing parameter: " + MORPHLINE_FILE_PARAM, null); + } + Map morphlineVariables = new HashMap(); + for (Map.Entry entry : configuration) { + String variablePrefix = MORPHLINE_VARIABLE_PARAM + "."; + if (entry.getKey().startsWith(variablePrefix)) { + morphlineVariables.put(entry.getKey().substring(variablePrefix.length()), entry.getValue()); + } + } + Config override = ConfigFactory.parseMap(morphlineVariables); + morphline = new Compiler().compile(new File(morphlineFile), morphlineId, morphlineContext, null, override); + morphlineFileAndId = morphlineFile + "@" + morphlineId; + + disableFileOpen = configuration.getBoolean(DISABLE_FILE_OPEN, false); + LOG.debug("disableFileOpen: {}", disableFileOpen); + + commandLineMorphlineHeaders = new HashMap(); + for (Map.Entry entry : configuration) { + if (entry.getKey().startsWith(MORPHLINE_FIELD_PREFIX)) { + commandLineMorphlineHeaders.put(entry.getKey().substring(MORPHLINE_FIELD_PREFIX.length()), entry.getValue()); + } + } + LOG.debug("Headers, including MIME types, passed by force from the CLI to morphline: {}", commandLineMorphlineHeaders); + + String metricName = MetricRegistry.name(Utils.getShortClassName(getClass()), Metrics.ELAPSED_TIME); + this.elapsedTime = morphlineContext.getMetricRegistry().timer(metricName); + Notifications.notifyBeginTransaction(morphline); + } + + /** + * Extract content from the path specified in the value. Key is useless. + */ + public void map(String value, Configuration configuration, Context context) throws IOException { + LOG.info("Processing file {}", value); + InputStream in = null; + Record record = null; + Timer.Context timerContext = elapsedTime.time(); + try { + PathParts parts = new PathParts(value.toString(), configuration); + record = getRecord(parts); + if (record == null) { + return; // ignore + } + for (Map.Entry entry : commandLineMorphlineHeaders.entrySet()) { + record.replaceValues(entry.getKey(), entry.getValue()); + } + long fileLength = parts.getFileStatus().getLen(); + if (disableFileOpen) { + in = new ByteArrayInputStream(new byte[0]); + } else { + in = new BufferedInputStream(parts.getFileSystem().open(parts.getUploadPath())); + } + record.put(Fields.ATTACHMENT_BODY, in); + Notifications.notifyStartSession(morphline); + if (!morphline.process(record)) { + LOG.warn("Morphline {} failed to process record: {}", morphlineFileAndId, record); + } + if (context != null) { + context.getCounter(MorphlineCounters.class.getName(), MorphlineCounters.FILES_READ.toString()).increment(1); + context.getCounter(MorphlineCounters.class.getName(), MorphlineCounters.FILE_BYTES_READ.toString()).increment(fileLength); + } + } catch (Exception e) { + LOG.error("Unable to process file " + value, e); + if (context != null) { + context.getCounter(getClass().getName() + ".errors", e.getClass().getName()).increment(1); + } + morphlineContext.getExceptionHandler().handleException(e, record); + } finally { + timerContext.stop(); + if (in != null) { + in.close(); + } + } + } + + protected Record getRecord(PathParts parts) { + FileStatus stats; + try { + stats = parts.getFileStatus(); + } catch (IOException e) { + stats = null; + } + if (stats == null) { + LOG.warn("Ignoring file that somehow has become unavailable since the job was submitted: {}", + parts.getUploadURL()); + return null; + } + + Record headers = new Record(); + //headers.put(getSchema().getUniqueKeyField().getName(), parts.getId()); // use HDFS file path as docId if no docId is specified + headers.put(Fields.BASE_ID, parts.getId()); // with sanitizeUniqueKey command, use HDFS file path as docId if no docId is specified + headers.put(Fields.ATTACHMENT_NAME, parts.getName()); // Tika can use the file name in guessing the right MIME type + + // enable indexing and storing of file meta data in Solr + headers.put(HdfsFileFieldNames.FILE_UPLOAD_URL, parts.getUploadURL()); + headers.put(HdfsFileFieldNames.FILE_DOWNLOAD_URL, parts.getDownloadURL()); + headers.put(HdfsFileFieldNames.FILE_SCHEME, parts.getScheme()); + headers.put(HdfsFileFieldNames.FILE_HOST, parts.getHost()); + headers.put(HdfsFileFieldNames.FILE_PORT, String.valueOf(parts.getPort())); + headers.put(HdfsFileFieldNames.FILE_PATH, parts.getURIPath()); + headers.put(HdfsFileFieldNames.FILE_NAME, parts.getName()); + headers.put(HdfsFileFieldNames.FILE_LAST_MODIFIED, String.valueOf(stats.getModificationTime())); // FIXME also add in SpoolDirectorySource + headers.put(HdfsFileFieldNames.FILE_LENGTH, String.valueOf(stats.getLen())); // FIXME also add in SpoolDirectorySource + headers.put(HdfsFileFieldNames.FILE_OWNER, stats.getOwner()); + headers.put(HdfsFileFieldNames.FILE_GROUP, stats.getGroup()); + headers.put(HdfsFileFieldNames.FILE_PERMISSIONS_USER, stats.getPermission().getUserAction().SYMBOL); + headers.put(HdfsFileFieldNames.FILE_PERMISSIONS_GROUP, stats.getPermission().getGroupAction().SYMBOL); + headers.put(HdfsFileFieldNames.FILE_PERMISSIONS_OTHER, stats.getPermission().getOtherAction().SYMBOL); + headers.put(HdfsFileFieldNames.FILE_PERMISSIONS_STICKYBIT, String.valueOf(stats.getPermission().getStickyBit())); + // TODO: consider to add stats.getAccessTime(), stats.getReplication(), stats.isSymlink(), stats.getBlockSize() + + return headers; + } + + public void cleanup() { + Notifications.notifyCommitTransaction(morphline); + Notifications.notifyShutdown(morphline); + } + +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/morphline/MorphlineMapper.java b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/morphline/MorphlineMapper.java new file mode 100644 index 00000000000..8ded6041547 --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/morphline/MorphlineMapper.java @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.morphline; + +import java.io.IOException; +import java.util.Collection; +import java.util.Map; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.response.SolrPingResponse; +import org.apache.solr.client.solrj.response.UpdateResponse; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.SolrInputField; +import org.apache.solr.hadoop.HeartBeater; +import org.apache.solr.hadoop.SolrInputDocumentWritable; +import org.apache.solr.hadoop.SolrMapper; +import org.apache.solr.morphlines.solr.DocumentLoader; +import org.apache.solr.schema.IndexSchema; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.codahale.metrics.Counter; +import com.codahale.metrics.Counting; +import com.codahale.metrics.Histogram; +import com.codahale.metrics.Meter; +import com.codahale.metrics.MetricRegistry; +import com.codahale.metrics.Timer; + +/** + * This class takes the input files, extracts the relevant content, transforms + * it and hands SolrInputDocuments to a set of reducers. + * + * More specifically, it consumes a list of <offset, hdfsFilePath> input pairs. + * For each such pair extracts a set of zero or more SolrInputDocuments and + * sends them to a downstream Reducer. The key for the reducer is the unique id + * of the SolrInputDocument specified in Solr schema.xml. + */ +public class MorphlineMapper extends SolrMapper { + + private Context context; + private MorphlineMapRunner runner; + private HeartBeater heartBeater; + + private static final Logger LOG = LoggerFactory.getLogger(MorphlineMapper.class); + + protected IndexSchema getSchema() { + return runner.getSchema(); + } + + protected Context getContext() { + return context; + } + + @Override + protected void setup(Context context) throws IOException, InterruptedException { + super.setup(context); + this.context = context; + heartBeater = new HeartBeater(context); + this.runner = new MorphlineMapRunner( + context.getConfiguration(), new MyDocumentLoader(), getSolrHomeDir().toString()); + } + + /** + * Extract content from the path specified in the value. Key is useless. + */ + @Override + public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { + heartBeater.needHeartBeat(); + try { + runner.map(value.toString(), context.getConfiguration(), context); + } finally { + heartBeater.cancelHeartBeat(); + } + } + + @Override + protected void cleanup(Context context) throws IOException, InterruptedException { + heartBeater.close(); + runner.cleanup(); + addMetricsToMRCounters(runner.getMorphlineContext().getMetricRegistry(), context); + super.cleanup(context); + } + + private void addMetricsToMRCounters(MetricRegistry metricRegistry, Context context) { + for (Map.Entry entry : metricRegistry.getCounters().entrySet()) { + addCounting(entry.getKey(), entry.getValue(), 1); + } + for (Map.Entry entry : metricRegistry.getHistograms().entrySet()) { + addCounting(entry.getKey(), entry.getValue(), 1); + } + for (Map.Entry entry : metricRegistry.getMeters().entrySet()) { + addCounting(entry.getKey(), entry.getValue(), 1); + } + for (Map.Entry entry : metricRegistry.getTimers().entrySet()) { + long nanosPerMilliSec = 1000 * 1000; + addCounting(entry.getKey(), entry.getValue(), nanosPerMilliSec); + } + } + + private void addCounting(String metricName, Counting value, long scale) { + context.getCounter("morphline", metricName).increment(value.getCount() / scale); + } + + /////////////////////////////////////////////////////////////////////////////// + // Nested classes: + /////////////////////////////////////////////////////////////////////////////// + private final class MyDocumentLoader implements DocumentLoader { + + @Override + public void beginTransaction() { + } + + @Override + public void load(SolrInputDocument doc) throws IOException, SolrServerException { + String uniqueKeyFieldName = getSchema().getUniqueKeyField().getName(); + Object id = doc.getFieldValue(uniqueKeyFieldName); + if (id == null) { + throw new IllegalArgumentException("Missing value for (required) unique document key: " + uniqueKeyFieldName + + " (see Solr schema.xml)"); + } + try { + context.write(new Text(id.toString()), new SolrInputDocumentWritable(doc)); + } catch (InterruptedException e) { + throw new IOException("Interrupted while writing " + doc, e); + } + + if (LOG.isDebugEnabled()) { + long numParserOutputBytes = 0; + for (SolrInputField field : doc.values()) { + numParserOutputBytes += sizeOf(field.getValue()); + } + context.getCounter(MorphlineCounters.class.getName(), MorphlineCounters.PARSER_OUTPUT_BYTES.toString()).increment(numParserOutputBytes); + } + context.getCounter(MorphlineCounters.class.getName(), MorphlineCounters.DOCS_READ.toString()).increment(1); + } + + // just an approximation + private long sizeOf(Object value) { + if (value instanceof CharSequence) { + return ((CharSequence) value).length(); + } else if (value instanceof Integer) { + return 4; + } else if (value instanceof Long) { + return 8; + } else if (value instanceof Collection) { + long size = 0; + for (Object val : (Collection) value) { + size += sizeOf(val); + } + return size; + } else { + return String.valueOf(value).length(); + } + } + + @Override + public void commitTransaction() { + } + + @Override + public UpdateResponse rollbackTransaction() throws SolrServerException, IOException { + return new UpdateResponse(); + } + + @Override + public void shutdown() { + } + + @Override + public SolrPingResponse ping() throws SolrServerException, IOException { + return new SolrPingResponse(); + } + + } + +} diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/morphline/package.html b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/morphline/package.html new file mode 100644 index 00000000000..9597a15d4f5 --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/morphline/package.html @@ -0,0 +1,22 @@ + + + + +Morphlines related code. + + diff --git a/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/package.html b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/package.html new file mode 100644 index 00000000000..c90c7a24775 --- /dev/null +++ b/solr/contrib/solr-mr/src/java/org/apache/solr/hadoop/package.html @@ -0,0 +1,22 @@ + + + + +{@link org.apache.solr.hadoop.MapReduceIndexerTool} and related code. + + diff --git a/solr/contrib/solr-mr/src/java/overview.html b/solr/contrib/solr-mr/src/java/overview.html new file mode 100644 index 00000000000..c97f378ca2e --- /dev/null +++ b/solr/contrib/solr-mr/src/java/overview.html @@ -0,0 +1,21 @@ + + + +Apache Solr Search Server: Solr MapReduce index building contrib + + diff --git a/solr/contrib/solr-mr/src/test-files/custom-mimetypes.xml b/solr/contrib/solr-mr/src/test-files/custom-mimetypes.xml new file mode 100644 index 00000000000..6891e42d616 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/custom-mimetypes.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/currency.xml b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/currency.xml new file mode 100644 index 00000000000..3a9c58afee8 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/currency.xml @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/elevate.xml b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/elevate.xml new file mode 100644 index 00000000000..25d5cebe4fb --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/elevate.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/contractions_ca.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/contractions_ca.txt new file mode 100644 index 00000000000..307a85f913d --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/contractions_ca.txt @@ -0,0 +1,8 @@ +# Set of Catalan contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +l +m +n +s +t diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/contractions_fr.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/contractions_fr.txt new file mode 100644 index 00000000000..722db588333 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/contractions_fr.txt @@ -0,0 +1,9 @@ +# Set of French contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +l +m +t +qu +n +s +j diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/contractions_ga.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/contractions_ga.txt new file mode 100644 index 00000000000..9ebe7fa349a --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/contractions_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +m +b diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/contractions_it.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/contractions_it.txt new file mode 100644 index 00000000000..cac04095372 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/contractions_it.txt @@ -0,0 +1,23 @@ +# Set of Italian contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +c +l +all +dall +dell +nell +sull +coll +pell +gl +agl +dagl +degl +negl +sugl +un +m +t +s +v +d diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/hyphenations_ga.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/hyphenations_ga.txt new file mode 100644 index 00000000000..4d2642cc5a3 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/hyphenations_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish hyphenations for StopFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +h +n +t diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stemdict_nl.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stemdict_nl.txt new file mode 100644 index 00000000000..441072971d3 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stemdict_nl.txt @@ -0,0 +1,6 @@ +# Set of overrides for the dutch stemmer +# TODO: load this as a resource from the analyzer and sync it in build.xml +fiets fiets +bromfiets bromfiets +ei eier +kind kinder diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stoptags_ja.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stoptags_ja.txt new file mode 100644 index 00000000000..71b750845e3 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stoptags_ja.txt @@ -0,0 +1,420 @@ +# +# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. +# +# Any token with a part-of-speech tag that exactly matches those defined in this +# file are removed from the token stream. +# +# Set your own stoptags by uncommenting the lines below. Note that comments are +# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, +# etc. that can be useful for building you own stoptag set. +# +# The entire possible tagset is provided below for convenience. +# +##### +# noun: unclassified nouns +#åè©ž +# +# noun-common: Common nouns or nouns where the sub-classification is undefined +#åè©ž-一般 +# +# noun-proper: Proper nouns where the sub-classification is undefined +#åè©ž-固有åè©ž +# +# noun-proper-misc: miscellaneous proper nouns +#åè©ž-固有åè©ž-一般 +# +# noun-proper-person: Personal names where the sub-classification is undefined +#åè©ž-固有åè©ž-人å +# +# noun-proper-person-misc: names that cannot be divided into surname and +# given name; foreign names; names where the surname or given name is unknown. +# e.g. ãŠå¸‚ã®æ–¹ +#åè©ž-固有åè©ž-人å-一般 +# +# noun-proper-person-surname: Mainly Japanese surnames. +# e.g. 山田 +#åè©ž-固有åè©ž-人å-姓 +# +# noun-proper-person-given_name: Mainly Japanese given names. +# e.g. 太郎 +#åè©ž-固有åè©ž-人å-å +# +# noun-proper-organization: Names representing organizations. +# e.g. 通産çœ, NHK +#åè©ž-固有åè©ž-組織 +# +# noun-proper-place: Place names where the sub-classification is undefined +#åè©ž-固有åè©ž-地域 +# +# noun-proper-place-misc: Place names excluding countries. +# e.g. アジア, ãƒãƒ«ã‚»ãƒ­ãƒŠ, 京都 +#åè©ž-固有åè©ž-地域-一般 +# +# noun-proper-place-country: Country names. +# e.g. 日本, オーストラリア +#åè©ž-固有åè©ž-地域-国 +# +# noun-pronoun: Pronouns where the sub-classification is undefined +#åè©ž-代åè©ž +# +# noun-pronoun-misc: miscellaneous pronouns: +# e.g. ãã‚Œ, ã“ã“, ã‚ã„ã¤, ã‚ãªãŸ, ã‚ã¡ã“ã¡, ã„ãã¤, ã©ã“ã‹, ãªã«, ã¿ãªã•ã‚“, ã¿ã‚“ãª, ã‚ãŸãã—, ã‚ã‚Œã‚ã‚Œ +#åè©ž-代åè©ž-一般 +# +# noun-pronoun-contraction: Spoken language contraction made by combining a +# pronoun and the particle 'wa'. +# e.g. ã‚りゃ, ã“りゃ, ã“りゃã‚, ãりゃ, ãりゃ゠+#åè©ž-代åè©ž-縮約 +# +# noun-adverbial: Temporal nouns such as names of days or months that behave +# like adverbs. Nouns that represent amount or ratios and can be used adverbially, +# e.g. 金曜, 一月, åˆå¾Œ, å°‘é‡ +#åè©ž-副詞å¯èƒ½ +# +# noun-verbal: Nouns that take arguments with case and can appear followed by +# 'suru' and related verbs (ã™ã‚‹, ã§ãã‚‹, ãªã•ã‚‹, ãã ã•ã‚‹) +# e.g. インプット, æ„›ç€, 悪化, 悪戦苦闘, 一安心, 下å–ã‚Š +#åè©ž-サ変接続 +# +# noun-adjective-base: The base form of adjectives, words that appear before 㪠("na") +# e.g. å¥åº·, 安易, 駄目, ã ã‚ +#åè©ž-形容動詞語幹 +# +# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), æ•°. +# e.g. 0, 1, 2, 何, æ•°, å¹¾ +#åè©ž-æ•° +# +# noun-affix: noun affixes where the sub-classification is undefined +#åè©ž-éžè‡ªç«‹ +# +# noun-affix-misc: Of adnominalizers, the case-marker ã® ("no"), and words that +# attach to the base form of inflectional words, words that cannot be classified +# into any of the other categories below. This category includes indefinite nouns. +# e.g. ã‚ã‹ã¤ã, æš, ã‹ã„, 甲æ–, æ°—, ãらã„, å«Œã„, ãã›, ç™–, ã“ã¨, 事, ã”ã¨, 毎, ã—ã ã„, 次第, +# é †, ã›ã„, 所為, ã¤ã„ã§, åºã§, ã¤ã‚‚ã‚Š, ç©ã‚‚ã‚Š, 点, ã©ã“ã‚, ã®, ã¯ãš, ç­ˆ, ã¯ãšã¿, å¼¾ã¿, +# æ‹å­, ãµã†, ãµã‚Š, 振り, ã»ã†, æ–¹, æ—¨, ã‚‚ã®, 物, 者, ゆãˆ, æ•…, ゆãˆã‚“, 所以, ã‚ã‘, 訳, +# ã‚ã‚Š, 割り, 割, ã‚“-å£èªž/, ã‚‚ã‚“-å£èªž/ +#åè©ž-éžè‡ªç«‹-一般 +# +# noun-affix-adverbial: noun affixes that that can behave as adverbs. +# e.g. ã‚ã„ã , é–“, ã‚ã’ã, 挙ã’å¥, ã‚ã¨, 後, 余り, 以外, 以é™, 以後, 以上, 以å‰, 一方, ã†ãˆ, +# 上, ã†ã¡, 内, ãŠã‚Š, 折り, ã‹ãŽã‚Š, é™ã‚Š, ãã‚Š, ã£ãã‚Š, çµæžœ, ã“ã‚, é ƒ, ã•ã„, éš›, 最中, ã•ãªã‹, +# 最中, ã˜ãŸã„, 自体, ãŸã³, 度, ãŸã‚, 為, ã¤ã©, 都度, ã¨ãŠã‚Š, 通り, ã¨ã, 時, ã¨ã“ã‚, 所, +# ã¨ãŸã‚“, 途端, ãªã‹, 中, ã®ã¡, 後, ã°ã‚ã„, å ´åˆ, æ—¥, ã¶ã‚“, 分, ã»ã‹, ä»–, ã¾ãˆ, å‰, ã¾ã¾, +# 儘, ä¾­, ã¿ãŽã‚Š, 矢先 +#åè©ž-éžè‡ªç«‹-副詞å¯èƒ½ +# +# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars +# with the stem よã†(ã ) ("you(da)"). +# e.g. よã†, ã‚„ã†, 様 (よã†) +#åè©ž-éžè‡ªç«‹-助動詞語幹 +# +# noun-affix-adjective-base: noun affixes that can connect to the indeclinable +# connection form 㪠(aux "da"). +# e.g. ã¿ãŸã„, ãµã† +#åè©ž-éžè‡ªç«‹-形容動詞語幹 +# +# noun-special: special nouns where the sub-classification is undefined. +#åè©ž-特殊 +# +# noun-special-aux: The ãã†ã  ("souda") stem form that is used for reporting news, is +# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base +# form of inflectional words. +# e.g. ãㆠ+#åè©ž-特殊-助動詞語幹 +# +# noun-suffix: noun suffixes where the sub-classification is undefined. +#åè©ž-接尾 +# +# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect +# to ガル or タイ and can combine into compound nouns, words that cannot be classified into +# any of the other categories below. In general, this category is more inclusive than +# 接尾語 ("suffix") and is usually the last element in a compound noun. +# e.g. ãŠã, ã‹ãŸ, æ–¹, ç”²æ– (ãŒã„), ãŒã‹ã‚Š, ãŽã¿, 気味, ãã‚‹ã¿, (~ã—ãŸ) ã•, 次第, 済 (ãš) ã¿, +# よã†, (ã§ã)ã£ã“, æ„Ÿ, 観, 性, å­¦, é¡ž, é¢, 用 +#åè©ž-接尾-一般 +# +# noun-suffix-person: Suffixes that form nouns and attach to person names more often +# than other nouns. +# e.g. å›, 様, è‘— +#åè©ž-接尾-人å +# +# noun-suffix-place: Suffixes that form nouns and attach to place names more often +# than other nouns. +# e.g. 町, 市, 県 +#åè©ž-接尾-地域 +# +# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that +# can appear before スル ("suru"). +# e.g. 化, 視, 分ã‘, 入り, è½ã¡, è²·ã„ +#åè©ž-接尾-サ変接続 +# +# noun-suffix-aux: The stem form of ãã†ã  (様態) that is used to indicate conditions, +# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the +# conjunctive form of inflectional words. +# e.g. ãㆠ+#åè©ž-接尾-助動詞語幹 +# +# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive +# form of inflectional words and appear before the copula ã  ("da"). +# e.g. çš„, ã’, ãŒã¡ +#åè©ž-接尾-形容動詞語幹 +# +# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. +# e.g. 後 (ã”), 以後, 以é™, 以å‰, å‰å¾Œ, 中, 末, 上, 時 (ã˜) +#åè©ž-接尾-副詞å¯èƒ½ +# +# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category +# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach +# to numbers. +# e.g. 個, ã¤, 本, 冊, パーセント, cm, kg, カ月, ã‹å›½, 区画, 時間, æ™‚åŠ +#åè©ž-接尾-助数詞 +# +# noun-suffix-special: Special suffixes that mainly attach to inflecting words. +# e.g. (楽ã—) ã•, (考ãˆ) æ–¹ +#åè©ž-接尾-特殊 +# +# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words +# together. +# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) å…¼ (主婦) +#åè©ž-接続詞的 +# +# noun-verbal_aux: Nouns that attach to the conjunctive particle 㦠("te") and are +# semantically verb-like. +# e.g. ã”らん, ã”覧, 御覧, 頂戴 +#åè©ž-å‹•è©žéžè‡ªç«‹çš„ +# +# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, +# dialects, English, etc. Currently, the only entry for åè©ž 引用文字列 ("noun quotation") +# is ã„ã‚ã ("iwaku"). +#åè©ž-引用文字列 +# +# noun-nai_adjective: Words that appear before the auxiliary verb ãªã„ ("nai") and +# behave like an adjective. +# e.g. 申ã—訳, 仕方, ã¨ã‚“ã§ã‚‚, é•ã„ +#åè©ž-ナイ形容詞語幹 +# +##### +# prefix: unclassified prefixes +#接頭詞 +# +# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) +# excluding numerical expressions. +# e.g. ㊠(æ°´), æŸ (æ°), åŒ (社), æ•… (~æ°), 高 (å“質), ㊠(見事), ã” (ç«‹æ´¾) +#接頭詞-å詞接続 +# +# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb +# in conjunctive form followed by ãªã‚‹/ãªã•ã‚‹/ãã ã•ã‚‹. +# e.g. ㊠(読ã¿ãªã•ã„), ㊠(座り) +#接頭詞-動詞接続 +# +# prefix-adjectival: Prefixes that attach to adjectives. +# e.g. ㊠(寒ã„ã§ã™ã­ãˆ), ãƒã‚« (ã§ã‹ã„) +#接頭詞-形容詞接続 +# +# prefix-numerical: Prefixes that attach to numerical expressions. +# e.g. ç´„, ãŠã‚ˆã, 毎時 +#接頭詞-数接続 +# +##### +# verb: unclassified verbs +#å‹•è©ž +# +# verb-main: +#å‹•è©ž-自立 +# +# verb-auxiliary: +#å‹•è©ž-éžè‡ªç«‹ +# +# verb-suffix: +#å‹•è©ž-接尾 +# +##### +# adjective: unclassified adjectives +#形容詞 +# +# adjective-main: +#形容詞-自立 +# +# adjective-auxiliary: +#形容詞-éžè‡ªç«‹ +# +# adjective-suffix: +#形容詞-接尾 +# +##### +# adverb: unclassified adverbs +#副詞 +# +# adverb-misc: Words that can be segmented into one unit and where adnominal +# modification is not possible. +# e.g. ã‚ã„ã‹ã‚らãš, 多分 +#副詞-一般 +# +# adverb-particle_conjunction: Adverbs that can be followed by ã®, ã¯, ã«, +# ãª, ã™ã‚‹, ã , etc. +# e.g. ã“ã‚“ãªã«, ãã‚“ãªã«, ã‚ã‚“ãªã«, ãªã«ã‹, ãªã‚“ã§ã‚‚ +#副詞-助詞類接続 +# +##### +# adnominal: Words that only have noun-modifying forms. +# e.g. ã“ã®, ãã®, ã‚ã®, ã©ã®, ã„ã‚ゆる, ãªã‚“らã‹ã®, 何らã‹ã®, ã„ã‚ã‚“ãª, ã“ã†ã„ã†, ãã†ã„ã†, ã‚ã‚ã„ã†, +# ã©ã†ã„ã†, ã“ã‚“ãª, ãã‚“ãª, ã‚ã‚“ãª, ã©ã‚“ãª, 大ããª, å°ã•ãª, ãŠã‹ã—ãª, ã»ã‚“ã®, ãŸã„ã—ãŸ, +# 「(, ã‚‚) ã•ã‚‹ (ã“ã¨ãªãŒã‚‰)ã€, 微々ãŸã‚‹, 堂々ãŸã‚‹, å˜ãªã‚‹, ã„ã‹ãªã‚‹, 我ãŒã€ã€ŒåŒã˜, 亡ã +#連体詞 +# +##### +# conjunction: Conjunctions that can occur independently. +# e.g. ãŒ, ã‘ã‚Œã©ã‚‚, ãã—ã¦, ã˜ã‚ƒã‚, ãã‚Œã©ã“ã‚ã‹ +接続詞 +# +##### +# particle: unclassified particles. +助詞 +# +# particle-case: case particles where the subclassification is undefined. +助詞-格助詞 +# +# particle-case-misc: Case particles. +# e.g. ã‹ã‚‰, ãŒ, ã§, ã¨, ã«, ã¸, より, ã‚’, ã®, ã«ã¦ +助詞-格助詞-一般 +# +# particle-case-quote: the "to" that appears after nouns, a person’s speech, +# quotation marks, expressions of decisions from a meeting, reasons, judgements, +# conjectures, etc. +# e.g. ( ã ) 㨠(è¿°ã¹ãŸ.), ( ã§ã‚ã‚‹) 㨠(ã—ã¦åŸ·è¡ŒçŒ¶äºˆ...) +助詞-格助詞-引用 +# +# particle-case-compound: Compounds of particles and verbs that mainly behave +# like case particles. +# e.g. ã¨ã„ã†, ã¨ã„ã£ãŸ, ã¨ã‹ã„ã†, ã¨ã—ã¦, ã¨ã¨ã‚‚ã«, ã¨å…±ã«, ã§ã‚‚ã£ã¦, ã«ã‚ãŸã£ã¦, ã«å½“ãŸã£ã¦, ã«å½“ã£ã¦, +# ã«ã‚ãŸã‚Š, ã«å½“ãŸã‚Š, ã«å½“ã‚Š, ã«å½“ãŸã‚‹, ã«ã‚ãŸã‚‹, ã«ãŠã„ã¦, ã«æ–¼ã„ã¦,ã«æ–¼ã¦, ã«ãŠã‘ã‚‹, ã«æ–¼ã‘ã‚‹, +# ã«ã‹ã‘, ã«ã‹ã‘ã¦, ã«ã‹ã‚“ã—, ã«é–¢ã—, ã«ã‹ã‚“ã—ã¦, ã«é–¢ã—ã¦, ã«ã‹ã‚“ã™ã‚‹, ã«é–¢ã™ã‚‹, ã«éš›ã—, +# ã«éš›ã—ã¦, ã«ã—ãŸãŒã„, ã«å¾“ã„, ã«å¾“ã†, ã«ã—ãŸãŒã£ã¦, ã«å¾“ã£ã¦, ã«ãŸã„ã—, ã«å¯¾ã—, ã«ãŸã„ã—ã¦, +# ã«å¯¾ã—ã¦, ã«ãŸã„ã™ã‚‹, ã«å¯¾ã™ã‚‹, ã«ã¤ã„ã¦, ã«ã¤ã, ã«ã¤ã‘, ã«ã¤ã‘ã¦, ã«ã¤ã‚Œ, ã«ã¤ã‚Œã¦, ã«ã¨ã£ã¦, +# ã«ã¨ã‚Š, ã«ã¾ã¤ã‚ã‚‹, ã«ã‚ˆã£ã¦, ã«ä¾ã£ã¦, ã«å› ã£ã¦, ã«ã‚ˆã‚Š, ã«ä¾ã‚Š, ã«å› ã‚Š, ã«ã‚ˆã‚‹, ã«ä¾ã‚‹, ã«å› ã‚‹, +# ã«ã‚ãŸã£ã¦, ã«ã‚ãŸã‚‹, ã‚’ã‚‚ã£ã¦, を以ã£ã¦, を通ã˜, を通ã˜ã¦, を通ã—ã¦, ã‚’ã‚ãã£ã¦, ã‚’ã‚ãã‚Š, ã‚’ã‚ãã‚‹, +# ã£ã¦-å£èªž/, ã¡ã‚…ã†-関西å¼ã€Œã¨ã„ã†ã€/, (何) ã¦ã„ㆠ(人)-å£èªž/, ã£ã¦ã„ã†-å£èªž/, ã¨ã„ãµ, ã¨ã‹ã„ãµ +助詞-格助詞-連語 +# +# particle-conjunctive: +# e.g. ã‹ã‚‰, ã‹ã‚‰ã«ã¯, ãŒ, ã‘ã‚Œã©, ã‘ã‚Œã©ã‚‚, ã‘ã©, ã—, ã¤ã¤, ã¦, ã§, ã¨, ã¨ã“ã‚ãŒ, ã©ã“ã‚ã‹, ã¨ã‚‚, ã©ã‚‚, +# ãªãŒã‚‰, ãªã‚Š, ã®ã§, ã®ã«, ã°, ã‚‚ã®ã®, ã‚„ ( ã—ãŸ), ã‚„ã„ãªã‚„, (ã“ã‚ã‚“) ã˜ã‚ƒ(ã„ã‘ãªã„)-å£èªž/, +# (è¡Œã£) ã¡ã‚ƒ(ã„ã‘ãªã„)-å£èªž/, (言ã£) ãŸã£ã¦ (ã—ã‹ãŸãŒãªã„)-å£èªž/, (ãã‚ŒãŒãªã)ã£ãŸã£ã¦ (平気)-å£èªž/ +助詞-接続助詞 +# +# particle-dependency: +# e.g. ã“ã, ã•ãˆ, ã—ã‹, ã™ã‚‰, ã¯, ã‚‚, ãž +助詞-係助詞 +# +# particle-adverbial: +# e.g. ãŒã¦ã‚‰, ã‹ã‚‚, ãらã„, ä½, ãらã„, ã—ã‚‚, (学校) ã˜ã‚ƒ(ã“ã‚ŒãŒæµè¡Œã£ã¦ã„ã‚‹)-å£èªž/, +# (ãã‚Œ)ã˜ã‚ƒã‚ (よããªã„)-å£èªž/, ãšã¤, (ç§) ãªãž, ãªã©, (ç§) ãªã‚Š (ã«), (先生) ãªã‚“ã‹ (大嫌ã„)-å£èªž/, +# (ç§) ãªã‚“ãž, (先生) ãªã‚“㦠(大嫌ã„)-å£èªž/, ã®ã¿, ã ã‘, (ç§) ã ã£ã¦-å£èªž/, ã ã«, +# (å½¼)ã£ãŸã‚‰-å£èªž/, (ãŠèŒ¶) ã§ã‚‚ (ã„ã‹ãŒ), ç­‰ (ã¨ã†), (今後) ã¨ã‚‚, ã°ã‹ã‚Š, ã°ã£ã‹-å£èªž/, ã°ã£ã‹ã‚Š-å£èªž/, +# ã»ã©, 程, ã¾ã§, è¿„, (誰) ã‚‚ (ãŒ)([助詞-格助詞] ãŠã‚ˆã³ [助詞-係助詞] ã®å‰ã«ä½ç½®ã™ã‚‹ã€Œã‚‚ã€) +助詞-副助詞 +# +# particle-interjective: particles with interjective grammatical roles. +# e.g. (æ¾å³¶) ã‚„ +助詞-間投助詞 +# +# particle-coordinate: +# e.g. ã¨, ãŸã‚Š, ã ã®, ã ã‚Š, ã¨ã‹, ãªã‚Š, ã‚„, やら +助詞-並立助詞 +# +# particle-final: +# e.g. ã‹ã„, ã‹ã—ら, ã•, ãœ, (ã )ã£ã‘-å£èªž/, (ã¨ã¾ã£ã¦ã‚‹) ã§-方言/, ãª, ナ, ãªã‚-å£èªž/, ãž, ã­, ãƒ, +# ã­ã‡-å£èªž/, ã­ãˆ-å£èªž/, ã­ã‚“-方言/, ã®, ã®ã†-å£èªž/, ã‚„, よ, ヨ, よã‰-å£èªž/, ã‚, ã‚ã„-å£èªž/ +助詞-終助詞 +# +# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is +# adverbial, conjunctive, or sentence final. For example: +# (a) 「A ã‹ B ã‹ã€. Ex:「(国内ã§é‹ç”¨ã™ã‚‹) ã‹,(海外ã§é‹ç”¨ã™ã‚‹) ã‹ (.)〠+# (b) Inside an adverb phrase. Ex:「(幸ã„ã¨ã„ã†) ã‹ (, 死者ã¯ã„ãªã‹ã£ãŸ.)〠+# 「(祈りãŒå±Šã„ãŸã›ã„) ã‹ (, 試験ã«åˆæ ¼ã—ãŸ.)〠+# (c) 「ã‹ã®ã‚ˆã†ã«ã€. Ex:「(何もãªã‹ã£ãŸ) ã‹ (ã®ã‚ˆã†ã«æŒ¯ã‚‹èˆžã£ãŸ.)〠+# e.g. ã‹ +助詞-副助詞ï¼ä¸¦ç«‹åŠ©è©žï¼çµ‚助詞 +# +# particle-adnominalizer: The "no" that attaches to nouns and modifies +# non-inflectional words. +助詞-連体化 +# +# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs +# that are giongo, giseigo, or gitaigo. +# e.g. ã«, 㨠+助詞-副詞化 +# +# particle-special: A particle that does not fit into one of the above classifications. +# This includes particles that are used in Tanka, Haiku, and other poetry. +# e.g. ã‹ãª, ã‘ã‚€, ( ã—ãŸã ã‚ã†) ã«, (ã‚ã‚“ãŸ) ã«ã‚ƒ(ã‚ã‹ã‚‰ã‚“), (俺) ã‚“ (家) +助詞-特殊 +# +##### +# auxiliary-verb: +助動詞 +# +##### +# interjection: Greetings and other exclamations. +# e.g. ãŠã¯ã‚ˆã†, ãŠã¯ã‚ˆã†ã”ã–ã„ã¾ã™, ã“ã‚“ã«ã¡ã¯, ã“ã‚“ã°ã‚“ã¯, ã‚ã‚ŠãŒã¨ã†, ã©ã†ã‚‚ã‚ã‚ŠãŒã¨ã†, ã‚ã‚ŠãŒã¨ã†ã”ã–ã„ã¾ã™, +# ã„ãŸã ãã¾ã™, ã”ã¡ãã†ã•ã¾, ã•ã‚ˆãªã‚‰, ã•ã‚ˆã†ãªã‚‰, ã¯ã„, ã„ã„ãˆ, ã”ã‚ã‚“, ã”ã‚ã‚“ãªã•ã„ +#æ„Ÿå‹•è©ž +# +##### +# symbol: unclassified Symbols. +è¨˜å· +# +# symbol-misc: A general symbol not in one of the categories below. +# e.g. [â—‹â—Ž@$〒→+] +記å·-一般 +# +# symbol-comma: Commas +# e.g. [,ã€] +記å·-読点 +# +# symbol-period: Periods and full stops. +# e.g. [..。] +記å·-å¥ç‚¹ +# +# symbol-space: Full-width whitespace. +記å·-空白 +# +# symbol-open_bracket: +# e.g. [({‘“『ã€] +記å·-括弧開 +# +# symbol-close_bracket: +# e.g. [)}’â€ã€ã€ã€‘] +記å·-括弧閉 +# +# symbol-alphabetic: +#記å·-アルファベット +# +##### +# other: unclassified other +#ãã®ä»– +# +# other-interjection: Words that are hard to classify as noun-suffixes or +# sentence-final particles. +# e.g. (ã )ã‚¡ +ãã®ä»–-間投 +# +##### +# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. +# e.g. ã‚ã®, ã†ã‚“ã¨, ãˆã¨ +フィラー +# +##### +# non-verbal: non-verbal sound. +éžè¨€èªžéŸ³ +# +##### +# fragment: +#語断片 +# +##### +# unknown: unknown part of speech. +#未知語 +# +##### End of file diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ar.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ar.txt new file mode 100644 index 00000000000..046829db6a2 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ar.txt @@ -0,0 +1,125 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Cleaned on October 11, 2009 (not normalized, so use before normalization) +# This means that when modifying this list, you might need to add some +# redundant entries, for example containing forms with both Ø£ and ا +من +ومن +منها +منه +ÙÙŠ +ÙˆÙÙŠ +Ùيها +Ùيه +Ùˆ +Ù +ثم +او +أو +ب +بها +به +ا +Ø£ +اى +اي +أي +أى +لا +ولا +الا +ألا +إلا +لكن +ما +وما +كما +Ùما +عن +مع +اذا +إذا +ان +أن +إن +انها +أنها +إنها +انه +أنه +إنه +بان +بأن +Ùان +Ùأن +وان +وأن +وإن +التى +التي +الذى +الذي +الذين +الى +الي +إلى +إلي +على +عليها +عليه +اما +أما +إما +ايضا +أيضا +كل +وكل +لم +ولم +لن +ولن +هى +هي +هو +وهى +وهي +وهو +Ùهى +Ùهي +Ùهو +انت +أنت +لك +لها +له +هذه +هذا +تلك +ذلك +هناك +كانت +كان +يكون +تكون +وكانت +وكان +غير +بعض +قد +نحو +بين +بينما +منذ +ضمن +حيث +الان +الآن +خلال +بعد +قبل +حتى +عند +عندما +لدى +جميع diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_bg.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_bg.txt new file mode 100644 index 00000000000..1ae4ba2ae38 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_bg.txt @@ -0,0 +1,193 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +а +аз +ако +ала +бе +без +беше +би +бил +била +били +било +близо +бъдат +бъде +бÑха +в +Ð²Ð°Ñ +ваш +ваша +вероÑтно +вече +взема +ви +вие +винаги +вÑе +вÑеки +вÑички +вÑичко +вÑÑка +във +въпреки +върху +г +ги +главно +го +д +да +дали +до +докато +докога +дори +доÑега +доÑта +е +едва +един +ето +за +зад +заедно +заради +заÑега +затова +защо +защото +и +из +или +им +има +имат +иÑка +й +каза +как +каква +какво +както +какъв +като +кога +когато +което +които +кой +който +колко +коÑто +къде +където +към +ли +м +ме +между +мен +ми +мнозина +мога +могат +може +Ð¼Ð¾Ð»Ñ +момента +му +н +на +над +назад +най +направи +напред +например +Ð½Ð°Ñ +не +него +Ð½ÐµÑ +ни +ние +никой +нито +но +нÑкои +нÑкой +нÑма +обаче +около +оÑвен +оÑобено +от +отгоре +отново +още +пак +по +повече +повечето +под +поне +поради +поÑле +почти +прави +пред +преди +през +при +пък +първо +Ñ +Ñа +Ñамо +Ñе +Ñега +Ñи +Ñкоро +Ñлед +Ñме +Ñпоред +Ñред +Ñрещу +Ñте +Ñъм +ÑÑŠÑ +Ñъщо +Ñ‚ +тази +така +такива +такъв +там +твой +те +тези +ти +тн +то +това +тогава +този +той +толкова +точно +Ñ‚Ñ€Ñбва +тук +тъй +Ñ‚Ñ +Ñ‚ÑÑ… +у +хареÑва +ч +че +чеÑто +чрез +ще +щом +Ñ diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ca.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ca.txt new file mode 100644 index 00000000000..3da65deafe1 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ca.txt @@ -0,0 +1,220 @@ +# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) +a +abans +ací +ah +així +això +al +als +aleshores +algun +alguna +algunes +alguns +alhora +allà +allí +allò +altra +altre +altres +amb +ambdós +ambdues +apa +aquell +aquella +aquelles +aquells +aquest +aquesta +aquestes +aquests +aquí +baix +cada +cadascú +cadascuna +cadascunes +cadascuns +com +contra +d'un +d'una +d'unes +d'uns +dalt +de +del +dels +des +després +dins +dintre +donat +doncs +durant +e +eh +el +els +em +en +encara +ens +entre +érem +eren +éreu +es +és +esta +està +estàvem +estaven +estàveu +esteu +et +etc +ets +fins +fora +gairebé +ha +han +has +havia +he +hem +heu +hi +ho +i +igual +iguals +ja +l'hi +la +les +li +li'n +llavors +m'he +ma +mal +malgrat +mateix +mateixa +mateixes +mateixos +me +mentre +més +meu +meus +meva +meves +molt +molta +moltes +molts +mon +mons +n'he +n'hi +ne +ni +no +nogensmenys +només +nosaltres +nostra +nostre +nostres +o +oh +oi +on +pas +pel +pels +per +però +perquè +poc +poca +pocs +poques +potser +propi +qual +quals +quan +quant +que +què +quelcom +qui +quin +quina +quines +quins +s'ha +s'han +sa +semblant +semblants +ses +seu +seus +seva +seva +seves +si +sobre +sobretot +sóc +solament +sols +son +són +sons +sota +sou +t'ha +t'han +t'he +ta +tal +també +tampoc +tan +tant +tanta +tantes +teu +teus +teva +teves +ton +tons +tot +tota +totes +tots +un +una +unes +uns +us +va +vaig +vam +van +vas +veu +vosaltres +vostra +vostre +vostres diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_cz.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_cz.txt new file mode 100644 index 00000000000..53c6097dac7 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_cz.txt @@ -0,0 +1,172 @@ +a +s +k +o +i +u +v +z +dnes +cz +tímto +budeÅ¡ +budem +byli +jseÅ¡ +můj +svým +ta +tomto +tohle +tuto +tyto +jej +zda +proÄ +máte +tato +kam +tohoto +kdo +kteří +mi +nám +tom +tomuto +mít +nic +proto +kterou +byla +toho +protože +asi +ho +naÅ¡i +napiÅ¡te +re +což +tím +takže +svých +její +svými +jste +aj +tu +tedy +teto +bylo +kde +ke +pravé +ji +nad +nejsou +Äi +pod +téma +mezi +pÅ™es +ty +pak +vám +ani +když +vÅ¡ak +neg +jsem +tento +Älánku +Älánky +aby +jsme +pÅ™ed +pta +jejich +byl +jeÅ¡tÄ› +až +bez +také +pouze +první +vaÅ¡e +která +nás +nový +tipy +pokud +může +strana +jeho +své +jiné +zprávy +nové +není +vás +jen +podle +zde +už +být +více +bude +již +než +který +by +které +co +nebo +ten +tak +má +pÅ™i +od +po +jsou +jak +další +ale +si +se +ve +to +jako +za +zpÄ›t +ze +do +pro +je +na +atd +atp +jakmile +pÅ™iÄemž +já +on +ona +ono +oni +ony +my +vy +jí +ji +mÄ› +mne +jemu +tomu +tÄ›m +tÄ›mu +nÄ›mu +nÄ›muž +jehož +jíž +jelikož +jež +jakož +naÄež diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_da.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_da.txt new file mode 100644 index 00000000000..a3ff5fe122c --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_da.txt @@ -0,0 +1,108 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Danish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + +og | and +i | in +jeg | I +det | that (dem. pronoun)/it (pers. pronoun) +at | that (in front of a sentence)/to (with infinitive) +en | a/an +den | it (pers. pronoun)/that (dem. pronoun) +til | to/at/for/until/against/by/of/into, more +er | present tense of "to be" +som | who, as +pÃ¥ | on/upon/in/on/at/to/after/of/with/for, on +de | they +med | with/by/in, along +han | he +af | of/by/from/off/for/in/with/on, off +for | at/for/to/from/by/of/ago, in front/before, because +ikke | not +der | who/which, there/those +var | past tense of "to be" +mig | me/myself +sig | oneself/himself/herself/itself/themselves +men | but +et | a/an/one, one (number), someone/somebody/one +har | present tense of "to have" +om | round/about/for/in/a, about/around/down, if +vi | we +min | my +havde | past tense of "to have" +ham | him +hun | she +nu | now +over | over/above/across/by/beyond/past/on/about, over/past +da | then, when/as/since +fra | from/off/since, off, since +du | you +ud | out +sin | his/her/its/one's +dem | them +os | us/ourselves +op | up +man | you/one +hans | his +hvor | where +eller | or +hvad | what +skal | must/shall etc. +selv | myself/youself/herself/ourselves etc., even +her | here +alle | all/everyone/everybody etc. +vil | will (verb) +blev | past tense of "to stay/to remain/to get/to become" +kunne | could +ind | in +nÃ¥r | when +være | present tense of "to be" +dog | however/yet/after all +noget | something +ville | would +jo | you know/you see (adv), yes +deres | their/theirs +efter | after/behind/according to/for/by/from, later/afterwards +ned | down +skulle | should +denne | this +end | than +dette | this +mit | my/mine +ogsÃ¥ | also +under | under/beneath/below/during, below/underneath +have | have +dig | you +anden | other +hende | her +mine | my +alt | everything +meget | much/very, plenty of +sit | his, her, its, one's +sine | his, her, its, one's +vor | our +mod | against +disse | these +hvis | if +din | your/yours +nogle | some +hos | by/at +blive | be/become +mange | many +ad | by/through +bliver | present tense of "to be/to become" +hendes | her/hers +været | be +thi | for (conj) +jer | you +sÃ¥dan | such, like this/like that diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_de.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_de.txt new file mode 100644 index 00000000000..f7703841887 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_de.txt @@ -0,0 +1,292 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A German stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | The number of forms in this list is reduced significantly by passing it + | through the German stemmer. + + +aber | but + +alle | all +allem +allen +aller +alles + +als | than, as +also | so +am | an + dem +an | at + +ander | other +andere +anderem +anderen +anderer +anderes +anderm +andern +anderr +anders + +auch | also +auf | on +aus | out of +bei | by +bin | am +bis | until +bist | art +da | there +damit | with it +dann | then + +der | the +den +des +dem +die +das + +daß | that + +derselbe | the same +derselben +denselben +desselben +demselben +dieselbe +dieselben +dasselbe + +dazu | to that + +dein | thy +deine +deinem +deinen +deiner +deines + +denn | because + +derer | of those +dessen | of him + +dich | thee +dir | to thee +du | thou + +dies | this +diese +diesem +diesen +dieser +dieses + + +doch | (several meanings) +dort | (over) there + + +durch | through + +ein | a +eine +einem +einen +einer +eines + +einig | some +einige +einigem +einigen +einiger +einiges + +einmal | once + +er | he +ihn | him +ihm | to him + +es | it +etwas | something + +euer | your +eure +eurem +euren +eurer +eures + +für | for +gegen | towards +gewesen | p.p. of sein +hab | have +habe | have +haben | have +hat | has +hatte | had +hatten | had +hier | here +hin | there +hinter | behind + +ich | I +mich | me +mir | to me + + +ihr | you, to her +ihre +ihrem +ihren +ihrer +ihres +euch | to you + +im | in + dem +in | in +indem | while +ins | in + das +ist | is + +jede | each, every +jedem +jeden +jeder +jedes + +jene | that +jenem +jenen +jener +jenes + +jetzt | now +kann | can + +kein | no +keine +keinem +keinen +keiner +keines + +können | can +könnte | could +machen | do +man | one + +manche | some, many a +manchem +manchen +mancher +manches + +mein | my +meine +meinem +meinen +meiner +meines + +mit | with +muss | must +musste | had to +nach | to(wards) +nicht | not +nichts | nothing +noch | still, yet +nun | now +nur | only +ob | whether +oder | or +ohne | without +sehr | very + +sein | his +seine +seinem +seinen +seiner +seines + +selbst | self +sich | herself + +sie | they, she +ihnen | to them + +sind | are +so | so + +solche | such +solchem +solchen +solcher +solches + +soll | shall +sollte | should +sondern | but +sonst | else +über | over +um | about, around +und | and + +uns | us +unse +unsem +unsen +unser +unses + +unter | under +viel | much +vom | von + dem +von | from +vor | before +während | while +war | was +waren | were +warst | wast +was | what +weg | away, off +weil | because +weiter | further + +welche | which +welchem +welchen +welcher +welches + +wenn | when +werde | will +werden | will +wie | how +wieder | again +will | want +wir | we +wird | will +wirst | willst +wo | where +wollen | want +wollte | wanted +würde | would +würden | would +zu | to +zum | zu + dem +zur | zu + der +zwar | indeed +zwischen | between + diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_el.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_el.txt new file mode 100644 index 00000000000..232681f5bd6 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_el.txt @@ -0,0 +1,78 @@ +# Lucene Greek Stopwords list +# Note: by default this file is used after GreekLowerCaseFilter, +# so when modifying this file use 'σ' instead of 'Ï‚' +ο +η +το +οι +τα +του +τησ +των +τον +την +και +κι +κ +ειμαι +εισαι +ειναι +ειμαστε +ειστε +στο +στον +στη +στην +μα +αλλα +απο +για +Ï€Ïοσ +με +σε +ωσ +παÏα +αντι +κατα +μετα +θα +να +δε +δεν +μη +μην +επι +ενω +εαν +αν +τοτε +που +πωσ +ποιοσ +ποια +ποιο +ποιοι +ποιεσ +ποιων +ποιουσ +αυτοσ +αυτη +αυτο +αυτοι +αυτων +αυτουσ +αυτεσ +αυτα +εκεινοσ +εκεινη +εκεινο +εκεινοι +εκεινεσ +εκεινα +εκεινων +εκεινουσ +οπωσ +ομωσ +ισωσ +οσο +οτι diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_en.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_en.txt new file mode 100644 index 00000000000..2c164c0b2a1 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_en.txt @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +# Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +such +that +the +their +then +there +these +they +this +to +was +will +with diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_es.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_es.txt new file mode 100644 index 00000000000..2db14760075 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_es.txt @@ -0,0 +1,354 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Spanish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | from, of +la | the, her +que | who, that +el | the +en | in +y | and +a | to +los | the, them +del | de + el +se | himself, from him etc +las | the, them +por | for, by, etc +un | a +para | for +con | with +no | no +una | a +su | his, her +al | a + el + | es from SER +lo | him +como | how +más | more +pero | pero +sus | su plural +le | to him, her +ya | already +o | or + | fue from SER +este | this + | ha from HABER +sí | himself etc +porque | because +esta | this + | son from SER +entre | between + | está from ESTAR +cuando | when +muy | very +sin | without +sobre | on + | ser from SER + | tiene from TENER +también | also +me | me +hasta | until +hay | there is/are +donde | where + | han from HABER +quien | whom, that + | están from ESTAR + | estado from ESTAR +desde | from +todo | all +nos | us +durante | during + | estados from ESTAR +todos | all +uno | a +les | to them +ni | nor +contra | against +otros | other + | fueron from SER +ese | that +eso | that + | había from HABER +ante | before +ellos | they +e | and (variant of y) +esto | this +mí | me +antes | before +algunos | some +qué | what? +unos | a +yo | I +otro | other +otras | other +otra | other +él | he +tanto | so much, many +esa | that +estos | these +mucho | much, many +quienes | who +nada | nothing +muchos | many +cual | who + | sea from SER +poco | few +ella | she +estar | to be + | haber from HABER +estas | these + | estaba from ESTAR + | estamos from ESTAR +algunas | some +algo | something +nosotros | we + + | other forms + +mi | me +mis | mi plural +tú | thou +te | thee +ti | thee +tu | thy +tus | tu plural +ellas | they +nosotras | we +vosotros | you +vosotras | you +os | you +mío | mine +mía | +míos | +mías | +tuyo | thine +tuya | +tuyos | +tuyas | +suyo | his, hers, theirs +suya | +suyos | +suyas | +nuestro | ours +nuestra | +nuestros | +nuestras | +vuestro | yours +vuestra | +vuestros | +vuestras | +esos | those +esas | those + + | forms of estar, to be (not including the infinitive): +estoy +estás +está +estamos +estáis +están +esté +estés +estemos +estéis +estén +estaré +estarás +estará +estaremos +estaréis +estarán +estaría +estarías +estaríamos +estaríais +estarían +estaba +estabas +estábamos +estabais +estaban +estuve +estuviste +estuvo +estuvimos +estuvisteis +estuvieron +estuviera +estuvieras +estuviéramos +estuvierais +estuvieran +estuviese +estuvieses +estuviésemos +estuvieseis +estuviesen +estando +estado +estada +estados +estadas +estad + + | forms of haber, to have (not including the infinitive): +he +has +ha +hemos +habéis +han +haya +hayas +hayamos +hayáis +hayan +habré +habrás +habrá +habremos +habréis +habrán +habría +habrías +habríamos +habríais +habrían +había +habías +habíamos +habíais +habían +hube +hubiste +hubo +hubimos +hubisteis +hubieron +hubiera +hubieras +hubiéramos +hubierais +hubieran +hubiese +hubieses +hubiésemos +hubieseis +hubiesen +habiendo +habido +habida +habidos +habidas + + | forms of ser, to be (not including the infinitive): +soy +eres +es +somos +sois +son +sea +seas +seamos +seáis +sean +seré +serás +será +seremos +seréis +serán +sería +serías +seríamos +seríais +serían +era +eras +éramos +erais +eran +fui +fuiste +fue +fuimos +fuisteis +fueron +fuera +fueras +fuéramos +fuerais +fueran +fuese +fueses +fuésemos +fueseis +fuesen +siendo +sido + | sed also means 'thirst' + + | forms of tener, to have (not including the infinitive): +tengo +tienes +tiene +tenemos +tenéis +tienen +tenga +tengas +tengamos +tengáis +tengan +tendré +tendrás +tendrá +tendremos +tendréis +tendrán +tendría +tendrías +tendríamos +tendríais +tendrían +tenía +tenías +teníamos +teníais +tenían +tuve +tuviste +tuvo +tuvimos +tuvisteis +tuvieron +tuviera +tuvieras +tuviéramos +tuvierais +tuvieran +tuviese +tuvieses +tuviésemos +tuvieseis +tuviesen +teniendo +tenido +tenida +tenidos +tenidas +tened + diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_eu.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_eu.txt new file mode 100644 index 00000000000..25f1db93460 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_eu.txt @@ -0,0 +1,99 @@ +# example set of basque stopwords +al +anitz +arabera +asko +baina +bat +batean +batek +bati +batzuei +batzuek +batzuetan +batzuk +bera +beraiek +berau +berauek +bere +berori +beroriek +beste +bezala +da +dago +dira +ditu +du +dute +edo +egin +ere +eta +eurak +ez +gainera +gu +gutxi +guzti +haiei +haiek +haietan +hainbeste +hala +han +handik +hango +hara +hari +hark +hartan +hau +hauei +hauek +hauetan +hemen +hemendik +hemengo +hi +hona +honek +honela +honetan +honi +hor +hori +horiei +horiek +horietan +horko +horra +horrek +horrela +horretan +horri +hortik +hura +izan +ni +noiz +nola +non +nondik +nongo +nor +nora +ze +zein +zen +zenbait +zenbat +zer +zergatik +ziren +zituen +zu +zuek +zuen +zuten diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_fa.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_fa.txt new file mode 100644 index 00000000000..723641c6da7 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_fa.txt @@ -0,0 +1,313 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Note: by default this file is used after normalization, so when adding entries +# to this file, use the arabic 'ÙŠ' instead of 'ÛŒ' +انان +نداشته +سراسر +خياه +ايشان +وي +تاكنون +بيشتري +دوم +پس +ناشي +ÙˆÚ¯Ùˆ +يا +داشتند +سپس +هنگام +هرگز +پنج +نشان +امسال +ديگر +گروهي +شدند +چطور +ده +Ùˆ +دو +نخستين +ولي +چرا +Ú†Ù‡ +وسط +Ù‡ +كدام +قابل +يك +رÙت +Ù‡Ùت +همچنين +در +هزار +بله +بلي +شايد +اما +شناسي +گرÙته +دهد +داشته +دانست +داشتن +خواهيم +ميليارد +وقتيكه +امد +خواهد +جز +اورده +شده +بلكه +خدمات +شدن +برخي +نبود +بسياري +جلوگيري +حق +كردند +نوعي +بعري +نكرده +نظير +نبايد +بوده +بودن +داد +اورد +هست +جايي +شود +دنبال +داده +بايد +سابق +هيچ +همان +انجا +كمتر +كجاست +گردد +كسي +تر +مردم +تان +دادن +بودند +سري +جدا +ندارند +مگر +يكديگر +دارد +دهند +بنابراين +هنگامي +سمت +جا +انچه +خود +دادند +زياد +دارند +اثر +بدون +بهترين +بيشتر +البته +به +براساس +بيرون +كرد +بعضي +گرÙت +توي +اي +ميليون +او +جريان +تول +بر +مانند +برابر +باشيم +مدتي +گويند +اكنون +تا +تنها +جديد +چند +بي +نشده +كردن +كردم +گويد +كرده +كنيم +نمي +نزد +روي +قصد +Ùقط +بالاي +ديگران +اين +ديروز +توسط +سوم +ايم +دانند +سوي +استÙاده +شما +كنار +داريم +ساخته +طور +امده +رÙته +نخست +بيست +نزديك +طي +كنيد +از +انها +تمامي +داشت +يكي +طريق +اش +چيست +روب +نمايد +Ú¯Ùت +چندين +چيزي +تواند +ام +ايا +با +ان +ايد +ترين +اينكه +ديگري +راه +هايي +بروز +همچنان +پاعين +كس +حدود +مختل٠+مقابل +چيز +گيرد +ندارد +ضد +همچون +سازي +شان +مورد +باره +مرسي +خويش +برخوردار +چون +خارج +شش +هنوز +تحت +ضمن +هستيم +Ú¯Ùته +Ùكر +بسيار +پيش +براي +روزهاي +انكه +نخواهد +بالا +كل +وقتي +كي +چنين +كه +گيري +نيست +است +كجا +كند +نيز +يابد +بندي +حتي +توانند +عقب +خواست +كنند +بين +تمام +همه +ما +باشند +مثل +شد +اري +باشد +اره +طبق +بعد +اگر +صورت +غير +جاي +بيش +ريزي +اند +زيرا +چگونه +بار +لطÙا +مي +درباره +من +ديده +همين +گذاري +برداري +علت +گذاشته +هم +Ùوق +نه +ها +شوند +اباد +همواره +هر +اول +خواهند +چهار +نام +امروز +مان +هاي +قبل +كنم +سعي +تازه +را +هستند +زير +جلوي +عنوان +بود diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_fi.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_fi.txt new file mode 100644 index 00000000000..addad798c4b --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_fi.txt @@ -0,0 +1,95 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| forms of BE + +olla +olen +olet +on +olemme +olette +ovat +ole | negative form + +oli +olisi +olisit +olisin +olisimme +olisitte +olisivat +olit +olin +olimme +olitte +olivat +ollut +olleet + +en | negation +et +ei +emme +ette +eivät + +|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans +minä minun minut minua minussa minusta minuun minulla minulta minulle | I +sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you +hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she +me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we +te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you +he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they + +tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this +tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that +se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it +nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these +nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those +ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they + +kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who +ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) +mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what +mitkä | (pl) + +joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which +jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) + +| conjunctions + +että | that +ja | and +jos | if +koska | because +kuin | than +mutta | but +niin | so +sekä | and +sillä | for +tai | or +vaan | but +vai | or +vaikka | although + + +| prepositions + +kanssa | with +mukaan | according to +noin | about +poikki | across +yli | over, across + +| other + +kun | when +niin | so +nyt | now +itse | self + diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_fr.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_fr.txt new file mode 100644 index 00000000000..c00837ea939 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_fr.txt @@ -0,0 +1,183 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A French stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +au | a + le +aux | a + les +avec | with +ce | this +ces | these +dans | with +de | of +des | de + les +du | de + le +elle | she +en | `of them' etc +et | and +eux | them +il | he +je | I +la | the +le | the +leur | their +lui | him +ma | my (fem) +mais | but +me | me +même | same; as in moi-même (myself) etc +mes | me (pl) +moi | me +mon | my (masc) +ne | not +nos | our (pl) +notre | our +nous | we +on | one +ou | where +par | by +pas | not +pour | for +qu | que before vowel +que | that +qui | who +sa | his, her (fem) +se | oneself +ses | his (pl) +son | his, her (masc) +sur | on +ta | thy (fem) +te | thee +tes | thy (pl) +toi | thee +ton | thy (masc) +tu | thou +un | a +une | a +vos | your (pl) +votre | your +vous | you + + | single letter forms + +c | c' +d | d' +j | j' +l | l' +à | to, at +m | m' +n | n' +s | s' +t | t' +y | there + + | forms of être (not including the infinitive): +été +étée +étées +étés +étant +suis +es +est +sommes +êtes +sont +serai +seras +sera +serons +serez +seront +serais +serait +serions +seriez +seraient +étais +était +étions +étiez +étaient +fus +fut +fûmes +fûtes +furent +sois +soit +soyons +soyez +soient +fusse +fusses +fût +fussions +fussiez +fussent + + | forms of avoir (not including the infinitive): +ayant +eu +eue +eues +eus +ai +as +avons +avez +ont +aurai +auras +aura +aurons +aurez +auront +aurais +aurait +aurions +auriez +auraient +avais +avait +avions +aviez +avaient +eut +eûmes +eûtes +eurent +aie +aies +ait +ayons +ayez +aient +eusse +eusses +eût +eussions +eussiez +eussent + + | Later additions (from Jean-Christophe Deschamps) +ceci | this +celà  | that +cet | this +cette | this +ici | here +ils | they +les | the (pl) +leurs | their (pl) +quel | which +quels | which +quelle | which +quelles | which +sans | without +soi | oneself + diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ga.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ga.txt new file mode 100644 index 00000000000..9ff88d747e5 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ga.txt @@ -0,0 +1,110 @@ + +a +ach +ag +agus +an +aon +ar +arna +as +b' +ba +beirt +bhúr +caoga +ceathair +ceathrar +chomh +chtó +chuig +chun +cois +céad +cúig +cúigear +d' +daichead +dar +de +deich +deichniúr +den +dhá +do +don +dtí +dá +dár +dó +faoi +faoin +faoina +faoinár +fara +fiche +gach +gan +go +gur +haon +hocht +i +iad +idir +in +ina +ins +inár +is +le +leis +lena +lenár +m' +mar +mo +mé +na +nach +naoi +naonúr +ná +ní +níor +nó +nócha +ocht +ochtar +os +roimh +sa +seacht +seachtar +seachtó +seasca +seisear +siad +sibh +sinn +sna +sé +sí +tar +thar +thú +triúr +trí +trína +trínár +tríocha +tú +um +ár +é +éis +í +ó +ón +óna +ónár diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_gl.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_gl.txt new file mode 100644 index 00000000000..d8760b12c14 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_gl.txt @@ -0,0 +1,161 @@ +# galican stopwords +a +aínda +alí +aquel +aquela +aquelas +aqueles +aquilo +aquí +ao +aos +as +así +á +ben +cando +che +co +coa +comigo +con +connosco +contigo +convosco +coas +cos +cun +cuns +cunha +cunhas +da +dalgunha +dalgunhas +dalgún +dalgúns +das +de +del +dela +delas +deles +desde +deste +do +dos +dun +duns +dunha +dunhas +e +el +ela +elas +eles +en +era +eran +esa +esas +ese +eses +esta +estar +estaba +está +están +este +estes +estiven +estou +eu +é +facer +foi +foron +fun +había +hai +iso +isto +la +las +lle +lles +lo +los +mais +me +meu +meus +min +miña +miñas +moi +na +nas +neste +nin +no +non +nos +nosa +nosas +noso +nosos +nós +nun +nunha +nuns +nunhas +o +os +ou +ó +ós +para +pero +pode +pois +pola +polas +polo +polos +por +que +se +senón +ser +seu +seus +sexa +sido +sobre +súa +súas +tamén +tan +te +ten +teñen +teño +ter +teu +teus +ti +tido +tiña +tiven +túa +túas +un +unha +unhas +uns +vos +vosa +vosas +voso +vosos +vós diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_hi.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_hi.txt new file mode 100644 index 00000000000..86286bb083b --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_hi.txt @@ -0,0 +1,235 @@ +# Also see http://www.opensource.org/licenses/bsd-license.html +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# This file was created by Jacques Savoy and is distributed under the BSD license. +# Note: by default this file also contains forms normalized by HindiNormalizer +# for spelling variation (see section below), such that it can be used whether or +# not you enable that feature. When adding additional entries to this list, +# please add the normalized form as well. +अंदर +अत +अपना +अपनी +अपने +अभी +आदि +आप +इतà¥à¤¯à¤¾à¤¦à¤¿ +इन +इनका +इनà¥à¤¹à¥€à¤‚ +इनà¥à¤¹à¥‡à¤‚ +इनà¥à¤¹à¥‹à¤‚ +इस +इसका +इसकी +इसके +इसमें +इसी +इसे +उन +उनका +उनकी +उनके +उनको +उनà¥à¤¹à¥€à¤‚ +उनà¥à¤¹à¥‡à¤‚ +उनà¥à¤¹à¥‹à¤‚ +उस +उसके +उसी +उसे +à¤à¤• +à¤à¤µà¤‚ +à¤à¤¸ +à¤à¤¸à¥‡ +और +कई +कर +करता +करते +करना +करने +करें +कहते +कहा +का +काफ़ी +कि +कितना +किनà¥à¤¹à¥‡à¤‚ +किनà¥à¤¹à¥‹à¤‚ +किया +किर +किस +किसी +किसे +की +कà¥à¤› +कà¥à¤² +के +को +कोई +कौन +कौनसा +गया +घर +जब +जहाठ+जा +जितना +जिन +जिनà¥à¤¹à¥‡à¤‚ +जिनà¥à¤¹à¥‹à¤‚ +जिस +जिसे +जीधर +जैसा +जैसे +जो +तक +तब +तरह +तिन +तिनà¥à¤¹à¥‡à¤‚ +तिनà¥à¤¹à¥‹à¤‚ +तिस +तिसे +तो +था +थी +थे +दबारा +दिया +दà¥à¤¸à¤°à¤¾ +दूसरे +दो +दà¥à¤µà¤¾à¤°à¤¾ +न +नहीं +ना +निहायत +नीचे +ने +पर +पर +पहले +पूरा +पे +फिर +बनी +बही +बहà¥à¤¤ +बाद +बाला +बिलकà¥à¤² +भी +भीतर +मगर +मानो +मे +में +यदि +यह +यहाठ+यही +या +यिह +ये +रखें +रहा +रहे +ऱà¥à¤µà¤¾à¤¸à¤¾ +लिठ+लिये +लेकिन +व +वरà¥à¤— +वह +वह +वहाठ+वहीं +वाले +वà¥à¤¹ +वे +वग़ैरह +संग +सकता +सकते +सबसे +सभी +साथ +साबà¥à¤¤ +साभ +सारा +से +सो +ही +हà¥à¤† +हà¥à¤ˆ +हà¥à¤ +है +हैं +हो +होता +होती +होते +होना +होने +# additional normalized forms of the above +अपनि +जेसे +होति +सभि +तिंहों +इंहों +दवारा +इसि +किंहें +थि +उंहों +ओर +जिंहें +वहिं +अभि +बनि +हि +उंहिं +उंहें +हें +वगेरह +à¤à¤¸à¥‡ +रवासा +कोन +निचे +काफि +उसि +पà¥à¤°à¤¾ +भितर +हे +बहि +वहां +कोइ +यहां +जिंहों +तिंहें +किसि +कइ +यहि +इंहिं +जिधर +इंहें +अदि +इतयादि +हà¥à¤‡ +कोनसा +इसकि +दà¥à¤¸à¤°à¥‡ +जहां +अप +किंहों +उनकि +भि +वरग +हà¥à¤… +जेसा +नहिं diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_hu.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_hu.txt new file mode 100644 index 00000000000..1a96f1db6f2 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_hu.txt @@ -0,0 +1,209 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| Hungarian stop word list +| prepared by Anna Tordai + +a +ahogy +ahol +aki +akik +akkor +alatt +által +általában +amely +amelyek +amelyekben +amelyeket +amelyet +amelynek +ami +amit +amolyan +amíg +amikor +át +abban +ahhoz +annak +arra +arról +az +azok +azon +azt +azzal +azért +aztán +azután +azonban +bár +be +belül +benne +cikk +cikkek +cikkeket +csak +de +e +eddig +egész +egy +egyes +egyetlen +egyéb +egyik +egyre +ekkor +el +elég +ellen +elÅ‘ +elÅ‘ször +elÅ‘tt +elsÅ‘ +én +éppen +ebben +ehhez +emilyen +ennek +erre +ez +ezt +ezek +ezen +ezzel +ezért +és +fel +felé +hanem +hiszen +hogy +hogyan +igen +így +illetve +ill. +ill +ilyen +ilyenkor +ison +ismét +itt +jó +jól +jobban +kell +kellett +keresztül +keressünk +ki +kívül +között +közül +legalább +lehet +lehetett +legyen +lenne +lenni +lesz +lett +maga +magát +majd +majd +már +más +másik +meg +még +mellett +mert +mely +melyek +mi +mit +míg +miért +milyen +mikor +minden +mindent +mindenki +mindig +mint +mintha +mivel +most +nagy +nagyobb +nagyon +ne +néha +nekem +neki +nem +néhány +nélkül +nincs +olyan +ott +össze +Å‘ +Å‘k +Å‘ket +pedig +persze +rá +s +saját +sem +semmi +sok +sokat +sokkal +számára +szemben +szerint +szinte +talán +tehát +teljes +tovább +továbbá +több +úgy +ugyanis +új +újabb +újra +után +utána +utolsó +vagy +vagyis +valaki +valami +valamint +való +vagyok +van +vannak +volt +voltam +voltak +voltunk +vissza +vele +viszont +volna diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_hy.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_hy.txt new file mode 100644 index 00000000000..60c1c50fbc8 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_hy.txt @@ -0,0 +1,46 @@ +# example set of Armenian stopwords. +Õ¡ÕµÕ¤ +Õ¡ÕµÕ¬ +Õ¡ÕµÕ¶ +Õ¡ÕµÕ½ +Õ¤Õ¸Ö‚ +Õ¤Õ¸Ö‚Ö„ +Õ¥Õ´ +Õ¥Õ¶ +Õ¥Õ¶Ö„ +Õ¥Õ½ +Õ¥Ö„ +Õ§ +Õ§Õ« +Õ§Õ«Õ¶ +Õ§Õ«Õ¶Ö„ +Õ§Õ«Ö€ +Õ§Õ«Ö„ +Õ§Ö€ +Õ¨Õ½Õ¿ +Õ© +Õ« +Õ«Õ¶ +Õ«Õ½Õ¯ +Õ«Ö€ +Õ¯Õ¡Õ´ +Õ°Õ¡Õ´Õ¡Ö€ +Õ°Õ¥Õ¿ +Õ°Õ¥Õ¿Õ¸ +Õ´Õ¥Õ¶Ö„ +Õ´Õ¥Õ» +Õ´Õ« +Õ¶ +Õ¶Õ¡ +Õ¶Õ¡Ö‡ +Õ¶Ö€Õ¡ +Õ¶Ö€Õ¡Õ¶Ö„ +Õ¸Ö€ +Õ¸Ö€Õ¨ +Õ¸Ö€Õ¸Õ¶Ö„ +Õ¸Ö€ÕºÕ¥Õ½ +Õ¸Ö‚ +Õ¸Ö‚Õ´ +ÕºÕ«Õ¿Õ« +Õ¾Ö€Õ¡ +Ö‡ diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_id.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_id.txt new file mode 100644 index 00000000000..4617f83a5c5 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_id.txt @@ -0,0 +1,359 @@ +# from appendix D of: A Study of Stemming Effects on Information +# Retrieval in Bahasa Indonesia +ada +adanya +adalah +adapun +agak +agaknya +agar +akan +akankah +akhirnya +aku +akulah +amat +amatlah +anda +andalah +antar +diantaranya +antara +antaranya +diantara +apa +apaan +mengapa +apabila +apakah +apalagi +apatah +atau +ataukah +ataupun +bagai +bagaikan +sebagai +sebagainya +bagaimana +bagaimanapun +sebagaimana +bagaimanakah +bagi +bahkan +bahwa +bahwasanya +sebaliknya +banyak +sebanyak +beberapa +seberapa +begini +beginian +beginikah +beginilah +sebegini +begitu +begitukah +begitulah +begitupun +sebegitu +belum +belumlah +sebelum +sebelumnya +sebenarnya +berapa +berapakah +berapalah +berapapun +betulkah +sebetulnya +biasa +biasanya +bila +bilakah +bisa +bisakah +sebisanya +boleh +bolehkah +bolehlah +buat +bukan +bukankah +bukanlah +bukannya +cuma +percuma +dahulu +dalam +dan +dapat +dari +daripada +dekat +demi +demikian +demikianlah +sedemikian +dengan +depan +di +dia +dialah +dini +diri +dirinya +terdiri +dong +dulu +enggak +enggaknya +entah +entahlah +terhadap +terhadapnya +hal +hampir +hanya +hanyalah +harus +haruslah +harusnya +seharusnya +hendak +hendaklah +hendaknya +hingga +sehingga +ia +ialah +ibarat +ingin +inginkah +inginkan +ini +inikah +inilah +itu +itukah +itulah +jangan +jangankan +janganlah +jika +jikalau +juga +justru +kala +kalau +kalaulah +kalaupun +kalian +kami +kamilah +kamu +kamulah +kan +kapan +kapankah +kapanpun +dikarenakan +karena +karenanya +ke +kecil +kemudian +kenapa +kepada +kepadanya +ketika +seketika +khususnya +kini +kinilah +kiranya +sekiranya +kita +kitalah +kok +lagi +lagian +selagi +lah +lain +lainnya +melainkan +selaku +lalu +melalui +terlalu +lama +lamanya +selama +selama +selamanya +lebih +terlebih +bermacam +macam +semacam +maka +makanya +makin +malah +malahan +mampu +mampukah +mana +manakala +manalagi +masih +masihkah +semasih +masing +mau +maupun +semaunya +memang +mereka +merekalah +meski +meskipun +semula +mungkin +mungkinkah +nah +namun +nanti +nantinya +nyaris +oleh +olehnya +seorang +seseorang +pada +padanya +padahal +paling +sepanjang +pantas +sepantasnya +sepantasnyalah +para +pasti +pastilah +per +pernah +pula +pun +merupakan +rupanya +serupa +saat +saatnya +sesaat +saja +sajalah +saling +bersama +sama +sesama +sambil +sampai +sana +sangat +sangatlah +saya +sayalah +se +sebab +sebabnya +sebuah +tersebut +tersebutlah +sedang +sedangkan +sedikit +sedikitnya +segala +segalanya +segera +sesegera +sejak +sejenak +sekali +sekalian +sekalipun +sesekali +sekaligus +sekarang +sekarang +sekitar +sekitarnya +sela +selain +selalu +seluruh +seluruhnya +semakin +sementara +sempat +semua +semuanya +sendiri +sendirinya +seolah +seperti +sepertinya +sering +seringnya +serta +siapa +siapakah +siapapun +disini +disinilah +sini +sinilah +sesuatu +sesuatunya +suatu +sesudah +sesudahnya +sudah +sudahkah +sudahlah +supaya +tadi +tadinya +tak +tanpa +setelah +telah +tentang +tentu +tentulah +tentunya +tertentu +seterusnya +tapi +tetapi +setiap +tiap +setidaknya +tidak +tidakkah +tidaklah +toh +waduh +wah +wahai +sewaktu +walau +walaupun +wong +yaitu +yakni +yang diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_it.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_it.txt new file mode 100644 index 00000000000..4cb5b0891b1 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_it.txt @@ -0,0 +1,301 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | An Italian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +ad | a (to) before vowel +al | a + il +allo | a + lo +ai | a + i +agli | a + gli +all | a + l' +agl | a + gl' +alla | a + la +alle | a + le +con | with +col | con + il +coi | con + i (forms collo, cogli etc are now very rare) +da | from +dal | da + il +dallo | da + lo +dai | da + i +dagli | da + gli +dall | da + l' +dagl | da + gll' +dalla | da + la +dalle | da + le +di | of +del | di + il +dello | di + lo +dei | di + i +degli | di + gli +dell | di + l' +degl | di + gl' +della | di + la +delle | di + le +in | in +nel | in + el +nello | in + lo +nei | in + i +negli | in + gli +nell | in + l' +negl | in + gl' +nella | in + la +nelle | in + le +su | on +sul | su + il +sullo | su + lo +sui | su + i +sugli | su + gli +sull | su + l' +sugl | su + gl' +sulla | su + la +sulle | su + le +per | through, by +tra | among +contro | against +io | I +tu | thou +lui | he +lei | she +noi | we +voi | you +loro | they +mio | my +mia | +miei | +mie | +tuo | +tua | +tuoi | thy +tue | +suo | +sua | +suoi | his, her +sue | +nostro | our +nostra | +nostri | +nostre | +vostro | your +vostra | +vostri | +vostre | +mi | me +ti | thee +ci | us, there +vi | you, there +lo | him, the +la | her, the +li | them +le | them, the +gli | to him, the +ne | from there etc +il | the +un | a +uno | a +una | a +ma | but +ed | and +se | if +perché | why, because +anche | also +come | how +dov | where (as dov') +dove | where +che | who, that +chi | who +cui | whom +non | not +più | more +quale | who, that +quanto | how much +quanti | +quanta | +quante | +quello | that +quelli | +quella | +quelle | +questo | this +questi | +questa | +queste | +si | yes +tutto | all +tutti | all + + | single letter forms: + +a | at +c | as c' for ce or ci +e | and +i | the +l | as l' +o | or + + | forms of avere, to have (not including the infinitive): + +ho +hai +ha +abbiamo +avete +hanno +abbia +abbiate +abbiano +avrò +avrai +avrà +avremo +avrete +avranno +avrei +avresti +avrebbe +avremmo +avreste +avrebbero +avevo +avevi +aveva +avevamo +avevate +avevano +ebbi +avesti +ebbe +avemmo +aveste +ebbero +avessi +avesse +avessimo +avessero +avendo +avuto +avuta +avuti +avute + + | forms of essere, to be (not including the infinitive): +sono +sei +è +siamo +siete +sia +siate +siano +sarò +sarai +sarà +saremo +sarete +saranno +sarei +saresti +sarebbe +saremmo +sareste +sarebbero +ero +eri +era +eravamo +eravate +erano +fui +fosti +fu +fummo +foste +furono +fossi +fosse +fossimo +fossero +essendo + + | forms of fare, to do (not including the infinitive, fa, fat-): +faccio +fai +facciamo +fanno +faccia +facciate +facciano +farò +farai +farà +faremo +farete +faranno +farei +faresti +farebbe +faremmo +fareste +farebbero +facevo +facevi +faceva +facevamo +facevate +facevano +feci +facesti +fece +facemmo +faceste +fecero +facessi +facesse +facessimo +facessero +facendo + + | forms of stare, to be (not including the infinitive): +sto +stai +sta +stiamo +stanno +stia +stiate +stiano +starò +starai +starà +staremo +starete +staranno +starei +staresti +starebbe +staremmo +stareste +starebbero +stavo +stavi +stava +stavamo +stavate +stavano +stetti +stesti +stette +stemmo +steste +stettero +stessi +stesse +stessimo +stessero +stando diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ja.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ja.txt new file mode 100644 index 00000000000..d4321be6b16 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ja.txt @@ -0,0 +1,127 @@ +# +# This file defines a stopword set for Japanese. +# +# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. +# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 +# for frequency lists, etc. that can be useful for making your own set (if desired) +# +# Note that there is an overlap between these stopwords and the terms stopped when used +# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note +# that comments are not allowed on the same line as stopwords. +# +# Also note that stopping is done in a case-insensitive manner. Change your StopFilter +# configuration if you need case-sensitive stopping. Lastly, note that stopping is done +# using the same character width as the entries in this file. Since this StopFilter is +# normally done after a CJKWidthFilter in your chain, you would usually want your romaji +# entries to be in half-width and your kana entries to be in full-width. +# +ã® +ã« +㯠+ã‚’ +㟠+㌠+㧠+㦠+㨠+ã— +ã‚Œ +ã• +ã‚ã‚‹ +ã„ã‚‹ +ã‚‚ +ã™ã‚‹ +ã‹ã‚‰ +㪠+ã“㨠+ã¨ã—㦠+ã„ +ã‚„ +れる +ãªã© +ãªã£ +ãªã„ +ã“ã® +ãŸã‚ +ãã® +ã‚㣠+よㆠ+ã¾ãŸ +ã‚‚ã® +ã¨ã„ㆠ+ã‚ã‚Š +ã¾ã§ +られ +ãªã‚‹ +㸠+ã‹ +ã  +ã“ã‚Œ +ã«ã‚ˆã£ã¦ +ã«ã‚ˆã‚Š +ãŠã‚Š +より +ã«ã‚ˆã‚‹ +ãš +ãªã‚Š +られる +ã«ãŠã„㦠+ã° +ãªã‹ã£ +ãªã +ã—ã‹ã— +ã«ã¤ã„㦠+ã› +ã ã£ +ãã®å¾Œ +ã§ãã‚‹ +ãã‚Œ +ㆠ+ã®ã§ +ãªãŠ +ã®ã¿ +ã§ã +ã +㤠+ã«ãŠã‘ã‚‹ +ãŠã‚ˆã³ +ã„ㆠ+ã•ã‚‰ã« +ã§ã‚‚ +ら +ãŸã‚Š +ãã®ä»– +ã«é–¢ã™ã‚‹ +ãŸã¡ +ã¾ã™ +ã‚“ +ãªã‚‰ +ã«å¯¾ã—㦠+特㫠+ã›ã‚‹ +åŠã³ +ã“れら +ã¨ã +ã§ã¯ +ã«ã¦ +ã»ã‹ +ãªãŒã‚‰ +ã†ã¡ +ãã—㦠+ã¨ã¨ã‚‚ã« +ãŸã ã— +ã‹ã¤ã¦ +ãã‚Œãžã‚Œ +ã¾ãŸã¯ +㊠+ã»ã© +ã‚‚ã®ã® +ã«å¯¾ã™ã‚‹ +ã»ã¨ã‚“ã© +ã¨å…±ã« +ã¨ã„ã£ãŸ +ã§ã™ +ã¨ã‚‚ +ã¨ã“ã‚ +ã“ã“ +##### End of file diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_lv.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_lv.txt new file mode 100644 index 00000000000..e21a23c06c3 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_lv.txt @@ -0,0 +1,172 @@ +# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins +# the original list of over 800 forms was refined: +# pronouns, adverbs, interjections were removed +# +# prepositions +aiz +ap +ar +apakÅ¡ +Ärpus +augÅ¡pus +bez +caur +dēļ +gar +iekÅ¡ +iz +kopÅ¡ +labad +lejpus +lÄ«dz +no +otrpus +pa +par +pÄr +pÄ“c +pie +pirms +pret +priekÅ¡ +starp +Å¡aipus +uz +viņpus +virs +virspus +zem +apakÅ¡pus +# Conjunctions +un +bet +jo +ja +ka +lai +tomÄ“r +tikko +turpretÄ« +arÄ« +kaut +gan +tÄdēļ +tÄ +ne +tikvien +vien +kÄ +ir +te +vai +kamÄ“r +# Particles +ar +diezin +droÅ¡i +diemžēl +nebÅ«t +ik +it +taÄu +nu +pat +tiklab +iekÅ¡pus +nedz +tik +nevis +turpretim +jeb +iekam +iekÄm +iekÄms +kolÄ«dz +lÄ«dzko +tiklÄ«dz +jebÅ¡u +tÄlab +tÄpÄ“c +nekÄ +itin +jÄ +jau +jel +nÄ“ +nezin +tad +tikai +vis +tak +iekams +vien +# modal verbs +bÅ«t +biju +biji +bija +bijÄm +bijÄt +esmu +esi +esam +esat +bÅ«Å¡u +bÅ«si +bÅ«s +bÅ«sim +bÅ«siet +tikt +tiku +tiki +tika +tikÄm +tikÄt +tieku +tiec +tiek +tiekam +tiekat +tikÅ¡u +tiks +tiksim +tiksiet +tapt +tapi +tapÄt +topat +tapÅ¡u +tapsi +taps +tapsim +tapsiet +kļūt +kļuvu +kļuvi +kļuva +kļuvÄm +kļuvÄt +kļūstu +kļūsti +kļūst +kļūstam +kļūstat +kļūšu +kļūsi +kļūs +kļūsim +kļūsiet +# verbs +varÄ“t +varÄ“ju +varÄ“jÄm +varÄ“Å¡u +varÄ“sim +var +varÄ“ji +varÄ“jÄt +varÄ“si +varÄ“siet +varat +varÄ“ja +varÄ“s diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_nl.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_nl.txt new file mode 100644 index 00000000000..f4d61f5092c --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_nl.txt @@ -0,0 +1,117 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Dutch stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large sample of Dutch text. + + | Dutch stop words frequently exhibit homonym clashes. These are indicated + | clearly below. + +de | the +en | and +van | of, from +ik | I, the ego +te | (1) chez, at etc, (2) to, (3) too +dat | that, which +die | that, those, who, which +in | in, inside +een | a, an, one +hij | he +het | the, it +niet | not, nothing, naught +zijn | (1) to be, being, (2) his, one's, its +is | is +was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river +op | on, upon, at, in, up, used up +aan | on, upon, to (as dative) +met | with, by +als | like, such as, when +voor | (1) before, in front of, (2) furrow +had | had, past tense all persons sing. of 'hebben' (have) +er | there +maar | but, only +om | round, about, for etc +hem | him +dan | then +zou | should/would, past tense all persons sing. of 'zullen' +of | or, whether, if +wat | what, something, anything +mijn | possessive and noun 'mine' +men | people, 'one' +dit | this +zo | so, thus, in this way +door | through by +over | over, across +ze | she, her, they, them +zich | oneself +bij | (1) a bee, (2) by, near, at +ook | also, too +tot | till, until +je | you +mij | me +uit | out of, from +der | Old Dutch form of 'van der' still found in surnames +daar | (1) there, (2) because +haar | (1) her, their, them, (2) hair +naar | (1) unpleasant, unwell etc, (2) towards, (3) as +heb | present first person sing. of 'to have' +hoe | how, why +heeft | present third person sing. of 'to have' +hebben | 'to have' and various parts thereof +deze | this +u | you +want | (1) for, (2) mitten, (3) rigging +nog | yet, still +zal | 'shall', first and third person sing. of verb 'zullen' (will) +me | me +zij | she, they +nu | now +ge | 'thou', still used in Belgium and south Netherlands +geen | none +omdat | because +iets | something, somewhat +worden | to become, grow, get +toch | yet, still +al | all, every, each +waren | (1) 'were' (2) to wander, (3) wares, (3) +veel | much, many +meer | (1) more, (2) lake +doen | to do, to make +toen | then, when +moet | noun 'spot/mote' and present form of 'to must' +ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' +zonder | without +kan | noun 'can' and present form of 'to be able' +hun | their, them +dus | so, consequently +alles | all, everything, anything +onder | under, beneath +ja | yes, of course +eens | once, one day +hier | here +wie | who +werd | imperfect third person sing. of 'become' +altijd | always +doch | yet, but etc +wordt | present third person sing. of 'become' +wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans +kunnen | to be able +ons | us/our +zelf | self +tegen | against, towards, at +na | after, near +reeds | already +wil | (1) present tense of 'want', (2) 'will', noun, (3) fender +kon | could; past tense of 'to be able' +niets | nothing +uw | your +iemand | somebody +geweest | been; past participle of 'be' +andere | other diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_no.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_no.txt new file mode 100644 index 00000000000..e76f36e69ed --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_no.txt @@ -0,0 +1,192 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Norwegian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This stop word list is for the dominant bokmÃ¥l dialect. Words unique + | to nynorsk are marked *. + + | Revised by Jan Bruusgaard , Jan 2005 + +og | and +i | in +jeg | I +det | it/this/that +at | to (w. inf.) +en | a/an +et | a/an +den | it/this/that +til | to +er | is/am/are +som | who/that +pÃ¥ | on +de | they / you(formal) +med | with +han | he +av | of +ikke | not +ikkje | not * +der | there +sÃ¥ | so +var | was/were +meg | me +seg | you +men | but +ett | one +har | have +om | about +vi | we +min | my +mitt | my +ha | have +hadde | had +hun | she +nÃ¥ | now +over | over +da | when/as +ved | by/know +fra | from +du | you +ut | out +sin | your +dem | them +oss | us +opp | up +man | you/one +kan | can +hans | his +hvor | where +eller | or +hva | what +skal | shall/must +selv | self (reflective) +sjøl | self (reflective) +her | here +alle | all +vil | will +bli | become +ble | became +blei | became * +blitt | have become +kunne | could +inn | in +nÃ¥r | when +være | be +kom | come +noen | some +noe | some +ville | would +dere | you +som | who/which/that +deres | their/theirs +kun | only/just +ja | yes +etter | after +ned | down +skulle | should +denne | this +for | for/because +deg | you +si | hers/his +sine | hers/his +sitt | hers/his +mot | against +Ã¥ | to +meget | much +hvorfor | why +dette | this +disse | these/those +uten | without +hvordan | how +ingen | none +din | your +ditt | your +blir | become +samme | same +hvilken | which +hvilke | which (plural) +sÃ¥nn | such a +inni | inside/within +mellom | between +vÃ¥r | our +hver | each +hvem | who +vors | us/ours +hvis | whose +bÃ¥de | both +bare | only/just +enn | than +fordi | as/because +før | before +mange | many +ogsÃ¥ | also +slik | just +vært | been +være | to be +bÃ¥e | both * +begge | both +siden | since +dykk | your * +dykkar | yours * +dei | they * +deira | them * +deires | theirs * +deim | them * +di | your (fem.) * +dÃ¥ | as/when * +eg | I * +ein | a/an * +eit | a/an * +eitt | a/an * +elles | or * +honom | he * +hjÃ¥ | at * +ho | she * +hoe | she * +henne | her +hennar | her/hers +hennes | hers +hoss | how * +hossen | how * +ikkje | not * +ingi | noone * +inkje | noone * +korleis | how * +korso | how * +kva | what/which * +kvar | where * +kvarhelst | where * +kven | who/whom * +kvi | why * +kvifor | why * +me | we * +medan | while * +mi | my * +mine | my * +mykje | much * +no | now * +nokon | some (masc./neut.) * +noka | some (fem.) * +nokor | some * +noko | some * +nokre | some * +si | his/hers * +sia | since * +sidan | since * +so | so * +somt | some * +somme | some * +um | about* +upp | up * +vere | be * +vore | was * +verte | become * +vort | become * +varte | became * +vart | became * + diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_pt.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_pt.txt new file mode 100644 index 00000000000..276c1b446f2 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_pt.txt @@ -0,0 +1,251 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Portuguese stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | of, from +a | the; to, at; her +o | the; him +que | who, that +e | and +do | de + o +da | de + a +em | in +um | a +para | for + | é from SER +com | with +não | not, no +uma | a +os | the; them +no | em + o +se | himself etc +na | em + a +por | for +mais | more +as | the; them +dos | de + os +como | as, like +mas | but + | foi from SER +ao | a + o +ele | he +das | de + as + | tem from TER +à | a + a +seu | his +sua | her +ou | or + | ser from SER +quando | when +muito | much + | há from HAV +nos | em + os; us +já | already, now + | está from EST +eu | I +também | also +só | only, just +pelo | per + o +pela | per + a +até | up to +isso | that +ela | he +entre | between + | era from SER +depois | after +sem | without +mesmo | same +aos | a + os + | ter from TER +seus | his +quem | whom +nas | em + as +me | me +esse | that +eles | they + | estão from EST +você | you + | tinha from TER + | foram from SER +essa | that +num | em + um +nem | nor +suas | her +meu | my +às | a + as +minha | my + | têm from TER +numa | em + uma +pelos | per + os +elas | they + | havia from HAV + | seja from SER +qual | which + | será from SER +nós | we + | tenho from TER +lhe | to him, her +deles | of them +essas | those +esses | those +pelas | per + as +este | this + | fosse from SER +dele | of him + + | other words. There are many contractions such as naquele = em+aquele, + | mo = me+o, but they are rare. + | Indefinite article plural forms are also rare. + +tu | thou +te | thee +vocês | you (plural) +vos | you +lhes | to them +meus | my +minhas +teu | thy +tua +teus +tuas +nosso | our +nossa +nossos +nossas + +dela | of her +delas | of them + +esta | this +estes | these +estas | these +aquele | that +aquela | that +aqueles | those +aquelas | those +isto | this +aquilo | that + + | forms of estar, to be (not including the infinitive): +estou +está +estamos +estão +estive +esteve +estivemos +estiveram +estava +estávamos +estavam +estivera +estivéramos +esteja +estejamos +estejam +estivesse +estivéssemos +estivessem +estiver +estivermos +estiverem + + | forms of haver, to have (not including the infinitive): +hei +há +havemos +hão +houve +houvemos +houveram +houvera +houvéramos +haja +hajamos +hajam +houvesse +houvéssemos +houvessem +houver +houvermos +houverem +houverei +houverá +houveremos +houverão +houveria +houveríamos +houveriam + + | forms of ser, to be (not including the infinitive): +sou +somos +são +era +éramos +eram +fui +foi +fomos +foram +fora +fôramos +seja +sejamos +sejam +fosse +fôssemos +fossem +for +formos +forem +serei +será +seremos +serão +seria +seríamos +seriam + + | forms of ter, to have (not including the infinitive): +tenho +tem +temos +tém +tinha +tínhamos +tinham +tive +teve +tivemos +tiveram +tivera +tivéramos +tenha +tenhamos +tenham +tivesse +tivéssemos +tivessem +tiver +tivermos +tiverem +terei +terá +teremos +terão +teria +teríamos +teriam diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ro.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ro.txt new file mode 100644 index 00000000000..4fdee90a5ba --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ro.txt @@ -0,0 +1,233 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +acea +aceasta +această +aceea +acei +aceia +acel +acela +acele +acelea +acest +acesta +aceste +acestea +aceÅŸti +aceÅŸtia +acolo +acum +ai +aia +aibă +aici +al +ăla +ale +alea +ălea +altceva +altcineva +am +ar +are +aÅŸ +aÅŸadar +asemenea +asta +ăsta +astăzi +astea +ăstea +ăştia +asupra +aÅ£i +au +avea +avem +aveÅ£i +azi +bine +bucur +bună +ca +că +căci +când +care +cărei +căror +cărui +cât +câte +câţi +către +câtva +ce +cel +ceva +chiar +cînd +cine +cineva +cît +cîte +cîţi +cîtva +contra +cu +cum +cumva +curând +curînd +da +dă +dacă +dar +datorită +de +deci +deja +deoarece +departe +deÅŸi +din +dinaintea +dintr +dintre +drept +după +ea +ei +el +ele +eram +este +eÅŸti +eu +face +fără +fi +fie +fiecare +fii +fim +fiÅ£i +iar +ieri +îi +îl +îmi +împotriva +în +înainte +înaintea +încât +încît +încotro +între +întrucât +întrucît +îţi +la +lângă +le +li +lîngă +lor +lui +mă +mâine +mea +mei +mele +mereu +meu +mi +mine +mult +multă +mulÅ£i +ne +nicăieri +nici +nimeni +niÅŸte +noastră +noastre +noi +noÅŸtri +nostru +nu +ori +oricând +oricare +oricât +orice +oricînd +oricine +oricît +oricum +oriunde +până +pe +pentru +peste +pînă +poate +pot +prea +prima +primul +prin +printr +sa +să +săi +sale +sau +său +se +ÅŸi +sînt +sîntem +sînteÅ£i +spre +sub +sunt +suntem +sunteÅ£i +ta +tăi +tale +tău +te +Å£i +Å£ie +tine +toată +toate +tot +toÅ£i +totuÅŸi +tu +un +una +unde +undeva +unei +unele +uneori +unor +vă +vi +voastră +voastre +voi +voÅŸtri +vostru +vouă +vreo +vreun diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ru.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ru.txt new file mode 100644 index 00000000000..64307693457 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_ru.txt @@ -0,0 +1,241 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | a russian stop word list. comments begin with vertical bar. each stop + | word is at the start of a line. + + | this is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | letter `Ñ‘' is translated to `е'. + +и | and +в | in/into +во | alternative form +не | not +что | what/that +он | he +на | on/onto +Ñ | i +Ñ | from +Ñо | alternative form +как | how +а | milder form of `no' (but) +то | conjunction and form of `that' +вÑе | all +она | she +так | so, thus +его | him +но | but +да | yes/and +Ñ‚Ñ‹ | thou +к | towards, by +у | around, chez +же | intensifier particle +вы | you +за | beyond, behind +бы | conditional/subj. particle +по | up to, along +только | only +ее | her +мне | to me +было | it was +вот | here is/are, particle +от | away from +Ð¼ÐµÐ½Ñ | me +еще | still, yet, more +нет | no, there isnt/arent +о | about +из | out of +ему | to him +теперь | now +когда | when +даже | even +ну | so, well +вдруг | suddenly +ли | interrogative particle +еÑли | if +уже | already, but homonym of `narrower' +или | or +ни | neither +быть | to be +был | he was +него | prepositional form of его +до | up to +Ð²Ð°Ñ | you accusative +нибудь | indef. suffix preceded by hyphen +опÑÑ‚ÑŒ | again +уж | already, but homonym of `adder' +вам | to you +Ñказал | he said +ведь | particle `after all' +там | there +потом | then +ÑÐµÐ±Ñ | oneself +ничего | nothing +ей | to her +может | usually with `быть' as `maybe' +они | they +тут | here +где | where +еÑÑ‚ÑŒ | there is/are +надо | got to, must +ней | prepositional form of ей +Ð´Ð»Ñ | for +мы | we +Ñ‚ÐµÐ±Ñ | thee +их | them, their +чем | than +была | she was +Ñам | self +чтоб | in order to +без | without +будто | as if +человек | man, person, one +чего | genitive form of `what' +раз | once +тоже | also +Ñебе | to oneself +под | beneath +жизнь | life +будет | will be +ж | short form of intensifer particle `же' +тогда | then +кто | who +Ñтот | this +говорил | was saying +того | genitive form of `that' +потому | for that reason +Ñтого | genitive form of `this' +какой | which +ÑовÑем | altogether +ним | prepositional form of `его', `они' +здеÑÑŒ | here +Ñтом | prepositional form of `Ñтот' +один | one +почти | almost +мой | my +тем | instrumental/dative plural of `тот', `то' +чтобы | full form of `in order that' +нее | her (acc.) +кажетÑÑ | it seems +ÑÐµÐ¹Ñ‡Ð°Ñ | now +были | they were +куда | where to +зачем | why +Ñказать | to say +вÑех | all (acc., gen. preposn. plural) +никогда | never +ÑÐµÐ³Ð¾Ð´Ð½Ñ | today +можно | possible, one can +при | by +наконец | finally +два | two +об | alternative form of `о', about +другой | another +хоть | even +поÑле | after +над | above +больше | more +тот | that one (masc.) +через | across, in +Ñти | these +Ð½Ð°Ñ | us +про | about +вÑего | in all, only, of all +них | prepositional form of `они' (they) +ÐºÐ°ÐºÐ°Ñ | which, feminine +много | lots +разве | interrogative particle +Ñказала | she said +три | three +Ñту | this, acc. fem. sing. +Ð¼Ð¾Ñ | my, feminine +впрочем | moreover, besides +хорошо | good +Ñвою | ones own, acc. fem. sing. +Ñтой | oblique form of `Ñта', fem. `this' +перед | in front of +иногда | sometimes +лучше | better +чуть | a little +том | preposn. form of `that one' +Ð½ÐµÐ»ÑŒÐ·Ñ | one must not +такой | such a one +им | to them +более | more +вÑегда | always +конечно | of course +вÑÑŽ | acc. fem. sing of `all' +между | between + + + | b: some paradigms + | + | personal pronouns + | + | Ñ Ð¼ÐµÐ½Ñ Ð¼Ð½Ðµ мной [мною] + | Ñ‚Ñ‹ Ñ‚ÐµÐ±Ñ Ñ‚ÐµÐ±Ðµ тобой [тобою] + | он его ему им [него, нему, ним] + | она ее Ñи ею [нее, нÑи, нею] + | оно его ему им [него, нему, ним] + | + | мы Ð½Ð°Ñ Ð½Ð°Ð¼ нами + | вы Ð²Ð°Ñ Ð²Ð°Ð¼ вами + | они их им ими [них, ним, ними] + | + | ÑÐµÐ±Ñ Ñебе Ñобой [Ñобою] + | + | demonstrative pronouns: Ñтот (this), тот (that) + | + | Ñтот Ñта Ñто Ñти + | Ñтого ÑÑ‚Ñ‹ Ñто Ñти + | Ñтого Ñтой Ñтого Ñтих + | Ñтому Ñтой Ñтому Ñтим + | Ñтим Ñтой Ñтим [Ñтою] Ñтими + | Ñтом Ñтой Ñтом Ñтих + | + | тот та то те + | того ту то те + | того той того тех + | тому той тому тем + | тем той тем [тою] теми + | том той том тех + | + | determinative pronouns + | + | (a) веÑÑŒ (all) + | + | веÑÑŒ вÑÑ Ð²Ñе вÑе + | вÑего вÑÑŽ вÑе вÑе + | вÑего вÑей вÑего вÑех + | вÑему вÑей вÑему вÑем + | вÑем вÑей вÑем [вÑею] вÑеми + | вÑем вÑей вÑем вÑех + | + | (b) Ñам (himself etc) + | + | Ñам Ñама Ñамо Ñами + | Ñамого Ñаму Ñамо Ñамих + | Ñамого Ñамой Ñамого Ñамих + | Ñамому Ñамой Ñамому Ñамим + | Ñамим Ñамой Ñамим [Ñамою] Ñамими + | Ñамом Ñамой Ñамом Ñамих + | + | stems of verbs `to be', `to have', `to do' and modal + | + | быть бы буд быв еÑÑ‚ÑŒ Ñуть + | име + | дел + | мог мож мочь + | уме + | хоч хот + | долж + | можн + | нужн + | Ð½ÐµÐ»ÑŒÐ·Ñ + diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_sv.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_sv.txt new file mode 100644 index 00000000000..22bddfd8cb3 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_sv.txt @@ -0,0 +1,131 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Swedish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | Swedish stop words occasionally exhibit homonym clashes. For example + | sÃ¥ = so, but also seed. These are indicated clearly below. + +och | and +det | it, this/that +att | to (with infinitive) +i | in, at +en | a +jag | I +hon | she +som | who, that +han | he +pÃ¥ | on +den | it, this/that +med | with +var | where, each +sig | him(self) etc +för | for +sÃ¥ | so (also: seed) +till | to +är | is +men | but +ett | a +om | if; around, about +hade | had +de | they, these/those +av | of +icke | not, no +mig | me +du | you +henne | her +dÃ¥ | then, when +sin | his +nu | now +har | have +inte | inte nÃ¥gon = no one +hans | his +honom | him +skulle | 'sake' +hennes | her +där | there +min | my +man | one (pronoun) +ej | nor +vid | at, by, on (also: vast) +kunde | could +nÃ¥got | some etc +frÃ¥n | from, off +ut | out +när | when +efter | after, behind +upp | up +vi | we +dem | them +vara | be +vad | what +över | over +än | than +dig | you +kan | can +sina | his +här | here +ha | have +mot | towards +alla | all +under | under (also: wonder) +nÃ¥gon | some etc +eller | or (else) +allt | all +mycket | much +sedan | since +ju | why +denna | this/that +själv | myself, yourself etc +detta | this/that +Ã¥t | to +utan | without +varit | was +hur | how +ingen | no +mitt | my +ni | you +bli | to be, become +blev | from bli +oss | us +din | thy +dessa | these/those +nÃ¥gra | some etc +deras | their +blir | from bli +mina | my +samma | (the) same +vilken | who, that +er | you, your +sÃ¥dan | such a +vÃ¥r | our +blivit | from bli +dess | its +inom | within +mellan | between +sÃ¥dant | such a +varför | why +varje | each +vilka | who, that +ditt | thy +vem | who +vilket | who, that +sitta | his +sÃ¥dana | such a +vart | each +dina | thy +vars | whose +vÃ¥rt | our +vÃ¥ra | our +ert | your +era | your +vilkas | whose + diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_th.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_th.txt new file mode 100644 index 00000000000..07f0fabe692 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_th.txt @@ -0,0 +1,119 @@ +# Thai stopwords from: +# "Opinion Detection in Thai Political News Columns +# Based on Subjectivity Analysis" +# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak +ไว้ +ไม่ +ไป +ได้ +ให้ +ใน +โดย +à¹à¸«à¹ˆà¸‡ +à¹à¸¥à¹‰à¸§ +à¹à¸¥à¸° +à¹à¸£à¸ +à¹à¸šà¸š +à¹à¸•à¹ˆ +เอง +เห็น +เลย +เริ่ม +เรา +เมื่อ +เพื่อ +เพราะ +เป็นà¸à¸²à¸£ +เป็น +เปิดเผย +เปิด +เนื่องจาภ+เดียวà¸à¸±à¸™ +เดียว +เช่น +เฉพาะ +เคย +เข้า +เขา +อีภ+อาจ +อะไร +ออภ+อย่าง +อยู่ +อยาภ+หาภ+หลาย +หลังจาภ+หลัง +หรือ +หนึ่ง +ส่วน +ส่ง +สุด +สà¹à¸²à¸«à¸£à¸±à¸š +ว่า +วัน +ลง +ร่วม +ราย +รับ +ระหว่าง +รวม +ยัง +มี +มาภ+มา +พร้อม +พบ +ผ่าน +ผล +บาง +น่า +นี้ +นà¹à¸² +นั้น +นัภ+นอà¸à¸ˆà¸²à¸ +ทุภ+ที่สุด +ที่ +ทà¹à¸²à¹ƒà¸«à¹‰ +ทà¹à¸² +ทาง +ทั้งนี้ +ทั้ง +ถ้า +ถูภ+ถึง +ต้อง +ต่างๆ +ต่าง +ต่อ +ตาม +ตั้งà¹à¸•à¹ˆ +ตั้ง +ด้าน +ด้วย +ดัง +ซึ่ง +ช่วง +จึง +จาภ+จัด +จะ +คือ +ความ +ครั้ง +คง +ขึ้น +ของ +ขอ +ขณะ +à¸à¹ˆà¸­à¸™ +à¸à¹‡ +à¸à¸²à¸£ +à¸à¸±à¸š +à¸à¸±à¸™ +à¸à¸§à¹ˆà¸² +à¸à¸¥à¹ˆà¸²à¸§ diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_tr.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_tr.txt new file mode 100644 index 00000000000..84d9408d4ea --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/stopwords_tr.txt @@ -0,0 +1,212 @@ +# Turkish stopwords from LUCENE-559 +# merged with the list from "Information Retrieval on Turkish Texts" +# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) +acaba +altmış +altı +ama +ancak +arada +aslında +ayrıca +bana +bazı +belki +ben +benden +beni +benim +beri +beÅŸ +bile +bin +bir +birçok +biri +birkaç +birkez +birÅŸey +birÅŸeyi +biz +bize +bizden +bizi +bizim +böyle +böylece +bu +buna +bunda +bundan +bunlar +bunları +bunların +bunu +bunun +burada +çok +çünkü +da +daha +dahi +de +defa +deÄŸil +diÄŸer +diye +doksan +dokuz +dolayı +dolayısıyla +dört +edecek +eden +ederek +edilecek +ediliyor +edilmesi +ediyor +eÄŸer +elli +en +etmesi +etti +ettiÄŸi +ettiÄŸini +gibi +göre +halen +hangi +hatta +hem +henüz +hep +hepsi +her +herhangi +herkesin +hiç +hiçbir +için +iki +ile +ilgili +ise +iÅŸte +itibaren +itibariyle +kadar +karşın +katrilyon +kendi +kendilerine +kendini +kendisi +kendisine +kendisini +kez +ki +kim +kimden +kime +kimi +kimse +kırk +milyar +milyon +mu +mü +mı +nasıl +ne +neden +nedenle +nerde +nerede +nereye +niye +niçin +o +olan +olarak +oldu +olduÄŸu +olduÄŸunu +olduklarını +olmadı +olmadığı +olmak +olması +olmayan +olmaz +olsa +olsun +olup +olur +olursa +oluyor +on +ona +ondan +onlar +onlardan +onları +onların +onu +onun +otuz +oysa +öyle +pek +raÄŸmen +sadece +sanki +sekiz +seksen +sen +senden +seni +senin +siz +sizden +sizi +sizin +ÅŸey +ÅŸeyden +ÅŸeyi +ÅŸeyler +şöyle +ÅŸu +ÅŸuna +ÅŸunda +ÅŸundan +ÅŸunları +ÅŸunu +tarafından +trilyon +tüm +üç +üzere +var +vardı +ve +veya +ya +yani +yapacak +yapılan +yapılması +yapıyor +yapmak +yaptı +yaptığı +yaptığını +yaptıkları +yedi +yerine +yetmiÅŸ +yine +yirmi +yoksa +yüz +zaten diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/userdict_ja.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/userdict_ja.txt new file mode 100644 index 00000000000..6f0368e4d81 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/lang/userdict_ja.txt @@ -0,0 +1,29 @@ +# +# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) +# +# Add entries to this file in order to override the statistical model in terms +# of segmentation, readings and part-of-speech tags. Notice that entries do +# not have weights since they are always used when found. This is by-design +# in order to maximize ease-of-use. +# +# Entries are defined using the following CSV format: +# , ... , ... , +# +# Notice that a single half-width space separates tokens and readings, and +# that the number tokens and readings must match exactly. +# +# Also notice that multiple entries with the same is undefined. +# +# Whitespace only lines are ignored. Comments are not allowed on entry lines. +# + +# Custom segmentation for kanji compounds +日本経済新èž,日本 経済 æ–°èž,ニホン ケイザイ シンブン,カスタムåè©ž +関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタムåè©ž + +# Custom segmentation for compound katakana +トートãƒãƒƒã‚°,トート ãƒãƒƒã‚°,トート ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž +ショルダーãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž + +# Custom reading for former sumo wrestler +æœé’é¾,æœé’é¾,アサショウリュウ,カスタム人å diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/protwords.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/protwords.txt new file mode 100644 index 00000000000..1dfc0abecbf --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/schema.xml new file mode 100644 index 00000000000..ae2c56d18ae --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/schema.xml @@ -0,0 +1,947 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + iddiff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/solrconfig.xml b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/solrconfig.xml new file mode 100644 index 00000000000..9d9178746cf --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/solrconfig.xml @@ -0,0 +1,1764 @@ + + + + + + + + + LUCENE_43 + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.data.dir:} + + + + + + + + + + + + + + + + ${solr.maxIndexingThreads:8} + + + + + + 128 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.ulog.dir:} + + + + + ${solr.autoCommit.maxTime:60000} + false + + + + + + ${solr.autoSoftCommit.maxTime:1000} + + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + static firstSearcher warming in solrconfig.xml + + + + + + false + + + 4 + + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + text + + + + + + + + + + + + + + explicit + json + true + text + + + + + + + + true + json + true + + + + + + + + explicit + + + velocity + browse + layout + Solritas + + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text + 100% + *:* + 10 + *,score + + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename + 3 + + + on + cat + manu_exact + content_type + author_s + ipod + GB + 1 + cat,inStock + after + price + 0 + 600 + 50 + popularity + 0 + 10 + 3 + manufacturedate_dt + NOW/YEAR-10YEARS + NOW + +1YEAR + before + after + + + on + content features title name + html + <b> + </b> + 0 + title + 0 + name + 3 + 200 + content + 750 + + + on + false + 5 + 2 + 5 + true + true + 5 + 3 + + + + + spellcheck + + + + + + + + + + + + + + application/json + + + + + application/csv + + + + + + + + + + + + + + + + + + + + + solrpingquery + + + all + + + + + + + + + explicit + true + + + + + + + + + + + + + + + + textSpell + + + + + + default + name + solr.DirectSolrSpellChecker + + internal + + 0.5 + + 2 + + 1 + + 5 + + 4 + + 0.01 + + + + + + wordbreak + solr.WordBreakSolrSpellChecker + name + true + true + 10 + + + + + + + + + + + + + + + + text + + default + wordbreak + on + true + 10 + 5 + 5 + true + true + 10 + 5 + + + spellcheck + + + + + + + + + + text + true + + + tvComponent + + + + + + + + + default + + + org.carrot2.clustering.lingo.LingoClusteringAlgorithm + + + 20 + + + clustering/carrot2 + + + ENGLISH + + + stc + org.carrot2.clustering.stc.STCClusteringAlgorithm + + + + + + + true + default + true + + name + id + + features + + true + + + + false + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + *:* + 10 + *,score + + + clustering + + + + + + + + + + true + + + terms + + + + + + + + string + elevate.xml + + + + + + explicit + text + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + + + + + + + + + + + text/plain; charset=UTF-8 + + + + + + + + + 5 + + + + + + + + + + + + + + + + + + *:* + + + diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/stopwords.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/stopwords.txt new file mode 100644 index 00000000000..ae1e83eeb3d --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/stopwords.txt @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/synonyms.txt b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/synonyms.txt new file mode 100644 index 00000000000..7f72128303b --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/collection1/conf/synonyms.txt @@ -0,0 +1,29 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaafoo => aaabar +bbbfoo => bbbfoo bbbbar +cccfoo => cccbar cccbaz +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/currency.xml b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/currency.xml new file mode 100644 index 00000000000..3a9c58afee8 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/currency.xml @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/elevate.xml b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/elevate.xml new file mode 100644 index 00000000000..25d5cebe4fb --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/elevate.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/contractions_ca.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/contractions_ca.txt new file mode 100644 index 00000000000..307a85f913d --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/contractions_ca.txt @@ -0,0 +1,8 @@ +# Set of Catalan contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +l +m +n +s +t diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/contractions_fr.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/contractions_fr.txt new file mode 100644 index 00000000000..722db588333 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/contractions_fr.txt @@ -0,0 +1,9 @@ +# Set of French contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +l +m +t +qu +n +s +j diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/contractions_ga.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/contractions_ga.txt new file mode 100644 index 00000000000..9ebe7fa349a --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/contractions_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +m +b diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/contractions_it.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/contractions_it.txt new file mode 100644 index 00000000000..cac04095372 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/contractions_it.txt @@ -0,0 +1,23 @@ +# Set of Italian contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +c +l +all +dall +dell +nell +sull +coll +pell +gl +agl +dagl +degl +negl +sugl +un +m +t +s +v +d diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/hyphenations_ga.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/hyphenations_ga.txt new file mode 100644 index 00000000000..4d2642cc5a3 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/hyphenations_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish hyphenations for StopFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +h +n +t diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stemdict_nl.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stemdict_nl.txt new file mode 100644 index 00000000000..441072971d3 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stemdict_nl.txt @@ -0,0 +1,6 @@ +# Set of overrides for the dutch stemmer +# TODO: load this as a resource from the analyzer and sync it in build.xml +fiets fiets +bromfiets bromfiets +ei eier +kind kinder diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stoptags_ja.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stoptags_ja.txt new file mode 100644 index 00000000000..71b750845e3 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stoptags_ja.txt @@ -0,0 +1,420 @@ +# +# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. +# +# Any token with a part-of-speech tag that exactly matches those defined in this +# file are removed from the token stream. +# +# Set your own stoptags by uncommenting the lines below. Note that comments are +# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, +# etc. that can be useful for building you own stoptag set. +# +# The entire possible tagset is provided below for convenience. +# +##### +# noun: unclassified nouns +#åè©ž +# +# noun-common: Common nouns or nouns where the sub-classification is undefined +#åè©ž-一般 +# +# noun-proper: Proper nouns where the sub-classification is undefined +#åè©ž-固有åè©ž +# +# noun-proper-misc: miscellaneous proper nouns +#åè©ž-固有åè©ž-一般 +# +# noun-proper-person: Personal names where the sub-classification is undefined +#åè©ž-固有åè©ž-人å +# +# noun-proper-person-misc: names that cannot be divided into surname and +# given name; foreign names; names where the surname or given name is unknown. +# e.g. ãŠå¸‚ã®æ–¹ +#åè©ž-固有åè©ž-人å-一般 +# +# noun-proper-person-surname: Mainly Japanese surnames. +# e.g. 山田 +#åè©ž-固有åè©ž-人å-姓 +# +# noun-proper-person-given_name: Mainly Japanese given names. +# e.g. 太郎 +#åè©ž-固有åè©ž-人å-å +# +# noun-proper-organization: Names representing organizations. +# e.g. 通産çœ, NHK +#åè©ž-固有åè©ž-組織 +# +# noun-proper-place: Place names where the sub-classification is undefined +#åè©ž-固有åè©ž-地域 +# +# noun-proper-place-misc: Place names excluding countries. +# e.g. アジア, ãƒãƒ«ã‚»ãƒ­ãƒŠ, 京都 +#åè©ž-固有åè©ž-地域-一般 +# +# noun-proper-place-country: Country names. +# e.g. 日本, オーストラリア +#åè©ž-固有åè©ž-地域-国 +# +# noun-pronoun: Pronouns where the sub-classification is undefined +#åè©ž-代åè©ž +# +# noun-pronoun-misc: miscellaneous pronouns: +# e.g. ãã‚Œ, ã“ã“, ã‚ã„ã¤, ã‚ãªãŸ, ã‚ã¡ã“ã¡, ã„ãã¤, ã©ã“ã‹, ãªã«, ã¿ãªã•ã‚“, ã¿ã‚“ãª, ã‚ãŸãã—, ã‚ã‚Œã‚ã‚Œ +#åè©ž-代åè©ž-一般 +# +# noun-pronoun-contraction: Spoken language contraction made by combining a +# pronoun and the particle 'wa'. +# e.g. ã‚りゃ, ã“りゃ, ã“りゃã‚, ãりゃ, ãりゃ゠+#åè©ž-代åè©ž-縮約 +# +# noun-adverbial: Temporal nouns such as names of days or months that behave +# like adverbs. Nouns that represent amount or ratios and can be used adverbially, +# e.g. 金曜, 一月, åˆå¾Œ, å°‘é‡ +#åè©ž-副詞å¯èƒ½ +# +# noun-verbal: Nouns that take arguments with case and can appear followed by +# 'suru' and related verbs (ã™ã‚‹, ã§ãã‚‹, ãªã•ã‚‹, ãã ã•ã‚‹) +# e.g. インプット, æ„›ç€, 悪化, 悪戦苦闘, 一安心, 下å–ã‚Š +#åè©ž-サ変接続 +# +# noun-adjective-base: The base form of adjectives, words that appear before 㪠("na") +# e.g. å¥åº·, 安易, 駄目, ã ã‚ +#åè©ž-形容動詞語幹 +# +# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), æ•°. +# e.g. 0, 1, 2, 何, æ•°, å¹¾ +#åè©ž-æ•° +# +# noun-affix: noun affixes where the sub-classification is undefined +#åè©ž-éžè‡ªç«‹ +# +# noun-affix-misc: Of adnominalizers, the case-marker ã® ("no"), and words that +# attach to the base form of inflectional words, words that cannot be classified +# into any of the other categories below. This category includes indefinite nouns. +# e.g. ã‚ã‹ã¤ã, æš, ã‹ã„, 甲æ–, æ°—, ãらã„, å«Œã„, ãã›, ç™–, ã“ã¨, 事, ã”ã¨, 毎, ã—ã ã„, 次第, +# é †, ã›ã„, 所為, ã¤ã„ã§, åºã§, ã¤ã‚‚ã‚Š, ç©ã‚‚ã‚Š, 点, ã©ã“ã‚, ã®, ã¯ãš, ç­ˆ, ã¯ãšã¿, å¼¾ã¿, +# æ‹å­, ãµã†, ãµã‚Š, 振り, ã»ã†, æ–¹, æ—¨, ã‚‚ã®, 物, 者, ゆãˆ, æ•…, ゆãˆã‚“, 所以, ã‚ã‘, 訳, +# ã‚ã‚Š, 割り, 割, ã‚“-å£èªž/, ã‚‚ã‚“-å£èªž/ +#åè©ž-éžè‡ªç«‹-一般 +# +# noun-affix-adverbial: noun affixes that that can behave as adverbs. +# e.g. ã‚ã„ã , é–“, ã‚ã’ã, 挙ã’å¥, ã‚ã¨, 後, 余り, 以外, 以é™, 以後, 以上, 以å‰, 一方, ã†ãˆ, +# 上, ã†ã¡, 内, ãŠã‚Š, 折り, ã‹ãŽã‚Š, é™ã‚Š, ãã‚Š, ã£ãã‚Š, çµæžœ, ã“ã‚, é ƒ, ã•ã„, éš›, 最中, ã•ãªã‹, +# 最中, ã˜ãŸã„, 自体, ãŸã³, 度, ãŸã‚, 為, ã¤ã©, 都度, ã¨ãŠã‚Š, 通り, ã¨ã, 時, ã¨ã“ã‚, 所, +# ã¨ãŸã‚“, 途端, ãªã‹, 中, ã®ã¡, 後, ã°ã‚ã„, å ´åˆ, æ—¥, ã¶ã‚“, 分, ã»ã‹, ä»–, ã¾ãˆ, å‰, ã¾ã¾, +# 儘, ä¾­, ã¿ãŽã‚Š, 矢先 +#åè©ž-éžè‡ªç«‹-副詞å¯èƒ½ +# +# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars +# with the stem よã†(ã ) ("you(da)"). +# e.g. よã†, ã‚„ã†, 様 (よã†) +#åè©ž-éžè‡ªç«‹-助動詞語幹 +# +# noun-affix-adjective-base: noun affixes that can connect to the indeclinable +# connection form 㪠(aux "da"). +# e.g. ã¿ãŸã„, ãµã† +#åè©ž-éžè‡ªç«‹-形容動詞語幹 +# +# noun-special: special nouns where the sub-classification is undefined. +#åè©ž-特殊 +# +# noun-special-aux: The ãã†ã  ("souda") stem form that is used for reporting news, is +# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base +# form of inflectional words. +# e.g. ãㆠ+#åè©ž-特殊-助動詞語幹 +# +# noun-suffix: noun suffixes where the sub-classification is undefined. +#åè©ž-接尾 +# +# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect +# to ガル or タイ and can combine into compound nouns, words that cannot be classified into +# any of the other categories below. In general, this category is more inclusive than +# 接尾語 ("suffix") and is usually the last element in a compound noun. +# e.g. ãŠã, ã‹ãŸ, æ–¹, ç”²æ– (ãŒã„), ãŒã‹ã‚Š, ãŽã¿, 気味, ãã‚‹ã¿, (~ã—ãŸ) ã•, 次第, 済 (ãš) ã¿, +# よã†, (ã§ã)ã£ã“, æ„Ÿ, 観, 性, å­¦, é¡ž, é¢, 用 +#åè©ž-接尾-一般 +# +# noun-suffix-person: Suffixes that form nouns and attach to person names more often +# than other nouns. +# e.g. å›, 様, è‘— +#åè©ž-接尾-人å +# +# noun-suffix-place: Suffixes that form nouns and attach to place names more often +# than other nouns. +# e.g. 町, 市, 県 +#åè©ž-接尾-地域 +# +# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that +# can appear before スル ("suru"). +# e.g. 化, 視, 分ã‘, 入り, è½ã¡, è²·ã„ +#åè©ž-接尾-サ変接続 +# +# noun-suffix-aux: The stem form of ãã†ã  (様態) that is used to indicate conditions, +# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the +# conjunctive form of inflectional words. +# e.g. ãㆠ+#åè©ž-接尾-助動詞語幹 +# +# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive +# form of inflectional words and appear before the copula ã  ("da"). +# e.g. çš„, ã’, ãŒã¡ +#åè©ž-接尾-形容動詞語幹 +# +# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. +# e.g. 後 (ã”), 以後, 以é™, 以å‰, å‰å¾Œ, 中, 末, 上, 時 (ã˜) +#åè©ž-接尾-副詞å¯èƒ½ +# +# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category +# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach +# to numbers. +# e.g. 個, ã¤, 本, 冊, パーセント, cm, kg, カ月, ã‹å›½, 区画, 時間, æ™‚åŠ +#åè©ž-接尾-助数詞 +# +# noun-suffix-special: Special suffixes that mainly attach to inflecting words. +# e.g. (楽ã—) ã•, (考ãˆ) æ–¹ +#åè©ž-接尾-特殊 +# +# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words +# together. +# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) å…¼ (主婦) +#åè©ž-接続詞的 +# +# noun-verbal_aux: Nouns that attach to the conjunctive particle 㦠("te") and are +# semantically verb-like. +# e.g. ã”らん, ã”覧, 御覧, 頂戴 +#åè©ž-å‹•è©žéžè‡ªç«‹çš„ +# +# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, +# dialects, English, etc. Currently, the only entry for åè©ž 引用文字列 ("noun quotation") +# is ã„ã‚ã ("iwaku"). +#åè©ž-引用文字列 +# +# noun-nai_adjective: Words that appear before the auxiliary verb ãªã„ ("nai") and +# behave like an adjective. +# e.g. 申ã—訳, 仕方, ã¨ã‚“ã§ã‚‚, é•ã„ +#åè©ž-ナイ形容詞語幹 +# +##### +# prefix: unclassified prefixes +#接頭詞 +# +# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) +# excluding numerical expressions. +# e.g. ㊠(æ°´), æŸ (æ°), åŒ (社), æ•… (~æ°), 高 (å“質), ㊠(見事), ã” (ç«‹æ´¾) +#接頭詞-å詞接続 +# +# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb +# in conjunctive form followed by ãªã‚‹/ãªã•ã‚‹/ãã ã•ã‚‹. +# e.g. ㊠(読ã¿ãªã•ã„), ㊠(座り) +#接頭詞-動詞接続 +# +# prefix-adjectival: Prefixes that attach to adjectives. +# e.g. ㊠(寒ã„ã§ã™ã­ãˆ), ãƒã‚« (ã§ã‹ã„) +#接頭詞-形容詞接続 +# +# prefix-numerical: Prefixes that attach to numerical expressions. +# e.g. ç´„, ãŠã‚ˆã, 毎時 +#接頭詞-数接続 +# +##### +# verb: unclassified verbs +#å‹•è©ž +# +# verb-main: +#å‹•è©ž-自立 +# +# verb-auxiliary: +#å‹•è©ž-éžè‡ªç«‹ +# +# verb-suffix: +#å‹•è©ž-接尾 +# +##### +# adjective: unclassified adjectives +#形容詞 +# +# adjective-main: +#形容詞-自立 +# +# adjective-auxiliary: +#形容詞-éžè‡ªç«‹ +# +# adjective-suffix: +#形容詞-接尾 +# +##### +# adverb: unclassified adverbs +#副詞 +# +# adverb-misc: Words that can be segmented into one unit and where adnominal +# modification is not possible. +# e.g. ã‚ã„ã‹ã‚らãš, 多分 +#副詞-一般 +# +# adverb-particle_conjunction: Adverbs that can be followed by ã®, ã¯, ã«, +# ãª, ã™ã‚‹, ã , etc. +# e.g. ã“ã‚“ãªã«, ãã‚“ãªã«, ã‚ã‚“ãªã«, ãªã«ã‹, ãªã‚“ã§ã‚‚ +#副詞-助詞類接続 +# +##### +# adnominal: Words that only have noun-modifying forms. +# e.g. ã“ã®, ãã®, ã‚ã®, ã©ã®, ã„ã‚ゆる, ãªã‚“らã‹ã®, 何らã‹ã®, ã„ã‚ã‚“ãª, ã“ã†ã„ã†, ãã†ã„ã†, ã‚ã‚ã„ã†, +# ã©ã†ã„ã†, ã“ã‚“ãª, ãã‚“ãª, ã‚ã‚“ãª, ã©ã‚“ãª, 大ããª, å°ã•ãª, ãŠã‹ã—ãª, ã»ã‚“ã®, ãŸã„ã—ãŸ, +# 「(, ã‚‚) ã•ã‚‹ (ã“ã¨ãªãŒã‚‰)ã€, 微々ãŸã‚‹, 堂々ãŸã‚‹, å˜ãªã‚‹, ã„ã‹ãªã‚‹, 我ãŒã€ã€ŒåŒã˜, 亡ã +#連体詞 +# +##### +# conjunction: Conjunctions that can occur independently. +# e.g. ãŒ, ã‘ã‚Œã©ã‚‚, ãã—ã¦, ã˜ã‚ƒã‚, ãã‚Œã©ã“ã‚ã‹ +接続詞 +# +##### +# particle: unclassified particles. +助詞 +# +# particle-case: case particles where the subclassification is undefined. +助詞-格助詞 +# +# particle-case-misc: Case particles. +# e.g. ã‹ã‚‰, ãŒ, ã§, ã¨, ã«, ã¸, より, ã‚’, ã®, ã«ã¦ +助詞-格助詞-一般 +# +# particle-case-quote: the "to" that appears after nouns, a person’s speech, +# quotation marks, expressions of decisions from a meeting, reasons, judgements, +# conjectures, etc. +# e.g. ( ã ) 㨠(è¿°ã¹ãŸ.), ( ã§ã‚ã‚‹) 㨠(ã—ã¦åŸ·è¡ŒçŒ¶äºˆ...) +助詞-格助詞-引用 +# +# particle-case-compound: Compounds of particles and verbs that mainly behave +# like case particles. +# e.g. ã¨ã„ã†, ã¨ã„ã£ãŸ, ã¨ã‹ã„ã†, ã¨ã—ã¦, ã¨ã¨ã‚‚ã«, ã¨å…±ã«, ã§ã‚‚ã£ã¦, ã«ã‚ãŸã£ã¦, ã«å½“ãŸã£ã¦, ã«å½“ã£ã¦, +# ã«ã‚ãŸã‚Š, ã«å½“ãŸã‚Š, ã«å½“ã‚Š, ã«å½“ãŸã‚‹, ã«ã‚ãŸã‚‹, ã«ãŠã„ã¦, ã«æ–¼ã„ã¦,ã«æ–¼ã¦, ã«ãŠã‘ã‚‹, ã«æ–¼ã‘ã‚‹, +# ã«ã‹ã‘, ã«ã‹ã‘ã¦, ã«ã‹ã‚“ã—, ã«é–¢ã—, ã«ã‹ã‚“ã—ã¦, ã«é–¢ã—ã¦, ã«ã‹ã‚“ã™ã‚‹, ã«é–¢ã™ã‚‹, ã«éš›ã—, +# ã«éš›ã—ã¦, ã«ã—ãŸãŒã„, ã«å¾“ã„, ã«å¾“ã†, ã«ã—ãŸãŒã£ã¦, ã«å¾“ã£ã¦, ã«ãŸã„ã—, ã«å¯¾ã—, ã«ãŸã„ã—ã¦, +# ã«å¯¾ã—ã¦, ã«ãŸã„ã™ã‚‹, ã«å¯¾ã™ã‚‹, ã«ã¤ã„ã¦, ã«ã¤ã, ã«ã¤ã‘, ã«ã¤ã‘ã¦, ã«ã¤ã‚Œ, ã«ã¤ã‚Œã¦, ã«ã¨ã£ã¦, +# ã«ã¨ã‚Š, ã«ã¾ã¤ã‚ã‚‹, ã«ã‚ˆã£ã¦, ã«ä¾ã£ã¦, ã«å› ã£ã¦, ã«ã‚ˆã‚Š, ã«ä¾ã‚Š, ã«å› ã‚Š, ã«ã‚ˆã‚‹, ã«ä¾ã‚‹, ã«å› ã‚‹, +# ã«ã‚ãŸã£ã¦, ã«ã‚ãŸã‚‹, ã‚’ã‚‚ã£ã¦, を以ã£ã¦, を通ã˜, を通ã˜ã¦, を通ã—ã¦, ã‚’ã‚ãã£ã¦, ã‚’ã‚ãã‚Š, ã‚’ã‚ãã‚‹, +# ã£ã¦-å£èªž/, ã¡ã‚…ã†-関西å¼ã€Œã¨ã„ã†ã€/, (何) ã¦ã„ㆠ(人)-å£èªž/, ã£ã¦ã„ã†-å£èªž/, ã¨ã„ãµ, ã¨ã‹ã„ãµ +助詞-格助詞-連語 +# +# particle-conjunctive: +# e.g. ã‹ã‚‰, ã‹ã‚‰ã«ã¯, ãŒ, ã‘ã‚Œã©, ã‘ã‚Œã©ã‚‚, ã‘ã©, ã—, ã¤ã¤, ã¦, ã§, ã¨, ã¨ã“ã‚ãŒ, ã©ã“ã‚ã‹, ã¨ã‚‚, ã©ã‚‚, +# ãªãŒã‚‰, ãªã‚Š, ã®ã§, ã®ã«, ã°, ã‚‚ã®ã®, ã‚„ ( ã—ãŸ), ã‚„ã„ãªã‚„, (ã“ã‚ã‚“) ã˜ã‚ƒ(ã„ã‘ãªã„)-å£èªž/, +# (è¡Œã£) ã¡ã‚ƒ(ã„ã‘ãªã„)-å£èªž/, (言ã£) ãŸã£ã¦ (ã—ã‹ãŸãŒãªã„)-å£èªž/, (ãã‚ŒãŒãªã)ã£ãŸã£ã¦ (平気)-å£èªž/ +助詞-接続助詞 +# +# particle-dependency: +# e.g. ã“ã, ã•ãˆ, ã—ã‹, ã™ã‚‰, ã¯, ã‚‚, ãž +助詞-係助詞 +# +# particle-adverbial: +# e.g. ãŒã¦ã‚‰, ã‹ã‚‚, ãらã„, ä½, ãらã„, ã—ã‚‚, (学校) ã˜ã‚ƒ(ã“ã‚ŒãŒæµè¡Œã£ã¦ã„ã‚‹)-å£èªž/, +# (ãã‚Œ)ã˜ã‚ƒã‚ (よããªã„)-å£èªž/, ãšã¤, (ç§) ãªãž, ãªã©, (ç§) ãªã‚Š (ã«), (先生) ãªã‚“ã‹ (大嫌ã„)-å£èªž/, +# (ç§) ãªã‚“ãž, (先生) ãªã‚“㦠(大嫌ã„)-å£èªž/, ã®ã¿, ã ã‘, (ç§) ã ã£ã¦-å£èªž/, ã ã«, +# (å½¼)ã£ãŸã‚‰-å£èªž/, (ãŠèŒ¶) ã§ã‚‚ (ã„ã‹ãŒ), ç­‰ (ã¨ã†), (今後) ã¨ã‚‚, ã°ã‹ã‚Š, ã°ã£ã‹-å£èªž/, ã°ã£ã‹ã‚Š-å£èªž/, +# ã»ã©, 程, ã¾ã§, è¿„, (誰) ã‚‚ (ãŒ)([助詞-格助詞] ãŠã‚ˆã³ [助詞-係助詞] ã®å‰ã«ä½ç½®ã™ã‚‹ã€Œã‚‚ã€) +助詞-副助詞 +# +# particle-interjective: particles with interjective grammatical roles. +# e.g. (æ¾å³¶) ã‚„ +助詞-間投助詞 +# +# particle-coordinate: +# e.g. ã¨, ãŸã‚Š, ã ã®, ã ã‚Š, ã¨ã‹, ãªã‚Š, ã‚„, やら +助詞-並立助詞 +# +# particle-final: +# e.g. ã‹ã„, ã‹ã—ら, ã•, ãœ, (ã )ã£ã‘-å£èªž/, (ã¨ã¾ã£ã¦ã‚‹) ã§-方言/, ãª, ナ, ãªã‚-å£èªž/, ãž, ã­, ãƒ, +# ã­ã‡-å£èªž/, ã­ãˆ-å£èªž/, ã­ã‚“-方言/, ã®, ã®ã†-å£èªž/, ã‚„, よ, ヨ, よã‰-å£èªž/, ã‚, ã‚ã„-å£èªž/ +助詞-終助詞 +# +# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is +# adverbial, conjunctive, or sentence final. For example: +# (a) 「A ã‹ B ã‹ã€. Ex:「(国内ã§é‹ç”¨ã™ã‚‹) ã‹,(海外ã§é‹ç”¨ã™ã‚‹) ã‹ (.)〠+# (b) Inside an adverb phrase. Ex:「(幸ã„ã¨ã„ã†) ã‹ (, 死者ã¯ã„ãªã‹ã£ãŸ.)〠+# 「(祈りãŒå±Šã„ãŸã›ã„) ã‹ (, 試験ã«åˆæ ¼ã—ãŸ.)〠+# (c) 「ã‹ã®ã‚ˆã†ã«ã€. Ex:「(何もãªã‹ã£ãŸ) ã‹ (ã®ã‚ˆã†ã«æŒ¯ã‚‹èˆžã£ãŸ.)〠+# e.g. ã‹ +助詞-副助詞ï¼ä¸¦ç«‹åŠ©è©žï¼çµ‚助詞 +# +# particle-adnominalizer: The "no" that attaches to nouns and modifies +# non-inflectional words. +助詞-連体化 +# +# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs +# that are giongo, giseigo, or gitaigo. +# e.g. ã«, 㨠+助詞-副詞化 +# +# particle-special: A particle that does not fit into one of the above classifications. +# This includes particles that are used in Tanka, Haiku, and other poetry. +# e.g. ã‹ãª, ã‘ã‚€, ( ã—ãŸã ã‚ã†) ã«, (ã‚ã‚“ãŸ) ã«ã‚ƒ(ã‚ã‹ã‚‰ã‚“), (俺) ã‚“ (家) +助詞-特殊 +# +##### +# auxiliary-verb: +助動詞 +# +##### +# interjection: Greetings and other exclamations. +# e.g. ãŠã¯ã‚ˆã†, ãŠã¯ã‚ˆã†ã”ã–ã„ã¾ã™, ã“ã‚“ã«ã¡ã¯, ã“ã‚“ã°ã‚“ã¯, ã‚ã‚ŠãŒã¨ã†, ã©ã†ã‚‚ã‚ã‚ŠãŒã¨ã†, ã‚ã‚ŠãŒã¨ã†ã”ã–ã„ã¾ã™, +# ã„ãŸã ãã¾ã™, ã”ã¡ãã†ã•ã¾, ã•ã‚ˆãªã‚‰, ã•ã‚ˆã†ãªã‚‰, ã¯ã„, ã„ã„ãˆ, ã”ã‚ã‚“, ã”ã‚ã‚“ãªã•ã„ +#æ„Ÿå‹•è©ž +# +##### +# symbol: unclassified Symbols. +è¨˜å· +# +# symbol-misc: A general symbol not in one of the categories below. +# e.g. [â—‹â—Ž@$〒→+] +記å·-一般 +# +# symbol-comma: Commas +# e.g. [,ã€] +記å·-読点 +# +# symbol-period: Periods and full stops. +# e.g. [..。] +記å·-å¥ç‚¹ +# +# symbol-space: Full-width whitespace. +記å·-空白 +# +# symbol-open_bracket: +# e.g. [({‘“『ã€] +記å·-括弧開 +# +# symbol-close_bracket: +# e.g. [)}’â€ã€ã€ã€‘] +記å·-括弧閉 +# +# symbol-alphabetic: +#記å·-アルファベット +# +##### +# other: unclassified other +#ãã®ä»– +# +# other-interjection: Words that are hard to classify as noun-suffixes or +# sentence-final particles. +# e.g. (ã )ã‚¡ +ãã®ä»–-間投 +# +##### +# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. +# e.g. ã‚ã®, ã†ã‚“ã¨, ãˆã¨ +フィラー +# +##### +# non-verbal: non-verbal sound. +éžè¨€èªžéŸ³ +# +##### +# fragment: +#語断片 +# +##### +# unknown: unknown part of speech. +#未知語 +# +##### End of file diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ar.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ar.txt new file mode 100644 index 00000000000..046829db6a2 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ar.txt @@ -0,0 +1,125 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Cleaned on October 11, 2009 (not normalized, so use before normalization) +# This means that when modifying this list, you might need to add some +# redundant entries, for example containing forms with both Ø£ and ا +من +ومن +منها +منه +ÙÙŠ +ÙˆÙÙŠ +Ùيها +Ùيه +Ùˆ +Ù +ثم +او +أو +ب +بها +به +ا +Ø£ +اى +اي +أي +أى +لا +ولا +الا +ألا +إلا +لكن +ما +وما +كما +Ùما +عن +مع +اذا +إذا +ان +أن +إن +انها +أنها +إنها +انه +أنه +إنه +بان +بأن +Ùان +Ùأن +وان +وأن +وإن +التى +التي +الذى +الذي +الذين +الى +الي +إلى +إلي +على +عليها +عليه +اما +أما +إما +ايضا +أيضا +كل +وكل +لم +ولم +لن +ولن +هى +هي +هو +وهى +وهي +وهو +Ùهى +Ùهي +Ùهو +انت +أنت +لك +لها +له +هذه +هذا +تلك +ذلك +هناك +كانت +كان +يكون +تكون +وكانت +وكان +غير +بعض +قد +نحو +بين +بينما +منذ +ضمن +حيث +الان +الآن +خلال +بعد +قبل +حتى +عند +عندما +لدى +جميع diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_bg.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_bg.txt new file mode 100644 index 00000000000..1ae4ba2ae38 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_bg.txt @@ -0,0 +1,193 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +а +аз +ако +ала +бе +без +беше +би +бил +била +били +било +близо +бъдат +бъде +бÑха +в +Ð²Ð°Ñ +ваш +ваша +вероÑтно +вече +взема +ви +вие +винаги +вÑе +вÑеки +вÑички +вÑичко +вÑÑка +във +въпреки +върху +г +ги +главно +го +д +да +дали +до +докато +докога +дори +доÑега +доÑта +е +едва +един +ето +за +зад +заедно +заради +заÑега +затова +защо +защото +и +из +или +им +има +имат +иÑка +й +каза +как +каква +какво +както +какъв +като +кога +когато +което +които +кой +който +колко +коÑто +къде +където +към +ли +м +ме +между +мен +ми +мнозина +мога +могат +може +Ð¼Ð¾Ð»Ñ +момента +му +н +на +над +назад +най +направи +напред +например +Ð½Ð°Ñ +не +него +Ð½ÐµÑ +ни +ние +никой +нито +но +нÑкои +нÑкой +нÑма +обаче +около +оÑвен +оÑобено +от +отгоре +отново +още +пак +по +повече +повечето +под +поне +поради +поÑле +почти +прави +пред +преди +през +при +пък +първо +Ñ +Ñа +Ñамо +Ñе +Ñега +Ñи +Ñкоро +Ñлед +Ñме +Ñпоред +Ñред +Ñрещу +Ñте +Ñъм +ÑÑŠÑ +Ñъщо +Ñ‚ +тази +така +такива +такъв +там +твой +те +тези +ти +тн +то +това +тогава +този +той +толкова +точно +Ñ‚Ñ€Ñбва +тук +тъй +Ñ‚Ñ +Ñ‚ÑÑ… +у +хареÑва +ч +че +чеÑто +чрез +ще +щом +Ñ diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ca.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ca.txt new file mode 100644 index 00000000000..3da65deafe1 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ca.txt @@ -0,0 +1,220 @@ +# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) +a +abans +ací +ah +així +això +al +als +aleshores +algun +alguna +algunes +alguns +alhora +allà +allí +allò +altra +altre +altres +amb +ambdós +ambdues +apa +aquell +aquella +aquelles +aquells +aquest +aquesta +aquestes +aquests +aquí +baix +cada +cadascú +cadascuna +cadascunes +cadascuns +com +contra +d'un +d'una +d'unes +d'uns +dalt +de +del +dels +des +després +dins +dintre +donat +doncs +durant +e +eh +el +els +em +en +encara +ens +entre +érem +eren +éreu +es +és +esta +està +estàvem +estaven +estàveu +esteu +et +etc +ets +fins +fora +gairebé +ha +han +has +havia +he +hem +heu +hi +ho +i +igual +iguals +ja +l'hi +la +les +li +li'n +llavors +m'he +ma +mal +malgrat +mateix +mateixa +mateixes +mateixos +me +mentre +més +meu +meus +meva +meves +molt +molta +moltes +molts +mon +mons +n'he +n'hi +ne +ni +no +nogensmenys +només +nosaltres +nostra +nostre +nostres +o +oh +oi +on +pas +pel +pels +per +però +perquè +poc +poca +pocs +poques +potser +propi +qual +quals +quan +quant +que +què +quelcom +qui +quin +quina +quines +quins +s'ha +s'han +sa +semblant +semblants +ses +seu +seus +seva +seva +seves +si +sobre +sobretot +sóc +solament +sols +son +són +sons +sota +sou +t'ha +t'han +t'he +ta +tal +també +tampoc +tan +tant +tanta +tantes +teu +teus +teva +teves +ton +tons +tot +tota +totes +tots +un +una +unes +uns +us +va +vaig +vam +van +vas +veu +vosaltres +vostra +vostre +vostres diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_cz.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_cz.txt new file mode 100644 index 00000000000..53c6097dac7 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_cz.txt @@ -0,0 +1,172 @@ +a +s +k +o +i +u +v +z +dnes +cz +tímto +budeÅ¡ +budem +byli +jseÅ¡ +můj +svým +ta +tomto +tohle +tuto +tyto +jej +zda +proÄ +máte +tato +kam +tohoto +kdo +kteří +mi +nám +tom +tomuto +mít +nic +proto +kterou +byla +toho +protože +asi +ho +naÅ¡i +napiÅ¡te +re +což +tím +takže +svých +její +svými +jste +aj +tu +tedy +teto +bylo +kde +ke +pravé +ji +nad +nejsou +Äi +pod +téma +mezi +pÅ™es +ty +pak +vám +ani +když +vÅ¡ak +neg +jsem +tento +Älánku +Älánky +aby +jsme +pÅ™ed +pta +jejich +byl +jeÅ¡tÄ› +až +bez +také +pouze +první +vaÅ¡e +která +nás +nový +tipy +pokud +může +strana +jeho +své +jiné +zprávy +nové +není +vás +jen +podle +zde +už +být +více +bude +již +než +který +by +které +co +nebo +ten +tak +má +pÅ™i +od +po +jsou +jak +další +ale +si +se +ve +to +jako +za +zpÄ›t +ze +do +pro +je +na +atd +atp +jakmile +pÅ™iÄemž +já +on +ona +ono +oni +ony +my +vy +jí +ji +mÄ› +mne +jemu +tomu +tÄ›m +tÄ›mu +nÄ›mu +nÄ›muž +jehož +jíž +jelikož +jež +jakož +naÄež diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_da.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_da.txt new file mode 100644 index 00000000000..a3ff5fe122c --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_da.txt @@ -0,0 +1,108 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Danish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + +og | and +i | in +jeg | I +det | that (dem. pronoun)/it (pers. pronoun) +at | that (in front of a sentence)/to (with infinitive) +en | a/an +den | it (pers. pronoun)/that (dem. pronoun) +til | to/at/for/until/against/by/of/into, more +er | present tense of "to be" +som | who, as +pÃ¥ | on/upon/in/on/at/to/after/of/with/for, on +de | they +med | with/by/in, along +han | he +af | of/by/from/off/for/in/with/on, off +for | at/for/to/from/by/of/ago, in front/before, because +ikke | not +der | who/which, there/those +var | past tense of "to be" +mig | me/myself +sig | oneself/himself/herself/itself/themselves +men | but +et | a/an/one, one (number), someone/somebody/one +har | present tense of "to have" +om | round/about/for/in/a, about/around/down, if +vi | we +min | my +havde | past tense of "to have" +ham | him +hun | she +nu | now +over | over/above/across/by/beyond/past/on/about, over/past +da | then, when/as/since +fra | from/off/since, off, since +du | you +ud | out +sin | his/her/its/one's +dem | them +os | us/ourselves +op | up +man | you/one +hans | his +hvor | where +eller | or +hvad | what +skal | must/shall etc. +selv | myself/youself/herself/ourselves etc., even +her | here +alle | all/everyone/everybody etc. +vil | will (verb) +blev | past tense of "to stay/to remain/to get/to become" +kunne | could +ind | in +nÃ¥r | when +være | present tense of "to be" +dog | however/yet/after all +noget | something +ville | would +jo | you know/you see (adv), yes +deres | their/theirs +efter | after/behind/according to/for/by/from, later/afterwards +ned | down +skulle | should +denne | this +end | than +dette | this +mit | my/mine +ogsÃ¥ | also +under | under/beneath/below/during, below/underneath +have | have +dig | you +anden | other +hende | her +mine | my +alt | everything +meget | much/very, plenty of +sit | his, her, its, one's +sine | his, her, its, one's +vor | our +mod | against +disse | these +hvis | if +din | your/yours +nogle | some +hos | by/at +blive | be/become +mange | many +ad | by/through +bliver | present tense of "to be/to become" +hendes | her/hers +været | be +thi | for (conj) +jer | you +sÃ¥dan | such, like this/like that diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_de.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_de.txt new file mode 100644 index 00000000000..f7703841887 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_de.txt @@ -0,0 +1,292 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A German stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | The number of forms in this list is reduced significantly by passing it + | through the German stemmer. + + +aber | but + +alle | all +allem +allen +aller +alles + +als | than, as +also | so +am | an + dem +an | at + +ander | other +andere +anderem +anderen +anderer +anderes +anderm +andern +anderr +anders + +auch | also +auf | on +aus | out of +bei | by +bin | am +bis | until +bist | art +da | there +damit | with it +dann | then + +der | the +den +des +dem +die +das + +daß | that + +derselbe | the same +derselben +denselben +desselben +demselben +dieselbe +dieselben +dasselbe + +dazu | to that + +dein | thy +deine +deinem +deinen +deiner +deines + +denn | because + +derer | of those +dessen | of him + +dich | thee +dir | to thee +du | thou + +dies | this +diese +diesem +diesen +dieser +dieses + + +doch | (several meanings) +dort | (over) there + + +durch | through + +ein | a +eine +einem +einen +einer +eines + +einig | some +einige +einigem +einigen +einiger +einiges + +einmal | once + +er | he +ihn | him +ihm | to him + +es | it +etwas | something + +euer | your +eure +eurem +euren +eurer +eures + +für | for +gegen | towards +gewesen | p.p. of sein +hab | have +habe | have +haben | have +hat | has +hatte | had +hatten | had +hier | here +hin | there +hinter | behind + +ich | I +mich | me +mir | to me + + +ihr | you, to her +ihre +ihrem +ihren +ihrer +ihres +euch | to you + +im | in + dem +in | in +indem | while +ins | in + das +ist | is + +jede | each, every +jedem +jeden +jeder +jedes + +jene | that +jenem +jenen +jener +jenes + +jetzt | now +kann | can + +kein | no +keine +keinem +keinen +keiner +keines + +können | can +könnte | could +machen | do +man | one + +manche | some, many a +manchem +manchen +mancher +manches + +mein | my +meine +meinem +meinen +meiner +meines + +mit | with +muss | must +musste | had to +nach | to(wards) +nicht | not +nichts | nothing +noch | still, yet +nun | now +nur | only +ob | whether +oder | or +ohne | without +sehr | very + +sein | his +seine +seinem +seinen +seiner +seines + +selbst | self +sich | herself + +sie | they, she +ihnen | to them + +sind | are +so | so + +solche | such +solchem +solchen +solcher +solches + +soll | shall +sollte | should +sondern | but +sonst | else +über | over +um | about, around +und | and + +uns | us +unse +unsem +unsen +unser +unses + +unter | under +viel | much +vom | von + dem +von | from +vor | before +während | while +war | was +waren | were +warst | wast +was | what +weg | away, off +weil | because +weiter | further + +welche | which +welchem +welchen +welcher +welches + +wenn | when +werde | will +werden | will +wie | how +wieder | again +will | want +wir | we +wird | will +wirst | willst +wo | where +wollen | want +wollte | wanted +würde | would +würden | would +zu | to +zum | zu + dem +zur | zu + der +zwar | indeed +zwischen | between + diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_el.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_el.txt new file mode 100644 index 00000000000..232681f5bd6 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_el.txt @@ -0,0 +1,78 @@ +# Lucene Greek Stopwords list +# Note: by default this file is used after GreekLowerCaseFilter, +# so when modifying this file use 'σ' instead of 'Ï‚' +ο +η +το +οι +τα +του +τησ +των +τον +την +και +κι +κ +ειμαι +εισαι +ειναι +ειμαστε +ειστε +στο +στον +στη +στην +μα +αλλα +απο +για +Ï€Ïοσ +με +σε +ωσ +παÏα +αντι +κατα +μετα +θα +να +δε +δεν +μη +μην +επι +ενω +εαν +αν +τοτε +που +πωσ +ποιοσ +ποια +ποιο +ποιοι +ποιεσ +ποιων +ποιουσ +αυτοσ +αυτη +αυτο +αυτοι +αυτων +αυτουσ +αυτεσ +αυτα +εκεινοσ +εκεινη +εκεινο +εκεινοι +εκεινεσ +εκεινα +εκεινων +εκεινουσ +οπωσ +ομωσ +ισωσ +οσο +οτι diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_en.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_en.txt new file mode 100644 index 00000000000..2c164c0b2a1 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_en.txt @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +# Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +such +that +the +their +then +there +these +they +this +to +was +will +with diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_es.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_es.txt new file mode 100644 index 00000000000..2db14760075 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_es.txt @@ -0,0 +1,354 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Spanish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | from, of +la | the, her +que | who, that +el | the +en | in +y | and +a | to +los | the, them +del | de + el +se | himself, from him etc +las | the, them +por | for, by, etc +un | a +para | for +con | with +no | no +una | a +su | his, her +al | a + el + | es from SER +lo | him +como | how +más | more +pero | pero +sus | su plural +le | to him, her +ya | already +o | or + | fue from SER +este | this + | ha from HABER +sí | himself etc +porque | because +esta | this + | son from SER +entre | between + | está from ESTAR +cuando | when +muy | very +sin | without +sobre | on + | ser from SER + | tiene from TENER +también | also +me | me +hasta | until +hay | there is/are +donde | where + | han from HABER +quien | whom, that + | están from ESTAR + | estado from ESTAR +desde | from +todo | all +nos | us +durante | during + | estados from ESTAR +todos | all +uno | a +les | to them +ni | nor +contra | against +otros | other + | fueron from SER +ese | that +eso | that + | había from HABER +ante | before +ellos | they +e | and (variant of y) +esto | this +mí | me +antes | before +algunos | some +qué | what? +unos | a +yo | I +otro | other +otras | other +otra | other +él | he +tanto | so much, many +esa | that +estos | these +mucho | much, many +quienes | who +nada | nothing +muchos | many +cual | who + | sea from SER +poco | few +ella | she +estar | to be + | haber from HABER +estas | these + | estaba from ESTAR + | estamos from ESTAR +algunas | some +algo | something +nosotros | we + + | other forms + +mi | me +mis | mi plural +tú | thou +te | thee +ti | thee +tu | thy +tus | tu plural +ellas | they +nosotras | we +vosotros | you +vosotras | you +os | you +mío | mine +mía | +míos | +mías | +tuyo | thine +tuya | +tuyos | +tuyas | +suyo | his, hers, theirs +suya | +suyos | +suyas | +nuestro | ours +nuestra | +nuestros | +nuestras | +vuestro | yours +vuestra | +vuestros | +vuestras | +esos | those +esas | those + + | forms of estar, to be (not including the infinitive): +estoy +estás +está +estamos +estáis +están +esté +estés +estemos +estéis +estén +estaré +estarás +estará +estaremos +estaréis +estarán +estaría +estarías +estaríamos +estaríais +estarían +estaba +estabas +estábamos +estabais +estaban +estuve +estuviste +estuvo +estuvimos +estuvisteis +estuvieron +estuviera +estuvieras +estuviéramos +estuvierais +estuvieran +estuviese +estuvieses +estuviésemos +estuvieseis +estuviesen +estando +estado +estada +estados +estadas +estad + + | forms of haber, to have (not including the infinitive): +he +has +ha +hemos +habéis +han +haya +hayas +hayamos +hayáis +hayan +habré +habrás +habrá +habremos +habréis +habrán +habría +habrías +habríamos +habríais +habrían +había +habías +habíamos +habíais +habían +hube +hubiste +hubo +hubimos +hubisteis +hubieron +hubiera +hubieras +hubiéramos +hubierais +hubieran +hubiese +hubieses +hubiésemos +hubieseis +hubiesen +habiendo +habido +habida +habidos +habidas + + | forms of ser, to be (not including the infinitive): +soy +eres +es +somos +sois +son +sea +seas +seamos +seáis +sean +seré +serás +será +seremos +seréis +serán +sería +serías +seríamos +seríais +serían +era +eras +éramos +erais +eran +fui +fuiste +fue +fuimos +fuisteis +fueron +fuera +fueras +fuéramos +fuerais +fueran +fuese +fueses +fuésemos +fueseis +fuesen +siendo +sido + | sed also means 'thirst' + + | forms of tener, to have (not including the infinitive): +tengo +tienes +tiene +tenemos +tenéis +tienen +tenga +tengas +tengamos +tengáis +tengan +tendré +tendrás +tendrá +tendremos +tendréis +tendrán +tendría +tendrías +tendríamos +tendríais +tendrían +tenía +tenías +teníamos +teníais +tenían +tuve +tuviste +tuvo +tuvimos +tuvisteis +tuvieron +tuviera +tuvieras +tuviéramos +tuvierais +tuvieran +tuviese +tuvieses +tuviésemos +tuvieseis +tuviesen +teniendo +tenido +tenida +tenidos +tenidas +tened + diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_eu.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_eu.txt new file mode 100644 index 00000000000..25f1db93460 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_eu.txt @@ -0,0 +1,99 @@ +# example set of basque stopwords +al +anitz +arabera +asko +baina +bat +batean +batek +bati +batzuei +batzuek +batzuetan +batzuk +bera +beraiek +berau +berauek +bere +berori +beroriek +beste +bezala +da +dago +dira +ditu +du +dute +edo +egin +ere +eta +eurak +ez +gainera +gu +gutxi +guzti +haiei +haiek +haietan +hainbeste +hala +han +handik +hango +hara +hari +hark +hartan +hau +hauei +hauek +hauetan +hemen +hemendik +hemengo +hi +hona +honek +honela +honetan +honi +hor +hori +horiei +horiek +horietan +horko +horra +horrek +horrela +horretan +horri +hortik +hura +izan +ni +noiz +nola +non +nondik +nongo +nor +nora +ze +zein +zen +zenbait +zenbat +zer +zergatik +ziren +zituen +zu +zuek +zuen +zuten diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_fa.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_fa.txt new file mode 100644 index 00000000000..723641c6da7 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_fa.txt @@ -0,0 +1,313 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Note: by default this file is used after normalization, so when adding entries +# to this file, use the arabic 'ÙŠ' instead of 'ÛŒ' +انان +نداشته +سراسر +خياه +ايشان +وي +تاكنون +بيشتري +دوم +پس +ناشي +ÙˆÚ¯Ùˆ +يا +داشتند +سپس +هنگام +هرگز +پنج +نشان +امسال +ديگر +گروهي +شدند +چطور +ده +Ùˆ +دو +نخستين +ولي +چرا +Ú†Ù‡ +وسط +Ù‡ +كدام +قابل +يك +رÙت +Ù‡Ùت +همچنين +در +هزار +بله +بلي +شايد +اما +شناسي +گرÙته +دهد +داشته +دانست +داشتن +خواهيم +ميليارد +وقتيكه +امد +خواهد +جز +اورده +شده +بلكه +خدمات +شدن +برخي +نبود +بسياري +جلوگيري +حق +كردند +نوعي +بعري +نكرده +نظير +نبايد +بوده +بودن +داد +اورد +هست +جايي +شود +دنبال +داده +بايد +سابق +هيچ +همان +انجا +كمتر +كجاست +گردد +كسي +تر +مردم +تان +دادن +بودند +سري +جدا +ندارند +مگر +يكديگر +دارد +دهند +بنابراين +هنگامي +سمت +جا +انچه +خود +دادند +زياد +دارند +اثر +بدون +بهترين +بيشتر +البته +به +براساس +بيرون +كرد +بعضي +گرÙت +توي +اي +ميليون +او +جريان +تول +بر +مانند +برابر +باشيم +مدتي +گويند +اكنون +تا +تنها +جديد +چند +بي +نشده +كردن +كردم +گويد +كرده +كنيم +نمي +نزد +روي +قصد +Ùقط +بالاي +ديگران +اين +ديروز +توسط +سوم +ايم +دانند +سوي +استÙاده +شما +كنار +داريم +ساخته +طور +امده +رÙته +نخست +بيست +نزديك +طي +كنيد +از +انها +تمامي +داشت +يكي +طريق +اش +چيست +روب +نمايد +Ú¯Ùت +چندين +چيزي +تواند +ام +ايا +با +ان +ايد +ترين +اينكه +ديگري +راه +هايي +بروز +همچنان +پاعين +كس +حدود +مختل٠+مقابل +چيز +گيرد +ندارد +ضد +همچون +سازي +شان +مورد +باره +مرسي +خويش +برخوردار +چون +خارج +شش +هنوز +تحت +ضمن +هستيم +Ú¯Ùته +Ùكر +بسيار +پيش +براي +روزهاي +انكه +نخواهد +بالا +كل +وقتي +كي +چنين +كه +گيري +نيست +است +كجا +كند +نيز +يابد +بندي +حتي +توانند +عقب +خواست +كنند +بين +تمام +همه +ما +باشند +مثل +شد +اري +باشد +اره +طبق +بعد +اگر +صورت +غير +جاي +بيش +ريزي +اند +زيرا +چگونه +بار +لطÙا +مي +درباره +من +ديده +همين +گذاري +برداري +علت +گذاشته +هم +Ùوق +نه +ها +شوند +اباد +همواره +هر +اول +خواهند +چهار +نام +امروز +مان +هاي +قبل +كنم +سعي +تازه +را +هستند +زير +جلوي +عنوان +بود diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_fi.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_fi.txt new file mode 100644 index 00000000000..addad798c4b --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_fi.txt @@ -0,0 +1,95 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| forms of BE + +olla +olen +olet +on +olemme +olette +ovat +ole | negative form + +oli +olisi +olisit +olisin +olisimme +olisitte +olisivat +olit +olin +olimme +olitte +olivat +ollut +olleet + +en | negation +et +ei +emme +ette +eivät + +|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans +minä minun minut minua minussa minusta minuun minulla minulta minulle | I +sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you +hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she +me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we +te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you +he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they + +tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this +tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that +se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it +nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these +nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those +ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they + +kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who +ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) +mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what +mitkä | (pl) + +joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which +jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) + +| conjunctions + +että | that +ja | and +jos | if +koska | because +kuin | than +mutta | but +niin | so +sekä | and +sillä | for +tai | or +vaan | but +vai | or +vaikka | although + + +| prepositions + +kanssa | with +mukaan | according to +noin | about +poikki | across +yli | over, across + +| other + +kun | when +niin | so +nyt | now +itse | self + diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_fr.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_fr.txt new file mode 100644 index 00000000000..c00837ea939 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_fr.txt @@ -0,0 +1,183 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A French stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +au | a + le +aux | a + les +avec | with +ce | this +ces | these +dans | with +de | of +des | de + les +du | de + le +elle | she +en | `of them' etc +et | and +eux | them +il | he +je | I +la | the +le | the +leur | their +lui | him +ma | my (fem) +mais | but +me | me +même | same; as in moi-même (myself) etc +mes | me (pl) +moi | me +mon | my (masc) +ne | not +nos | our (pl) +notre | our +nous | we +on | one +ou | where +par | by +pas | not +pour | for +qu | que before vowel +que | that +qui | who +sa | his, her (fem) +se | oneself +ses | his (pl) +son | his, her (masc) +sur | on +ta | thy (fem) +te | thee +tes | thy (pl) +toi | thee +ton | thy (masc) +tu | thou +un | a +une | a +vos | your (pl) +votre | your +vous | you + + | single letter forms + +c | c' +d | d' +j | j' +l | l' +à | to, at +m | m' +n | n' +s | s' +t | t' +y | there + + | forms of être (not including the infinitive): +été +étée +étées +étés +étant +suis +es +est +sommes +êtes +sont +serai +seras +sera +serons +serez +seront +serais +serait +serions +seriez +seraient +étais +était +étions +étiez +étaient +fus +fut +fûmes +fûtes +furent +sois +soit +soyons +soyez +soient +fusse +fusses +fût +fussions +fussiez +fussent + + | forms of avoir (not including the infinitive): +ayant +eu +eue +eues +eus +ai +as +avons +avez +ont +aurai +auras +aura +aurons +aurez +auront +aurais +aurait +aurions +auriez +auraient +avais +avait +avions +aviez +avaient +eut +eûmes +eûtes +eurent +aie +aies +ait +ayons +ayez +aient +eusse +eusses +eût +eussions +eussiez +eussent + + | Later additions (from Jean-Christophe Deschamps) +ceci | this +celà  | that +cet | this +cette | this +ici | here +ils | they +les | the (pl) +leurs | their (pl) +quel | which +quels | which +quelle | which +quelles | which +sans | without +soi | oneself + diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ga.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ga.txt new file mode 100644 index 00000000000..9ff88d747e5 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ga.txt @@ -0,0 +1,110 @@ + +a +ach +ag +agus +an +aon +ar +arna +as +b' +ba +beirt +bhúr +caoga +ceathair +ceathrar +chomh +chtó +chuig +chun +cois +céad +cúig +cúigear +d' +daichead +dar +de +deich +deichniúr +den +dhá +do +don +dtí +dá +dár +dó +faoi +faoin +faoina +faoinár +fara +fiche +gach +gan +go +gur +haon +hocht +i +iad +idir +in +ina +ins +inár +is +le +leis +lena +lenár +m' +mar +mo +mé +na +nach +naoi +naonúr +ná +ní +níor +nó +nócha +ocht +ochtar +os +roimh +sa +seacht +seachtar +seachtó +seasca +seisear +siad +sibh +sinn +sna +sé +sí +tar +thar +thú +triúr +trí +trína +trínár +tríocha +tú +um +ár +é +éis +í +ó +ón +óna +ónár diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_gl.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_gl.txt new file mode 100644 index 00000000000..d8760b12c14 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_gl.txt @@ -0,0 +1,161 @@ +# galican stopwords +a +aínda +alí +aquel +aquela +aquelas +aqueles +aquilo +aquí +ao +aos +as +así +á +ben +cando +che +co +coa +comigo +con +connosco +contigo +convosco +coas +cos +cun +cuns +cunha +cunhas +da +dalgunha +dalgunhas +dalgún +dalgúns +das +de +del +dela +delas +deles +desde +deste +do +dos +dun +duns +dunha +dunhas +e +el +ela +elas +eles +en +era +eran +esa +esas +ese +eses +esta +estar +estaba +está +están +este +estes +estiven +estou +eu +é +facer +foi +foron +fun +había +hai +iso +isto +la +las +lle +lles +lo +los +mais +me +meu +meus +min +miña +miñas +moi +na +nas +neste +nin +no +non +nos +nosa +nosas +noso +nosos +nós +nun +nunha +nuns +nunhas +o +os +ou +ó +ós +para +pero +pode +pois +pola +polas +polo +polos +por +que +se +senón +ser +seu +seus +sexa +sido +sobre +súa +súas +tamén +tan +te +ten +teñen +teño +ter +teu +teus +ti +tido +tiña +tiven +túa +túas +un +unha +unhas +uns +vos +vosa +vosas +voso +vosos +vós diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_hi.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_hi.txt new file mode 100644 index 00000000000..86286bb083b --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_hi.txt @@ -0,0 +1,235 @@ +# Also see http://www.opensource.org/licenses/bsd-license.html +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# This file was created by Jacques Savoy and is distributed under the BSD license. +# Note: by default this file also contains forms normalized by HindiNormalizer +# for spelling variation (see section below), such that it can be used whether or +# not you enable that feature. When adding additional entries to this list, +# please add the normalized form as well. +अंदर +अत +अपना +अपनी +अपने +अभी +आदि +आप +इतà¥à¤¯à¤¾à¤¦à¤¿ +इन +इनका +इनà¥à¤¹à¥€à¤‚ +इनà¥à¤¹à¥‡à¤‚ +इनà¥à¤¹à¥‹à¤‚ +इस +इसका +इसकी +इसके +इसमें +इसी +इसे +उन +उनका +उनकी +उनके +उनको +उनà¥à¤¹à¥€à¤‚ +उनà¥à¤¹à¥‡à¤‚ +उनà¥à¤¹à¥‹à¤‚ +उस +उसके +उसी +उसे +à¤à¤• +à¤à¤µà¤‚ +à¤à¤¸ +à¤à¤¸à¥‡ +और +कई +कर +करता +करते +करना +करने +करें +कहते +कहा +का +काफ़ी +कि +कितना +किनà¥à¤¹à¥‡à¤‚ +किनà¥à¤¹à¥‹à¤‚ +किया +किर +किस +किसी +किसे +की +कà¥à¤› +कà¥à¤² +के +को +कोई +कौन +कौनसा +गया +घर +जब +जहाठ+जा +जितना +जिन +जिनà¥à¤¹à¥‡à¤‚ +जिनà¥à¤¹à¥‹à¤‚ +जिस +जिसे +जीधर +जैसा +जैसे +जो +तक +तब +तरह +तिन +तिनà¥à¤¹à¥‡à¤‚ +तिनà¥à¤¹à¥‹à¤‚ +तिस +तिसे +तो +था +थी +थे +दबारा +दिया +दà¥à¤¸à¤°à¤¾ +दूसरे +दो +दà¥à¤µà¤¾à¤°à¤¾ +न +नहीं +ना +निहायत +नीचे +ने +पर +पर +पहले +पूरा +पे +फिर +बनी +बही +बहà¥à¤¤ +बाद +बाला +बिलकà¥à¤² +भी +भीतर +मगर +मानो +मे +में +यदि +यह +यहाठ+यही +या +यिह +ये +रखें +रहा +रहे +ऱà¥à¤µà¤¾à¤¸à¤¾ +लिठ+लिये +लेकिन +व +वरà¥à¤— +वह +वह +वहाठ+वहीं +वाले +वà¥à¤¹ +वे +वग़ैरह +संग +सकता +सकते +सबसे +सभी +साथ +साबà¥à¤¤ +साभ +सारा +से +सो +ही +हà¥à¤† +हà¥à¤ˆ +हà¥à¤ +है +हैं +हो +होता +होती +होते +होना +होने +# additional normalized forms of the above +अपनि +जेसे +होति +सभि +तिंहों +इंहों +दवारा +इसि +किंहें +थि +उंहों +ओर +जिंहें +वहिं +अभि +बनि +हि +उंहिं +उंहें +हें +वगेरह +à¤à¤¸à¥‡ +रवासा +कोन +निचे +काफि +उसि +पà¥à¤°à¤¾ +भितर +हे +बहि +वहां +कोइ +यहां +जिंहों +तिंहें +किसि +कइ +यहि +इंहिं +जिधर +इंहें +अदि +इतयादि +हà¥à¤‡ +कोनसा +इसकि +दà¥à¤¸à¤°à¥‡ +जहां +अप +किंहों +उनकि +भि +वरग +हà¥à¤… +जेसा +नहिं diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_hu.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_hu.txt new file mode 100644 index 00000000000..1a96f1db6f2 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_hu.txt @@ -0,0 +1,209 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| Hungarian stop word list +| prepared by Anna Tordai + +a +ahogy +ahol +aki +akik +akkor +alatt +által +általában +amely +amelyek +amelyekben +amelyeket +amelyet +amelynek +ami +amit +amolyan +amíg +amikor +át +abban +ahhoz +annak +arra +arról +az +azok +azon +azt +azzal +azért +aztán +azután +azonban +bár +be +belül +benne +cikk +cikkek +cikkeket +csak +de +e +eddig +egész +egy +egyes +egyetlen +egyéb +egyik +egyre +ekkor +el +elég +ellen +elÅ‘ +elÅ‘ször +elÅ‘tt +elsÅ‘ +én +éppen +ebben +ehhez +emilyen +ennek +erre +ez +ezt +ezek +ezen +ezzel +ezért +és +fel +felé +hanem +hiszen +hogy +hogyan +igen +így +illetve +ill. +ill +ilyen +ilyenkor +ison +ismét +itt +jó +jól +jobban +kell +kellett +keresztül +keressünk +ki +kívül +között +közül +legalább +lehet +lehetett +legyen +lenne +lenni +lesz +lett +maga +magát +majd +majd +már +más +másik +meg +még +mellett +mert +mely +melyek +mi +mit +míg +miért +milyen +mikor +minden +mindent +mindenki +mindig +mint +mintha +mivel +most +nagy +nagyobb +nagyon +ne +néha +nekem +neki +nem +néhány +nélkül +nincs +olyan +ott +össze +Å‘ +Å‘k +Å‘ket +pedig +persze +rá +s +saját +sem +semmi +sok +sokat +sokkal +számára +szemben +szerint +szinte +talán +tehát +teljes +tovább +továbbá +több +úgy +ugyanis +új +újabb +újra +után +utána +utolsó +vagy +vagyis +valaki +valami +valamint +való +vagyok +van +vannak +volt +voltam +voltak +voltunk +vissza +vele +viszont +volna diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_hy.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_hy.txt new file mode 100644 index 00000000000..60c1c50fbc8 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_hy.txt @@ -0,0 +1,46 @@ +# example set of Armenian stopwords. +Õ¡ÕµÕ¤ +Õ¡ÕµÕ¬ +Õ¡ÕµÕ¶ +Õ¡ÕµÕ½ +Õ¤Õ¸Ö‚ +Õ¤Õ¸Ö‚Ö„ +Õ¥Õ´ +Õ¥Õ¶ +Õ¥Õ¶Ö„ +Õ¥Õ½ +Õ¥Ö„ +Õ§ +Õ§Õ« +Õ§Õ«Õ¶ +Õ§Õ«Õ¶Ö„ +Õ§Õ«Ö€ +Õ§Õ«Ö„ +Õ§Ö€ +Õ¨Õ½Õ¿ +Õ© +Õ« +Õ«Õ¶ +Õ«Õ½Õ¯ +Õ«Ö€ +Õ¯Õ¡Õ´ +Õ°Õ¡Õ´Õ¡Ö€ +Õ°Õ¥Õ¿ +Õ°Õ¥Õ¿Õ¸ +Õ´Õ¥Õ¶Ö„ +Õ´Õ¥Õ» +Õ´Õ« +Õ¶ +Õ¶Õ¡ +Õ¶Õ¡Ö‡ +Õ¶Ö€Õ¡ +Õ¶Ö€Õ¡Õ¶Ö„ +Õ¸Ö€ +Õ¸Ö€Õ¨ +Õ¸Ö€Õ¸Õ¶Ö„ +Õ¸Ö€ÕºÕ¥Õ½ +Õ¸Ö‚ +Õ¸Ö‚Õ´ +ÕºÕ«Õ¿Õ« +Õ¾Ö€Õ¡ +Ö‡ diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_id.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_id.txt new file mode 100644 index 00000000000..4617f83a5c5 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_id.txt @@ -0,0 +1,359 @@ +# from appendix D of: A Study of Stemming Effects on Information +# Retrieval in Bahasa Indonesia +ada +adanya +adalah +adapun +agak +agaknya +agar +akan +akankah +akhirnya +aku +akulah +amat +amatlah +anda +andalah +antar +diantaranya +antara +antaranya +diantara +apa +apaan +mengapa +apabila +apakah +apalagi +apatah +atau +ataukah +ataupun +bagai +bagaikan +sebagai +sebagainya +bagaimana +bagaimanapun +sebagaimana +bagaimanakah +bagi +bahkan +bahwa +bahwasanya +sebaliknya +banyak +sebanyak +beberapa +seberapa +begini +beginian +beginikah +beginilah +sebegini +begitu +begitukah +begitulah +begitupun +sebegitu +belum +belumlah +sebelum +sebelumnya +sebenarnya +berapa +berapakah +berapalah +berapapun +betulkah +sebetulnya +biasa +biasanya +bila +bilakah +bisa +bisakah +sebisanya +boleh +bolehkah +bolehlah +buat +bukan +bukankah +bukanlah +bukannya +cuma +percuma +dahulu +dalam +dan +dapat +dari +daripada +dekat +demi +demikian +demikianlah +sedemikian +dengan +depan +di +dia +dialah +dini +diri +dirinya +terdiri +dong +dulu +enggak +enggaknya +entah +entahlah +terhadap +terhadapnya +hal +hampir +hanya +hanyalah +harus +haruslah +harusnya +seharusnya +hendak +hendaklah +hendaknya +hingga +sehingga +ia +ialah +ibarat +ingin +inginkah +inginkan +ini +inikah +inilah +itu +itukah +itulah +jangan +jangankan +janganlah +jika +jikalau +juga +justru +kala +kalau +kalaulah +kalaupun +kalian +kami +kamilah +kamu +kamulah +kan +kapan +kapankah +kapanpun +dikarenakan +karena +karenanya +ke +kecil +kemudian +kenapa +kepada +kepadanya +ketika +seketika +khususnya +kini +kinilah +kiranya +sekiranya +kita +kitalah +kok +lagi +lagian +selagi +lah +lain +lainnya +melainkan +selaku +lalu +melalui +terlalu +lama +lamanya +selama +selama +selamanya +lebih +terlebih +bermacam +macam +semacam +maka +makanya +makin +malah +malahan +mampu +mampukah +mana +manakala +manalagi +masih +masihkah +semasih +masing +mau +maupun +semaunya +memang +mereka +merekalah +meski +meskipun +semula +mungkin +mungkinkah +nah +namun +nanti +nantinya +nyaris +oleh +olehnya +seorang +seseorang +pada +padanya +padahal +paling +sepanjang +pantas +sepantasnya +sepantasnyalah +para +pasti +pastilah +per +pernah +pula +pun +merupakan +rupanya +serupa +saat +saatnya +sesaat +saja +sajalah +saling +bersama +sama +sesama +sambil +sampai +sana +sangat +sangatlah +saya +sayalah +se +sebab +sebabnya +sebuah +tersebut +tersebutlah +sedang +sedangkan +sedikit +sedikitnya +segala +segalanya +segera +sesegera +sejak +sejenak +sekali +sekalian +sekalipun +sesekali +sekaligus +sekarang +sekarang +sekitar +sekitarnya +sela +selain +selalu +seluruh +seluruhnya +semakin +sementara +sempat +semua +semuanya +sendiri +sendirinya +seolah +seperti +sepertinya +sering +seringnya +serta +siapa +siapakah +siapapun +disini +disinilah +sini +sinilah +sesuatu +sesuatunya +suatu +sesudah +sesudahnya +sudah +sudahkah +sudahlah +supaya +tadi +tadinya +tak +tanpa +setelah +telah +tentang +tentu +tentulah +tentunya +tertentu +seterusnya +tapi +tetapi +setiap +tiap +setidaknya +tidak +tidakkah +tidaklah +toh +waduh +wah +wahai +sewaktu +walau +walaupun +wong +yaitu +yakni +yang diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_it.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_it.txt new file mode 100644 index 00000000000..4cb5b0891b1 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_it.txt @@ -0,0 +1,301 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | An Italian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +ad | a (to) before vowel +al | a + il +allo | a + lo +ai | a + i +agli | a + gli +all | a + l' +agl | a + gl' +alla | a + la +alle | a + le +con | with +col | con + il +coi | con + i (forms collo, cogli etc are now very rare) +da | from +dal | da + il +dallo | da + lo +dai | da + i +dagli | da + gli +dall | da + l' +dagl | da + gll' +dalla | da + la +dalle | da + le +di | of +del | di + il +dello | di + lo +dei | di + i +degli | di + gli +dell | di + l' +degl | di + gl' +della | di + la +delle | di + le +in | in +nel | in + el +nello | in + lo +nei | in + i +negli | in + gli +nell | in + l' +negl | in + gl' +nella | in + la +nelle | in + le +su | on +sul | su + il +sullo | su + lo +sui | su + i +sugli | su + gli +sull | su + l' +sugl | su + gl' +sulla | su + la +sulle | su + le +per | through, by +tra | among +contro | against +io | I +tu | thou +lui | he +lei | she +noi | we +voi | you +loro | they +mio | my +mia | +miei | +mie | +tuo | +tua | +tuoi | thy +tue | +suo | +sua | +suoi | his, her +sue | +nostro | our +nostra | +nostri | +nostre | +vostro | your +vostra | +vostri | +vostre | +mi | me +ti | thee +ci | us, there +vi | you, there +lo | him, the +la | her, the +li | them +le | them, the +gli | to him, the +ne | from there etc +il | the +un | a +uno | a +una | a +ma | but +ed | and +se | if +perché | why, because +anche | also +come | how +dov | where (as dov') +dove | where +che | who, that +chi | who +cui | whom +non | not +più | more +quale | who, that +quanto | how much +quanti | +quanta | +quante | +quello | that +quelli | +quella | +quelle | +questo | this +questi | +questa | +queste | +si | yes +tutto | all +tutti | all + + | single letter forms: + +a | at +c | as c' for ce or ci +e | and +i | the +l | as l' +o | or + + | forms of avere, to have (not including the infinitive): + +ho +hai +ha +abbiamo +avete +hanno +abbia +abbiate +abbiano +avrò +avrai +avrà +avremo +avrete +avranno +avrei +avresti +avrebbe +avremmo +avreste +avrebbero +avevo +avevi +aveva +avevamo +avevate +avevano +ebbi +avesti +ebbe +avemmo +aveste +ebbero +avessi +avesse +avessimo +avessero +avendo +avuto +avuta +avuti +avute + + | forms of essere, to be (not including the infinitive): +sono +sei +è +siamo +siete +sia +siate +siano +sarò +sarai +sarà +saremo +sarete +saranno +sarei +saresti +sarebbe +saremmo +sareste +sarebbero +ero +eri +era +eravamo +eravate +erano +fui +fosti +fu +fummo +foste +furono +fossi +fosse +fossimo +fossero +essendo + + | forms of fare, to do (not including the infinitive, fa, fat-): +faccio +fai +facciamo +fanno +faccia +facciate +facciano +farò +farai +farà +faremo +farete +faranno +farei +faresti +farebbe +faremmo +fareste +farebbero +facevo +facevi +faceva +facevamo +facevate +facevano +feci +facesti +fece +facemmo +faceste +fecero +facessi +facesse +facessimo +facessero +facendo + + | forms of stare, to be (not including the infinitive): +sto +stai +sta +stiamo +stanno +stia +stiate +stiano +starò +starai +starà +staremo +starete +staranno +starei +staresti +starebbe +staremmo +stareste +starebbero +stavo +stavi +stava +stavamo +stavate +stavano +stetti +stesti +stette +stemmo +steste +stettero +stessi +stesse +stessimo +stessero +stando diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ja.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ja.txt new file mode 100644 index 00000000000..d4321be6b16 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ja.txt @@ -0,0 +1,127 @@ +# +# This file defines a stopword set for Japanese. +# +# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. +# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 +# for frequency lists, etc. that can be useful for making your own set (if desired) +# +# Note that there is an overlap between these stopwords and the terms stopped when used +# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note +# that comments are not allowed on the same line as stopwords. +# +# Also note that stopping is done in a case-insensitive manner. Change your StopFilter +# configuration if you need case-sensitive stopping. Lastly, note that stopping is done +# using the same character width as the entries in this file. Since this StopFilter is +# normally done after a CJKWidthFilter in your chain, you would usually want your romaji +# entries to be in half-width and your kana entries to be in full-width. +# +ã® +ã« +㯠+ã‚’ +㟠+㌠+㧠+㦠+㨠+ã— +ã‚Œ +ã• +ã‚ã‚‹ +ã„ã‚‹ +ã‚‚ +ã™ã‚‹ +ã‹ã‚‰ +㪠+ã“㨠+ã¨ã—㦠+ã„ +ã‚„ +れる +ãªã© +ãªã£ +ãªã„ +ã“ã® +ãŸã‚ +ãã® +ã‚㣠+よㆠ+ã¾ãŸ +ã‚‚ã® +ã¨ã„ㆠ+ã‚ã‚Š +ã¾ã§ +られ +ãªã‚‹ +㸠+ã‹ +ã  +ã“ã‚Œ +ã«ã‚ˆã£ã¦ +ã«ã‚ˆã‚Š +ãŠã‚Š +より +ã«ã‚ˆã‚‹ +ãš +ãªã‚Š +られる +ã«ãŠã„㦠+ã° +ãªã‹ã£ +ãªã +ã—ã‹ã— +ã«ã¤ã„㦠+ã› +ã ã£ +ãã®å¾Œ +ã§ãã‚‹ +ãã‚Œ +ㆠ+ã®ã§ +ãªãŠ +ã®ã¿ +ã§ã +ã +㤠+ã«ãŠã‘ã‚‹ +ãŠã‚ˆã³ +ã„ㆠ+ã•ã‚‰ã« +ã§ã‚‚ +ら +ãŸã‚Š +ãã®ä»– +ã«é–¢ã™ã‚‹ +ãŸã¡ +ã¾ã™ +ã‚“ +ãªã‚‰ +ã«å¯¾ã—㦠+特㫠+ã›ã‚‹ +åŠã³ +ã“れら +ã¨ã +ã§ã¯ +ã«ã¦ +ã»ã‹ +ãªãŒã‚‰ +ã†ã¡ +ãã—㦠+ã¨ã¨ã‚‚ã« +ãŸã ã— +ã‹ã¤ã¦ +ãã‚Œãžã‚Œ +ã¾ãŸã¯ +㊠+ã»ã© +ã‚‚ã®ã® +ã«å¯¾ã™ã‚‹ +ã»ã¨ã‚“ã© +ã¨å…±ã« +ã¨ã„ã£ãŸ +ã§ã™ +ã¨ã‚‚ +ã¨ã“ã‚ +ã“ã“ +##### End of file diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_lv.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_lv.txt new file mode 100644 index 00000000000..e21a23c06c3 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_lv.txt @@ -0,0 +1,172 @@ +# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins +# the original list of over 800 forms was refined: +# pronouns, adverbs, interjections were removed +# +# prepositions +aiz +ap +ar +apakÅ¡ +Ärpus +augÅ¡pus +bez +caur +dēļ +gar +iekÅ¡ +iz +kopÅ¡ +labad +lejpus +lÄ«dz +no +otrpus +pa +par +pÄr +pÄ“c +pie +pirms +pret +priekÅ¡ +starp +Å¡aipus +uz +viņpus +virs +virspus +zem +apakÅ¡pus +# Conjunctions +un +bet +jo +ja +ka +lai +tomÄ“r +tikko +turpretÄ« +arÄ« +kaut +gan +tÄdēļ +tÄ +ne +tikvien +vien +kÄ +ir +te +vai +kamÄ“r +# Particles +ar +diezin +droÅ¡i +diemžēl +nebÅ«t +ik +it +taÄu +nu +pat +tiklab +iekÅ¡pus +nedz +tik +nevis +turpretim +jeb +iekam +iekÄm +iekÄms +kolÄ«dz +lÄ«dzko +tiklÄ«dz +jebÅ¡u +tÄlab +tÄpÄ“c +nekÄ +itin +jÄ +jau +jel +nÄ“ +nezin +tad +tikai +vis +tak +iekams +vien +# modal verbs +bÅ«t +biju +biji +bija +bijÄm +bijÄt +esmu +esi +esam +esat +bÅ«Å¡u +bÅ«si +bÅ«s +bÅ«sim +bÅ«siet +tikt +tiku +tiki +tika +tikÄm +tikÄt +tieku +tiec +tiek +tiekam +tiekat +tikÅ¡u +tiks +tiksim +tiksiet +tapt +tapi +tapÄt +topat +tapÅ¡u +tapsi +taps +tapsim +tapsiet +kļūt +kļuvu +kļuvi +kļuva +kļuvÄm +kļuvÄt +kļūstu +kļūsti +kļūst +kļūstam +kļūstat +kļūšu +kļūsi +kļūs +kļūsim +kļūsiet +# verbs +varÄ“t +varÄ“ju +varÄ“jÄm +varÄ“Å¡u +varÄ“sim +var +varÄ“ji +varÄ“jÄt +varÄ“si +varÄ“siet +varat +varÄ“ja +varÄ“s diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_nl.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_nl.txt new file mode 100644 index 00000000000..f4d61f5092c --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_nl.txt @@ -0,0 +1,117 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Dutch stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large sample of Dutch text. + + | Dutch stop words frequently exhibit homonym clashes. These are indicated + | clearly below. + +de | the +en | and +van | of, from +ik | I, the ego +te | (1) chez, at etc, (2) to, (3) too +dat | that, which +die | that, those, who, which +in | in, inside +een | a, an, one +hij | he +het | the, it +niet | not, nothing, naught +zijn | (1) to be, being, (2) his, one's, its +is | is +was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river +op | on, upon, at, in, up, used up +aan | on, upon, to (as dative) +met | with, by +als | like, such as, when +voor | (1) before, in front of, (2) furrow +had | had, past tense all persons sing. of 'hebben' (have) +er | there +maar | but, only +om | round, about, for etc +hem | him +dan | then +zou | should/would, past tense all persons sing. of 'zullen' +of | or, whether, if +wat | what, something, anything +mijn | possessive and noun 'mine' +men | people, 'one' +dit | this +zo | so, thus, in this way +door | through by +over | over, across +ze | she, her, they, them +zich | oneself +bij | (1) a bee, (2) by, near, at +ook | also, too +tot | till, until +je | you +mij | me +uit | out of, from +der | Old Dutch form of 'van der' still found in surnames +daar | (1) there, (2) because +haar | (1) her, their, them, (2) hair +naar | (1) unpleasant, unwell etc, (2) towards, (3) as +heb | present first person sing. of 'to have' +hoe | how, why +heeft | present third person sing. of 'to have' +hebben | 'to have' and various parts thereof +deze | this +u | you +want | (1) for, (2) mitten, (3) rigging +nog | yet, still +zal | 'shall', first and third person sing. of verb 'zullen' (will) +me | me +zij | she, they +nu | now +ge | 'thou', still used in Belgium and south Netherlands +geen | none +omdat | because +iets | something, somewhat +worden | to become, grow, get +toch | yet, still +al | all, every, each +waren | (1) 'were' (2) to wander, (3) wares, (3) +veel | much, many +meer | (1) more, (2) lake +doen | to do, to make +toen | then, when +moet | noun 'spot/mote' and present form of 'to must' +ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' +zonder | without +kan | noun 'can' and present form of 'to be able' +hun | their, them +dus | so, consequently +alles | all, everything, anything +onder | under, beneath +ja | yes, of course +eens | once, one day +hier | here +wie | who +werd | imperfect third person sing. of 'become' +altijd | always +doch | yet, but etc +wordt | present third person sing. of 'become' +wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans +kunnen | to be able +ons | us/our +zelf | self +tegen | against, towards, at +na | after, near +reeds | already +wil | (1) present tense of 'want', (2) 'will', noun, (3) fender +kon | could; past tense of 'to be able' +niets | nothing +uw | your +iemand | somebody +geweest | been; past participle of 'be' +andere | other diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_no.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_no.txt new file mode 100644 index 00000000000..e76f36e69ed --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_no.txt @@ -0,0 +1,192 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Norwegian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This stop word list is for the dominant bokmÃ¥l dialect. Words unique + | to nynorsk are marked *. + + | Revised by Jan Bruusgaard , Jan 2005 + +og | and +i | in +jeg | I +det | it/this/that +at | to (w. inf.) +en | a/an +et | a/an +den | it/this/that +til | to +er | is/am/are +som | who/that +pÃ¥ | on +de | they / you(formal) +med | with +han | he +av | of +ikke | not +ikkje | not * +der | there +sÃ¥ | so +var | was/were +meg | me +seg | you +men | but +ett | one +har | have +om | about +vi | we +min | my +mitt | my +ha | have +hadde | had +hun | she +nÃ¥ | now +over | over +da | when/as +ved | by/know +fra | from +du | you +ut | out +sin | your +dem | them +oss | us +opp | up +man | you/one +kan | can +hans | his +hvor | where +eller | or +hva | what +skal | shall/must +selv | self (reflective) +sjøl | self (reflective) +her | here +alle | all +vil | will +bli | become +ble | became +blei | became * +blitt | have become +kunne | could +inn | in +nÃ¥r | when +være | be +kom | come +noen | some +noe | some +ville | would +dere | you +som | who/which/that +deres | their/theirs +kun | only/just +ja | yes +etter | after +ned | down +skulle | should +denne | this +for | for/because +deg | you +si | hers/his +sine | hers/his +sitt | hers/his +mot | against +Ã¥ | to +meget | much +hvorfor | why +dette | this +disse | these/those +uten | without +hvordan | how +ingen | none +din | your +ditt | your +blir | become +samme | same +hvilken | which +hvilke | which (plural) +sÃ¥nn | such a +inni | inside/within +mellom | between +vÃ¥r | our +hver | each +hvem | who +vors | us/ours +hvis | whose +bÃ¥de | both +bare | only/just +enn | than +fordi | as/because +før | before +mange | many +ogsÃ¥ | also +slik | just +vært | been +være | to be +bÃ¥e | both * +begge | both +siden | since +dykk | your * +dykkar | yours * +dei | they * +deira | them * +deires | theirs * +deim | them * +di | your (fem.) * +dÃ¥ | as/when * +eg | I * +ein | a/an * +eit | a/an * +eitt | a/an * +elles | or * +honom | he * +hjÃ¥ | at * +ho | she * +hoe | she * +henne | her +hennar | her/hers +hennes | hers +hoss | how * +hossen | how * +ikkje | not * +ingi | noone * +inkje | noone * +korleis | how * +korso | how * +kva | what/which * +kvar | where * +kvarhelst | where * +kven | who/whom * +kvi | why * +kvifor | why * +me | we * +medan | while * +mi | my * +mine | my * +mykje | much * +no | now * +nokon | some (masc./neut.) * +noka | some (fem.) * +nokor | some * +noko | some * +nokre | some * +si | his/hers * +sia | since * +sidan | since * +so | so * +somt | some * +somme | some * +um | about* +upp | up * +vere | be * +vore | was * +verte | become * +vort | become * +varte | became * +vart | became * + diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_pt.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_pt.txt new file mode 100644 index 00000000000..276c1b446f2 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_pt.txt @@ -0,0 +1,251 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Portuguese stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | of, from +a | the; to, at; her +o | the; him +que | who, that +e | and +do | de + o +da | de + a +em | in +um | a +para | for + | é from SER +com | with +não | not, no +uma | a +os | the; them +no | em + o +se | himself etc +na | em + a +por | for +mais | more +as | the; them +dos | de + os +como | as, like +mas | but + | foi from SER +ao | a + o +ele | he +das | de + as + | tem from TER +à | a + a +seu | his +sua | her +ou | or + | ser from SER +quando | when +muito | much + | há from HAV +nos | em + os; us +já | already, now + | está from EST +eu | I +também | also +só | only, just +pelo | per + o +pela | per + a +até | up to +isso | that +ela | he +entre | between + | era from SER +depois | after +sem | without +mesmo | same +aos | a + os + | ter from TER +seus | his +quem | whom +nas | em + as +me | me +esse | that +eles | they + | estão from EST +você | you + | tinha from TER + | foram from SER +essa | that +num | em + um +nem | nor +suas | her +meu | my +às | a + as +minha | my + | têm from TER +numa | em + uma +pelos | per + os +elas | they + | havia from HAV + | seja from SER +qual | which + | será from SER +nós | we + | tenho from TER +lhe | to him, her +deles | of them +essas | those +esses | those +pelas | per + as +este | this + | fosse from SER +dele | of him + + | other words. There are many contractions such as naquele = em+aquele, + | mo = me+o, but they are rare. + | Indefinite article plural forms are also rare. + +tu | thou +te | thee +vocês | you (plural) +vos | you +lhes | to them +meus | my +minhas +teu | thy +tua +teus +tuas +nosso | our +nossa +nossos +nossas + +dela | of her +delas | of them + +esta | this +estes | these +estas | these +aquele | that +aquela | that +aqueles | those +aquelas | those +isto | this +aquilo | that + + | forms of estar, to be (not including the infinitive): +estou +está +estamos +estão +estive +esteve +estivemos +estiveram +estava +estávamos +estavam +estivera +estivéramos +esteja +estejamos +estejam +estivesse +estivéssemos +estivessem +estiver +estivermos +estiverem + + | forms of haver, to have (not including the infinitive): +hei +há +havemos +hão +houve +houvemos +houveram +houvera +houvéramos +haja +hajamos +hajam +houvesse +houvéssemos +houvessem +houver +houvermos +houverem +houverei +houverá +houveremos +houverão +houveria +houveríamos +houveriam + + | forms of ser, to be (not including the infinitive): +sou +somos +são +era +éramos +eram +fui +foi +fomos +foram +fora +fôramos +seja +sejamos +sejam +fosse +fôssemos +fossem +for +formos +forem +serei +será +seremos +serão +seria +seríamos +seriam + + | forms of ter, to have (not including the infinitive): +tenho +tem +temos +tém +tinha +tínhamos +tinham +tive +teve +tivemos +tiveram +tivera +tivéramos +tenha +tenhamos +tenham +tivesse +tivéssemos +tivessem +tiver +tivermos +tiverem +terei +terá +teremos +terão +teria +teríamos +teriam diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ro.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ro.txt new file mode 100644 index 00000000000..4fdee90a5ba --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ro.txt @@ -0,0 +1,233 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +acea +aceasta +această +aceea +acei +aceia +acel +acela +acele +acelea +acest +acesta +aceste +acestea +aceÅŸti +aceÅŸtia +acolo +acum +ai +aia +aibă +aici +al +ăla +ale +alea +ălea +altceva +altcineva +am +ar +are +aÅŸ +aÅŸadar +asemenea +asta +ăsta +astăzi +astea +ăstea +ăştia +asupra +aÅ£i +au +avea +avem +aveÅ£i +azi +bine +bucur +bună +ca +că +căci +când +care +cărei +căror +cărui +cât +câte +câţi +către +câtva +ce +cel +ceva +chiar +cînd +cine +cineva +cît +cîte +cîţi +cîtva +contra +cu +cum +cumva +curând +curînd +da +dă +dacă +dar +datorită +de +deci +deja +deoarece +departe +deÅŸi +din +dinaintea +dintr +dintre +drept +după +ea +ei +el +ele +eram +este +eÅŸti +eu +face +fără +fi +fie +fiecare +fii +fim +fiÅ£i +iar +ieri +îi +îl +îmi +împotriva +în +înainte +înaintea +încât +încît +încotro +între +întrucât +întrucît +îţi +la +lângă +le +li +lîngă +lor +lui +mă +mâine +mea +mei +mele +mereu +meu +mi +mine +mult +multă +mulÅ£i +ne +nicăieri +nici +nimeni +niÅŸte +noastră +noastre +noi +noÅŸtri +nostru +nu +ori +oricând +oricare +oricât +orice +oricînd +oricine +oricît +oricum +oriunde +până +pe +pentru +peste +pînă +poate +pot +prea +prima +primul +prin +printr +sa +să +săi +sale +sau +său +se +ÅŸi +sînt +sîntem +sînteÅ£i +spre +sub +sunt +suntem +sunteÅ£i +ta +tăi +tale +tău +te +Å£i +Å£ie +tine +toată +toate +tot +toÅ£i +totuÅŸi +tu +un +una +unde +undeva +unei +unele +uneori +unor +vă +vi +voastră +voastre +voi +voÅŸtri +vostru +vouă +vreo +vreun diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ru.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ru.txt new file mode 100644 index 00000000000..64307693457 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_ru.txt @@ -0,0 +1,241 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | a russian stop word list. comments begin with vertical bar. each stop + | word is at the start of a line. + + | this is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | letter `Ñ‘' is translated to `е'. + +и | and +в | in/into +во | alternative form +не | not +что | what/that +он | he +на | on/onto +Ñ | i +Ñ | from +Ñо | alternative form +как | how +а | milder form of `no' (but) +то | conjunction and form of `that' +вÑе | all +она | she +так | so, thus +его | him +но | but +да | yes/and +Ñ‚Ñ‹ | thou +к | towards, by +у | around, chez +же | intensifier particle +вы | you +за | beyond, behind +бы | conditional/subj. particle +по | up to, along +только | only +ее | her +мне | to me +было | it was +вот | here is/are, particle +от | away from +Ð¼ÐµÐ½Ñ | me +еще | still, yet, more +нет | no, there isnt/arent +о | about +из | out of +ему | to him +теперь | now +когда | when +даже | even +ну | so, well +вдруг | suddenly +ли | interrogative particle +еÑли | if +уже | already, but homonym of `narrower' +или | or +ни | neither +быть | to be +был | he was +него | prepositional form of его +до | up to +Ð²Ð°Ñ | you accusative +нибудь | indef. suffix preceded by hyphen +опÑÑ‚ÑŒ | again +уж | already, but homonym of `adder' +вам | to you +Ñказал | he said +ведь | particle `after all' +там | there +потом | then +ÑÐµÐ±Ñ | oneself +ничего | nothing +ей | to her +может | usually with `быть' as `maybe' +они | they +тут | here +где | where +еÑÑ‚ÑŒ | there is/are +надо | got to, must +ней | prepositional form of ей +Ð´Ð»Ñ | for +мы | we +Ñ‚ÐµÐ±Ñ | thee +их | them, their +чем | than +была | she was +Ñам | self +чтоб | in order to +без | without +будто | as if +человек | man, person, one +чего | genitive form of `what' +раз | once +тоже | also +Ñебе | to oneself +под | beneath +жизнь | life +будет | will be +ж | short form of intensifer particle `же' +тогда | then +кто | who +Ñтот | this +говорил | was saying +того | genitive form of `that' +потому | for that reason +Ñтого | genitive form of `this' +какой | which +ÑовÑем | altogether +ним | prepositional form of `его', `они' +здеÑÑŒ | here +Ñтом | prepositional form of `Ñтот' +один | one +почти | almost +мой | my +тем | instrumental/dative plural of `тот', `то' +чтобы | full form of `in order that' +нее | her (acc.) +кажетÑÑ | it seems +ÑÐµÐ¹Ñ‡Ð°Ñ | now +были | they were +куда | where to +зачем | why +Ñказать | to say +вÑех | all (acc., gen. preposn. plural) +никогда | never +ÑÐµÐ³Ð¾Ð´Ð½Ñ | today +можно | possible, one can +при | by +наконец | finally +два | two +об | alternative form of `о', about +другой | another +хоть | even +поÑле | after +над | above +больше | more +тот | that one (masc.) +через | across, in +Ñти | these +Ð½Ð°Ñ | us +про | about +вÑего | in all, only, of all +них | prepositional form of `они' (they) +ÐºÐ°ÐºÐ°Ñ | which, feminine +много | lots +разве | interrogative particle +Ñказала | she said +три | three +Ñту | this, acc. fem. sing. +Ð¼Ð¾Ñ | my, feminine +впрочем | moreover, besides +хорошо | good +Ñвою | ones own, acc. fem. sing. +Ñтой | oblique form of `Ñта', fem. `this' +перед | in front of +иногда | sometimes +лучше | better +чуть | a little +том | preposn. form of `that one' +Ð½ÐµÐ»ÑŒÐ·Ñ | one must not +такой | such a one +им | to them +более | more +вÑегда | always +конечно | of course +вÑÑŽ | acc. fem. sing of `all' +между | between + + + | b: some paradigms + | + | personal pronouns + | + | Ñ Ð¼ÐµÐ½Ñ Ð¼Ð½Ðµ мной [мною] + | Ñ‚Ñ‹ Ñ‚ÐµÐ±Ñ Ñ‚ÐµÐ±Ðµ тобой [тобою] + | он его ему им [него, нему, ним] + | она ее Ñи ею [нее, нÑи, нею] + | оно его ему им [него, нему, ним] + | + | мы Ð½Ð°Ñ Ð½Ð°Ð¼ нами + | вы Ð²Ð°Ñ Ð²Ð°Ð¼ вами + | они их им ими [них, ним, ними] + | + | ÑÐµÐ±Ñ Ñебе Ñобой [Ñобою] + | + | demonstrative pronouns: Ñтот (this), тот (that) + | + | Ñтот Ñта Ñто Ñти + | Ñтого ÑÑ‚Ñ‹ Ñто Ñти + | Ñтого Ñтой Ñтого Ñтих + | Ñтому Ñтой Ñтому Ñтим + | Ñтим Ñтой Ñтим [Ñтою] Ñтими + | Ñтом Ñтой Ñтом Ñтих + | + | тот та то те + | того ту то те + | того той того тех + | тому той тому тем + | тем той тем [тою] теми + | том той том тех + | + | determinative pronouns + | + | (a) веÑÑŒ (all) + | + | веÑÑŒ вÑÑ Ð²Ñе вÑе + | вÑего вÑÑŽ вÑе вÑе + | вÑего вÑей вÑего вÑех + | вÑему вÑей вÑему вÑем + | вÑем вÑей вÑем [вÑею] вÑеми + | вÑем вÑей вÑем вÑех + | + | (b) Ñам (himself etc) + | + | Ñам Ñама Ñамо Ñами + | Ñамого Ñаму Ñамо Ñамих + | Ñамого Ñамой Ñамого Ñамих + | Ñамому Ñамой Ñамому Ñамим + | Ñамим Ñамой Ñамим [Ñамою] Ñамими + | Ñамом Ñамой Ñамом Ñамих + | + | stems of verbs `to be', `to have', `to do' and modal + | + | быть бы буд быв еÑÑ‚ÑŒ Ñуть + | име + | дел + | мог мож мочь + | уме + | хоч хот + | долж + | можн + | нужн + | Ð½ÐµÐ»ÑŒÐ·Ñ + diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_sv.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_sv.txt new file mode 100644 index 00000000000..22bddfd8cb3 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_sv.txt @@ -0,0 +1,131 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Swedish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | Swedish stop words occasionally exhibit homonym clashes. For example + | sÃ¥ = so, but also seed. These are indicated clearly below. + +och | and +det | it, this/that +att | to (with infinitive) +i | in, at +en | a +jag | I +hon | she +som | who, that +han | he +pÃ¥ | on +den | it, this/that +med | with +var | where, each +sig | him(self) etc +för | for +sÃ¥ | so (also: seed) +till | to +är | is +men | but +ett | a +om | if; around, about +hade | had +de | they, these/those +av | of +icke | not, no +mig | me +du | you +henne | her +dÃ¥ | then, when +sin | his +nu | now +har | have +inte | inte nÃ¥gon = no one +hans | his +honom | him +skulle | 'sake' +hennes | her +där | there +min | my +man | one (pronoun) +ej | nor +vid | at, by, on (also: vast) +kunde | could +nÃ¥got | some etc +frÃ¥n | from, off +ut | out +när | when +efter | after, behind +upp | up +vi | we +dem | them +vara | be +vad | what +över | over +än | than +dig | you +kan | can +sina | his +här | here +ha | have +mot | towards +alla | all +under | under (also: wonder) +nÃ¥gon | some etc +eller | or (else) +allt | all +mycket | much +sedan | since +ju | why +denna | this/that +själv | myself, yourself etc +detta | this/that +Ã¥t | to +utan | without +varit | was +hur | how +ingen | no +mitt | my +ni | you +bli | to be, become +blev | from bli +oss | us +din | thy +dessa | these/those +nÃ¥gra | some etc +deras | their +blir | from bli +mina | my +samma | (the) same +vilken | who, that +er | you, your +sÃ¥dan | such a +vÃ¥r | our +blivit | from bli +dess | its +inom | within +mellan | between +sÃ¥dant | such a +varför | why +varje | each +vilka | who, that +ditt | thy +vem | who +vilket | who, that +sitta | his +sÃ¥dana | such a +vart | each +dina | thy +vars | whose +vÃ¥rt | our +vÃ¥ra | our +ert | your +era | your +vilkas | whose + diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_th.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_th.txt new file mode 100644 index 00000000000..07f0fabe692 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_th.txt @@ -0,0 +1,119 @@ +# Thai stopwords from: +# "Opinion Detection in Thai Political News Columns +# Based on Subjectivity Analysis" +# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak +ไว้ +ไม่ +ไป +ได้ +ให้ +ใน +โดย +à¹à¸«à¹ˆà¸‡ +à¹à¸¥à¹‰à¸§ +à¹à¸¥à¸° +à¹à¸£à¸ +à¹à¸šà¸š +à¹à¸•à¹ˆ +เอง +เห็น +เลย +เริ่ม +เรา +เมื่อ +เพื่อ +เพราะ +เป็นà¸à¸²à¸£ +เป็น +เปิดเผย +เปิด +เนื่องจาภ+เดียวà¸à¸±à¸™ +เดียว +เช่น +เฉพาะ +เคย +เข้า +เขา +อีภ+อาจ +อะไร +ออภ+อย่าง +อยู่ +อยาภ+หาภ+หลาย +หลังจาภ+หลัง +หรือ +หนึ่ง +ส่วน +ส่ง +สุด +สà¹à¸²à¸«à¸£à¸±à¸š +ว่า +วัน +ลง +ร่วม +ราย +รับ +ระหว่าง +รวม +ยัง +มี +มาภ+มา +พร้อม +พบ +ผ่าน +ผล +บาง +น่า +นี้ +นà¹à¸² +นั้น +นัภ+นอà¸à¸ˆà¸²à¸ +ทุภ+ที่สุด +ที่ +ทà¹à¸²à¹ƒà¸«à¹‰ +ทà¹à¸² +ทาง +ทั้งนี้ +ทั้ง +ถ้า +ถูภ+ถึง +ต้อง +ต่างๆ +ต่าง +ต่อ +ตาม +ตั้งà¹à¸•à¹ˆ +ตั้ง +ด้าน +ด้วย +ดัง +ซึ่ง +ช่วง +จึง +จาภ+จัด +จะ +คือ +ความ +ครั้ง +คง +ขึ้น +ของ +ขอ +ขณะ +à¸à¹ˆà¸­à¸™ +à¸à¹‡ +à¸à¸²à¸£ +à¸à¸±à¸š +à¸à¸±à¸™ +à¸à¸§à¹ˆà¸² +à¸à¸¥à¹ˆà¸²à¸§ diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_tr.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_tr.txt new file mode 100644 index 00000000000..84d9408d4ea --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/stopwords_tr.txt @@ -0,0 +1,212 @@ +# Turkish stopwords from LUCENE-559 +# merged with the list from "Information Retrieval on Turkish Texts" +# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) +acaba +altmış +altı +ama +ancak +arada +aslında +ayrıca +bana +bazı +belki +ben +benden +beni +benim +beri +beÅŸ +bile +bin +bir +birçok +biri +birkaç +birkez +birÅŸey +birÅŸeyi +biz +bize +bizden +bizi +bizim +böyle +böylece +bu +buna +bunda +bundan +bunlar +bunları +bunların +bunu +bunun +burada +çok +çünkü +da +daha +dahi +de +defa +deÄŸil +diÄŸer +diye +doksan +dokuz +dolayı +dolayısıyla +dört +edecek +eden +ederek +edilecek +ediliyor +edilmesi +ediyor +eÄŸer +elli +en +etmesi +etti +ettiÄŸi +ettiÄŸini +gibi +göre +halen +hangi +hatta +hem +henüz +hep +hepsi +her +herhangi +herkesin +hiç +hiçbir +için +iki +ile +ilgili +ise +iÅŸte +itibaren +itibariyle +kadar +karşın +katrilyon +kendi +kendilerine +kendini +kendisi +kendisine +kendisini +kez +ki +kim +kimden +kime +kimi +kimse +kırk +milyar +milyon +mu +mü +mı +nasıl +ne +neden +nedenle +nerde +nerede +nereye +niye +niçin +o +olan +olarak +oldu +olduÄŸu +olduÄŸunu +olduklarını +olmadı +olmadığı +olmak +olması +olmayan +olmaz +olsa +olsun +olup +olur +olursa +oluyor +on +ona +ondan +onlar +onlardan +onları +onların +onu +onun +otuz +oysa +öyle +pek +raÄŸmen +sadece +sanki +sekiz +seksen +sen +senden +seni +senin +siz +sizden +sizi +sizin +ÅŸey +ÅŸeyden +ÅŸeyi +ÅŸeyler +şöyle +ÅŸu +ÅŸuna +ÅŸunda +ÅŸundan +ÅŸunları +ÅŸunu +tarafından +trilyon +tüm +üç +üzere +var +vardı +ve +veya +ya +yani +yapacak +yapılan +yapılması +yapıyor +yapmak +yaptı +yaptığı +yaptığını +yaptıkları +yedi +yerine +yetmiÅŸ +yine +yirmi +yoksa +yüz +zaten diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/userdict_ja.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/userdict_ja.txt new file mode 100644 index 00000000000..6f0368e4d81 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/lang/userdict_ja.txt @@ -0,0 +1,29 @@ +# +# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) +# +# Add entries to this file in order to override the statistical model in terms +# of segmentation, readings and part-of-speech tags. Notice that entries do +# not have weights since they are always used when found. This is by-design +# in order to maximize ease-of-use. +# +# Entries are defined using the following CSV format: +# , ... , ... , +# +# Notice that a single half-width space separates tokens and readings, and +# that the number tokens and readings must match exactly. +# +# Also notice that multiple entries with the same is undefined. +# +# Whitespace only lines are ignored. Comments are not allowed on entry lines. +# + +# Custom segmentation for kanji compounds +日本経済新èž,日本 経済 æ–°èž,ニホン ケイザイ シンブン,カスタムåè©ž +関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタムåè©ž + +# Custom segmentation for compound katakana +トートãƒãƒƒã‚°,トート ãƒãƒƒã‚°,トート ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž +ショルダーãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž + +# Custom reading for former sumo wrestler +æœé’é¾,æœé’é¾,アサショウリュウ,カスタム人å diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/protwords.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/protwords.txt new file mode 100644 index 00000000000..1dfc0abecbf --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/schema.xml b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/schema.xml new file mode 100644 index 00000000000..65192efe442 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/schema.xml @@ -0,0 +1,961 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + iddiff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/solrconfig.xml b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/solrconfig.xml new file mode 100644 index 00000000000..beff1b2af0a --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/solrconfig.xml @@ -0,0 +1,1784 @@ + + + + + + + + + LUCENE_43 + + + + + + + + + + + + + + + + + + + + + + + + ${solr.data.dir:} + + + + + ${solr.hdfs.home:} + ${solr.hdfs.confdir:} + ${solr.hdfs.blockcache.enabled:true} + ${solr.hdfs.blockcache.slab.count:1} + ${solr.hdfs.blockcache.direct.memory.allocation:true} + ${solr.hdfs.blockcache.blocksperbank:16384} + ${solr.hdfs.blockcache.read.enabled:true} + ${solr.hdfs.blockcache.write.enabled:true} + ${solr.hdfs.nrtcachingdirectory.enable:true} + ${solr.hdfs.nrtcachingdirectory.maxmergesizemb:16} + ${solr.hdfs.nrtcachingdirectory.maxcachedmb:192} + + + + + + + + + + + + + ${solr.maxIndexingThreads:8} + + + + + + 128 + + + + + + + + + + + + + ${solr.lock.type:hdfs} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.ulog.dir:} + + + + + ${solr.autoCommit.maxTime:60000} + false + + + + + ${solr.autoSoftCommit.maxTime:1000} + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + static firstSearcher warming in solrconfig.xml + + + + + + false + + + 4 + + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + text + + + + + + + + + + + + + + explicit + json + true + text + + + + + + + + true + json + true + + + + + + + + explicit + + + velocity + browse + layout + Solritas + + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text + 100% + *:* + 10 + *,score + + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename + 3 + + + on + cat + manu_exact + content_type + author_s + ipod + GB + 1 + cat,inStock + after + price + 0 + 600 + 50 + popularity + 0 + 10 + 3 + manufacturedate_dt + NOW/YEAR-10YEARS + NOW + +1YEAR + before + after + + + on + content features title name + html + <b> + </b> + 0 + title + 0 + name + 3 + 200 + content + 750 + + + on + false + 5 + 2 + 5 + true + true + 5 + 3 + + + + + spellcheck + + + + + + + + + + + + + + application/json + + + + + application/csv + + + + + + + + + + + + + + + + + + + + + solrpingquery + + + all + + + + + + + + + explicit + true + + + + + + + + + + + + + + + + text_general + + + + + + default + text + solr.DirectSolrSpellChecker + + internal + + 0.5 + + 2 + + 1 + + 5 + + 4 + + 0.01 + + + + + + wordbreak + solr.WordBreakSolrSpellChecker + name + true + true + 10 + + + + + + + + + + + + + + + + text + + default + wordbreak + on + true + 10 + 5 + 5 + true + true + 10 + 5 + + + spellcheck + + + + + + + + + + text + true + + + tvComponent + + + + + + + + + default + + + org.carrot2.clustering.lingo.LingoClusteringAlgorithm + + + 20 + + + clustering/carrot2 + + + ENGLISH + + + stc + org.carrot2.clustering.stc.STCClusteringAlgorithm + + + + + + + true + default + true + + name + id + + features + + true + + + + false + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + *:* + 10 + *,score + + + clustering + + + + + + + + + + true + false + + + terms + + + + + + + + string + elevate.xml + + + + + + explicit + text + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + + + + + + + + + + + text/plain; charset=UTF-8 + + + + + + + + + 5 + + + + + + + + + + + + + + + + + + *:* + + + diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/stopwords.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/stopwords.txt new file mode 100644 index 00000000000..ae1e83eeb3d --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/stopwords.txt @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/synonyms.txt b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/synonyms.txt new file mode 100644 index 00000000000..7f72128303b --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/conf/synonyms.txt @@ -0,0 +1,29 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaafoo => aaabar +bbbfoo => bbbfoo bbbbar +cccfoo => cccbar cccbaz +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + diff --git a/solr/contrib/solr-mr/src/test-files/solr/minimr/solr.xml b/solr/contrib/solr-mr/src/test-files/solr/minimr/solr.xml new file mode 100644 index 00000000000..6c8b43f75ed --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/minimr/solr.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + ${socketTimeout:120000} + ${connTimeout:15000} + + + + diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/currency.xml b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/currency.xml new file mode 100644 index 00000000000..3a9c58afee8 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/currency.xml @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/elevate.xml b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/elevate.xml new file mode 100644 index 00000000000..25d5cebe4fb --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/elevate.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/contractions_ca.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/contractions_ca.txt new file mode 100644 index 00000000000..307a85f913d --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/contractions_ca.txt @@ -0,0 +1,8 @@ +# Set of Catalan contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +l +m +n +s +t diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/contractions_fr.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/contractions_fr.txt new file mode 100644 index 00000000000..722db588333 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/contractions_fr.txt @@ -0,0 +1,9 @@ +# Set of French contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +l +m +t +qu +n +s +j diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/contractions_ga.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/contractions_ga.txt new file mode 100644 index 00000000000..9ebe7fa349a --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/contractions_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +m +b diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/contractions_it.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/contractions_it.txt new file mode 100644 index 00000000000..cac04095372 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/contractions_it.txt @@ -0,0 +1,23 @@ +# Set of Italian contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +c +l +all +dall +dell +nell +sull +coll +pell +gl +agl +dagl +degl +negl +sugl +un +m +t +s +v +d diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/hyphenations_ga.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/hyphenations_ga.txt new file mode 100644 index 00000000000..4d2642cc5a3 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/hyphenations_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish hyphenations for StopFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +h +n +t diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stemdict_nl.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stemdict_nl.txt new file mode 100644 index 00000000000..441072971d3 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stemdict_nl.txt @@ -0,0 +1,6 @@ +# Set of overrides for the dutch stemmer +# TODO: load this as a resource from the analyzer and sync it in build.xml +fiets fiets +bromfiets bromfiets +ei eier +kind kinder diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stoptags_ja.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stoptags_ja.txt new file mode 100644 index 00000000000..71b750845e3 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stoptags_ja.txt @@ -0,0 +1,420 @@ +# +# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. +# +# Any token with a part-of-speech tag that exactly matches those defined in this +# file are removed from the token stream. +# +# Set your own stoptags by uncommenting the lines below. Note that comments are +# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, +# etc. that can be useful for building you own stoptag set. +# +# The entire possible tagset is provided below for convenience. +# +##### +# noun: unclassified nouns +#åè©ž +# +# noun-common: Common nouns or nouns where the sub-classification is undefined +#åè©ž-一般 +# +# noun-proper: Proper nouns where the sub-classification is undefined +#åè©ž-固有åè©ž +# +# noun-proper-misc: miscellaneous proper nouns +#åè©ž-固有åè©ž-一般 +# +# noun-proper-person: Personal names where the sub-classification is undefined +#åè©ž-固有åè©ž-人å +# +# noun-proper-person-misc: names that cannot be divided into surname and +# given name; foreign names; names where the surname or given name is unknown. +# e.g. ãŠå¸‚ã®æ–¹ +#åè©ž-固有åè©ž-人å-一般 +# +# noun-proper-person-surname: Mainly Japanese surnames. +# e.g. 山田 +#åè©ž-固有åè©ž-人å-姓 +# +# noun-proper-person-given_name: Mainly Japanese given names. +# e.g. 太郎 +#åè©ž-固有åè©ž-人å-å +# +# noun-proper-organization: Names representing organizations. +# e.g. 通産çœ, NHK +#åè©ž-固有åè©ž-組織 +# +# noun-proper-place: Place names where the sub-classification is undefined +#åè©ž-固有åè©ž-地域 +# +# noun-proper-place-misc: Place names excluding countries. +# e.g. アジア, ãƒãƒ«ã‚»ãƒ­ãƒŠ, 京都 +#åè©ž-固有åè©ž-地域-一般 +# +# noun-proper-place-country: Country names. +# e.g. 日本, オーストラリア +#åè©ž-固有åè©ž-地域-国 +# +# noun-pronoun: Pronouns where the sub-classification is undefined +#åè©ž-代åè©ž +# +# noun-pronoun-misc: miscellaneous pronouns: +# e.g. ãã‚Œ, ã“ã“, ã‚ã„ã¤, ã‚ãªãŸ, ã‚ã¡ã“ã¡, ã„ãã¤, ã©ã“ã‹, ãªã«, ã¿ãªã•ã‚“, ã¿ã‚“ãª, ã‚ãŸãã—, ã‚ã‚Œã‚ã‚Œ +#åè©ž-代åè©ž-一般 +# +# noun-pronoun-contraction: Spoken language contraction made by combining a +# pronoun and the particle 'wa'. +# e.g. ã‚りゃ, ã“りゃ, ã“りゃã‚, ãりゃ, ãりゃ゠+#åè©ž-代åè©ž-縮約 +# +# noun-adverbial: Temporal nouns such as names of days or months that behave +# like adverbs. Nouns that represent amount or ratios and can be used adverbially, +# e.g. 金曜, 一月, åˆå¾Œ, å°‘é‡ +#åè©ž-副詞å¯èƒ½ +# +# noun-verbal: Nouns that take arguments with case and can appear followed by +# 'suru' and related verbs (ã™ã‚‹, ã§ãã‚‹, ãªã•ã‚‹, ãã ã•ã‚‹) +# e.g. インプット, æ„›ç€, 悪化, 悪戦苦闘, 一安心, 下å–ã‚Š +#åè©ž-サ変接続 +# +# noun-adjective-base: The base form of adjectives, words that appear before 㪠("na") +# e.g. å¥åº·, 安易, 駄目, ã ã‚ +#åè©ž-形容動詞語幹 +# +# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), æ•°. +# e.g. 0, 1, 2, 何, æ•°, å¹¾ +#åè©ž-æ•° +# +# noun-affix: noun affixes where the sub-classification is undefined +#åè©ž-éžè‡ªç«‹ +# +# noun-affix-misc: Of adnominalizers, the case-marker ã® ("no"), and words that +# attach to the base form of inflectional words, words that cannot be classified +# into any of the other categories below. This category includes indefinite nouns. +# e.g. ã‚ã‹ã¤ã, æš, ã‹ã„, 甲æ–, æ°—, ãらã„, å«Œã„, ãã›, ç™–, ã“ã¨, 事, ã”ã¨, 毎, ã—ã ã„, 次第, +# é †, ã›ã„, 所為, ã¤ã„ã§, åºã§, ã¤ã‚‚ã‚Š, ç©ã‚‚ã‚Š, 点, ã©ã“ã‚, ã®, ã¯ãš, ç­ˆ, ã¯ãšã¿, å¼¾ã¿, +# æ‹å­, ãµã†, ãµã‚Š, 振り, ã»ã†, æ–¹, æ—¨, ã‚‚ã®, 物, 者, ゆãˆ, æ•…, ゆãˆã‚“, 所以, ã‚ã‘, 訳, +# ã‚ã‚Š, 割り, 割, ã‚“-å£èªž/, ã‚‚ã‚“-å£èªž/ +#åè©ž-éžè‡ªç«‹-一般 +# +# noun-affix-adverbial: noun affixes that that can behave as adverbs. +# e.g. ã‚ã„ã , é–“, ã‚ã’ã, 挙ã’å¥, ã‚ã¨, 後, 余り, 以外, 以é™, 以後, 以上, 以å‰, 一方, ã†ãˆ, +# 上, ã†ã¡, 内, ãŠã‚Š, 折り, ã‹ãŽã‚Š, é™ã‚Š, ãã‚Š, ã£ãã‚Š, çµæžœ, ã“ã‚, é ƒ, ã•ã„, éš›, 最中, ã•ãªã‹, +# 最中, ã˜ãŸã„, 自体, ãŸã³, 度, ãŸã‚, 為, ã¤ã©, 都度, ã¨ãŠã‚Š, 通り, ã¨ã, 時, ã¨ã“ã‚, 所, +# ã¨ãŸã‚“, 途端, ãªã‹, 中, ã®ã¡, 後, ã°ã‚ã„, å ´åˆ, æ—¥, ã¶ã‚“, 分, ã»ã‹, ä»–, ã¾ãˆ, å‰, ã¾ã¾, +# 儘, ä¾­, ã¿ãŽã‚Š, 矢先 +#åè©ž-éžè‡ªç«‹-副詞å¯èƒ½ +# +# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars +# with the stem よã†(ã ) ("you(da)"). +# e.g. よã†, ã‚„ã†, 様 (よã†) +#åè©ž-éžè‡ªç«‹-助動詞語幹 +# +# noun-affix-adjective-base: noun affixes that can connect to the indeclinable +# connection form 㪠(aux "da"). +# e.g. ã¿ãŸã„, ãµã† +#åè©ž-éžè‡ªç«‹-形容動詞語幹 +# +# noun-special: special nouns where the sub-classification is undefined. +#åè©ž-特殊 +# +# noun-special-aux: The ãã†ã  ("souda") stem form that is used for reporting news, is +# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base +# form of inflectional words. +# e.g. ãㆠ+#åè©ž-特殊-助動詞語幹 +# +# noun-suffix: noun suffixes where the sub-classification is undefined. +#åè©ž-接尾 +# +# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect +# to ガル or タイ and can combine into compound nouns, words that cannot be classified into +# any of the other categories below. In general, this category is more inclusive than +# 接尾語 ("suffix") and is usually the last element in a compound noun. +# e.g. ãŠã, ã‹ãŸ, æ–¹, ç”²æ– (ãŒã„), ãŒã‹ã‚Š, ãŽã¿, 気味, ãã‚‹ã¿, (~ã—ãŸ) ã•, 次第, 済 (ãš) ã¿, +# よã†, (ã§ã)ã£ã“, æ„Ÿ, 観, 性, å­¦, é¡ž, é¢, 用 +#åè©ž-接尾-一般 +# +# noun-suffix-person: Suffixes that form nouns and attach to person names more often +# than other nouns. +# e.g. å›, 様, è‘— +#åè©ž-接尾-人å +# +# noun-suffix-place: Suffixes that form nouns and attach to place names more often +# than other nouns. +# e.g. 町, 市, 県 +#åè©ž-接尾-地域 +# +# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that +# can appear before スル ("suru"). +# e.g. 化, 視, 分ã‘, 入り, è½ã¡, è²·ã„ +#åè©ž-接尾-サ変接続 +# +# noun-suffix-aux: The stem form of ãã†ã  (様態) that is used to indicate conditions, +# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the +# conjunctive form of inflectional words. +# e.g. ãㆠ+#åè©ž-接尾-助動詞語幹 +# +# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive +# form of inflectional words and appear before the copula ã  ("da"). +# e.g. çš„, ã’, ãŒã¡ +#åè©ž-接尾-形容動詞語幹 +# +# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. +# e.g. 後 (ã”), 以後, 以é™, 以å‰, å‰å¾Œ, 中, 末, 上, 時 (ã˜) +#åè©ž-接尾-副詞å¯èƒ½ +# +# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category +# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach +# to numbers. +# e.g. 個, ã¤, 本, 冊, パーセント, cm, kg, カ月, ã‹å›½, 区画, 時間, æ™‚åŠ +#åè©ž-接尾-助数詞 +# +# noun-suffix-special: Special suffixes that mainly attach to inflecting words. +# e.g. (楽ã—) ã•, (考ãˆ) æ–¹ +#åè©ž-接尾-特殊 +# +# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words +# together. +# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) å…¼ (主婦) +#åè©ž-接続詞的 +# +# noun-verbal_aux: Nouns that attach to the conjunctive particle 㦠("te") and are +# semantically verb-like. +# e.g. ã”らん, ã”覧, 御覧, 頂戴 +#åè©ž-å‹•è©žéžè‡ªç«‹çš„ +# +# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, +# dialects, English, etc. Currently, the only entry for åè©ž 引用文字列 ("noun quotation") +# is ã„ã‚ã ("iwaku"). +#åè©ž-引用文字列 +# +# noun-nai_adjective: Words that appear before the auxiliary verb ãªã„ ("nai") and +# behave like an adjective. +# e.g. 申ã—訳, 仕方, ã¨ã‚“ã§ã‚‚, é•ã„ +#åè©ž-ナイ形容詞語幹 +# +##### +# prefix: unclassified prefixes +#接頭詞 +# +# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) +# excluding numerical expressions. +# e.g. ㊠(æ°´), æŸ (æ°), åŒ (社), æ•… (~æ°), 高 (å“質), ㊠(見事), ã” (ç«‹æ´¾) +#接頭詞-å詞接続 +# +# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb +# in conjunctive form followed by ãªã‚‹/ãªã•ã‚‹/ãã ã•ã‚‹. +# e.g. ㊠(読ã¿ãªã•ã„), ㊠(座り) +#接頭詞-動詞接続 +# +# prefix-adjectival: Prefixes that attach to adjectives. +# e.g. ㊠(寒ã„ã§ã™ã­ãˆ), ãƒã‚« (ã§ã‹ã„) +#接頭詞-形容詞接続 +# +# prefix-numerical: Prefixes that attach to numerical expressions. +# e.g. ç´„, ãŠã‚ˆã, 毎時 +#接頭詞-数接続 +# +##### +# verb: unclassified verbs +#å‹•è©ž +# +# verb-main: +#å‹•è©ž-自立 +# +# verb-auxiliary: +#å‹•è©ž-éžè‡ªç«‹ +# +# verb-suffix: +#å‹•è©ž-接尾 +# +##### +# adjective: unclassified adjectives +#形容詞 +# +# adjective-main: +#形容詞-自立 +# +# adjective-auxiliary: +#形容詞-éžè‡ªç«‹ +# +# adjective-suffix: +#形容詞-接尾 +# +##### +# adverb: unclassified adverbs +#副詞 +# +# adverb-misc: Words that can be segmented into one unit and where adnominal +# modification is not possible. +# e.g. ã‚ã„ã‹ã‚らãš, 多分 +#副詞-一般 +# +# adverb-particle_conjunction: Adverbs that can be followed by ã®, ã¯, ã«, +# ãª, ã™ã‚‹, ã , etc. +# e.g. ã“ã‚“ãªã«, ãã‚“ãªã«, ã‚ã‚“ãªã«, ãªã«ã‹, ãªã‚“ã§ã‚‚ +#副詞-助詞類接続 +# +##### +# adnominal: Words that only have noun-modifying forms. +# e.g. ã“ã®, ãã®, ã‚ã®, ã©ã®, ã„ã‚ゆる, ãªã‚“らã‹ã®, 何らã‹ã®, ã„ã‚ã‚“ãª, ã“ã†ã„ã†, ãã†ã„ã†, ã‚ã‚ã„ã†, +# ã©ã†ã„ã†, ã“ã‚“ãª, ãã‚“ãª, ã‚ã‚“ãª, ã©ã‚“ãª, 大ããª, å°ã•ãª, ãŠã‹ã—ãª, ã»ã‚“ã®, ãŸã„ã—ãŸ, +# 「(, ã‚‚) ã•ã‚‹ (ã“ã¨ãªãŒã‚‰)ã€, 微々ãŸã‚‹, 堂々ãŸã‚‹, å˜ãªã‚‹, ã„ã‹ãªã‚‹, 我ãŒã€ã€ŒåŒã˜, 亡ã +#連体詞 +# +##### +# conjunction: Conjunctions that can occur independently. +# e.g. ãŒ, ã‘ã‚Œã©ã‚‚, ãã—ã¦, ã˜ã‚ƒã‚, ãã‚Œã©ã“ã‚ã‹ +接続詞 +# +##### +# particle: unclassified particles. +助詞 +# +# particle-case: case particles where the subclassification is undefined. +助詞-格助詞 +# +# particle-case-misc: Case particles. +# e.g. ã‹ã‚‰, ãŒ, ã§, ã¨, ã«, ã¸, より, ã‚’, ã®, ã«ã¦ +助詞-格助詞-一般 +# +# particle-case-quote: the "to" that appears after nouns, a person’s speech, +# quotation marks, expressions of decisions from a meeting, reasons, judgements, +# conjectures, etc. +# e.g. ( ã ) 㨠(è¿°ã¹ãŸ.), ( ã§ã‚ã‚‹) 㨠(ã—ã¦åŸ·è¡ŒçŒ¶äºˆ...) +助詞-格助詞-引用 +# +# particle-case-compound: Compounds of particles and verbs that mainly behave +# like case particles. +# e.g. ã¨ã„ã†, ã¨ã„ã£ãŸ, ã¨ã‹ã„ã†, ã¨ã—ã¦, ã¨ã¨ã‚‚ã«, ã¨å…±ã«, ã§ã‚‚ã£ã¦, ã«ã‚ãŸã£ã¦, ã«å½“ãŸã£ã¦, ã«å½“ã£ã¦, +# ã«ã‚ãŸã‚Š, ã«å½“ãŸã‚Š, ã«å½“ã‚Š, ã«å½“ãŸã‚‹, ã«ã‚ãŸã‚‹, ã«ãŠã„ã¦, ã«æ–¼ã„ã¦,ã«æ–¼ã¦, ã«ãŠã‘ã‚‹, ã«æ–¼ã‘ã‚‹, +# ã«ã‹ã‘, ã«ã‹ã‘ã¦, ã«ã‹ã‚“ã—, ã«é–¢ã—, ã«ã‹ã‚“ã—ã¦, ã«é–¢ã—ã¦, ã«ã‹ã‚“ã™ã‚‹, ã«é–¢ã™ã‚‹, ã«éš›ã—, +# ã«éš›ã—ã¦, ã«ã—ãŸãŒã„, ã«å¾“ã„, ã«å¾“ã†, ã«ã—ãŸãŒã£ã¦, ã«å¾“ã£ã¦, ã«ãŸã„ã—, ã«å¯¾ã—, ã«ãŸã„ã—ã¦, +# ã«å¯¾ã—ã¦, ã«ãŸã„ã™ã‚‹, ã«å¯¾ã™ã‚‹, ã«ã¤ã„ã¦, ã«ã¤ã, ã«ã¤ã‘, ã«ã¤ã‘ã¦, ã«ã¤ã‚Œ, ã«ã¤ã‚Œã¦, ã«ã¨ã£ã¦, +# ã«ã¨ã‚Š, ã«ã¾ã¤ã‚ã‚‹, ã«ã‚ˆã£ã¦, ã«ä¾ã£ã¦, ã«å› ã£ã¦, ã«ã‚ˆã‚Š, ã«ä¾ã‚Š, ã«å› ã‚Š, ã«ã‚ˆã‚‹, ã«ä¾ã‚‹, ã«å› ã‚‹, +# ã«ã‚ãŸã£ã¦, ã«ã‚ãŸã‚‹, ã‚’ã‚‚ã£ã¦, を以ã£ã¦, を通ã˜, を通ã˜ã¦, を通ã—ã¦, ã‚’ã‚ãã£ã¦, ã‚’ã‚ãã‚Š, ã‚’ã‚ãã‚‹, +# ã£ã¦-å£èªž/, ã¡ã‚…ã†-関西å¼ã€Œã¨ã„ã†ã€/, (何) ã¦ã„ㆠ(人)-å£èªž/, ã£ã¦ã„ã†-å£èªž/, ã¨ã„ãµ, ã¨ã‹ã„ãµ +助詞-格助詞-連語 +# +# particle-conjunctive: +# e.g. ã‹ã‚‰, ã‹ã‚‰ã«ã¯, ãŒ, ã‘ã‚Œã©, ã‘ã‚Œã©ã‚‚, ã‘ã©, ã—, ã¤ã¤, ã¦, ã§, ã¨, ã¨ã“ã‚ãŒ, ã©ã“ã‚ã‹, ã¨ã‚‚, ã©ã‚‚, +# ãªãŒã‚‰, ãªã‚Š, ã®ã§, ã®ã«, ã°, ã‚‚ã®ã®, ã‚„ ( ã—ãŸ), ã‚„ã„ãªã‚„, (ã“ã‚ã‚“) ã˜ã‚ƒ(ã„ã‘ãªã„)-å£èªž/, +# (è¡Œã£) ã¡ã‚ƒ(ã„ã‘ãªã„)-å£èªž/, (言ã£) ãŸã£ã¦ (ã—ã‹ãŸãŒãªã„)-å£èªž/, (ãã‚ŒãŒãªã)ã£ãŸã£ã¦ (平気)-å£èªž/ +助詞-接続助詞 +# +# particle-dependency: +# e.g. ã“ã, ã•ãˆ, ã—ã‹, ã™ã‚‰, ã¯, ã‚‚, ãž +助詞-係助詞 +# +# particle-adverbial: +# e.g. ãŒã¦ã‚‰, ã‹ã‚‚, ãらã„, ä½, ãらã„, ã—ã‚‚, (学校) ã˜ã‚ƒ(ã“ã‚ŒãŒæµè¡Œã£ã¦ã„ã‚‹)-å£èªž/, +# (ãã‚Œ)ã˜ã‚ƒã‚ (よããªã„)-å£èªž/, ãšã¤, (ç§) ãªãž, ãªã©, (ç§) ãªã‚Š (ã«), (先生) ãªã‚“ã‹ (大嫌ã„)-å£èªž/, +# (ç§) ãªã‚“ãž, (先生) ãªã‚“㦠(大嫌ã„)-å£èªž/, ã®ã¿, ã ã‘, (ç§) ã ã£ã¦-å£èªž/, ã ã«, +# (å½¼)ã£ãŸã‚‰-å£èªž/, (ãŠèŒ¶) ã§ã‚‚ (ã„ã‹ãŒ), ç­‰ (ã¨ã†), (今後) ã¨ã‚‚, ã°ã‹ã‚Š, ã°ã£ã‹-å£èªž/, ã°ã£ã‹ã‚Š-å£èªž/, +# ã»ã©, 程, ã¾ã§, è¿„, (誰) ã‚‚ (ãŒ)([助詞-格助詞] ãŠã‚ˆã³ [助詞-係助詞] ã®å‰ã«ä½ç½®ã™ã‚‹ã€Œã‚‚ã€) +助詞-副助詞 +# +# particle-interjective: particles with interjective grammatical roles. +# e.g. (æ¾å³¶) ã‚„ +助詞-間投助詞 +# +# particle-coordinate: +# e.g. ã¨, ãŸã‚Š, ã ã®, ã ã‚Š, ã¨ã‹, ãªã‚Š, ã‚„, やら +助詞-並立助詞 +# +# particle-final: +# e.g. ã‹ã„, ã‹ã—ら, ã•, ãœ, (ã )ã£ã‘-å£èªž/, (ã¨ã¾ã£ã¦ã‚‹) ã§-方言/, ãª, ナ, ãªã‚-å£èªž/, ãž, ã­, ãƒ, +# ã­ã‡-å£èªž/, ã­ãˆ-å£èªž/, ã­ã‚“-方言/, ã®, ã®ã†-å£èªž/, ã‚„, よ, ヨ, よã‰-å£èªž/, ã‚, ã‚ã„-å£èªž/ +助詞-終助詞 +# +# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is +# adverbial, conjunctive, or sentence final. For example: +# (a) 「A ã‹ B ã‹ã€. Ex:「(国内ã§é‹ç”¨ã™ã‚‹) ã‹,(海外ã§é‹ç”¨ã™ã‚‹) ã‹ (.)〠+# (b) Inside an adverb phrase. Ex:「(幸ã„ã¨ã„ã†) ã‹ (, 死者ã¯ã„ãªã‹ã£ãŸ.)〠+# 「(祈りãŒå±Šã„ãŸã›ã„) ã‹ (, 試験ã«åˆæ ¼ã—ãŸ.)〠+# (c) 「ã‹ã®ã‚ˆã†ã«ã€. Ex:「(何もãªã‹ã£ãŸ) ã‹ (ã®ã‚ˆã†ã«æŒ¯ã‚‹èˆžã£ãŸ.)〠+# e.g. ã‹ +助詞-副助詞ï¼ä¸¦ç«‹åŠ©è©žï¼çµ‚助詞 +# +# particle-adnominalizer: The "no" that attaches to nouns and modifies +# non-inflectional words. +助詞-連体化 +# +# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs +# that are giongo, giseigo, or gitaigo. +# e.g. ã«, 㨠+助詞-副詞化 +# +# particle-special: A particle that does not fit into one of the above classifications. +# This includes particles that are used in Tanka, Haiku, and other poetry. +# e.g. ã‹ãª, ã‘ã‚€, ( ã—ãŸã ã‚ã†) ã«, (ã‚ã‚“ãŸ) ã«ã‚ƒ(ã‚ã‹ã‚‰ã‚“), (俺) ã‚“ (家) +助詞-特殊 +# +##### +# auxiliary-verb: +助動詞 +# +##### +# interjection: Greetings and other exclamations. +# e.g. ãŠã¯ã‚ˆã†, ãŠã¯ã‚ˆã†ã”ã–ã„ã¾ã™, ã“ã‚“ã«ã¡ã¯, ã“ã‚“ã°ã‚“ã¯, ã‚ã‚ŠãŒã¨ã†, ã©ã†ã‚‚ã‚ã‚ŠãŒã¨ã†, ã‚ã‚ŠãŒã¨ã†ã”ã–ã„ã¾ã™, +# ã„ãŸã ãã¾ã™, ã”ã¡ãã†ã•ã¾, ã•ã‚ˆãªã‚‰, ã•ã‚ˆã†ãªã‚‰, ã¯ã„, ã„ã„ãˆ, ã”ã‚ã‚“, ã”ã‚ã‚“ãªã•ã„ +#æ„Ÿå‹•è©ž +# +##### +# symbol: unclassified Symbols. +è¨˜å· +# +# symbol-misc: A general symbol not in one of the categories below. +# e.g. [â—‹â—Ž@$〒→+] +記å·-一般 +# +# symbol-comma: Commas +# e.g. [,ã€] +記å·-読点 +# +# symbol-period: Periods and full stops. +# e.g. [..。] +記å·-å¥ç‚¹ +# +# symbol-space: Full-width whitespace. +記å·-空白 +# +# symbol-open_bracket: +# e.g. [({‘“『ã€] +記å·-括弧開 +# +# symbol-close_bracket: +# e.g. [)}’â€ã€ã€ã€‘] +記å·-括弧閉 +# +# symbol-alphabetic: +#記å·-アルファベット +# +##### +# other: unclassified other +#ãã®ä»– +# +# other-interjection: Words that are hard to classify as noun-suffixes or +# sentence-final particles. +# e.g. (ã )ã‚¡ +ãã®ä»–-間投 +# +##### +# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. +# e.g. ã‚ã®, ã†ã‚“ã¨, ãˆã¨ +フィラー +# +##### +# non-verbal: non-verbal sound. +éžè¨€èªžéŸ³ +# +##### +# fragment: +#語断片 +# +##### +# unknown: unknown part of speech. +#未知語 +# +##### End of file diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_ar.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_ar.txt new file mode 100644 index 00000000000..046829db6a2 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_ar.txt @@ -0,0 +1,125 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Cleaned on October 11, 2009 (not normalized, so use before normalization) +# This means that when modifying this list, you might need to add some +# redundant entries, for example containing forms with both Ø£ and ا +من +ومن +منها +منه +ÙÙŠ +ÙˆÙÙŠ +Ùيها +Ùيه +Ùˆ +Ù +ثم +او +أو +ب +بها +به +ا +Ø£ +اى +اي +أي +أى +لا +ولا +الا +ألا +إلا +لكن +ما +وما +كما +Ùما +عن +مع +اذا +إذا +ان +أن +إن +انها +أنها +إنها +انه +أنه +إنه +بان +بأن +Ùان +Ùأن +وان +وأن +وإن +التى +التي +الذى +الذي +الذين +الى +الي +إلى +إلي +على +عليها +عليه +اما +أما +إما +ايضا +أيضا +كل +وكل +لم +ولم +لن +ولن +هى +هي +هو +وهى +وهي +وهو +Ùهى +Ùهي +Ùهو +انت +أنت +لك +لها +له +هذه +هذا +تلك +ذلك +هناك +كانت +كان +يكون +تكون +وكانت +وكان +غير +بعض +قد +نحو +بين +بينما +منذ +ضمن +حيث +الان +الآن +خلال +بعد +قبل +حتى +عند +عندما +لدى +جميع diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_bg.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_bg.txt new file mode 100644 index 00000000000..1ae4ba2ae38 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_bg.txt @@ -0,0 +1,193 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +а +аз +ако +ала +бе +без +беше +би +бил +била +били +било +близо +бъдат +бъде +бÑха +в +Ð²Ð°Ñ +ваш +ваша +вероÑтно +вече +взема +ви +вие +винаги +вÑе +вÑеки +вÑички +вÑичко +вÑÑка +във +въпреки +върху +г +ги +главно +го +д +да +дали +до +докато +докога +дори +доÑега +доÑта +е +едва +един +ето +за +зад +заедно +заради +заÑега +затова +защо +защото +и +из +или +им +има +имат +иÑка +й +каза +как +каква +какво +както +какъв +като +кога +когато +което +които +кой +който +колко +коÑто +къде +където +към +ли +м +ме +между +мен +ми +мнозина +мога +могат +може +Ð¼Ð¾Ð»Ñ +момента +му +н +на +над +назад +най +направи +напред +например +Ð½Ð°Ñ +не +него +Ð½ÐµÑ +ни +ние +никой +нито +но +нÑкои +нÑкой +нÑма +обаче +около +оÑвен +оÑобено +от +отгоре +отново +още +пак +по +повече +повечето +под +поне +поради +поÑле +почти +прави +пред +преди +през +при +пък +първо +Ñ +Ñа +Ñамо +Ñе +Ñега +Ñи +Ñкоро +Ñлед +Ñме +Ñпоред +Ñред +Ñрещу +Ñте +Ñъм +ÑÑŠÑ +Ñъщо +Ñ‚ +тази +така +такива +такъв +там +твой +те +тези +ти +тн +то +това +тогава +този +той +толкова +точно +Ñ‚Ñ€Ñбва +тук +тъй +Ñ‚Ñ +Ñ‚ÑÑ… +у +хареÑва +ч +че +чеÑто +чрез +ще +щом +Ñ diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_ca.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_ca.txt new file mode 100644 index 00000000000..3da65deafe1 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_ca.txt @@ -0,0 +1,220 @@ +# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) +a +abans +ací +ah +així +això +al +als +aleshores +algun +alguna +algunes +alguns +alhora +allà +allí +allò +altra +altre +altres +amb +ambdós +ambdues +apa +aquell +aquella +aquelles +aquells +aquest +aquesta +aquestes +aquests +aquí +baix +cada +cadascú +cadascuna +cadascunes +cadascuns +com +contra +d'un +d'una +d'unes +d'uns +dalt +de +del +dels +des +després +dins +dintre +donat +doncs +durant +e +eh +el +els +em +en +encara +ens +entre +érem +eren +éreu +es +és +esta +està +estàvem +estaven +estàveu +esteu +et +etc +ets +fins +fora +gairebé +ha +han +has +havia +he +hem +heu +hi +ho +i +igual +iguals +ja +l'hi +la +les +li +li'n +llavors +m'he +ma +mal +malgrat +mateix +mateixa +mateixes +mateixos +me +mentre +més +meu +meus +meva +meves +molt +molta +moltes +molts +mon +mons +n'he +n'hi +ne +ni +no +nogensmenys +només +nosaltres +nostra +nostre +nostres +o +oh +oi +on +pas +pel +pels +per +però +perquè +poc +poca +pocs +poques +potser +propi +qual +quals +quan +quant +que +què +quelcom +qui +quin +quina +quines +quins +s'ha +s'han +sa +semblant +semblants +ses +seu +seus +seva +seva +seves +si +sobre +sobretot +sóc +solament +sols +son +són +sons +sota +sou +t'ha +t'han +t'he +ta +tal +també +tampoc +tan +tant +tanta +tantes +teu +teus +teva +teves +ton +tons +tot +tota +totes +tots +un +una +unes +uns +us +va +vaig +vam +van +vas +veu +vosaltres +vostra +vostre +vostres diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_cz.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_cz.txt new file mode 100644 index 00000000000..53c6097dac7 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_cz.txt @@ -0,0 +1,172 @@ +a +s +k +o +i +u +v +z +dnes +cz +tímto +budeÅ¡ +budem +byli +jseÅ¡ +můj +svým +ta +tomto +tohle +tuto +tyto +jej +zda +proÄ +máte +tato +kam +tohoto +kdo +kteří +mi +nám +tom +tomuto +mít +nic +proto +kterou +byla +toho +protože +asi +ho +naÅ¡i +napiÅ¡te +re +což +tím +takže +svých +její +svými +jste +aj +tu +tedy +teto +bylo +kde +ke +pravé +ji +nad +nejsou +Äi +pod +téma +mezi +pÅ™es +ty +pak +vám +ani +když +vÅ¡ak +neg +jsem +tento +Älánku +Älánky +aby +jsme +pÅ™ed +pta +jejich +byl +jeÅ¡tÄ› +až +bez +také +pouze +první +vaÅ¡e +která +nás +nový +tipy +pokud +může +strana +jeho +své +jiné +zprávy +nové +není +vás +jen +podle +zde +už +být +více +bude +již +než +který +by +které +co +nebo +ten +tak +má +pÅ™i +od +po +jsou +jak +další +ale +si +se +ve +to +jako +za +zpÄ›t +ze +do +pro +je +na +atd +atp +jakmile +pÅ™iÄemž +já +on +ona +ono +oni +ony +my +vy +jí +ji +mÄ› +mne +jemu +tomu +tÄ›m +tÄ›mu +nÄ›mu +nÄ›muž +jehož +jíž +jelikož +jež +jakož +naÄež diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_da.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_da.txt new file mode 100644 index 00000000000..a3ff5fe122c --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_da.txt @@ -0,0 +1,108 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Danish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + +og | and +i | in +jeg | I +det | that (dem. pronoun)/it (pers. pronoun) +at | that (in front of a sentence)/to (with infinitive) +en | a/an +den | it (pers. pronoun)/that (dem. pronoun) +til | to/at/for/until/against/by/of/into, more +er | present tense of "to be" +som | who, as +pÃ¥ | on/upon/in/on/at/to/after/of/with/for, on +de | they +med | with/by/in, along +han | he +af | of/by/from/off/for/in/with/on, off +for | at/for/to/from/by/of/ago, in front/before, because +ikke | not +der | who/which, there/those +var | past tense of "to be" +mig | me/myself +sig | oneself/himself/herself/itself/themselves +men | but +et | a/an/one, one (number), someone/somebody/one +har | present tense of "to have" +om | round/about/for/in/a, about/around/down, if +vi | we +min | my +havde | past tense of "to have" +ham | him +hun | she +nu | now +over | over/above/across/by/beyond/past/on/about, over/past +da | then, when/as/since +fra | from/off/since, off, since +du | you +ud | out +sin | his/her/its/one's +dem | them +os | us/ourselves +op | up +man | you/one +hans | his +hvor | where +eller | or +hvad | what +skal | must/shall etc. +selv | myself/youself/herself/ourselves etc., even +her | here +alle | all/everyone/everybody etc. +vil | will (verb) +blev | past tense of "to stay/to remain/to get/to become" +kunne | could +ind | in +nÃ¥r | when +være | present tense of "to be" +dog | however/yet/after all +noget | something +ville | would +jo | you know/you see (adv), yes +deres | their/theirs +efter | after/behind/according to/for/by/from, later/afterwards +ned | down +skulle | should +denne | this +end | than +dette | this +mit | my/mine +ogsÃ¥ | also +under | under/beneath/below/during, below/underneath +have | have +dig | you +anden | other +hende | her +mine | my +alt | everything +meget | much/very, plenty of +sit | his, her, its, one's +sine | his, her, its, one's +vor | our +mod | against +disse | these +hvis | if +din | your/yours +nogle | some +hos | by/at +blive | be/become +mange | many +ad | by/through +bliver | present tense of "to be/to become" +hendes | her/hers +været | be +thi | for (conj) +jer | you +sÃ¥dan | such, like this/like that diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_de.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_de.txt new file mode 100644 index 00000000000..f7703841887 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_de.txt @@ -0,0 +1,292 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A German stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | The number of forms in this list is reduced significantly by passing it + | through the German stemmer. + + +aber | but + +alle | all +allem +allen +aller +alles + +als | than, as +also | so +am | an + dem +an | at + +ander | other +andere +anderem +anderen +anderer +anderes +anderm +andern +anderr +anders + +auch | also +auf | on +aus | out of +bei | by +bin | am +bis | until +bist | art +da | there +damit | with it +dann | then + +der | the +den +des +dem +die +das + +daß | that + +derselbe | the same +derselben +denselben +desselben +demselben +dieselbe +dieselben +dasselbe + +dazu | to that + +dein | thy +deine +deinem +deinen +deiner +deines + +denn | because + +derer | of those +dessen | of him + +dich | thee +dir | to thee +du | thou + +dies | this +diese +diesem +diesen +dieser +dieses + + +doch | (several meanings) +dort | (over) there + + +durch | through + +ein | a +eine +einem +einen +einer +eines + +einig | some +einige +einigem +einigen +einiger +einiges + +einmal | once + +er | he +ihn | him +ihm | to him + +es | it +etwas | something + +euer | your +eure +eurem +euren +eurer +eures + +für | for +gegen | towards +gewesen | p.p. of sein +hab | have +habe | have +haben | have +hat | has +hatte | had +hatten | had +hier | here +hin | there +hinter | behind + +ich | I +mich | me +mir | to me + + +ihr | you, to her +ihre +ihrem +ihren +ihrer +ihres +euch | to you + +im | in + dem +in | in +indem | while +ins | in + das +ist | is + +jede | each, every +jedem +jeden +jeder +jedes + +jene | that +jenem +jenen +jener +jenes + +jetzt | now +kann | can + +kein | no +keine +keinem +keinen +keiner +keines + +können | can +könnte | could +machen | do +man | one + +manche | some, many a +manchem +manchen +mancher +manches + +mein | my +meine +meinem +meinen +meiner +meines + +mit | with +muss | must +musste | had to +nach | to(wards) +nicht | not +nichts | nothing +noch | still, yet +nun | now +nur | only +ob | whether +oder | or +ohne | without +sehr | very + +sein | his +seine +seinem +seinen +seiner +seines + +selbst | self +sich | herself + +sie | they, she +ihnen | to them + +sind | are +so | so + +solche | such +solchem +solchen +solcher +solches + +soll | shall +sollte | should +sondern | but +sonst | else +über | over +um | about, around +und | and + +uns | us +unse +unsem +unsen +unser +unses + +unter | under +viel | much +vom | von + dem +von | from +vor | before +während | while +war | was +waren | were +warst | wast +was | what +weg | away, off +weil | because +weiter | further + +welche | which +welchem +welchen +welcher +welches + +wenn | when +werde | will +werden | will +wie | how +wieder | again +will | want +wir | we +wird | will +wirst | willst +wo | where +wollen | want +wollte | wanted +würde | would +würden | would +zu | to +zum | zu + dem +zur | zu + der +zwar | indeed +zwischen | between + diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_el.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_el.txt new file mode 100644 index 00000000000..232681f5bd6 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_el.txt @@ -0,0 +1,78 @@ +# Lucene Greek Stopwords list +# Note: by default this file is used after GreekLowerCaseFilter, +# so when modifying this file use 'σ' instead of 'Ï‚' +ο +η +το +οι +τα +του +τησ +των +τον +την +και +κι +κ +ειμαι +εισαι +ειναι +ειμαστε +ειστε +στο +στον +στη +στην +μα +αλλα +απο +για +Ï€Ïοσ +με +σε +ωσ +παÏα +αντι +κατα +μετα +θα +να +δε +δεν +μη +μην +επι +ενω +εαν +αν +τοτε +που +πωσ +ποιοσ +ποια +ποιο +ποιοι +ποιεσ +ποιων +ποιουσ +αυτοσ +αυτη +αυτο +αυτοι +αυτων +αυτουσ +αυτεσ +αυτα +εκεινοσ +εκεινη +εκεινο +εκεινοι +εκεινεσ +εκεινα +εκεινων +εκεινουσ +οπωσ +ομωσ +ισωσ +οσο +οτι diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_en.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_en.txt new file mode 100644 index 00000000000..2c164c0b2a1 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_en.txt @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +# Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +such +that +the +their +then +there +these +they +this +to +was +will +with diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_es.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_es.txt new file mode 100644 index 00000000000..2db14760075 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_es.txt @@ -0,0 +1,354 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Spanish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | from, of +la | the, her +que | who, that +el | the +en | in +y | and +a | to +los | the, them +del | de + el +se | himself, from him etc +las | the, them +por | for, by, etc +un | a +para | for +con | with +no | no +una | a +su | his, her +al | a + el + | es from SER +lo | him +como | how +más | more +pero | pero +sus | su plural +le | to him, her +ya | already +o | or + | fue from SER +este | this + | ha from HABER +sí | himself etc +porque | because +esta | this + | son from SER +entre | between + | está from ESTAR +cuando | when +muy | very +sin | without +sobre | on + | ser from SER + | tiene from TENER +también | also +me | me +hasta | until +hay | there is/are +donde | where + | han from HABER +quien | whom, that + | están from ESTAR + | estado from ESTAR +desde | from +todo | all +nos | us +durante | during + | estados from ESTAR +todos | all +uno | a +les | to them +ni | nor +contra | against +otros | other + | fueron from SER +ese | that +eso | that + | había from HABER +ante | before +ellos | they +e | and (variant of y) +esto | this +mí | me +antes | before +algunos | some +qué | what? +unos | a +yo | I +otro | other +otras | other +otra | other +él | he +tanto | so much, many +esa | that +estos | these +mucho | much, many +quienes | who +nada | nothing +muchos | many +cual | who + | sea from SER +poco | few +ella | she +estar | to be + | haber from HABER +estas | these + | estaba from ESTAR + | estamos from ESTAR +algunas | some +algo | something +nosotros | we + + | other forms + +mi | me +mis | mi plural +tú | thou +te | thee +ti | thee +tu | thy +tus | tu plural +ellas | they +nosotras | we +vosotros | you +vosotras | you +os | you +mío | mine +mía | +míos | +mías | +tuyo | thine +tuya | +tuyos | +tuyas | +suyo | his, hers, theirs +suya | +suyos | +suyas | +nuestro | ours +nuestra | +nuestros | +nuestras | +vuestro | yours +vuestra | +vuestros | +vuestras | +esos | those +esas | those + + | forms of estar, to be (not including the infinitive): +estoy +estás +está +estamos +estáis +están +esté +estés +estemos +estéis +estén +estaré +estarás +estará +estaremos +estaréis +estarán +estaría +estarías +estaríamos +estaríais +estarían +estaba +estabas +estábamos +estabais +estaban +estuve +estuviste +estuvo +estuvimos +estuvisteis +estuvieron +estuviera +estuvieras +estuviéramos +estuvierais +estuvieran +estuviese +estuvieses +estuviésemos +estuvieseis +estuviesen +estando +estado +estada +estados +estadas +estad + + | forms of haber, to have (not including the infinitive): +he +has +ha +hemos +habéis +han +haya +hayas +hayamos +hayáis +hayan +habré +habrás +habrá +habremos +habréis +habrán +habría +habrías +habríamos +habríais +habrían +había +habías +habíamos +habíais +habían +hube +hubiste +hubo +hubimos +hubisteis +hubieron +hubiera +hubieras +hubiéramos +hubierais +hubieran +hubiese +hubieses +hubiésemos +hubieseis +hubiesen +habiendo +habido +habida +habidos +habidas + + | forms of ser, to be (not including the infinitive): +soy +eres +es +somos +sois +son +sea +seas +seamos +seáis +sean +seré +serás +será +seremos +seréis +serán +sería +serías +seríamos +seríais +serían +era +eras +éramos +erais +eran +fui +fuiste +fue +fuimos +fuisteis +fueron +fuera +fueras +fuéramos +fuerais +fueran +fuese +fueses +fuésemos +fueseis +fuesen +siendo +sido + | sed also means 'thirst' + + | forms of tener, to have (not including the infinitive): +tengo +tienes +tiene +tenemos +tenéis +tienen +tenga +tengas +tengamos +tengáis +tengan +tendré +tendrás +tendrá +tendremos +tendréis +tendrán +tendría +tendrías +tendríamos +tendríais +tendrían +tenía +tenías +teníamos +teníais +tenían +tuve +tuviste +tuvo +tuvimos +tuvisteis +tuvieron +tuviera +tuvieras +tuviéramos +tuvierais +tuvieran +tuviese +tuvieses +tuviésemos +tuvieseis +tuviesen +teniendo +tenido +tenida +tenidos +tenidas +tened + diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_eu.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_eu.txt new file mode 100644 index 00000000000..25f1db93460 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_eu.txt @@ -0,0 +1,99 @@ +# example set of basque stopwords +al +anitz +arabera +asko +baina +bat +batean +batek +bati +batzuei +batzuek +batzuetan +batzuk +bera +beraiek +berau +berauek +bere +berori +beroriek +beste +bezala +da +dago +dira +ditu +du +dute +edo +egin +ere +eta +eurak +ez +gainera +gu +gutxi +guzti +haiei +haiek +haietan +hainbeste +hala +han +handik +hango +hara +hari +hark +hartan +hau +hauei +hauek +hauetan +hemen +hemendik +hemengo +hi +hona +honek +honela +honetan +honi +hor +hori +horiei +horiek +horietan +horko +horra +horrek +horrela +horretan +horri +hortik +hura +izan +ni +noiz +nola +non +nondik +nongo +nor +nora +ze +zein +zen +zenbait +zenbat +zer +zergatik +ziren +zituen +zu +zuek +zuen +zuten diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_fa.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_fa.txt new file mode 100644 index 00000000000..723641c6da7 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_fa.txt @@ -0,0 +1,313 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Note: by default this file is used after normalization, so when adding entries +# to this file, use the arabic 'ÙŠ' instead of 'ÛŒ' +انان +نداشته +سراسر +خياه +ايشان +وي +تاكنون +بيشتري +دوم +پس +ناشي +ÙˆÚ¯Ùˆ +يا +داشتند +سپس +هنگام +هرگز +پنج +نشان +امسال +ديگر +گروهي +شدند +چطور +ده +Ùˆ +دو +نخستين +ولي +چرا +Ú†Ù‡ +وسط +Ù‡ +كدام +قابل +يك +رÙت +Ù‡Ùت +همچنين +در +هزار +بله +بلي +شايد +اما +شناسي +گرÙته +دهد +داشته +دانست +داشتن +خواهيم +ميليارد +وقتيكه +امد +خواهد +جز +اورده +شده +بلكه +خدمات +شدن +برخي +نبود +بسياري +جلوگيري +حق +كردند +نوعي +بعري +نكرده +نظير +نبايد +بوده +بودن +داد +اورد +هست +جايي +شود +دنبال +داده +بايد +سابق +هيچ +همان +انجا +كمتر +كجاست +گردد +كسي +تر +مردم +تان +دادن +بودند +سري +جدا +ندارند +مگر +يكديگر +دارد +دهند +بنابراين +هنگامي +سمت +جا +انچه +خود +دادند +زياد +دارند +اثر +بدون +بهترين +بيشتر +البته +به +براساس +بيرون +كرد +بعضي +گرÙت +توي +اي +ميليون +او +جريان +تول +بر +مانند +برابر +باشيم +مدتي +گويند +اكنون +تا +تنها +جديد +چند +بي +نشده +كردن +كردم +گويد +كرده +كنيم +نمي +نزد +روي +قصد +Ùقط +بالاي +ديگران +اين +ديروز +توسط +سوم +ايم +دانند +سوي +استÙاده +شما +كنار +داريم +ساخته +طور +امده +رÙته +نخست +بيست +نزديك +طي +كنيد +از +انها +تمامي +داشت +يكي +طريق +اش +چيست +روب +نمايد +Ú¯Ùت +چندين +چيزي +تواند +ام +ايا +با +ان +ايد +ترين +اينكه +ديگري +راه +هايي +بروز +همچنان +پاعين +كس +حدود +مختل٠+مقابل +چيز +گيرد +ندارد +ضد +همچون +سازي +شان +مورد +باره +مرسي +خويش +برخوردار +چون +خارج +شش +هنوز +تحت +ضمن +هستيم +Ú¯Ùته +Ùكر +بسيار +پيش +براي +روزهاي +انكه +نخواهد +بالا +كل +وقتي +كي +چنين +كه +گيري +نيست +است +كجا +كند +نيز +يابد +بندي +حتي +توانند +عقب +خواست +كنند +بين +تمام +همه +ما +باشند +مثل +شد +اري +باشد +اره +طبق +بعد +اگر +صورت +غير +جاي +بيش +ريزي +اند +زيرا +چگونه +بار +لطÙا +مي +درباره +من +ديده +همين +گذاري +برداري +علت +گذاشته +هم +Ùوق +نه +ها +شوند +اباد +همواره +هر +اول +خواهند +چهار +نام +امروز +مان +هاي +قبل +كنم +سعي +تازه +را +هستند +زير +جلوي +عنوان +بود diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_fi.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_fi.txt new file mode 100644 index 00000000000..addad798c4b --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_fi.txt @@ -0,0 +1,95 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| forms of BE + +olla +olen +olet +on +olemme +olette +ovat +ole | negative form + +oli +olisi +olisit +olisin +olisimme +olisitte +olisivat +olit +olin +olimme +olitte +olivat +ollut +olleet + +en | negation +et +ei +emme +ette +eivät + +|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans +minä minun minut minua minussa minusta minuun minulla minulta minulle | I +sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you +hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she +me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we +te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you +he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they + +tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this +tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that +se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it +nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these +nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those +ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they + +kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who +ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) +mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what +mitkä | (pl) + +joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which +jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) + +| conjunctions + +että | that +ja | and +jos | if +koska | because +kuin | than +mutta | but +niin | so +sekä | and +sillä | for +tai | or +vaan | but +vai | or +vaikka | although + + +| prepositions + +kanssa | with +mukaan | according to +noin | about +poikki | across +yli | over, across + +| other + +kun | when +niin | so +nyt | now +itse | self + diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_fr.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_fr.txt new file mode 100644 index 00000000000..c00837ea939 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_fr.txt @@ -0,0 +1,183 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A French stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +au | a + le +aux | a + les +avec | with +ce | this +ces | these +dans | with +de | of +des | de + les +du | de + le +elle | she +en | `of them' etc +et | and +eux | them +il | he +je | I +la | the +le | the +leur | their +lui | him +ma | my (fem) +mais | but +me | me +même | same; as in moi-même (myself) etc +mes | me (pl) +moi | me +mon | my (masc) +ne | not +nos | our (pl) +notre | our +nous | we +on | one +ou | where +par | by +pas | not +pour | for +qu | que before vowel +que | that +qui | who +sa | his, her (fem) +se | oneself +ses | his (pl) +son | his, her (masc) +sur | on +ta | thy (fem) +te | thee +tes | thy (pl) +toi | thee +ton | thy (masc) +tu | thou +un | a +une | a +vos | your (pl) +votre | your +vous | you + + | single letter forms + +c | c' +d | d' +j | j' +l | l' +à | to, at +m | m' +n | n' +s | s' +t | t' +y | there + + | forms of être (not including the infinitive): +été +étée +étées +étés +étant +suis +es +est +sommes +êtes +sont +serai +seras +sera +serons +serez +seront +serais +serait +serions +seriez +seraient +étais +était +étions +étiez +étaient +fus +fut +fûmes +fûtes +furent +sois +soit +soyons +soyez +soient +fusse +fusses +fût +fussions +fussiez +fussent + + | forms of avoir (not including the infinitive): +ayant +eu +eue +eues +eus +ai +as +avons +avez +ont +aurai +auras +aura +aurons +aurez +auront +aurais +aurait +aurions +auriez +auraient +avais +avait +avions +aviez +avaient +eut +eûmes +eûtes +eurent +aie +aies +ait +ayons +ayez +aient +eusse +eusses +eût +eussions +eussiez +eussent + + | Later additions (from Jean-Christophe Deschamps) +ceci | this +celà  | that +cet | this +cette | this +ici | here +ils | they +les | the (pl) +leurs | their (pl) +quel | which +quels | which +quelle | which +quelles | which +sans | without +soi | oneself + diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_ga.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_ga.txt new file mode 100644 index 00000000000..9ff88d747e5 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_ga.txt @@ -0,0 +1,110 @@ + +a +ach +ag +agus +an +aon +ar +arna +as +b' +ba +beirt +bhúr +caoga +ceathair +ceathrar +chomh +chtó +chuig +chun +cois +céad +cúig +cúigear +d' +daichead +dar +de +deich +deichniúr +den +dhá +do +don +dtí +dá +dár +dó +faoi +faoin +faoina +faoinár +fara +fiche +gach +gan +go +gur +haon +hocht +i +iad +idir +in +ina +ins +inár +is +le +leis +lena +lenár +m' +mar +mo +mé +na +nach +naoi +naonúr +ná +ní +níor +nó +nócha +ocht +ochtar +os +roimh +sa +seacht +seachtar +seachtó +seasca +seisear +siad +sibh +sinn +sna +sé +sí +tar +thar +thú +triúr +trí +trína +trínár +tríocha +tú +um +ár +é +éis +í +ó +ón +óna +ónár diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_gl.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_gl.txt new file mode 100644 index 00000000000..d8760b12c14 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_gl.txt @@ -0,0 +1,161 @@ +# galican stopwords +a +aínda +alí +aquel +aquela +aquelas +aqueles +aquilo +aquí +ao +aos +as +así +á +ben +cando +che +co +coa +comigo +con +connosco +contigo +convosco +coas +cos +cun +cuns +cunha +cunhas +da +dalgunha +dalgunhas +dalgún +dalgúns +das +de +del +dela +delas +deles +desde +deste +do +dos +dun +duns +dunha +dunhas +e +el +ela +elas +eles +en +era +eran +esa +esas +ese +eses +esta +estar +estaba +está +están +este +estes +estiven +estou +eu +é +facer +foi +foron +fun +había +hai +iso +isto +la +las +lle +lles +lo +los +mais +me +meu +meus +min +miña +miñas +moi +na +nas +neste +nin +no +non +nos +nosa +nosas +noso +nosos +nós +nun +nunha +nuns +nunhas +o +os +ou +ó +ós +para +pero +pode +pois +pola +polas +polo +polos +por +que +se +senón +ser +seu +seus +sexa +sido +sobre +súa +súas +tamén +tan +te +ten +teñen +teño +ter +teu +teus +ti +tido +tiña +tiven +túa +túas +un +unha +unhas +uns +vos +vosa +vosas +voso +vosos +vós diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_hi.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_hi.txt new file mode 100644 index 00000000000..86286bb083b --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_hi.txt @@ -0,0 +1,235 @@ +# Also see http://www.opensource.org/licenses/bsd-license.html +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# This file was created by Jacques Savoy and is distributed under the BSD license. +# Note: by default this file also contains forms normalized by HindiNormalizer +# for spelling variation (see section below), such that it can be used whether or +# not you enable that feature. When adding additional entries to this list, +# please add the normalized form as well. +अंदर +अत +अपना +अपनी +अपने +अभी +आदि +आप +इतà¥à¤¯à¤¾à¤¦à¤¿ +इन +इनका +इनà¥à¤¹à¥€à¤‚ +इनà¥à¤¹à¥‡à¤‚ +इनà¥à¤¹à¥‹à¤‚ +इस +इसका +इसकी +इसके +इसमें +इसी +इसे +उन +उनका +उनकी +उनके +उनको +उनà¥à¤¹à¥€à¤‚ +उनà¥à¤¹à¥‡à¤‚ +उनà¥à¤¹à¥‹à¤‚ +उस +उसके +उसी +उसे +à¤à¤• +à¤à¤µà¤‚ +à¤à¤¸ +à¤à¤¸à¥‡ +और +कई +कर +करता +करते +करना +करने +करें +कहते +कहा +का +काफ़ी +कि +कितना +किनà¥à¤¹à¥‡à¤‚ +किनà¥à¤¹à¥‹à¤‚ +किया +किर +किस +किसी +किसे +की +कà¥à¤› +कà¥à¤² +के +को +कोई +कौन +कौनसा +गया +घर +जब +जहाठ+जा +जितना +जिन +जिनà¥à¤¹à¥‡à¤‚ +जिनà¥à¤¹à¥‹à¤‚ +जिस +जिसे +जीधर +जैसा +जैसे +जो +तक +तब +तरह +तिन +तिनà¥à¤¹à¥‡à¤‚ +तिनà¥à¤¹à¥‹à¤‚ +तिस +तिसे +तो +था +थी +थे +दबारा +दिया +दà¥à¤¸à¤°à¤¾ +दूसरे +दो +दà¥à¤µà¤¾à¤°à¤¾ +न +नहीं +ना +निहायत +नीचे +ने +पर +पर +पहले +पूरा +पे +फिर +बनी +बही +बहà¥à¤¤ +बाद +बाला +बिलकà¥à¤² +भी +भीतर +मगर +मानो +मे +में +यदि +यह +यहाठ+यही +या +यिह +ये +रखें +रहा +रहे +ऱà¥à¤µà¤¾à¤¸à¤¾ +लिठ+लिये +लेकिन +व +वरà¥à¤— +वह +वह +वहाठ+वहीं +वाले +वà¥à¤¹ +वे +वग़ैरह +संग +सकता +सकते +सबसे +सभी +साथ +साबà¥à¤¤ +साभ +सारा +से +सो +ही +हà¥à¤† +हà¥à¤ˆ +हà¥à¤ +है +हैं +हो +होता +होती +होते +होना +होने +# additional normalized forms of the above +अपनि +जेसे +होति +सभि +तिंहों +इंहों +दवारा +इसि +किंहें +थि +उंहों +ओर +जिंहें +वहिं +अभि +बनि +हि +उंहिं +उंहें +हें +वगेरह +à¤à¤¸à¥‡ +रवासा +कोन +निचे +काफि +उसि +पà¥à¤°à¤¾ +भितर +हे +बहि +वहां +कोइ +यहां +जिंहों +तिंहें +किसि +कइ +यहि +इंहिं +जिधर +इंहें +अदि +इतयादि +हà¥à¤‡ +कोनसा +इसकि +दà¥à¤¸à¤°à¥‡ +जहां +अप +किंहों +उनकि +भि +वरग +हà¥à¤… +जेसा +नहिं diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_hu.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_hu.txt new file mode 100644 index 00000000000..1a96f1db6f2 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_hu.txt @@ -0,0 +1,209 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| Hungarian stop word list +| prepared by Anna Tordai + +a +ahogy +ahol +aki +akik +akkor +alatt +által +általában +amely +amelyek +amelyekben +amelyeket +amelyet +amelynek +ami +amit +amolyan +amíg +amikor +át +abban +ahhoz +annak +arra +arról +az +azok +azon +azt +azzal +azért +aztán +azután +azonban +bár +be +belül +benne +cikk +cikkek +cikkeket +csak +de +e +eddig +egész +egy +egyes +egyetlen +egyéb +egyik +egyre +ekkor +el +elég +ellen +elÅ‘ +elÅ‘ször +elÅ‘tt +elsÅ‘ +én +éppen +ebben +ehhez +emilyen +ennek +erre +ez +ezt +ezek +ezen +ezzel +ezért +és +fel +felé +hanem +hiszen +hogy +hogyan +igen +így +illetve +ill. +ill +ilyen +ilyenkor +ison +ismét +itt +jó +jól +jobban +kell +kellett +keresztül +keressünk +ki +kívül +között +közül +legalább +lehet +lehetett +legyen +lenne +lenni +lesz +lett +maga +magát +majd +majd +már +más +másik +meg +még +mellett +mert +mely +melyek +mi +mit +míg +miért +milyen +mikor +minden +mindent +mindenki +mindig +mint +mintha +mivel +most +nagy +nagyobb +nagyon +ne +néha +nekem +neki +nem +néhány +nélkül +nincs +olyan +ott +össze +Å‘ +Å‘k +Å‘ket +pedig +persze +rá +s +saját +sem +semmi +sok +sokat +sokkal +számára +szemben +szerint +szinte +talán +tehát +teljes +tovább +továbbá +több +úgy +ugyanis +új +újabb +újra +után +utána +utolsó +vagy +vagyis +valaki +valami +valamint +való +vagyok +van +vannak +volt +voltam +voltak +voltunk +vissza +vele +viszont +volna diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_hy.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_hy.txt new file mode 100644 index 00000000000..60c1c50fbc8 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_hy.txt @@ -0,0 +1,46 @@ +# example set of Armenian stopwords. +Õ¡ÕµÕ¤ +Õ¡ÕµÕ¬ +Õ¡ÕµÕ¶ +Õ¡ÕµÕ½ +Õ¤Õ¸Ö‚ +Õ¤Õ¸Ö‚Ö„ +Õ¥Õ´ +Õ¥Õ¶ +Õ¥Õ¶Ö„ +Õ¥Õ½ +Õ¥Ö„ +Õ§ +Õ§Õ« +Õ§Õ«Õ¶ +Õ§Õ«Õ¶Ö„ +Õ§Õ«Ö€ +Õ§Õ«Ö„ +Õ§Ö€ +Õ¨Õ½Õ¿ +Õ© +Õ« +Õ«Õ¶ +Õ«Õ½Õ¯ +Õ«Ö€ +Õ¯Õ¡Õ´ +Õ°Õ¡Õ´Õ¡Ö€ +Õ°Õ¥Õ¿ +Õ°Õ¥Õ¿Õ¸ +Õ´Õ¥Õ¶Ö„ +Õ´Õ¥Õ» +Õ´Õ« +Õ¶ +Õ¶Õ¡ +Õ¶Õ¡Ö‡ +Õ¶Ö€Õ¡ +Õ¶Ö€Õ¡Õ¶Ö„ +Õ¸Ö€ +Õ¸Ö€Õ¨ +Õ¸Ö€Õ¸Õ¶Ö„ +Õ¸Ö€ÕºÕ¥Õ½ +Õ¸Ö‚ +Õ¸Ö‚Õ´ +ÕºÕ«Õ¿Õ« +Õ¾Ö€Õ¡ +Ö‡ diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_id.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_id.txt new file mode 100644 index 00000000000..4617f83a5c5 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_id.txt @@ -0,0 +1,359 @@ +# from appendix D of: A Study of Stemming Effects on Information +# Retrieval in Bahasa Indonesia +ada +adanya +adalah +adapun +agak +agaknya +agar +akan +akankah +akhirnya +aku +akulah +amat +amatlah +anda +andalah +antar +diantaranya +antara +antaranya +diantara +apa +apaan +mengapa +apabila +apakah +apalagi +apatah +atau +ataukah +ataupun +bagai +bagaikan +sebagai +sebagainya +bagaimana +bagaimanapun +sebagaimana +bagaimanakah +bagi +bahkan +bahwa +bahwasanya +sebaliknya +banyak +sebanyak +beberapa +seberapa +begini +beginian +beginikah +beginilah +sebegini +begitu +begitukah +begitulah +begitupun +sebegitu +belum +belumlah +sebelum +sebelumnya +sebenarnya +berapa +berapakah +berapalah +berapapun +betulkah +sebetulnya +biasa +biasanya +bila +bilakah +bisa +bisakah +sebisanya +boleh +bolehkah +bolehlah +buat +bukan +bukankah +bukanlah +bukannya +cuma +percuma +dahulu +dalam +dan +dapat +dari +daripada +dekat +demi +demikian +demikianlah +sedemikian +dengan +depan +di +dia +dialah +dini +diri +dirinya +terdiri +dong +dulu +enggak +enggaknya +entah +entahlah +terhadap +terhadapnya +hal +hampir +hanya +hanyalah +harus +haruslah +harusnya +seharusnya +hendak +hendaklah +hendaknya +hingga +sehingga +ia +ialah +ibarat +ingin +inginkah +inginkan +ini +inikah +inilah +itu +itukah +itulah +jangan +jangankan +janganlah +jika +jikalau +juga +justru +kala +kalau +kalaulah +kalaupun +kalian +kami +kamilah +kamu +kamulah +kan +kapan +kapankah +kapanpun +dikarenakan +karena +karenanya +ke +kecil +kemudian +kenapa +kepada +kepadanya +ketika +seketika +khususnya +kini +kinilah +kiranya +sekiranya +kita +kitalah +kok +lagi +lagian +selagi +lah +lain +lainnya +melainkan +selaku +lalu +melalui +terlalu +lama +lamanya +selama +selama +selamanya +lebih +terlebih +bermacam +macam +semacam +maka +makanya +makin +malah +malahan +mampu +mampukah +mana +manakala +manalagi +masih +masihkah +semasih +masing +mau +maupun +semaunya +memang +mereka +merekalah +meski +meskipun +semula +mungkin +mungkinkah +nah +namun +nanti +nantinya +nyaris +oleh +olehnya +seorang +seseorang +pada +padanya +padahal +paling +sepanjang +pantas +sepantasnya +sepantasnyalah +para +pasti +pastilah +per +pernah +pula +pun +merupakan +rupanya +serupa +saat +saatnya +sesaat +saja +sajalah +saling +bersama +sama +sesama +sambil +sampai +sana +sangat +sangatlah +saya +sayalah +se +sebab +sebabnya +sebuah +tersebut +tersebutlah +sedang +sedangkan +sedikit +sedikitnya +segala +segalanya +segera +sesegera +sejak +sejenak +sekali +sekalian +sekalipun +sesekali +sekaligus +sekarang +sekarang +sekitar +sekitarnya +sela +selain +selalu +seluruh +seluruhnya +semakin +sementara +sempat +semua +semuanya +sendiri +sendirinya +seolah +seperti +sepertinya +sering +seringnya +serta +siapa +siapakah +siapapun +disini +disinilah +sini +sinilah +sesuatu +sesuatunya +suatu +sesudah +sesudahnya +sudah +sudahkah +sudahlah +supaya +tadi +tadinya +tak +tanpa +setelah +telah +tentang +tentu +tentulah +tentunya +tertentu +seterusnya +tapi +tetapi +setiap +tiap +setidaknya +tidak +tidakkah +tidaklah +toh +waduh +wah +wahai +sewaktu +walau +walaupun +wong +yaitu +yakni +yang diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_it.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_it.txt new file mode 100644 index 00000000000..4cb5b0891b1 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_it.txt @@ -0,0 +1,301 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | An Italian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +ad | a (to) before vowel +al | a + il +allo | a + lo +ai | a + i +agli | a + gli +all | a + l' +agl | a + gl' +alla | a + la +alle | a + le +con | with +col | con + il +coi | con + i (forms collo, cogli etc are now very rare) +da | from +dal | da + il +dallo | da + lo +dai | da + i +dagli | da + gli +dall | da + l' +dagl | da + gll' +dalla | da + la +dalle | da + le +di | of +del | di + il +dello | di + lo +dei | di + i +degli | di + gli +dell | di + l' +degl | di + gl' +della | di + la +delle | di + le +in | in +nel | in + el +nello | in + lo +nei | in + i +negli | in + gli +nell | in + l' +negl | in + gl' +nella | in + la +nelle | in + le +su | on +sul | su + il +sullo | su + lo +sui | su + i +sugli | su + gli +sull | su + l' +sugl | su + gl' +sulla | su + la +sulle | su + le +per | through, by +tra | among +contro | against +io | I +tu | thou +lui | he +lei | she +noi | we +voi | you +loro | they +mio | my +mia | +miei | +mie | +tuo | +tua | +tuoi | thy +tue | +suo | +sua | +suoi | his, her +sue | +nostro | our +nostra | +nostri | +nostre | +vostro | your +vostra | +vostri | +vostre | +mi | me +ti | thee +ci | us, there +vi | you, there +lo | him, the +la | her, the +li | them +le | them, the +gli | to him, the +ne | from there etc +il | the +un | a +uno | a +una | a +ma | but +ed | and +se | if +perché | why, because +anche | also +come | how +dov | where (as dov') +dove | where +che | who, that +chi | who +cui | whom +non | not +più | more +quale | who, that +quanto | how much +quanti | +quanta | +quante | +quello | that +quelli | +quella | +quelle | +questo | this +questi | +questa | +queste | +si | yes +tutto | all +tutti | all + + | single letter forms: + +a | at +c | as c' for ce or ci +e | and +i | the +l | as l' +o | or + + | forms of avere, to have (not including the infinitive): + +ho +hai +ha +abbiamo +avete +hanno +abbia +abbiate +abbiano +avrò +avrai +avrà +avremo +avrete +avranno +avrei +avresti +avrebbe +avremmo +avreste +avrebbero +avevo +avevi +aveva +avevamo +avevate +avevano +ebbi +avesti +ebbe +avemmo +aveste +ebbero +avessi +avesse +avessimo +avessero +avendo +avuto +avuta +avuti +avute + + | forms of essere, to be (not including the infinitive): +sono +sei +è +siamo +siete +sia +siate +siano +sarò +sarai +sarà +saremo +sarete +saranno +sarei +saresti +sarebbe +saremmo +sareste +sarebbero +ero +eri +era +eravamo +eravate +erano +fui +fosti +fu +fummo +foste +furono +fossi +fosse +fossimo +fossero +essendo + + | forms of fare, to do (not including the infinitive, fa, fat-): +faccio +fai +facciamo +fanno +faccia +facciate +facciano +farò +farai +farà +faremo +farete +faranno +farei +faresti +farebbe +faremmo +fareste +farebbero +facevo +facevi +faceva +facevamo +facevate +facevano +feci +facesti +fece +facemmo +faceste +fecero +facessi +facesse +facessimo +facessero +facendo + + | forms of stare, to be (not including the infinitive): +sto +stai +sta +stiamo +stanno +stia +stiate +stiano +starò +starai +starà +staremo +starete +staranno +starei +staresti +starebbe +staremmo +stareste +starebbero +stavo +stavi +stava +stavamo +stavate +stavano +stetti +stesti +stette +stemmo +steste +stettero +stessi +stesse +stessimo +stessero +stando diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_ja.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_ja.txt new file mode 100644 index 00000000000..d4321be6b16 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_ja.txt @@ -0,0 +1,127 @@ +# +# This file defines a stopword set for Japanese. +# +# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. +# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 +# for frequency lists, etc. that can be useful for making your own set (if desired) +# +# Note that there is an overlap between these stopwords and the terms stopped when used +# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note +# that comments are not allowed on the same line as stopwords. +# +# Also note that stopping is done in a case-insensitive manner. Change your StopFilter +# configuration if you need case-sensitive stopping. Lastly, note that stopping is done +# using the same character width as the entries in this file. Since this StopFilter is +# normally done after a CJKWidthFilter in your chain, you would usually want your romaji +# entries to be in half-width and your kana entries to be in full-width. +# +ã® +ã« +㯠+ã‚’ +㟠+㌠+㧠+㦠+㨠+ã— +ã‚Œ +ã• +ã‚ã‚‹ +ã„ã‚‹ +ã‚‚ +ã™ã‚‹ +ã‹ã‚‰ +㪠+ã“㨠+ã¨ã—㦠+ã„ +ã‚„ +れる +ãªã© +ãªã£ +ãªã„ +ã“ã® +ãŸã‚ +ãã® +ã‚㣠+よㆠ+ã¾ãŸ +ã‚‚ã® +ã¨ã„ㆠ+ã‚ã‚Š +ã¾ã§ +られ +ãªã‚‹ +㸠+ã‹ +ã  +ã“ã‚Œ +ã«ã‚ˆã£ã¦ +ã«ã‚ˆã‚Š +ãŠã‚Š +より +ã«ã‚ˆã‚‹ +ãš +ãªã‚Š +られる +ã«ãŠã„㦠+ã° +ãªã‹ã£ +ãªã +ã—ã‹ã— +ã«ã¤ã„㦠+ã› +ã ã£ +ãã®å¾Œ +ã§ãã‚‹ +ãã‚Œ +ㆠ+ã®ã§ +ãªãŠ +ã®ã¿ +ã§ã +ã +㤠+ã«ãŠã‘ã‚‹ +ãŠã‚ˆã³ +ã„ㆠ+ã•ã‚‰ã« +ã§ã‚‚ +ら +ãŸã‚Š +ãã®ä»– +ã«é–¢ã™ã‚‹ +ãŸã¡ +ã¾ã™ +ã‚“ +ãªã‚‰ +ã«å¯¾ã—㦠+特㫠+ã›ã‚‹ +åŠã³ +ã“れら +ã¨ã +ã§ã¯ +ã«ã¦ +ã»ã‹ +ãªãŒã‚‰ +ã†ã¡ +ãã—㦠+ã¨ã¨ã‚‚ã« +ãŸã ã— +ã‹ã¤ã¦ +ãã‚Œãžã‚Œ +ã¾ãŸã¯ +㊠+ã»ã© +ã‚‚ã®ã® +ã«å¯¾ã™ã‚‹ +ã»ã¨ã‚“ã© +ã¨å…±ã« +ã¨ã„ã£ãŸ +ã§ã™ +ã¨ã‚‚ +ã¨ã“ã‚ +ã“ã“ +##### End of file diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_lv.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_lv.txt new file mode 100644 index 00000000000..e21a23c06c3 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_lv.txt @@ -0,0 +1,172 @@ +# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins +# the original list of over 800 forms was refined: +# pronouns, adverbs, interjections were removed +# +# prepositions +aiz +ap +ar +apakÅ¡ +Ärpus +augÅ¡pus +bez +caur +dēļ +gar +iekÅ¡ +iz +kopÅ¡ +labad +lejpus +lÄ«dz +no +otrpus +pa +par +pÄr +pÄ“c +pie +pirms +pret +priekÅ¡ +starp +Å¡aipus +uz +viņpus +virs +virspus +zem +apakÅ¡pus +# Conjunctions +un +bet +jo +ja +ka +lai +tomÄ“r +tikko +turpretÄ« +arÄ« +kaut +gan +tÄdēļ +tÄ +ne +tikvien +vien +kÄ +ir +te +vai +kamÄ“r +# Particles +ar +diezin +droÅ¡i +diemžēl +nebÅ«t +ik +it +taÄu +nu +pat +tiklab +iekÅ¡pus +nedz +tik +nevis +turpretim +jeb +iekam +iekÄm +iekÄms +kolÄ«dz +lÄ«dzko +tiklÄ«dz +jebÅ¡u +tÄlab +tÄpÄ“c +nekÄ +itin +jÄ +jau +jel +nÄ“ +nezin +tad +tikai +vis +tak +iekams +vien +# modal verbs +bÅ«t +biju +biji +bija +bijÄm +bijÄt +esmu +esi +esam +esat +bÅ«Å¡u +bÅ«si +bÅ«s +bÅ«sim +bÅ«siet +tikt +tiku +tiki +tika +tikÄm +tikÄt +tieku +tiec +tiek +tiekam +tiekat +tikÅ¡u +tiks +tiksim +tiksiet +tapt +tapi +tapÄt +topat +tapÅ¡u +tapsi +taps +tapsim +tapsiet +kļūt +kļuvu +kļuvi +kļuva +kļuvÄm +kļuvÄt +kļūstu +kļūsti +kļūst +kļūstam +kļūstat +kļūšu +kļūsi +kļūs +kļūsim +kļūsiet +# verbs +varÄ“t +varÄ“ju +varÄ“jÄm +varÄ“Å¡u +varÄ“sim +var +varÄ“ji +varÄ“jÄt +varÄ“si +varÄ“siet +varat +varÄ“ja +varÄ“s diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_nl.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_nl.txt new file mode 100644 index 00000000000..f4d61f5092c --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_nl.txt @@ -0,0 +1,117 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Dutch stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large sample of Dutch text. + + | Dutch stop words frequently exhibit homonym clashes. These are indicated + | clearly below. + +de | the +en | and +van | of, from +ik | I, the ego +te | (1) chez, at etc, (2) to, (3) too +dat | that, which +die | that, those, who, which +in | in, inside +een | a, an, one +hij | he +het | the, it +niet | not, nothing, naught +zijn | (1) to be, being, (2) his, one's, its +is | is +was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river +op | on, upon, at, in, up, used up +aan | on, upon, to (as dative) +met | with, by +als | like, such as, when +voor | (1) before, in front of, (2) furrow +had | had, past tense all persons sing. of 'hebben' (have) +er | there +maar | but, only +om | round, about, for etc +hem | him +dan | then +zou | should/would, past tense all persons sing. of 'zullen' +of | or, whether, if +wat | what, something, anything +mijn | possessive and noun 'mine' +men | people, 'one' +dit | this +zo | so, thus, in this way +door | through by +over | over, across +ze | she, her, they, them +zich | oneself +bij | (1) a bee, (2) by, near, at +ook | also, too +tot | till, until +je | you +mij | me +uit | out of, from +der | Old Dutch form of 'van der' still found in surnames +daar | (1) there, (2) because +haar | (1) her, their, them, (2) hair +naar | (1) unpleasant, unwell etc, (2) towards, (3) as +heb | present first person sing. of 'to have' +hoe | how, why +heeft | present third person sing. of 'to have' +hebben | 'to have' and various parts thereof +deze | this +u | you +want | (1) for, (2) mitten, (3) rigging +nog | yet, still +zal | 'shall', first and third person sing. of verb 'zullen' (will) +me | me +zij | she, they +nu | now +ge | 'thou', still used in Belgium and south Netherlands +geen | none +omdat | because +iets | something, somewhat +worden | to become, grow, get +toch | yet, still +al | all, every, each +waren | (1) 'were' (2) to wander, (3) wares, (3) +veel | much, many +meer | (1) more, (2) lake +doen | to do, to make +toen | then, when +moet | noun 'spot/mote' and present form of 'to must' +ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' +zonder | without +kan | noun 'can' and present form of 'to be able' +hun | their, them +dus | so, consequently +alles | all, everything, anything +onder | under, beneath +ja | yes, of course +eens | once, one day +hier | here +wie | who +werd | imperfect third person sing. of 'become' +altijd | always +doch | yet, but etc +wordt | present third person sing. of 'become' +wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans +kunnen | to be able +ons | us/our +zelf | self +tegen | against, towards, at +na | after, near +reeds | already +wil | (1) present tense of 'want', (2) 'will', noun, (3) fender +kon | could; past tense of 'to be able' +niets | nothing +uw | your +iemand | somebody +geweest | been; past participle of 'be' +andere | other diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_no.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_no.txt new file mode 100644 index 00000000000..e76f36e69ed --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_no.txt @@ -0,0 +1,192 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Norwegian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This stop word list is for the dominant bokmÃ¥l dialect. Words unique + | to nynorsk are marked *. + + | Revised by Jan Bruusgaard , Jan 2005 + +og | and +i | in +jeg | I +det | it/this/that +at | to (w. inf.) +en | a/an +et | a/an +den | it/this/that +til | to +er | is/am/are +som | who/that +pÃ¥ | on +de | they / you(formal) +med | with +han | he +av | of +ikke | not +ikkje | not * +der | there +sÃ¥ | so +var | was/were +meg | me +seg | you +men | but +ett | one +har | have +om | about +vi | we +min | my +mitt | my +ha | have +hadde | had +hun | she +nÃ¥ | now +over | over +da | when/as +ved | by/know +fra | from +du | you +ut | out +sin | your +dem | them +oss | us +opp | up +man | you/one +kan | can +hans | his +hvor | where +eller | or +hva | what +skal | shall/must +selv | self (reflective) +sjøl | self (reflective) +her | here +alle | all +vil | will +bli | become +ble | became +blei | became * +blitt | have become +kunne | could +inn | in +nÃ¥r | when +være | be +kom | come +noen | some +noe | some +ville | would +dere | you +som | who/which/that +deres | their/theirs +kun | only/just +ja | yes +etter | after +ned | down +skulle | should +denne | this +for | for/because +deg | you +si | hers/his +sine | hers/his +sitt | hers/his +mot | against +Ã¥ | to +meget | much +hvorfor | why +dette | this +disse | these/those +uten | without +hvordan | how +ingen | none +din | your +ditt | your +blir | become +samme | same +hvilken | which +hvilke | which (plural) +sÃ¥nn | such a +inni | inside/within +mellom | between +vÃ¥r | our +hver | each +hvem | who +vors | us/ours +hvis | whose +bÃ¥de | both +bare | only/just +enn | than +fordi | as/because +før | before +mange | many +ogsÃ¥ | also +slik | just +vært | been +være | to be +bÃ¥e | both * +begge | both +siden | since +dykk | your * +dykkar | yours * +dei | they * +deira | them * +deires | theirs * +deim | them * +di | your (fem.) * +dÃ¥ | as/when * +eg | I * +ein | a/an * +eit | a/an * +eitt | a/an * +elles | or * +honom | he * +hjÃ¥ | at * +ho | she * +hoe | she * +henne | her +hennar | her/hers +hennes | hers +hoss | how * +hossen | how * +ikkje | not * +ingi | noone * +inkje | noone * +korleis | how * +korso | how * +kva | what/which * +kvar | where * +kvarhelst | where * +kven | who/whom * +kvi | why * +kvifor | why * +me | we * +medan | while * +mi | my * +mine | my * +mykje | much * +no | now * +nokon | some (masc./neut.) * +noka | some (fem.) * +nokor | some * +noko | some * +nokre | some * +si | his/hers * +sia | since * +sidan | since * +so | so * +somt | some * +somme | some * +um | about* +upp | up * +vere | be * +vore | was * +verte | become * +vort | become * +varte | became * +vart | became * + diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_pt.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_pt.txt new file mode 100644 index 00000000000..276c1b446f2 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_pt.txt @@ -0,0 +1,251 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Portuguese stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | of, from +a | the; to, at; her +o | the; him +que | who, that +e | and +do | de + o +da | de + a +em | in +um | a +para | for + | é from SER +com | with +não | not, no +uma | a +os | the; them +no | em + o +se | himself etc +na | em + a +por | for +mais | more +as | the; them +dos | de + os +como | as, like +mas | but + | foi from SER +ao | a + o +ele | he +das | de + as + | tem from TER +à | a + a +seu | his +sua | her +ou | or + | ser from SER +quando | when +muito | much + | há from HAV +nos | em + os; us +já | already, now + | está from EST +eu | I +também | also +só | only, just +pelo | per + o +pela | per + a +até | up to +isso | that +ela | he +entre | between + | era from SER +depois | after +sem | without +mesmo | same +aos | a + os + | ter from TER +seus | his +quem | whom +nas | em + as +me | me +esse | that +eles | they + | estão from EST +você | you + | tinha from TER + | foram from SER +essa | that +num | em + um +nem | nor +suas | her +meu | my +às | a + as +minha | my + | têm from TER +numa | em + uma +pelos | per + os +elas | they + | havia from HAV + | seja from SER +qual | which + | será from SER +nós | we + | tenho from TER +lhe | to him, her +deles | of them +essas | those +esses | those +pelas | per + as +este | this + | fosse from SER +dele | of him + + | other words. There are many contractions such as naquele = em+aquele, + | mo = me+o, but they are rare. + | Indefinite article plural forms are also rare. + +tu | thou +te | thee +vocês | you (plural) +vos | you +lhes | to them +meus | my +minhas +teu | thy +tua +teus +tuas +nosso | our +nossa +nossos +nossas + +dela | of her +delas | of them + +esta | this +estes | these +estas | these +aquele | that +aquela | that +aqueles | those +aquelas | those +isto | this +aquilo | that + + | forms of estar, to be (not including the infinitive): +estou +está +estamos +estão +estive +esteve +estivemos +estiveram +estava +estávamos +estavam +estivera +estivéramos +esteja +estejamos +estejam +estivesse +estivéssemos +estivessem +estiver +estivermos +estiverem + + | forms of haver, to have (not including the infinitive): +hei +há +havemos +hão +houve +houvemos +houveram +houvera +houvéramos +haja +hajamos +hajam +houvesse +houvéssemos +houvessem +houver +houvermos +houverem +houverei +houverá +houveremos +houverão +houveria +houveríamos +houveriam + + | forms of ser, to be (not including the infinitive): +sou +somos +são +era +éramos +eram +fui +foi +fomos +foram +fora +fôramos +seja +sejamos +sejam +fosse +fôssemos +fossem +for +formos +forem +serei +será +seremos +serão +seria +seríamos +seriam + + | forms of ter, to have (not including the infinitive): +tenho +tem +temos +tém +tinha +tínhamos +tinham +tive +teve +tivemos +tiveram +tivera +tivéramos +tenha +tenhamos +tenham +tivesse +tivéssemos +tivessem +tiver +tivermos +tiverem +terei +terá +teremos +terão +teria +teríamos +teriam diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_ro.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_ro.txt new file mode 100644 index 00000000000..4fdee90a5ba --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_ro.txt @@ -0,0 +1,233 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +acea +aceasta +această +aceea +acei +aceia +acel +acela +acele +acelea +acest +acesta +aceste +acestea +aceÅŸti +aceÅŸtia +acolo +acum +ai +aia +aibă +aici +al +ăla +ale +alea +ălea +altceva +altcineva +am +ar +are +aÅŸ +aÅŸadar +asemenea +asta +ăsta +astăzi +astea +ăstea +ăştia +asupra +aÅ£i +au +avea +avem +aveÅ£i +azi +bine +bucur +bună +ca +că +căci +când +care +cărei +căror +cărui +cât +câte +câţi +către +câtva +ce +cel +ceva +chiar +cînd +cine +cineva +cît +cîte +cîţi +cîtva +contra +cu +cum +cumva +curând +curînd +da +dă +dacă +dar +datorită +de +deci +deja +deoarece +departe +deÅŸi +din +dinaintea +dintr +dintre +drept +după +ea +ei +el +ele +eram +este +eÅŸti +eu +face +fără +fi +fie +fiecare +fii +fim +fiÅ£i +iar +ieri +îi +îl +îmi +împotriva +în +înainte +înaintea +încât +încît +încotro +între +întrucât +întrucît +îţi +la +lângă +le +li +lîngă +lor +lui +mă +mâine +mea +mei +mele +mereu +meu +mi +mine +mult +multă +mulÅ£i +ne +nicăieri +nici +nimeni +niÅŸte +noastră +noastre +noi +noÅŸtri +nostru +nu +ori +oricând +oricare +oricât +orice +oricînd +oricine +oricît +oricum +oriunde +până +pe +pentru +peste +pînă +poate +pot +prea +prima +primul +prin +printr +sa +să +săi +sale +sau +său +se +ÅŸi +sînt +sîntem +sînteÅ£i +spre +sub +sunt +suntem +sunteÅ£i +ta +tăi +tale +tău +te +Å£i +Å£ie +tine +toată +toate +tot +toÅ£i +totuÅŸi +tu +un +una +unde +undeva +unei +unele +uneori +unor +vă +vi +voastră +voastre +voi +voÅŸtri +vostru +vouă +vreo +vreun diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_ru.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_ru.txt new file mode 100644 index 00000000000..64307693457 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_ru.txt @@ -0,0 +1,241 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | a russian stop word list. comments begin with vertical bar. each stop + | word is at the start of a line. + + | this is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | letter `Ñ‘' is translated to `е'. + +и | and +в | in/into +во | alternative form +не | not +что | what/that +он | he +на | on/onto +Ñ | i +Ñ | from +Ñо | alternative form +как | how +а | milder form of `no' (but) +то | conjunction and form of `that' +вÑе | all +она | she +так | so, thus +его | him +но | but +да | yes/and +Ñ‚Ñ‹ | thou +к | towards, by +у | around, chez +же | intensifier particle +вы | you +за | beyond, behind +бы | conditional/subj. particle +по | up to, along +только | only +ее | her +мне | to me +было | it was +вот | here is/are, particle +от | away from +Ð¼ÐµÐ½Ñ | me +еще | still, yet, more +нет | no, there isnt/arent +о | about +из | out of +ему | to him +теперь | now +когда | when +даже | even +ну | so, well +вдруг | suddenly +ли | interrogative particle +еÑли | if +уже | already, but homonym of `narrower' +или | or +ни | neither +быть | to be +был | he was +него | prepositional form of его +до | up to +Ð²Ð°Ñ | you accusative +нибудь | indef. suffix preceded by hyphen +опÑÑ‚ÑŒ | again +уж | already, but homonym of `adder' +вам | to you +Ñказал | he said +ведь | particle `after all' +там | there +потом | then +ÑÐµÐ±Ñ | oneself +ничего | nothing +ей | to her +может | usually with `быть' as `maybe' +они | they +тут | here +где | where +еÑÑ‚ÑŒ | there is/are +надо | got to, must +ней | prepositional form of ей +Ð´Ð»Ñ | for +мы | we +Ñ‚ÐµÐ±Ñ | thee +их | them, their +чем | than +была | she was +Ñам | self +чтоб | in order to +без | without +будто | as if +человек | man, person, one +чего | genitive form of `what' +раз | once +тоже | also +Ñебе | to oneself +под | beneath +жизнь | life +будет | will be +ж | short form of intensifer particle `же' +тогда | then +кто | who +Ñтот | this +говорил | was saying +того | genitive form of `that' +потому | for that reason +Ñтого | genitive form of `this' +какой | which +ÑовÑем | altogether +ним | prepositional form of `его', `они' +здеÑÑŒ | here +Ñтом | prepositional form of `Ñтот' +один | one +почти | almost +мой | my +тем | instrumental/dative plural of `тот', `то' +чтобы | full form of `in order that' +нее | her (acc.) +кажетÑÑ | it seems +ÑÐµÐ¹Ñ‡Ð°Ñ | now +были | they were +куда | where to +зачем | why +Ñказать | to say +вÑех | all (acc., gen. preposn. plural) +никогда | never +ÑÐµÐ³Ð¾Ð´Ð½Ñ | today +можно | possible, one can +при | by +наконец | finally +два | two +об | alternative form of `о', about +другой | another +хоть | even +поÑле | after +над | above +больше | more +тот | that one (masc.) +через | across, in +Ñти | these +Ð½Ð°Ñ | us +про | about +вÑего | in all, only, of all +них | prepositional form of `они' (they) +ÐºÐ°ÐºÐ°Ñ | which, feminine +много | lots +разве | interrogative particle +Ñказала | she said +три | three +Ñту | this, acc. fem. sing. +Ð¼Ð¾Ñ | my, feminine +впрочем | moreover, besides +хорошо | good +Ñвою | ones own, acc. fem. sing. +Ñтой | oblique form of `Ñта', fem. `this' +перед | in front of +иногда | sometimes +лучше | better +чуть | a little +том | preposn. form of `that one' +Ð½ÐµÐ»ÑŒÐ·Ñ | one must not +такой | such a one +им | to them +более | more +вÑегда | always +конечно | of course +вÑÑŽ | acc. fem. sing of `all' +между | between + + + | b: some paradigms + | + | personal pronouns + | + | Ñ Ð¼ÐµÐ½Ñ Ð¼Ð½Ðµ мной [мною] + | Ñ‚Ñ‹ Ñ‚ÐµÐ±Ñ Ñ‚ÐµÐ±Ðµ тобой [тобою] + | он его ему им [него, нему, ним] + | она ее Ñи ею [нее, нÑи, нею] + | оно его ему им [него, нему, ним] + | + | мы Ð½Ð°Ñ Ð½Ð°Ð¼ нами + | вы Ð²Ð°Ñ Ð²Ð°Ð¼ вами + | они их им ими [них, ним, ними] + | + | ÑÐµÐ±Ñ Ñебе Ñобой [Ñобою] + | + | demonstrative pronouns: Ñтот (this), тот (that) + | + | Ñтот Ñта Ñто Ñти + | Ñтого ÑÑ‚Ñ‹ Ñто Ñти + | Ñтого Ñтой Ñтого Ñтих + | Ñтому Ñтой Ñтому Ñтим + | Ñтим Ñтой Ñтим [Ñтою] Ñтими + | Ñтом Ñтой Ñтом Ñтих + | + | тот та то те + | того ту то те + | того той того тех + | тому той тому тем + | тем той тем [тою] теми + | том той том тех + | + | determinative pronouns + | + | (a) веÑÑŒ (all) + | + | веÑÑŒ вÑÑ Ð²Ñе вÑе + | вÑего вÑÑŽ вÑе вÑе + | вÑего вÑей вÑего вÑех + | вÑему вÑей вÑему вÑем + | вÑем вÑей вÑем [вÑею] вÑеми + | вÑем вÑей вÑем вÑех + | + | (b) Ñам (himself etc) + | + | Ñам Ñама Ñамо Ñами + | Ñамого Ñаму Ñамо Ñамих + | Ñамого Ñамой Ñамого Ñамих + | Ñамому Ñамой Ñамому Ñамим + | Ñамим Ñамой Ñамим [Ñамою] Ñамими + | Ñамом Ñамой Ñамом Ñамих + | + | stems of verbs `to be', `to have', `to do' and modal + | + | быть бы буд быв еÑÑ‚ÑŒ Ñуть + | име + | дел + | мог мож мочь + | уме + | хоч хот + | долж + | можн + | нужн + | Ð½ÐµÐ»ÑŒÐ·Ñ + diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_sv.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_sv.txt new file mode 100644 index 00000000000..22bddfd8cb3 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_sv.txt @@ -0,0 +1,131 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Swedish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | Swedish stop words occasionally exhibit homonym clashes. For example + | sÃ¥ = so, but also seed. These are indicated clearly below. + +och | and +det | it, this/that +att | to (with infinitive) +i | in, at +en | a +jag | I +hon | she +som | who, that +han | he +pÃ¥ | on +den | it, this/that +med | with +var | where, each +sig | him(self) etc +för | for +sÃ¥ | so (also: seed) +till | to +är | is +men | but +ett | a +om | if; around, about +hade | had +de | they, these/those +av | of +icke | not, no +mig | me +du | you +henne | her +dÃ¥ | then, when +sin | his +nu | now +har | have +inte | inte nÃ¥gon = no one +hans | his +honom | him +skulle | 'sake' +hennes | her +där | there +min | my +man | one (pronoun) +ej | nor +vid | at, by, on (also: vast) +kunde | could +nÃ¥got | some etc +frÃ¥n | from, off +ut | out +när | when +efter | after, behind +upp | up +vi | we +dem | them +vara | be +vad | what +över | over +än | than +dig | you +kan | can +sina | his +här | here +ha | have +mot | towards +alla | all +under | under (also: wonder) +nÃ¥gon | some etc +eller | or (else) +allt | all +mycket | much +sedan | since +ju | why +denna | this/that +själv | myself, yourself etc +detta | this/that +Ã¥t | to +utan | without +varit | was +hur | how +ingen | no +mitt | my +ni | you +bli | to be, become +blev | from bli +oss | us +din | thy +dessa | these/those +nÃ¥gra | some etc +deras | their +blir | from bli +mina | my +samma | (the) same +vilken | who, that +er | you, your +sÃ¥dan | such a +vÃ¥r | our +blivit | from bli +dess | its +inom | within +mellan | between +sÃ¥dant | such a +varför | why +varje | each +vilka | who, that +ditt | thy +vem | who +vilket | who, that +sitta | his +sÃ¥dana | such a +vart | each +dina | thy +vars | whose +vÃ¥rt | our +vÃ¥ra | our +ert | your +era | your +vilkas | whose + diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_th.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_th.txt new file mode 100644 index 00000000000..07f0fabe692 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_th.txt @@ -0,0 +1,119 @@ +# Thai stopwords from: +# "Opinion Detection in Thai Political News Columns +# Based on Subjectivity Analysis" +# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak +ไว้ +ไม่ +ไป +ได้ +ให้ +ใน +โดย +à¹à¸«à¹ˆà¸‡ +à¹à¸¥à¹‰à¸§ +à¹à¸¥à¸° +à¹à¸£à¸ +à¹à¸šà¸š +à¹à¸•à¹ˆ +เอง +เห็น +เลย +เริ่ม +เรา +เมื่อ +เพื่อ +เพราะ +เป็นà¸à¸²à¸£ +เป็น +เปิดเผย +เปิด +เนื่องจาภ+เดียวà¸à¸±à¸™ +เดียว +เช่น +เฉพาะ +เคย +เข้า +เขา +อีภ+อาจ +อะไร +ออภ+อย่าง +อยู่ +อยาภ+หาภ+หลาย +หลังจาภ+หลัง +หรือ +หนึ่ง +ส่วน +ส่ง +สุด +สà¹à¸²à¸«à¸£à¸±à¸š +ว่า +วัน +ลง +ร่วม +ราย +รับ +ระหว่าง +รวม +ยัง +มี +มาภ+มา +พร้อม +พบ +ผ่าน +ผล +บาง +น่า +นี้ +นà¹à¸² +นั้น +นัภ+นอà¸à¸ˆà¸²à¸ +ทุภ+ที่สุด +ที่ +ทà¹à¸²à¹ƒà¸«à¹‰ +ทà¹à¸² +ทาง +ทั้งนี้ +ทั้ง +ถ้า +ถูภ+ถึง +ต้อง +ต่างๆ +ต่าง +ต่อ +ตาม +ตั้งà¹à¸•à¹ˆ +ตั้ง +ด้าน +ด้วย +ดัง +ซึ่ง +ช่วง +จึง +จาภ+จัด +จะ +คือ +ความ +ครั้ง +คง +ขึ้น +ของ +ขอ +ขณะ +à¸à¹ˆà¸­à¸™ +à¸à¹‡ +à¸à¸²à¸£ +à¸à¸±à¸š +à¸à¸±à¸™ +à¸à¸§à¹ˆà¸² +à¸à¸¥à¹ˆà¸²à¸§ diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_tr.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_tr.txt new file mode 100644 index 00000000000..84d9408d4ea --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/stopwords_tr.txt @@ -0,0 +1,212 @@ +# Turkish stopwords from LUCENE-559 +# merged with the list from "Information Retrieval on Turkish Texts" +# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) +acaba +altmış +altı +ama +ancak +arada +aslında +ayrıca +bana +bazı +belki +ben +benden +beni +benim +beri +beÅŸ +bile +bin +bir +birçok +biri +birkaç +birkez +birÅŸey +birÅŸeyi +biz +bize +bizden +bizi +bizim +böyle +böylece +bu +buna +bunda +bundan +bunlar +bunları +bunların +bunu +bunun +burada +çok +çünkü +da +daha +dahi +de +defa +deÄŸil +diÄŸer +diye +doksan +dokuz +dolayı +dolayısıyla +dört +edecek +eden +ederek +edilecek +ediliyor +edilmesi +ediyor +eÄŸer +elli +en +etmesi +etti +ettiÄŸi +ettiÄŸini +gibi +göre +halen +hangi +hatta +hem +henüz +hep +hepsi +her +herhangi +herkesin +hiç +hiçbir +için +iki +ile +ilgili +ise +iÅŸte +itibaren +itibariyle +kadar +karşın +katrilyon +kendi +kendilerine +kendini +kendisi +kendisine +kendisini +kez +ki +kim +kimden +kime +kimi +kimse +kırk +milyar +milyon +mu +mü +mı +nasıl +ne +neden +nedenle +nerde +nerede +nereye +niye +niçin +o +olan +olarak +oldu +olduÄŸu +olduÄŸunu +olduklarını +olmadı +olmadığı +olmak +olması +olmayan +olmaz +olsa +olsun +olup +olur +olursa +oluyor +on +ona +ondan +onlar +onlardan +onları +onların +onu +onun +otuz +oysa +öyle +pek +raÄŸmen +sadece +sanki +sekiz +seksen +sen +senden +seni +senin +siz +sizden +sizi +sizin +ÅŸey +ÅŸeyden +ÅŸeyi +ÅŸeyler +şöyle +ÅŸu +ÅŸuna +ÅŸunda +ÅŸundan +ÅŸunları +ÅŸunu +tarafından +trilyon +tüm +üç +üzere +var +vardı +ve +veya +ya +yani +yapacak +yapılan +yapılması +yapıyor +yapmak +yaptı +yaptığı +yaptığını +yaptıkları +yedi +yerine +yetmiÅŸ +yine +yirmi +yoksa +yüz +zaten diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/userdict_ja.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/userdict_ja.txt new file mode 100644 index 00000000000..6f0368e4d81 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/lang/userdict_ja.txt @@ -0,0 +1,29 @@ +# +# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) +# +# Add entries to this file in order to override the statistical model in terms +# of segmentation, readings and part-of-speech tags. Notice that entries do +# not have weights since they are always used when found. This is by-design +# in order to maximize ease-of-use. +# +# Entries are defined using the following CSV format: +# , ... , ... , +# +# Notice that a single half-width space separates tokens and readings, and +# that the number tokens and readings must match exactly. +# +# Also notice that multiple entries with the same is undefined. +# +# Whitespace only lines are ignored. Comments are not allowed on entry lines. +# + +# Custom segmentation for kanji compounds +日本経済新èž,日本 経済 æ–°èž,ニホン ケイザイ シンブン,カスタムåè©ž +関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタムåè©ž + +# Custom segmentation for compound katakana +トートãƒãƒƒã‚°,トート ãƒãƒƒã‚°,トート ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž +ショルダーãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž + +# Custom reading for former sumo wrestler +æœé’é¾,æœé’é¾,アサショウリュウ,カスタム人å diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/protwords.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/protwords.txt new file mode 100644 index 00000000000..1dfc0abecbf --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/schema.xml b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/schema.xml new file mode 100644 index 00000000000..b133c135f31 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/schema.xml @@ -0,0 +1,961 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/solrconfig.xml b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/solrconfig.xml new file mode 100644 index 00000000000..f9683b27db7 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/solrconfig.xml @@ -0,0 +1,1789 @@ + + + + + + + + + LUCENE_43 + + + + + + + + + + + + + + + + + + + + + + + + ${solr.data.dir:} + + + + + ${solr.hdfs.home:} + ${solr.hdfs.confdir:} + ${solr.hdfs.security.kerberos.enabled:false} + ${solr.hdfs.security.kerberos.keytabfile:} + ${solr.hdfs.security.kerberos.principal:} + ${solr.hdfs.blockcache.enabled:true} + ${solr.hdfs.blockcache.slab.count:1} + ${solr.hdfs.blockcache.direct.memory.allocation:true} + ${solr.hdfs.blockcache.blocksperbank:16384} + ${solr.hdfs.blockcache.read.enabled:true} + ${solr.hdfs.blockcache.write.enabled:true} + ${solr.hdfs.nrtcachingdirectory.enable:true} + ${solr.hdfs.nrtcachingdirectory.maxmergesizemb:16} + ${solr.hdfs.nrtcachingdirectory.maxcachedmb:192} + + + + + + + + + + + + + ${solr.maxIndexingThreads:8} + + + + + + 128 + + + + + + + + + + + + + ${solr.lock.type:hdfs} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.autoCommit.maxTime:60000} + false + + + + + ${solr.autoSoftCommit.maxTime:1000} + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + static firstSearcher warming in solrconfig.xml + + + + + + false + + + 4 + + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + text + + + + + + + + + + + + + + explicit + json + true + text + + + + + + + + true + json + true + + + + + + + + explicit + + + velocity + browse + layout + Solritas + + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text + 100% + *:* + 10 + *,score + + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename + 3 + + + on + cat + manu_exact + content_type + author_s + ipod + GB + 1 + cat,inStock + after + price + 0 + 600 + 50 + popularity + 0 + 10 + 3 + manufacturedate_dt + NOW/YEAR-10YEARS + NOW + +1YEAR + before + after + + + on + content features title name + html + <b> + </b> + 0 + title + 0 + name + 3 + 200 + content + 750 + + + on + false + 5 + 2 + 5 + true + true + 5 + 3 + + + + + spellcheck + + + + + + + + + + + + + + application/json + + + + + application/csv + + + + + + + + + + + + + + + + + + + + + solrpingquery + + + all + + + + + + + + + explicit + true + + + + + + + + + + + + + + + + text_general + + + + + + default + text + solr.DirectSolrSpellChecker + + internal + + 0.5 + + 2 + + 1 + + 5 + + 4 + + 0.01 + + + + + + wordbreak + solr.WordBreakSolrSpellChecker + name + true + true + 10 + + + + + + + + + + + + + + + + text + + default + wordbreak + on + true + 10 + 5 + 5 + true + true + 10 + 5 + + + spellcheck + + + + + + + + + + text + true + + + tvComponent + + + + + + + + + default + + + org.carrot2.clustering.lingo.LingoClusteringAlgorithm + + + 20 + + + clustering/carrot2 + + + ENGLISH + + + stc + org.carrot2.clustering.stc.STCClusteringAlgorithm + + + + + + + true + default + true + + name + id + + features + + true + + + + false + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + *:* + 10 + *,score + + + clustering + + + + + + + + + + true + false + + + terms + + + + + + + + string + elevate.xml + + + + + + explicit + text + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + + + + + + + + + + + text/plain; charset=UTF-8 + + + + + + + + + 5 + + + + + + + + + + + + + + + + + + *:* + + + diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/stopwords.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/stopwords.txt new file mode 100644 index 00000000000..ae1e83eeb3d --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/stopwords.txt @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/synonyms.txt b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/synonyms.txt new file mode 100644 index 00000000000..7f72128303b --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/conf/synonyms.txt @@ -0,0 +1,29 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaafoo => aaabar +bbbfoo => bbbfoo bbbbar +cccfoo => cccbar cccbaz +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + diff --git a/solr/contrib/solr-mr/src/test-files/solr/mrunit/solr.xml b/solr/contrib/solr-mr/src/test-files/solr/mrunit/solr.xml new file mode 100644 index 00000000000..6c8b43f75ed --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/mrunit/solr.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + ${socketTimeout:120000} + ${connTimeout:15000} + + + + diff --git a/solr/contrib/solr-mr/src/test-files/solr/solr.xml b/solr/contrib/solr-mr/src/test-files/solr/solr.xml new file mode 100644 index 00000000000..4604f60476f --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solr.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + ${socketTimeout:120000} + ${connTimeout:15000} + + + + diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/currency.xml b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/currency.xml new file mode 100644 index 00000000000..3a9c58afee8 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/currency.xml @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/elevate.xml b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/elevate.xml new file mode 100644 index 00000000000..25d5cebe4fb --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/elevate.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ca.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ca.txt new file mode 100644 index 00000000000..307a85f913d --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ca.txt @@ -0,0 +1,8 @@ +# Set of Catalan contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +l +m +n +s +t diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_fr.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_fr.txt new file mode 100644 index 00000000000..722db588333 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_fr.txt @@ -0,0 +1,9 @@ +# Set of French contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +l +m +t +qu +n +s +j diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ga.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ga.txt new file mode 100644 index 00000000000..9ebe7fa349a --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +m +b diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_it.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_it.txt new file mode 100644 index 00000000000..cac04095372 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/contractions_it.txt @@ -0,0 +1,23 @@ +# Set of Italian contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +c +l +all +dall +dell +nell +sull +coll +pell +gl +agl +dagl +degl +negl +sugl +un +m +t +s +v +d diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/hyphenations_ga.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/hyphenations_ga.txt new file mode 100644 index 00000000000..4d2642cc5a3 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/hyphenations_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish hyphenations for StopFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +h +n +t diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stemdict_nl.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stemdict_nl.txt new file mode 100644 index 00000000000..441072971d3 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stemdict_nl.txt @@ -0,0 +1,6 @@ +# Set of overrides for the dutch stemmer +# TODO: load this as a resource from the analyzer and sync it in build.xml +fiets fiets +bromfiets bromfiets +ei eier +kind kinder diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stoptags_ja.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stoptags_ja.txt new file mode 100644 index 00000000000..71b750845e3 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stoptags_ja.txt @@ -0,0 +1,420 @@ +# +# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. +# +# Any token with a part-of-speech tag that exactly matches those defined in this +# file are removed from the token stream. +# +# Set your own stoptags by uncommenting the lines below. Note that comments are +# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, +# etc. that can be useful for building you own stoptag set. +# +# The entire possible tagset is provided below for convenience. +# +##### +# noun: unclassified nouns +#åè©ž +# +# noun-common: Common nouns or nouns where the sub-classification is undefined +#åè©ž-一般 +# +# noun-proper: Proper nouns where the sub-classification is undefined +#åè©ž-固有åè©ž +# +# noun-proper-misc: miscellaneous proper nouns +#åè©ž-固有åè©ž-一般 +# +# noun-proper-person: Personal names where the sub-classification is undefined +#åè©ž-固有åè©ž-人å +# +# noun-proper-person-misc: names that cannot be divided into surname and +# given name; foreign names; names where the surname or given name is unknown. +# e.g. ãŠå¸‚ã®æ–¹ +#åè©ž-固有åè©ž-人å-一般 +# +# noun-proper-person-surname: Mainly Japanese surnames. +# e.g. 山田 +#åè©ž-固有åè©ž-人å-姓 +# +# noun-proper-person-given_name: Mainly Japanese given names. +# e.g. 太郎 +#åè©ž-固有åè©ž-人å-å +# +# noun-proper-organization: Names representing organizations. +# e.g. 通産çœ, NHK +#åè©ž-固有åè©ž-組織 +# +# noun-proper-place: Place names where the sub-classification is undefined +#åè©ž-固有åè©ž-地域 +# +# noun-proper-place-misc: Place names excluding countries. +# e.g. アジア, ãƒãƒ«ã‚»ãƒ­ãƒŠ, 京都 +#åè©ž-固有åè©ž-地域-一般 +# +# noun-proper-place-country: Country names. +# e.g. 日本, オーストラリア +#åè©ž-固有åè©ž-地域-国 +# +# noun-pronoun: Pronouns where the sub-classification is undefined +#åè©ž-代åè©ž +# +# noun-pronoun-misc: miscellaneous pronouns: +# e.g. ãã‚Œ, ã“ã“, ã‚ã„ã¤, ã‚ãªãŸ, ã‚ã¡ã“ã¡, ã„ãã¤, ã©ã“ã‹, ãªã«, ã¿ãªã•ã‚“, ã¿ã‚“ãª, ã‚ãŸãã—, ã‚ã‚Œã‚ã‚Œ +#åè©ž-代åè©ž-一般 +# +# noun-pronoun-contraction: Spoken language contraction made by combining a +# pronoun and the particle 'wa'. +# e.g. ã‚りゃ, ã“りゃ, ã“りゃã‚, ãりゃ, ãりゃ゠+#åè©ž-代åè©ž-縮約 +# +# noun-adverbial: Temporal nouns such as names of days or months that behave +# like adverbs. Nouns that represent amount or ratios and can be used adverbially, +# e.g. 金曜, 一月, åˆå¾Œ, å°‘é‡ +#åè©ž-副詞å¯èƒ½ +# +# noun-verbal: Nouns that take arguments with case and can appear followed by +# 'suru' and related verbs (ã™ã‚‹, ã§ãã‚‹, ãªã•ã‚‹, ãã ã•ã‚‹) +# e.g. インプット, æ„›ç€, 悪化, 悪戦苦闘, 一安心, 下å–ã‚Š +#åè©ž-サ変接続 +# +# noun-adjective-base: The base form of adjectives, words that appear before 㪠("na") +# e.g. å¥åº·, 安易, 駄目, ã ã‚ +#åè©ž-形容動詞語幹 +# +# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), æ•°. +# e.g. 0, 1, 2, 何, æ•°, å¹¾ +#åè©ž-æ•° +# +# noun-affix: noun affixes where the sub-classification is undefined +#åè©ž-éžè‡ªç«‹ +# +# noun-affix-misc: Of adnominalizers, the case-marker ã® ("no"), and words that +# attach to the base form of inflectional words, words that cannot be classified +# into any of the other categories below. This category includes indefinite nouns. +# e.g. ã‚ã‹ã¤ã, æš, ã‹ã„, 甲æ–, æ°—, ãらã„, å«Œã„, ãã›, ç™–, ã“ã¨, 事, ã”ã¨, 毎, ã—ã ã„, 次第, +# é †, ã›ã„, 所為, ã¤ã„ã§, åºã§, ã¤ã‚‚ã‚Š, ç©ã‚‚ã‚Š, 点, ã©ã“ã‚, ã®, ã¯ãš, ç­ˆ, ã¯ãšã¿, å¼¾ã¿, +# æ‹å­, ãµã†, ãµã‚Š, 振り, ã»ã†, æ–¹, æ—¨, ã‚‚ã®, 物, 者, ゆãˆ, æ•…, ゆãˆã‚“, 所以, ã‚ã‘, 訳, +# ã‚ã‚Š, 割り, 割, ã‚“-å£èªž/, ã‚‚ã‚“-å£èªž/ +#åè©ž-éžè‡ªç«‹-一般 +# +# noun-affix-adverbial: noun affixes that that can behave as adverbs. +# e.g. ã‚ã„ã , é–“, ã‚ã’ã, 挙ã’å¥, ã‚ã¨, 後, 余り, 以外, 以é™, 以後, 以上, 以å‰, 一方, ã†ãˆ, +# 上, ã†ã¡, 内, ãŠã‚Š, 折り, ã‹ãŽã‚Š, é™ã‚Š, ãã‚Š, ã£ãã‚Š, çµæžœ, ã“ã‚, é ƒ, ã•ã„, éš›, 最中, ã•ãªã‹, +# 最中, ã˜ãŸã„, 自体, ãŸã³, 度, ãŸã‚, 為, ã¤ã©, 都度, ã¨ãŠã‚Š, 通り, ã¨ã, 時, ã¨ã“ã‚, 所, +# ã¨ãŸã‚“, 途端, ãªã‹, 中, ã®ã¡, 後, ã°ã‚ã„, å ´åˆ, æ—¥, ã¶ã‚“, 分, ã»ã‹, ä»–, ã¾ãˆ, å‰, ã¾ã¾, +# 儘, ä¾­, ã¿ãŽã‚Š, 矢先 +#åè©ž-éžè‡ªç«‹-副詞å¯èƒ½ +# +# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars +# with the stem よã†(ã ) ("you(da)"). +# e.g. よã†, ã‚„ã†, 様 (よã†) +#åè©ž-éžè‡ªç«‹-助動詞語幹 +# +# noun-affix-adjective-base: noun affixes that can connect to the indeclinable +# connection form 㪠(aux "da"). +# e.g. ã¿ãŸã„, ãµã† +#åè©ž-éžè‡ªç«‹-形容動詞語幹 +# +# noun-special: special nouns where the sub-classification is undefined. +#åè©ž-特殊 +# +# noun-special-aux: The ãã†ã  ("souda") stem form that is used for reporting news, is +# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base +# form of inflectional words. +# e.g. ãㆠ+#åè©ž-特殊-助動詞語幹 +# +# noun-suffix: noun suffixes where the sub-classification is undefined. +#åè©ž-接尾 +# +# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect +# to ガル or タイ and can combine into compound nouns, words that cannot be classified into +# any of the other categories below. In general, this category is more inclusive than +# 接尾語 ("suffix") and is usually the last element in a compound noun. +# e.g. ãŠã, ã‹ãŸ, æ–¹, ç”²æ– (ãŒã„), ãŒã‹ã‚Š, ãŽã¿, 気味, ãã‚‹ã¿, (~ã—ãŸ) ã•, 次第, 済 (ãš) ã¿, +# よã†, (ã§ã)ã£ã“, æ„Ÿ, 観, 性, å­¦, é¡ž, é¢, 用 +#åè©ž-接尾-一般 +# +# noun-suffix-person: Suffixes that form nouns and attach to person names more often +# than other nouns. +# e.g. å›, 様, è‘— +#åè©ž-接尾-人å +# +# noun-suffix-place: Suffixes that form nouns and attach to place names more often +# than other nouns. +# e.g. 町, 市, 県 +#åè©ž-接尾-地域 +# +# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that +# can appear before スル ("suru"). +# e.g. 化, 視, 分ã‘, 入り, è½ã¡, è²·ã„ +#åè©ž-接尾-サ変接続 +# +# noun-suffix-aux: The stem form of ãã†ã  (様態) that is used to indicate conditions, +# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the +# conjunctive form of inflectional words. +# e.g. ãㆠ+#åè©ž-接尾-助動詞語幹 +# +# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive +# form of inflectional words and appear before the copula ã  ("da"). +# e.g. çš„, ã’, ãŒã¡ +#åè©ž-接尾-形容動詞語幹 +# +# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. +# e.g. 後 (ã”), 以後, 以é™, 以å‰, å‰å¾Œ, 中, 末, 上, 時 (ã˜) +#åè©ž-接尾-副詞å¯èƒ½ +# +# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category +# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach +# to numbers. +# e.g. 個, ã¤, 本, 冊, パーセント, cm, kg, カ月, ã‹å›½, 区画, 時間, æ™‚åŠ +#åè©ž-接尾-助数詞 +# +# noun-suffix-special: Special suffixes that mainly attach to inflecting words. +# e.g. (楽ã—) ã•, (考ãˆ) æ–¹ +#åè©ž-接尾-特殊 +# +# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words +# together. +# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) å…¼ (主婦) +#åè©ž-接続詞的 +# +# noun-verbal_aux: Nouns that attach to the conjunctive particle 㦠("te") and are +# semantically verb-like. +# e.g. ã”らん, ã”覧, 御覧, 頂戴 +#åè©ž-å‹•è©žéžè‡ªç«‹çš„ +# +# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, +# dialects, English, etc. Currently, the only entry for åè©ž 引用文字列 ("noun quotation") +# is ã„ã‚ã ("iwaku"). +#åè©ž-引用文字列 +# +# noun-nai_adjective: Words that appear before the auxiliary verb ãªã„ ("nai") and +# behave like an adjective. +# e.g. 申ã—訳, 仕方, ã¨ã‚“ã§ã‚‚, é•ã„ +#åè©ž-ナイ形容詞語幹 +# +##### +# prefix: unclassified prefixes +#接頭詞 +# +# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) +# excluding numerical expressions. +# e.g. ㊠(æ°´), æŸ (æ°), åŒ (社), æ•… (~æ°), 高 (å“質), ㊠(見事), ã” (ç«‹æ´¾) +#接頭詞-å詞接続 +# +# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb +# in conjunctive form followed by ãªã‚‹/ãªã•ã‚‹/ãã ã•ã‚‹. +# e.g. ㊠(読ã¿ãªã•ã„), ㊠(座り) +#接頭詞-動詞接続 +# +# prefix-adjectival: Prefixes that attach to adjectives. +# e.g. ㊠(寒ã„ã§ã™ã­ãˆ), ãƒã‚« (ã§ã‹ã„) +#接頭詞-形容詞接続 +# +# prefix-numerical: Prefixes that attach to numerical expressions. +# e.g. ç´„, ãŠã‚ˆã, 毎時 +#接頭詞-数接続 +# +##### +# verb: unclassified verbs +#å‹•è©ž +# +# verb-main: +#å‹•è©ž-自立 +# +# verb-auxiliary: +#å‹•è©ž-éžè‡ªç«‹ +# +# verb-suffix: +#å‹•è©ž-接尾 +# +##### +# adjective: unclassified adjectives +#形容詞 +# +# adjective-main: +#形容詞-自立 +# +# adjective-auxiliary: +#形容詞-éžè‡ªç«‹ +# +# adjective-suffix: +#形容詞-接尾 +# +##### +# adverb: unclassified adverbs +#副詞 +# +# adverb-misc: Words that can be segmented into one unit and where adnominal +# modification is not possible. +# e.g. ã‚ã„ã‹ã‚らãš, 多分 +#副詞-一般 +# +# adverb-particle_conjunction: Adverbs that can be followed by ã®, ã¯, ã«, +# ãª, ã™ã‚‹, ã , etc. +# e.g. ã“ã‚“ãªã«, ãã‚“ãªã«, ã‚ã‚“ãªã«, ãªã«ã‹, ãªã‚“ã§ã‚‚ +#副詞-助詞類接続 +# +##### +# adnominal: Words that only have noun-modifying forms. +# e.g. ã“ã®, ãã®, ã‚ã®, ã©ã®, ã„ã‚ゆる, ãªã‚“らã‹ã®, 何らã‹ã®, ã„ã‚ã‚“ãª, ã“ã†ã„ã†, ãã†ã„ã†, ã‚ã‚ã„ã†, +# ã©ã†ã„ã†, ã“ã‚“ãª, ãã‚“ãª, ã‚ã‚“ãª, ã©ã‚“ãª, 大ããª, å°ã•ãª, ãŠã‹ã—ãª, ã»ã‚“ã®, ãŸã„ã—ãŸ, +# 「(, ã‚‚) ã•ã‚‹ (ã“ã¨ãªãŒã‚‰)ã€, 微々ãŸã‚‹, 堂々ãŸã‚‹, å˜ãªã‚‹, ã„ã‹ãªã‚‹, 我ãŒã€ã€ŒåŒã˜, 亡ã +#連体詞 +# +##### +# conjunction: Conjunctions that can occur independently. +# e.g. ãŒ, ã‘ã‚Œã©ã‚‚, ãã—ã¦, ã˜ã‚ƒã‚, ãã‚Œã©ã“ã‚ã‹ +接続詞 +# +##### +# particle: unclassified particles. +助詞 +# +# particle-case: case particles where the subclassification is undefined. +助詞-格助詞 +# +# particle-case-misc: Case particles. +# e.g. ã‹ã‚‰, ãŒ, ã§, ã¨, ã«, ã¸, より, ã‚’, ã®, ã«ã¦ +助詞-格助詞-一般 +# +# particle-case-quote: the "to" that appears after nouns, a person’s speech, +# quotation marks, expressions of decisions from a meeting, reasons, judgements, +# conjectures, etc. +# e.g. ( ã ) 㨠(è¿°ã¹ãŸ.), ( ã§ã‚ã‚‹) 㨠(ã—ã¦åŸ·è¡ŒçŒ¶äºˆ...) +助詞-格助詞-引用 +# +# particle-case-compound: Compounds of particles and verbs that mainly behave +# like case particles. +# e.g. ã¨ã„ã†, ã¨ã„ã£ãŸ, ã¨ã‹ã„ã†, ã¨ã—ã¦, ã¨ã¨ã‚‚ã«, ã¨å…±ã«, ã§ã‚‚ã£ã¦, ã«ã‚ãŸã£ã¦, ã«å½“ãŸã£ã¦, ã«å½“ã£ã¦, +# ã«ã‚ãŸã‚Š, ã«å½“ãŸã‚Š, ã«å½“ã‚Š, ã«å½“ãŸã‚‹, ã«ã‚ãŸã‚‹, ã«ãŠã„ã¦, ã«æ–¼ã„ã¦,ã«æ–¼ã¦, ã«ãŠã‘ã‚‹, ã«æ–¼ã‘ã‚‹, +# ã«ã‹ã‘, ã«ã‹ã‘ã¦, ã«ã‹ã‚“ã—, ã«é–¢ã—, ã«ã‹ã‚“ã—ã¦, ã«é–¢ã—ã¦, ã«ã‹ã‚“ã™ã‚‹, ã«é–¢ã™ã‚‹, ã«éš›ã—, +# ã«éš›ã—ã¦, ã«ã—ãŸãŒã„, ã«å¾“ã„, ã«å¾“ã†, ã«ã—ãŸãŒã£ã¦, ã«å¾“ã£ã¦, ã«ãŸã„ã—, ã«å¯¾ã—, ã«ãŸã„ã—ã¦, +# ã«å¯¾ã—ã¦, ã«ãŸã„ã™ã‚‹, ã«å¯¾ã™ã‚‹, ã«ã¤ã„ã¦, ã«ã¤ã, ã«ã¤ã‘, ã«ã¤ã‘ã¦, ã«ã¤ã‚Œ, ã«ã¤ã‚Œã¦, ã«ã¨ã£ã¦, +# ã«ã¨ã‚Š, ã«ã¾ã¤ã‚ã‚‹, ã«ã‚ˆã£ã¦, ã«ä¾ã£ã¦, ã«å› ã£ã¦, ã«ã‚ˆã‚Š, ã«ä¾ã‚Š, ã«å› ã‚Š, ã«ã‚ˆã‚‹, ã«ä¾ã‚‹, ã«å› ã‚‹, +# ã«ã‚ãŸã£ã¦, ã«ã‚ãŸã‚‹, ã‚’ã‚‚ã£ã¦, を以ã£ã¦, を通ã˜, を通ã˜ã¦, を通ã—ã¦, ã‚’ã‚ãã£ã¦, ã‚’ã‚ãã‚Š, ã‚’ã‚ãã‚‹, +# ã£ã¦-å£èªž/, ã¡ã‚…ã†-関西å¼ã€Œã¨ã„ã†ã€/, (何) ã¦ã„ㆠ(人)-å£èªž/, ã£ã¦ã„ã†-å£èªž/, ã¨ã„ãµ, ã¨ã‹ã„ãµ +助詞-格助詞-連語 +# +# particle-conjunctive: +# e.g. ã‹ã‚‰, ã‹ã‚‰ã«ã¯, ãŒ, ã‘ã‚Œã©, ã‘ã‚Œã©ã‚‚, ã‘ã©, ã—, ã¤ã¤, ã¦, ã§, ã¨, ã¨ã“ã‚ãŒ, ã©ã“ã‚ã‹, ã¨ã‚‚, ã©ã‚‚, +# ãªãŒã‚‰, ãªã‚Š, ã®ã§, ã®ã«, ã°, ã‚‚ã®ã®, ã‚„ ( ã—ãŸ), ã‚„ã„ãªã‚„, (ã“ã‚ã‚“) ã˜ã‚ƒ(ã„ã‘ãªã„)-å£èªž/, +# (è¡Œã£) ã¡ã‚ƒ(ã„ã‘ãªã„)-å£èªž/, (言ã£) ãŸã£ã¦ (ã—ã‹ãŸãŒãªã„)-å£èªž/, (ãã‚ŒãŒãªã)ã£ãŸã£ã¦ (平気)-å£èªž/ +助詞-接続助詞 +# +# particle-dependency: +# e.g. ã“ã, ã•ãˆ, ã—ã‹, ã™ã‚‰, ã¯, ã‚‚, ãž +助詞-係助詞 +# +# particle-adverbial: +# e.g. ãŒã¦ã‚‰, ã‹ã‚‚, ãらã„, ä½, ãらã„, ã—ã‚‚, (学校) ã˜ã‚ƒ(ã“ã‚ŒãŒæµè¡Œã£ã¦ã„ã‚‹)-å£èªž/, +# (ãã‚Œ)ã˜ã‚ƒã‚ (よããªã„)-å£èªž/, ãšã¤, (ç§) ãªãž, ãªã©, (ç§) ãªã‚Š (ã«), (先生) ãªã‚“ã‹ (大嫌ã„)-å£èªž/, +# (ç§) ãªã‚“ãž, (先生) ãªã‚“㦠(大嫌ã„)-å£èªž/, ã®ã¿, ã ã‘, (ç§) ã ã£ã¦-å£èªž/, ã ã«, +# (å½¼)ã£ãŸã‚‰-å£èªž/, (ãŠèŒ¶) ã§ã‚‚ (ã„ã‹ãŒ), ç­‰ (ã¨ã†), (今後) ã¨ã‚‚, ã°ã‹ã‚Š, ã°ã£ã‹-å£èªž/, ã°ã£ã‹ã‚Š-å£èªž/, +# ã»ã©, 程, ã¾ã§, è¿„, (誰) ã‚‚ (ãŒ)([助詞-格助詞] ãŠã‚ˆã³ [助詞-係助詞] ã®å‰ã«ä½ç½®ã™ã‚‹ã€Œã‚‚ã€) +助詞-副助詞 +# +# particle-interjective: particles with interjective grammatical roles. +# e.g. (æ¾å³¶) ã‚„ +助詞-間投助詞 +# +# particle-coordinate: +# e.g. ã¨, ãŸã‚Š, ã ã®, ã ã‚Š, ã¨ã‹, ãªã‚Š, ã‚„, やら +助詞-並立助詞 +# +# particle-final: +# e.g. ã‹ã„, ã‹ã—ら, ã•, ãœ, (ã )ã£ã‘-å£èªž/, (ã¨ã¾ã£ã¦ã‚‹) ã§-方言/, ãª, ナ, ãªã‚-å£èªž/, ãž, ã­, ãƒ, +# ã­ã‡-å£èªž/, ã­ãˆ-å£èªž/, ã­ã‚“-方言/, ã®, ã®ã†-å£èªž/, ã‚„, よ, ヨ, よã‰-å£èªž/, ã‚, ã‚ã„-å£èªž/ +助詞-終助詞 +# +# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is +# adverbial, conjunctive, or sentence final. For example: +# (a) 「A ã‹ B ã‹ã€. Ex:「(国内ã§é‹ç”¨ã™ã‚‹) ã‹,(海外ã§é‹ç”¨ã™ã‚‹) ã‹ (.)〠+# (b) Inside an adverb phrase. Ex:「(幸ã„ã¨ã„ã†) ã‹ (, 死者ã¯ã„ãªã‹ã£ãŸ.)〠+# 「(祈りãŒå±Šã„ãŸã›ã„) ã‹ (, 試験ã«åˆæ ¼ã—ãŸ.)〠+# (c) 「ã‹ã®ã‚ˆã†ã«ã€. Ex:「(何もãªã‹ã£ãŸ) ã‹ (ã®ã‚ˆã†ã«æŒ¯ã‚‹èˆžã£ãŸ.)〠+# e.g. ã‹ +助詞-副助詞ï¼ä¸¦ç«‹åŠ©è©žï¼çµ‚助詞 +# +# particle-adnominalizer: The "no" that attaches to nouns and modifies +# non-inflectional words. +助詞-連体化 +# +# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs +# that are giongo, giseigo, or gitaigo. +# e.g. ã«, 㨠+助詞-副詞化 +# +# particle-special: A particle that does not fit into one of the above classifications. +# This includes particles that are used in Tanka, Haiku, and other poetry. +# e.g. ã‹ãª, ã‘ã‚€, ( ã—ãŸã ã‚ã†) ã«, (ã‚ã‚“ãŸ) ã«ã‚ƒ(ã‚ã‹ã‚‰ã‚“), (俺) ã‚“ (家) +助詞-特殊 +# +##### +# auxiliary-verb: +助動詞 +# +##### +# interjection: Greetings and other exclamations. +# e.g. ãŠã¯ã‚ˆã†, ãŠã¯ã‚ˆã†ã”ã–ã„ã¾ã™, ã“ã‚“ã«ã¡ã¯, ã“ã‚“ã°ã‚“ã¯, ã‚ã‚ŠãŒã¨ã†, ã©ã†ã‚‚ã‚ã‚ŠãŒã¨ã†, ã‚ã‚ŠãŒã¨ã†ã”ã–ã„ã¾ã™, +# ã„ãŸã ãã¾ã™, ã”ã¡ãã†ã•ã¾, ã•ã‚ˆãªã‚‰, ã•ã‚ˆã†ãªã‚‰, ã¯ã„, ã„ã„ãˆ, ã”ã‚ã‚“, ã”ã‚ã‚“ãªã•ã„ +#æ„Ÿå‹•è©ž +# +##### +# symbol: unclassified Symbols. +è¨˜å· +# +# symbol-misc: A general symbol not in one of the categories below. +# e.g. [â—‹â—Ž@$〒→+] +記å·-一般 +# +# symbol-comma: Commas +# e.g. [,ã€] +記å·-読点 +# +# symbol-period: Periods and full stops. +# e.g. [..。] +記å·-å¥ç‚¹ +# +# symbol-space: Full-width whitespace. +記å·-空白 +# +# symbol-open_bracket: +# e.g. [({‘“『ã€] +記å·-括弧開 +# +# symbol-close_bracket: +# e.g. [)}’â€ã€ã€ã€‘] +記å·-括弧閉 +# +# symbol-alphabetic: +#記å·-アルファベット +# +##### +# other: unclassified other +#ãã®ä»– +# +# other-interjection: Words that are hard to classify as noun-suffixes or +# sentence-final particles. +# e.g. (ã )ã‚¡ +ãã®ä»–-間投 +# +##### +# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. +# e.g. ã‚ã®, ã†ã‚“ã¨, ãˆã¨ +フィラー +# +##### +# non-verbal: non-verbal sound. +éžè¨€èªžéŸ³ +# +##### +# fragment: +#語断片 +# +##### +# unknown: unknown part of speech. +#未知語 +# +##### End of file diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ar.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ar.txt new file mode 100644 index 00000000000..046829db6a2 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ar.txt @@ -0,0 +1,125 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Cleaned on October 11, 2009 (not normalized, so use before normalization) +# This means that when modifying this list, you might need to add some +# redundant entries, for example containing forms with both Ø£ and ا +من +ومن +منها +منه +ÙÙŠ +ÙˆÙÙŠ +Ùيها +Ùيه +Ùˆ +Ù +ثم +او +أو +ب +بها +به +ا +Ø£ +اى +اي +أي +أى +لا +ولا +الا +ألا +إلا +لكن +ما +وما +كما +Ùما +عن +مع +اذا +إذا +ان +أن +إن +انها +أنها +إنها +انه +أنه +إنه +بان +بأن +Ùان +Ùأن +وان +وأن +وإن +التى +التي +الذى +الذي +الذين +الى +الي +إلى +إلي +على +عليها +عليه +اما +أما +إما +ايضا +أيضا +كل +وكل +لم +ولم +لن +ولن +هى +هي +هو +وهى +وهي +وهو +Ùهى +Ùهي +Ùهو +انت +أنت +لك +لها +له +هذه +هذا +تلك +ذلك +هناك +كانت +كان +يكون +تكون +وكانت +وكان +غير +بعض +قد +نحو +بين +بينما +منذ +ضمن +حيث +الان +الآن +خلال +بعد +قبل +حتى +عند +عندما +لدى +جميع diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_bg.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_bg.txt new file mode 100644 index 00000000000..1ae4ba2ae38 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_bg.txt @@ -0,0 +1,193 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +а +аз +ако +ала +бе +без +беше +би +бил +била +били +било +близо +бъдат +бъде +бÑха +в +Ð²Ð°Ñ +ваш +ваша +вероÑтно +вече +взема +ви +вие +винаги +вÑе +вÑеки +вÑички +вÑичко +вÑÑка +във +въпреки +върху +г +ги +главно +го +д +да +дали +до +докато +докога +дори +доÑега +доÑта +е +едва +един +ето +за +зад +заедно +заради +заÑега +затова +защо +защото +и +из +или +им +има +имат +иÑка +й +каза +как +каква +какво +както +какъв +като +кога +когато +което +които +кой +който +колко +коÑто +къде +където +към +ли +м +ме +между +мен +ми +мнозина +мога +могат +може +Ð¼Ð¾Ð»Ñ +момента +му +н +на +над +назад +най +направи +напред +например +Ð½Ð°Ñ +не +него +Ð½ÐµÑ +ни +ние +никой +нито +но +нÑкои +нÑкой +нÑма +обаче +около +оÑвен +оÑобено +от +отгоре +отново +още +пак +по +повече +повечето +под +поне +поради +поÑле +почти +прави +пред +преди +през +при +пък +първо +Ñ +Ñа +Ñамо +Ñе +Ñега +Ñи +Ñкоро +Ñлед +Ñме +Ñпоред +Ñред +Ñрещу +Ñте +Ñъм +ÑÑŠÑ +Ñъщо +Ñ‚ +тази +така +такива +такъв +там +твой +те +тези +ти +тн +то +това +тогава +този +той +толкова +точно +Ñ‚Ñ€Ñбва +тук +тъй +Ñ‚Ñ +Ñ‚ÑÑ… +у +хареÑва +ч +че +чеÑто +чрез +ще +щом +Ñ diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ca.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ca.txt new file mode 100644 index 00000000000..3da65deafe1 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ca.txt @@ -0,0 +1,220 @@ +# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) +a +abans +ací +ah +així +això +al +als +aleshores +algun +alguna +algunes +alguns +alhora +allà +allí +allò +altra +altre +altres +amb +ambdós +ambdues +apa +aquell +aquella +aquelles +aquells +aquest +aquesta +aquestes +aquests +aquí +baix +cada +cadascú +cadascuna +cadascunes +cadascuns +com +contra +d'un +d'una +d'unes +d'uns +dalt +de +del +dels +des +després +dins +dintre +donat +doncs +durant +e +eh +el +els +em +en +encara +ens +entre +érem +eren +éreu +es +és +esta +està +estàvem +estaven +estàveu +esteu +et +etc +ets +fins +fora +gairebé +ha +han +has +havia +he +hem +heu +hi +ho +i +igual +iguals +ja +l'hi +la +les +li +li'n +llavors +m'he +ma +mal +malgrat +mateix +mateixa +mateixes +mateixos +me +mentre +més +meu +meus +meva +meves +molt +molta +moltes +molts +mon +mons +n'he +n'hi +ne +ni +no +nogensmenys +només +nosaltres +nostra +nostre +nostres +o +oh +oi +on +pas +pel +pels +per +però +perquè +poc +poca +pocs +poques +potser +propi +qual +quals +quan +quant +que +què +quelcom +qui +quin +quina +quines +quins +s'ha +s'han +sa +semblant +semblants +ses +seu +seus +seva +seva +seves +si +sobre +sobretot +sóc +solament +sols +son +són +sons +sota +sou +t'ha +t'han +t'he +ta +tal +també +tampoc +tan +tant +tanta +tantes +teu +teus +teva +teves +ton +tons +tot +tota +totes +tots +un +una +unes +uns +us +va +vaig +vam +van +vas +veu +vosaltres +vostra +vostre +vostres diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_cz.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_cz.txt new file mode 100644 index 00000000000..53c6097dac7 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_cz.txt @@ -0,0 +1,172 @@ +a +s +k +o +i +u +v +z +dnes +cz +tímto +budeÅ¡ +budem +byli +jseÅ¡ +můj +svým +ta +tomto +tohle +tuto +tyto +jej +zda +proÄ +máte +tato +kam +tohoto +kdo +kteří +mi +nám +tom +tomuto +mít +nic +proto +kterou +byla +toho +protože +asi +ho +naÅ¡i +napiÅ¡te +re +což +tím +takže +svých +její +svými +jste +aj +tu +tedy +teto +bylo +kde +ke +pravé +ji +nad +nejsou +Äi +pod +téma +mezi +pÅ™es +ty +pak +vám +ani +když +vÅ¡ak +neg +jsem +tento +Älánku +Älánky +aby +jsme +pÅ™ed +pta +jejich +byl +jeÅ¡tÄ› +až +bez +také +pouze +první +vaÅ¡e +která +nás +nový +tipy +pokud +může +strana +jeho +své +jiné +zprávy +nové +není +vás +jen +podle +zde +už +být +více +bude +již +než +který +by +které +co +nebo +ten +tak +má +pÅ™i +od +po +jsou +jak +další +ale +si +se +ve +to +jako +za +zpÄ›t +ze +do +pro +je +na +atd +atp +jakmile +pÅ™iÄemž +já +on +ona +ono +oni +ony +my +vy +jí +ji +mÄ› +mne +jemu +tomu +tÄ›m +tÄ›mu +nÄ›mu +nÄ›muž +jehož +jíž +jelikož +jež +jakož +naÄež diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_da.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_da.txt new file mode 100644 index 00000000000..a3ff5fe122c --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_da.txt @@ -0,0 +1,108 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Danish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + +og | and +i | in +jeg | I +det | that (dem. pronoun)/it (pers. pronoun) +at | that (in front of a sentence)/to (with infinitive) +en | a/an +den | it (pers. pronoun)/that (dem. pronoun) +til | to/at/for/until/against/by/of/into, more +er | present tense of "to be" +som | who, as +pÃ¥ | on/upon/in/on/at/to/after/of/with/for, on +de | they +med | with/by/in, along +han | he +af | of/by/from/off/for/in/with/on, off +for | at/for/to/from/by/of/ago, in front/before, because +ikke | not +der | who/which, there/those +var | past tense of "to be" +mig | me/myself +sig | oneself/himself/herself/itself/themselves +men | but +et | a/an/one, one (number), someone/somebody/one +har | present tense of "to have" +om | round/about/for/in/a, about/around/down, if +vi | we +min | my +havde | past tense of "to have" +ham | him +hun | she +nu | now +over | over/above/across/by/beyond/past/on/about, over/past +da | then, when/as/since +fra | from/off/since, off, since +du | you +ud | out +sin | his/her/its/one's +dem | them +os | us/ourselves +op | up +man | you/one +hans | his +hvor | where +eller | or +hvad | what +skal | must/shall etc. +selv | myself/youself/herself/ourselves etc., even +her | here +alle | all/everyone/everybody etc. +vil | will (verb) +blev | past tense of "to stay/to remain/to get/to become" +kunne | could +ind | in +nÃ¥r | when +være | present tense of "to be" +dog | however/yet/after all +noget | something +ville | would +jo | you know/you see (adv), yes +deres | their/theirs +efter | after/behind/according to/for/by/from, later/afterwards +ned | down +skulle | should +denne | this +end | than +dette | this +mit | my/mine +ogsÃ¥ | also +under | under/beneath/below/during, below/underneath +have | have +dig | you +anden | other +hende | her +mine | my +alt | everything +meget | much/very, plenty of +sit | his, her, its, one's +sine | his, her, its, one's +vor | our +mod | against +disse | these +hvis | if +din | your/yours +nogle | some +hos | by/at +blive | be/become +mange | many +ad | by/through +bliver | present tense of "to be/to become" +hendes | her/hers +været | be +thi | for (conj) +jer | you +sÃ¥dan | such, like this/like that diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_de.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_de.txt new file mode 100644 index 00000000000..f7703841887 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_de.txt @@ -0,0 +1,292 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A German stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | The number of forms in this list is reduced significantly by passing it + | through the German stemmer. + + +aber | but + +alle | all +allem +allen +aller +alles + +als | than, as +also | so +am | an + dem +an | at + +ander | other +andere +anderem +anderen +anderer +anderes +anderm +andern +anderr +anders + +auch | also +auf | on +aus | out of +bei | by +bin | am +bis | until +bist | art +da | there +damit | with it +dann | then + +der | the +den +des +dem +die +das + +daß | that + +derselbe | the same +derselben +denselben +desselben +demselben +dieselbe +dieselben +dasselbe + +dazu | to that + +dein | thy +deine +deinem +deinen +deiner +deines + +denn | because + +derer | of those +dessen | of him + +dich | thee +dir | to thee +du | thou + +dies | this +diese +diesem +diesen +dieser +dieses + + +doch | (several meanings) +dort | (over) there + + +durch | through + +ein | a +eine +einem +einen +einer +eines + +einig | some +einige +einigem +einigen +einiger +einiges + +einmal | once + +er | he +ihn | him +ihm | to him + +es | it +etwas | something + +euer | your +eure +eurem +euren +eurer +eures + +für | for +gegen | towards +gewesen | p.p. of sein +hab | have +habe | have +haben | have +hat | has +hatte | had +hatten | had +hier | here +hin | there +hinter | behind + +ich | I +mich | me +mir | to me + + +ihr | you, to her +ihre +ihrem +ihren +ihrer +ihres +euch | to you + +im | in + dem +in | in +indem | while +ins | in + das +ist | is + +jede | each, every +jedem +jeden +jeder +jedes + +jene | that +jenem +jenen +jener +jenes + +jetzt | now +kann | can + +kein | no +keine +keinem +keinen +keiner +keines + +können | can +könnte | could +machen | do +man | one + +manche | some, many a +manchem +manchen +mancher +manches + +mein | my +meine +meinem +meinen +meiner +meines + +mit | with +muss | must +musste | had to +nach | to(wards) +nicht | not +nichts | nothing +noch | still, yet +nun | now +nur | only +ob | whether +oder | or +ohne | without +sehr | very + +sein | his +seine +seinem +seinen +seiner +seines + +selbst | self +sich | herself + +sie | they, she +ihnen | to them + +sind | are +so | so + +solche | such +solchem +solchen +solcher +solches + +soll | shall +sollte | should +sondern | but +sonst | else +über | over +um | about, around +und | and + +uns | us +unse +unsem +unsen +unser +unses + +unter | under +viel | much +vom | von + dem +von | from +vor | before +während | while +war | was +waren | were +warst | wast +was | what +weg | away, off +weil | because +weiter | further + +welche | which +welchem +welchen +welcher +welches + +wenn | when +werde | will +werden | will +wie | how +wieder | again +will | want +wir | we +wird | will +wirst | willst +wo | where +wollen | want +wollte | wanted +würde | would +würden | would +zu | to +zum | zu + dem +zur | zu + der +zwar | indeed +zwischen | between + diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_el.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_el.txt new file mode 100644 index 00000000000..232681f5bd6 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_el.txt @@ -0,0 +1,78 @@ +# Lucene Greek Stopwords list +# Note: by default this file is used after GreekLowerCaseFilter, +# so when modifying this file use 'σ' instead of 'Ï‚' +ο +η +το +οι +τα +του +τησ +των +τον +την +και +κι +κ +ειμαι +εισαι +ειναι +ειμαστε +ειστε +στο +στον +στη +στην +μα +αλλα +απο +για +Ï€Ïοσ +με +σε +ωσ +παÏα +αντι +κατα +μετα +θα +να +δε +δεν +μη +μην +επι +ενω +εαν +αν +τοτε +που +πωσ +ποιοσ +ποια +ποιο +ποιοι +ποιεσ +ποιων +ποιουσ +αυτοσ +αυτη +αυτο +αυτοι +αυτων +αυτουσ +αυτεσ +αυτα +εκεινοσ +εκεινη +εκεινο +εκεινοι +εκεινεσ +εκεινα +εκεινων +εκεινουσ +οπωσ +ομωσ +ισωσ +οσο +οτι diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_en.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_en.txt new file mode 100644 index 00000000000..2c164c0b2a1 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_en.txt @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +# Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +such +that +the +their +then +there +these +they +this +to +was +will +with diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_es.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_es.txt new file mode 100644 index 00000000000..2db14760075 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_es.txt @@ -0,0 +1,354 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Spanish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | from, of +la | the, her +que | who, that +el | the +en | in +y | and +a | to +los | the, them +del | de + el +se | himself, from him etc +las | the, them +por | for, by, etc +un | a +para | for +con | with +no | no +una | a +su | his, her +al | a + el + | es from SER +lo | him +como | how +más | more +pero | pero +sus | su plural +le | to him, her +ya | already +o | or + | fue from SER +este | this + | ha from HABER +sí | himself etc +porque | because +esta | this + | son from SER +entre | between + | está from ESTAR +cuando | when +muy | very +sin | without +sobre | on + | ser from SER + | tiene from TENER +también | also +me | me +hasta | until +hay | there is/are +donde | where + | han from HABER +quien | whom, that + | están from ESTAR + | estado from ESTAR +desde | from +todo | all +nos | us +durante | during + | estados from ESTAR +todos | all +uno | a +les | to them +ni | nor +contra | against +otros | other + | fueron from SER +ese | that +eso | that + | había from HABER +ante | before +ellos | they +e | and (variant of y) +esto | this +mí | me +antes | before +algunos | some +qué | what? +unos | a +yo | I +otro | other +otras | other +otra | other +él | he +tanto | so much, many +esa | that +estos | these +mucho | much, many +quienes | who +nada | nothing +muchos | many +cual | who + | sea from SER +poco | few +ella | she +estar | to be + | haber from HABER +estas | these + | estaba from ESTAR + | estamos from ESTAR +algunas | some +algo | something +nosotros | we + + | other forms + +mi | me +mis | mi plural +tú | thou +te | thee +ti | thee +tu | thy +tus | tu plural +ellas | they +nosotras | we +vosotros | you +vosotras | you +os | you +mío | mine +mía | +míos | +mías | +tuyo | thine +tuya | +tuyos | +tuyas | +suyo | his, hers, theirs +suya | +suyos | +suyas | +nuestro | ours +nuestra | +nuestros | +nuestras | +vuestro | yours +vuestra | +vuestros | +vuestras | +esos | those +esas | those + + | forms of estar, to be (not including the infinitive): +estoy +estás +está +estamos +estáis +están +esté +estés +estemos +estéis +estén +estaré +estarás +estará +estaremos +estaréis +estarán +estaría +estarías +estaríamos +estaríais +estarían +estaba +estabas +estábamos +estabais +estaban +estuve +estuviste +estuvo +estuvimos +estuvisteis +estuvieron +estuviera +estuvieras +estuviéramos +estuvierais +estuvieran +estuviese +estuvieses +estuviésemos +estuvieseis +estuviesen +estando +estado +estada +estados +estadas +estad + + | forms of haber, to have (not including the infinitive): +he +has +ha +hemos +habéis +han +haya +hayas +hayamos +hayáis +hayan +habré +habrás +habrá +habremos +habréis +habrán +habría +habrías +habríamos +habríais +habrían +había +habías +habíamos +habíais +habían +hube +hubiste +hubo +hubimos +hubisteis +hubieron +hubiera +hubieras +hubiéramos +hubierais +hubieran +hubiese +hubieses +hubiésemos +hubieseis +hubiesen +habiendo +habido +habida +habidos +habidas + + | forms of ser, to be (not including the infinitive): +soy +eres +es +somos +sois +son +sea +seas +seamos +seáis +sean +seré +serás +será +seremos +seréis +serán +sería +serías +seríamos +seríais +serían +era +eras +éramos +erais +eran +fui +fuiste +fue +fuimos +fuisteis +fueron +fuera +fueras +fuéramos +fuerais +fueran +fuese +fueses +fuésemos +fueseis +fuesen +siendo +sido + | sed also means 'thirst' + + | forms of tener, to have (not including the infinitive): +tengo +tienes +tiene +tenemos +tenéis +tienen +tenga +tengas +tengamos +tengáis +tengan +tendré +tendrás +tendrá +tendremos +tendréis +tendrán +tendría +tendrías +tendríamos +tendríais +tendrían +tenía +tenías +teníamos +teníais +tenían +tuve +tuviste +tuvo +tuvimos +tuvisteis +tuvieron +tuviera +tuvieras +tuviéramos +tuvierais +tuvieran +tuviese +tuvieses +tuviésemos +tuvieseis +tuviesen +teniendo +tenido +tenida +tenidos +tenidas +tened + diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_eu.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_eu.txt new file mode 100644 index 00000000000..25f1db93460 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_eu.txt @@ -0,0 +1,99 @@ +# example set of basque stopwords +al +anitz +arabera +asko +baina +bat +batean +batek +bati +batzuei +batzuek +batzuetan +batzuk +bera +beraiek +berau +berauek +bere +berori +beroriek +beste +bezala +da +dago +dira +ditu +du +dute +edo +egin +ere +eta +eurak +ez +gainera +gu +gutxi +guzti +haiei +haiek +haietan +hainbeste +hala +han +handik +hango +hara +hari +hark +hartan +hau +hauei +hauek +hauetan +hemen +hemendik +hemengo +hi +hona +honek +honela +honetan +honi +hor +hori +horiei +horiek +horietan +horko +horra +horrek +horrela +horretan +horri +hortik +hura +izan +ni +noiz +nola +non +nondik +nongo +nor +nora +ze +zein +zen +zenbait +zenbat +zer +zergatik +ziren +zituen +zu +zuek +zuen +zuten diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fa.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fa.txt new file mode 100644 index 00000000000..723641c6da7 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fa.txt @@ -0,0 +1,313 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Note: by default this file is used after normalization, so when adding entries +# to this file, use the arabic 'ÙŠ' instead of 'ÛŒ' +انان +نداشته +سراسر +خياه +ايشان +وي +تاكنون +بيشتري +دوم +پس +ناشي +ÙˆÚ¯Ùˆ +يا +داشتند +سپس +هنگام +هرگز +پنج +نشان +امسال +ديگر +گروهي +شدند +چطور +ده +Ùˆ +دو +نخستين +ولي +چرا +Ú†Ù‡ +وسط +Ù‡ +كدام +قابل +يك +رÙت +Ù‡Ùت +همچنين +در +هزار +بله +بلي +شايد +اما +شناسي +گرÙته +دهد +داشته +دانست +داشتن +خواهيم +ميليارد +وقتيكه +امد +خواهد +جز +اورده +شده +بلكه +خدمات +شدن +برخي +نبود +بسياري +جلوگيري +حق +كردند +نوعي +بعري +نكرده +نظير +نبايد +بوده +بودن +داد +اورد +هست +جايي +شود +دنبال +داده +بايد +سابق +هيچ +همان +انجا +كمتر +كجاست +گردد +كسي +تر +مردم +تان +دادن +بودند +سري +جدا +ندارند +مگر +يكديگر +دارد +دهند +بنابراين +هنگامي +سمت +جا +انچه +خود +دادند +زياد +دارند +اثر +بدون +بهترين +بيشتر +البته +به +براساس +بيرون +كرد +بعضي +گرÙت +توي +اي +ميليون +او +جريان +تول +بر +مانند +برابر +باشيم +مدتي +گويند +اكنون +تا +تنها +جديد +چند +بي +نشده +كردن +كردم +گويد +كرده +كنيم +نمي +نزد +روي +قصد +Ùقط +بالاي +ديگران +اين +ديروز +توسط +سوم +ايم +دانند +سوي +استÙاده +شما +كنار +داريم +ساخته +طور +امده +رÙته +نخست +بيست +نزديك +طي +كنيد +از +انها +تمامي +داشت +يكي +طريق +اش +چيست +روب +نمايد +Ú¯Ùت +چندين +چيزي +تواند +ام +ايا +با +ان +ايد +ترين +اينكه +ديگري +راه +هايي +بروز +همچنان +پاعين +كس +حدود +مختل٠+مقابل +چيز +گيرد +ندارد +ضد +همچون +سازي +شان +مورد +باره +مرسي +خويش +برخوردار +چون +خارج +شش +هنوز +تحت +ضمن +هستيم +Ú¯Ùته +Ùكر +بسيار +پيش +براي +روزهاي +انكه +نخواهد +بالا +كل +وقتي +كي +چنين +كه +گيري +نيست +است +كجا +كند +نيز +يابد +بندي +حتي +توانند +عقب +خواست +كنند +بين +تمام +همه +ما +باشند +مثل +شد +اري +باشد +اره +طبق +بعد +اگر +صورت +غير +جاي +بيش +ريزي +اند +زيرا +چگونه +بار +لطÙا +مي +درباره +من +ديده +همين +گذاري +برداري +علت +گذاشته +هم +Ùوق +نه +ها +شوند +اباد +همواره +هر +اول +خواهند +چهار +نام +امروز +مان +هاي +قبل +كنم +سعي +تازه +را +هستند +زير +جلوي +عنوان +بود diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fi.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fi.txt new file mode 100644 index 00000000000..addad798c4b --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fi.txt @@ -0,0 +1,95 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| forms of BE + +olla +olen +olet +on +olemme +olette +ovat +ole | negative form + +oli +olisi +olisit +olisin +olisimme +olisitte +olisivat +olit +olin +olimme +olitte +olivat +ollut +olleet + +en | negation +et +ei +emme +ette +eivät + +|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans +minä minun minut minua minussa minusta minuun minulla minulta minulle | I +sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you +hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she +me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we +te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you +he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they + +tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this +tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that +se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it +nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these +nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those +ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they + +kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who +ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) +mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what +mitkä | (pl) + +joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which +jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) + +| conjunctions + +että | that +ja | and +jos | if +koska | because +kuin | than +mutta | but +niin | so +sekä | and +sillä | for +tai | or +vaan | but +vai | or +vaikka | although + + +| prepositions + +kanssa | with +mukaan | according to +noin | about +poikki | across +yli | over, across + +| other + +kun | when +niin | so +nyt | now +itse | self + diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fr.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fr.txt new file mode 100644 index 00000000000..c00837ea939 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_fr.txt @@ -0,0 +1,183 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A French stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +au | a + le +aux | a + les +avec | with +ce | this +ces | these +dans | with +de | of +des | de + les +du | de + le +elle | she +en | `of them' etc +et | and +eux | them +il | he +je | I +la | the +le | the +leur | their +lui | him +ma | my (fem) +mais | but +me | me +même | same; as in moi-même (myself) etc +mes | me (pl) +moi | me +mon | my (masc) +ne | not +nos | our (pl) +notre | our +nous | we +on | one +ou | where +par | by +pas | not +pour | for +qu | que before vowel +que | that +qui | who +sa | his, her (fem) +se | oneself +ses | his (pl) +son | his, her (masc) +sur | on +ta | thy (fem) +te | thee +tes | thy (pl) +toi | thee +ton | thy (masc) +tu | thou +un | a +une | a +vos | your (pl) +votre | your +vous | you + + | single letter forms + +c | c' +d | d' +j | j' +l | l' +à | to, at +m | m' +n | n' +s | s' +t | t' +y | there + + | forms of être (not including the infinitive): +été +étée +étées +étés +étant +suis +es +est +sommes +êtes +sont +serai +seras +sera +serons +serez +seront +serais +serait +serions +seriez +seraient +étais +était +étions +étiez +étaient +fus +fut +fûmes +fûtes +furent +sois +soit +soyons +soyez +soient +fusse +fusses +fût +fussions +fussiez +fussent + + | forms of avoir (not including the infinitive): +ayant +eu +eue +eues +eus +ai +as +avons +avez +ont +aurai +auras +aura +aurons +aurez +auront +aurais +aurait +aurions +auriez +auraient +avais +avait +avions +aviez +avaient +eut +eûmes +eûtes +eurent +aie +aies +ait +ayons +ayez +aient +eusse +eusses +eût +eussions +eussiez +eussent + + | Later additions (from Jean-Christophe Deschamps) +ceci | this +celà  | that +cet | this +cette | this +ici | here +ils | they +les | the (pl) +leurs | their (pl) +quel | which +quels | which +quelle | which +quelles | which +sans | without +soi | oneself + diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ga.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ga.txt new file mode 100644 index 00000000000..9ff88d747e5 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ga.txt @@ -0,0 +1,110 @@ + +a +ach +ag +agus +an +aon +ar +arna +as +b' +ba +beirt +bhúr +caoga +ceathair +ceathrar +chomh +chtó +chuig +chun +cois +céad +cúig +cúigear +d' +daichead +dar +de +deich +deichniúr +den +dhá +do +don +dtí +dá +dár +dó +faoi +faoin +faoina +faoinár +fara +fiche +gach +gan +go +gur +haon +hocht +i +iad +idir +in +ina +ins +inár +is +le +leis +lena +lenár +m' +mar +mo +mé +na +nach +naoi +naonúr +ná +ní +níor +nó +nócha +ocht +ochtar +os +roimh +sa +seacht +seachtar +seachtó +seasca +seisear +siad +sibh +sinn +sna +sé +sí +tar +thar +thú +triúr +trí +trína +trínár +tríocha +tú +um +ár +é +éis +í +ó +ón +óna +ónár diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_gl.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_gl.txt new file mode 100644 index 00000000000..d8760b12c14 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_gl.txt @@ -0,0 +1,161 @@ +# galican stopwords +a +aínda +alí +aquel +aquela +aquelas +aqueles +aquilo +aquí +ao +aos +as +así +á +ben +cando +che +co +coa +comigo +con +connosco +contigo +convosco +coas +cos +cun +cuns +cunha +cunhas +da +dalgunha +dalgunhas +dalgún +dalgúns +das +de +del +dela +delas +deles +desde +deste +do +dos +dun +duns +dunha +dunhas +e +el +ela +elas +eles +en +era +eran +esa +esas +ese +eses +esta +estar +estaba +está +están +este +estes +estiven +estou +eu +é +facer +foi +foron +fun +había +hai +iso +isto +la +las +lle +lles +lo +los +mais +me +meu +meus +min +miña +miñas +moi +na +nas +neste +nin +no +non +nos +nosa +nosas +noso +nosos +nós +nun +nunha +nuns +nunhas +o +os +ou +ó +ós +para +pero +pode +pois +pola +polas +polo +polos +por +que +se +senón +ser +seu +seus +sexa +sido +sobre +súa +súas +tamén +tan +te +ten +teñen +teño +ter +teu +teus +ti +tido +tiña +tiven +túa +túas +un +unha +unhas +uns +vos +vosa +vosas +voso +vosos +vós diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hi.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hi.txt new file mode 100644 index 00000000000..86286bb083b --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hi.txt @@ -0,0 +1,235 @@ +# Also see http://www.opensource.org/licenses/bsd-license.html +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# This file was created by Jacques Savoy and is distributed under the BSD license. +# Note: by default this file also contains forms normalized by HindiNormalizer +# for spelling variation (see section below), such that it can be used whether or +# not you enable that feature. When adding additional entries to this list, +# please add the normalized form as well. +अंदर +अत +अपना +अपनी +अपने +अभी +आदि +आप +इतà¥à¤¯à¤¾à¤¦à¤¿ +इन +इनका +इनà¥à¤¹à¥€à¤‚ +इनà¥à¤¹à¥‡à¤‚ +इनà¥à¤¹à¥‹à¤‚ +इस +इसका +इसकी +इसके +इसमें +इसी +इसे +उन +उनका +उनकी +उनके +उनको +उनà¥à¤¹à¥€à¤‚ +उनà¥à¤¹à¥‡à¤‚ +उनà¥à¤¹à¥‹à¤‚ +उस +उसके +उसी +उसे +à¤à¤• +à¤à¤µà¤‚ +à¤à¤¸ +à¤à¤¸à¥‡ +और +कई +कर +करता +करते +करना +करने +करें +कहते +कहा +का +काफ़ी +कि +कितना +किनà¥à¤¹à¥‡à¤‚ +किनà¥à¤¹à¥‹à¤‚ +किया +किर +किस +किसी +किसे +की +कà¥à¤› +कà¥à¤² +के +को +कोई +कौन +कौनसा +गया +घर +जब +जहाठ+जा +जितना +जिन +जिनà¥à¤¹à¥‡à¤‚ +जिनà¥à¤¹à¥‹à¤‚ +जिस +जिसे +जीधर +जैसा +जैसे +जो +तक +तब +तरह +तिन +तिनà¥à¤¹à¥‡à¤‚ +तिनà¥à¤¹à¥‹à¤‚ +तिस +तिसे +तो +था +थी +थे +दबारा +दिया +दà¥à¤¸à¤°à¤¾ +दूसरे +दो +दà¥à¤µà¤¾à¤°à¤¾ +न +नहीं +ना +निहायत +नीचे +ने +पर +पर +पहले +पूरा +पे +फिर +बनी +बही +बहà¥à¤¤ +बाद +बाला +बिलकà¥à¤² +भी +भीतर +मगर +मानो +मे +में +यदि +यह +यहाठ+यही +या +यिह +ये +रखें +रहा +रहे +ऱà¥à¤µà¤¾à¤¸à¤¾ +लिठ+लिये +लेकिन +व +वरà¥à¤— +वह +वह +वहाठ+वहीं +वाले +वà¥à¤¹ +वे +वग़ैरह +संग +सकता +सकते +सबसे +सभी +साथ +साबà¥à¤¤ +साभ +सारा +से +सो +ही +हà¥à¤† +हà¥à¤ˆ +हà¥à¤ +है +हैं +हो +होता +होती +होते +होना +होने +# additional normalized forms of the above +अपनि +जेसे +होति +सभि +तिंहों +इंहों +दवारा +इसि +किंहें +थि +उंहों +ओर +जिंहें +वहिं +अभि +बनि +हि +उंहिं +उंहें +हें +वगेरह +à¤à¤¸à¥‡ +रवासा +कोन +निचे +काफि +उसि +पà¥à¤°à¤¾ +भितर +हे +बहि +वहां +कोइ +यहां +जिंहों +तिंहें +किसि +कइ +यहि +इंहिं +जिधर +इंहें +अदि +इतयादि +हà¥à¤‡ +कोनसा +इसकि +दà¥à¤¸à¤°à¥‡ +जहां +अप +किंहों +उनकि +भि +वरग +हà¥à¤… +जेसा +नहिं diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hu.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hu.txt new file mode 100644 index 00000000000..1a96f1db6f2 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hu.txt @@ -0,0 +1,209 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + +| Hungarian stop word list +| prepared by Anna Tordai + +a +ahogy +ahol +aki +akik +akkor +alatt +által +általában +amely +amelyek +amelyekben +amelyeket +amelyet +amelynek +ami +amit +amolyan +amíg +amikor +át +abban +ahhoz +annak +arra +arról +az +azok +azon +azt +azzal +azért +aztán +azután +azonban +bár +be +belül +benne +cikk +cikkek +cikkeket +csak +de +e +eddig +egész +egy +egyes +egyetlen +egyéb +egyik +egyre +ekkor +el +elég +ellen +elÅ‘ +elÅ‘ször +elÅ‘tt +elsÅ‘ +én +éppen +ebben +ehhez +emilyen +ennek +erre +ez +ezt +ezek +ezen +ezzel +ezért +és +fel +felé +hanem +hiszen +hogy +hogyan +igen +így +illetve +ill. +ill +ilyen +ilyenkor +ison +ismét +itt +jó +jól +jobban +kell +kellett +keresztül +keressünk +ki +kívül +között +közül +legalább +lehet +lehetett +legyen +lenne +lenni +lesz +lett +maga +magát +majd +majd +már +más +másik +meg +még +mellett +mert +mely +melyek +mi +mit +míg +miért +milyen +mikor +minden +mindent +mindenki +mindig +mint +mintha +mivel +most +nagy +nagyobb +nagyon +ne +néha +nekem +neki +nem +néhány +nélkül +nincs +olyan +ott +össze +Å‘ +Å‘k +Å‘ket +pedig +persze +rá +s +saját +sem +semmi +sok +sokat +sokkal +számára +szemben +szerint +szinte +talán +tehát +teljes +tovább +továbbá +több +úgy +ugyanis +új +újabb +újra +után +utána +utolsó +vagy +vagyis +valaki +valami +valamint +való +vagyok +van +vannak +volt +voltam +voltak +voltunk +vissza +vele +viszont +volna diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hy.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hy.txt new file mode 100644 index 00000000000..60c1c50fbc8 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_hy.txt @@ -0,0 +1,46 @@ +# example set of Armenian stopwords. +Õ¡ÕµÕ¤ +Õ¡ÕµÕ¬ +Õ¡ÕµÕ¶ +Õ¡ÕµÕ½ +Õ¤Õ¸Ö‚ +Õ¤Õ¸Ö‚Ö„ +Õ¥Õ´ +Õ¥Õ¶ +Õ¥Õ¶Ö„ +Õ¥Õ½ +Õ¥Ö„ +Õ§ +Õ§Õ« +Õ§Õ«Õ¶ +Õ§Õ«Õ¶Ö„ +Õ§Õ«Ö€ +Õ§Õ«Ö„ +Õ§Ö€ +Õ¨Õ½Õ¿ +Õ© +Õ« +Õ«Õ¶ +Õ«Õ½Õ¯ +Õ«Ö€ +Õ¯Õ¡Õ´ +Õ°Õ¡Õ´Õ¡Ö€ +Õ°Õ¥Õ¿ +Õ°Õ¥Õ¿Õ¸ +Õ´Õ¥Õ¶Ö„ +Õ´Õ¥Õ» +Õ´Õ« +Õ¶ +Õ¶Õ¡ +Õ¶Õ¡Ö‡ +Õ¶Ö€Õ¡ +Õ¶Ö€Õ¡Õ¶Ö„ +Õ¸Ö€ +Õ¸Ö€Õ¨ +Õ¸Ö€Õ¸Õ¶Ö„ +Õ¸Ö€ÕºÕ¥Õ½ +Õ¸Ö‚ +Õ¸Ö‚Õ´ +ÕºÕ«Õ¿Õ« +Õ¾Ö€Õ¡ +Ö‡ diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_id.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_id.txt new file mode 100644 index 00000000000..4617f83a5c5 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_id.txt @@ -0,0 +1,359 @@ +# from appendix D of: A Study of Stemming Effects on Information +# Retrieval in Bahasa Indonesia +ada +adanya +adalah +adapun +agak +agaknya +agar +akan +akankah +akhirnya +aku +akulah +amat +amatlah +anda +andalah +antar +diantaranya +antara +antaranya +diantara +apa +apaan +mengapa +apabila +apakah +apalagi +apatah +atau +ataukah +ataupun +bagai +bagaikan +sebagai +sebagainya +bagaimana +bagaimanapun +sebagaimana +bagaimanakah +bagi +bahkan +bahwa +bahwasanya +sebaliknya +banyak +sebanyak +beberapa +seberapa +begini +beginian +beginikah +beginilah +sebegini +begitu +begitukah +begitulah +begitupun +sebegitu +belum +belumlah +sebelum +sebelumnya +sebenarnya +berapa +berapakah +berapalah +berapapun +betulkah +sebetulnya +biasa +biasanya +bila +bilakah +bisa +bisakah +sebisanya +boleh +bolehkah +bolehlah +buat +bukan +bukankah +bukanlah +bukannya +cuma +percuma +dahulu +dalam +dan +dapat +dari +daripada +dekat +demi +demikian +demikianlah +sedemikian +dengan +depan +di +dia +dialah +dini +diri +dirinya +terdiri +dong +dulu +enggak +enggaknya +entah +entahlah +terhadap +terhadapnya +hal +hampir +hanya +hanyalah +harus +haruslah +harusnya +seharusnya +hendak +hendaklah +hendaknya +hingga +sehingga +ia +ialah +ibarat +ingin +inginkah +inginkan +ini +inikah +inilah +itu +itukah +itulah +jangan +jangankan +janganlah +jika +jikalau +juga +justru +kala +kalau +kalaulah +kalaupun +kalian +kami +kamilah +kamu +kamulah +kan +kapan +kapankah +kapanpun +dikarenakan +karena +karenanya +ke +kecil +kemudian +kenapa +kepada +kepadanya +ketika +seketika +khususnya +kini +kinilah +kiranya +sekiranya +kita +kitalah +kok +lagi +lagian +selagi +lah +lain +lainnya +melainkan +selaku +lalu +melalui +terlalu +lama +lamanya +selama +selama +selamanya +lebih +terlebih +bermacam +macam +semacam +maka +makanya +makin +malah +malahan +mampu +mampukah +mana +manakala +manalagi +masih +masihkah +semasih +masing +mau +maupun +semaunya +memang +mereka +merekalah +meski +meskipun +semula +mungkin +mungkinkah +nah +namun +nanti +nantinya +nyaris +oleh +olehnya +seorang +seseorang +pada +padanya +padahal +paling +sepanjang +pantas +sepantasnya +sepantasnyalah +para +pasti +pastilah +per +pernah +pula +pun +merupakan +rupanya +serupa +saat +saatnya +sesaat +saja +sajalah +saling +bersama +sama +sesama +sambil +sampai +sana +sangat +sangatlah +saya +sayalah +se +sebab +sebabnya +sebuah +tersebut +tersebutlah +sedang +sedangkan +sedikit +sedikitnya +segala +segalanya +segera +sesegera +sejak +sejenak +sekali +sekalian +sekalipun +sesekali +sekaligus +sekarang +sekarang +sekitar +sekitarnya +sela +selain +selalu +seluruh +seluruhnya +semakin +sementara +sempat +semua +semuanya +sendiri +sendirinya +seolah +seperti +sepertinya +sering +seringnya +serta +siapa +siapakah +siapapun +disini +disinilah +sini +sinilah +sesuatu +sesuatunya +suatu +sesudah +sesudahnya +sudah +sudahkah +sudahlah +supaya +tadi +tadinya +tak +tanpa +setelah +telah +tentang +tentu +tentulah +tentunya +tertentu +seterusnya +tapi +tetapi +setiap +tiap +setidaknya +tidak +tidakkah +tidaklah +toh +waduh +wah +wahai +sewaktu +walau +walaupun +wong +yaitu +yakni +yang diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_it.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_it.txt new file mode 100644 index 00000000000..4cb5b0891b1 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_it.txt @@ -0,0 +1,301 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | An Italian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +ad | a (to) before vowel +al | a + il +allo | a + lo +ai | a + i +agli | a + gli +all | a + l' +agl | a + gl' +alla | a + la +alle | a + le +con | with +col | con + il +coi | con + i (forms collo, cogli etc are now very rare) +da | from +dal | da + il +dallo | da + lo +dai | da + i +dagli | da + gli +dall | da + l' +dagl | da + gll' +dalla | da + la +dalle | da + le +di | of +del | di + il +dello | di + lo +dei | di + i +degli | di + gli +dell | di + l' +degl | di + gl' +della | di + la +delle | di + le +in | in +nel | in + el +nello | in + lo +nei | in + i +negli | in + gli +nell | in + l' +negl | in + gl' +nella | in + la +nelle | in + le +su | on +sul | su + il +sullo | su + lo +sui | su + i +sugli | su + gli +sull | su + l' +sugl | su + gl' +sulla | su + la +sulle | su + le +per | through, by +tra | among +contro | against +io | I +tu | thou +lui | he +lei | she +noi | we +voi | you +loro | they +mio | my +mia | +miei | +mie | +tuo | +tua | +tuoi | thy +tue | +suo | +sua | +suoi | his, her +sue | +nostro | our +nostra | +nostri | +nostre | +vostro | your +vostra | +vostri | +vostre | +mi | me +ti | thee +ci | us, there +vi | you, there +lo | him, the +la | her, the +li | them +le | them, the +gli | to him, the +ne | from there etc +il | the +un | a +uno | a +una | a +ma | but +ed | and +se | if +perché | why, because +anche | also +come | how +dov | where (as dov') +dove | where +che | who, that +chi | who +cui | whom +non | not +più | more +quale | who, that +quanto | how much +quanti | +quanta | +quante | +quello | that +quelli | +quella | +quelle | +questo | this +questi | +questa | +queste | +si | yes +tutto | all +tutti | all + + | single letter forms: + +a | at +c | as c' for ce or ci +e | and +i | the +l | as l' +o | or + + | forms of avere, to have (not including the infinitive): + +ho +hai +ha +abbiamo +avete +hanno +abbia +abbiate +abbiano +avrò +avrai +avrà +avremo +avrete +avranno +avrei +avresti +avrebbe +avremmo +avreste +avrebbero +avevo +avevi +aveva +avevamo +avevate +avevano +ebbi +avesti +ebbe +avemmo +aveste +ebbero +avessi +avesse +avessimo +avessero +avendo +avuto +avuta +avuti +avute + + | forms of essere, to be (not including the infinitive): +sono +sei +è +siamo +siete +sia +siate +siano +sarò +sarai +sarà +saremo +sarete +saranno +sarei +saresti +sarebbe +saremmo +sareste +sarebbero +ero +eri +era +eravamo +eravate +erano +fui +fosti +fu +fummo +foste +furono +fossi +fosse +fossimo +fossero +essendo + + | forms of fare, to do (not including the infinitive, fa, fat-): +faccio +fai +facciamo +fanno +faccia +facciate +facciano +farò +farai +farà +faremo +farete +faranno +farei +faresti +farebbe +faremmo +fareste +farebbero +facevo +facevi +faceva +facevamo +facevate +facevano +feci +facesti +fece +facemmo +faceste +fecero +facessi +facesse +facessimo +facessero +facendo + + | forms of stare, to be (not including the infinitive): +sto +stai +sta +stiamo +stanno +stia +stiate +stiano +starò +starai +starà +staremo +starete +staranno +starei +staresti +starebbe +staremmo +stareste +starebbero +stavo +stavi +stava +stavamo +stavate +stavano +stetti +stesti +stette +stemmo +steste +stettero +stessi +stesse +stessimo +stessero +stando diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ja.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ja.txt new file mode 100644 index 00000000000..d4321be6b16 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ja.txt @@ -0,0 +1,127 @@ +# +# This file defines a stopword set for Japanese. +# +# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. +# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 +# for frequency lists, etc. that can be useful for making your own set (if desired) +# +# Note that there is an overlap between these stopwords and the terms stopped when used +# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note +# that comments are not allowed on the same line as stopwords. +# +# Also note that stopping is done in a case-insensitive manner. Change your StopFilter +# configuration if you need case-sensitive stopping. Lastly, note that stopping is done +# using the same character width as the entries in this file. Since this StopFilter is +# normally done after a CJKWidthFilter in your chain, you would usually want your romaji +# entries to be in half-width and your kana entries to be in full-width. +# +ã® +ã« +㯠+ã‚’ +㟠+㌠+㧠+㦠+㨠+ã— +ã‚Œ +ã• +ã‚ã‚‹ +ã„ã‚‹ +ã‚‚ +ã™ã‚‹ +ã‹ã‚‰ +㪠+ã“㨠+ã¨ã—㦠+ã„ +ã‚„ +れる +ãªã© +ãªã£ +ãªã„ +ã“ã® +ãŸã‚ +ãã® +ã‚㣠+よㆠ+ã¾ãŸ +ã‚‚ã® +ã¨ã„ㆠ+ã‚ã‚Š +ã¾ã§ +られ +ãªã‚‹ +㸠+ã‹ +ã  +ã“ã‚Œ +ã«ã‚ˆã£ã¦ +ã«ã‚ˆã‚Š +ãŠã‚Š +より +ã«ã‚ˆã‚‹ +ãš +ãªã‚Š +られる +ã«ãŠã„㦠+ã° +ãªã‹ã£ +ãªã +ã—ã‹ã— +ã«ã¤ã„㦠+ã› +ã ã£ +ãã®å¾Œ +ã§ãã‚‹ +ãã‚Œ +ㆠ+ã®ã§ +ãªãŠ +ã®ã¿ +ã§ã +ã +㤠+ã«ãŠã‘ã‚‹ +ãŠã‚ˆã³ +ã„ㆠ+ã•ã‚‰ã« +ã§ã‚‚ +ら +ãŸã‚Š +ãã®ä»– +ã«é–¢ã™ã‚‹ +ãŸã¡ +ã¾ã™ +ã‚“ +ãªã‚‰ +ã«å¯¾ã—㦠+特㫠+ã›ã‚‹ +åŠã³ +ã“れら +ã¨ã +ã§ã¯ +ã«ã¦ +ã»ã‹ +ãªãŒã‚‰ +ã†ã¡ +ãã—㦠+ã¨ã¨ã‚‚ã« +ãŸã ã— +ã‹ã¤ã¦ +ãã‚Œãžã‚Œ +ã¾ãŸã¯ +㊠+ã»ã© +ã‚‚ã®ã® +ã«å¯¾ã™ã‚‹ +ã»ã¨ã‚“ã© +ã¨å…±ã« +ã¨ã„ã£ãŸ +ã§ã™ +ã¨ã‚‚ +ã¨ã“ã‚ +ã“ã“ +##### End of file diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_lv.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_lv.txt new file mode 100644 index 00000000000..e21a23c06c3 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_lv.txt @@ -0,0 +1,172 @@ +# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins +# the original list of over 800 forms was refined: +# pronouns, adverbs, interjections were removed +# +# prepositions +aiz +ap +ar +apakÅ¡ +Ärpus +augÅ¡pus +bez +caur +dēļ +gar +iekÅ¡ +iz +kopÅ¡ +labad +lejpus +lÄ«dz +no +otrpus +pa +par +pÄr +pÄ“c +pie +pirms +pret +priekÅ¡ +starp +Å¡aipus +uz +viņpus +virs +virspus +zem +apakÅ¡pus +# Conjunctions +un +bet +jo +ja +ka +lai +tomÄ“r +tikko +turpretÄ« +arÄ« +kaut +gan +tÄdēļ +tÄ +ne +tikvien +vien +kÄ +ir +te +vai +kamÄ“r +# Particles +ar +diezin +droÅ¡i +diemžēl +nebÅ«t +ik +it +taÄu +nu +pat +tiklab +iekÅ¡pus +nedz +tik +nevis +turpretim +jeb +iekam +iekÄm +iekÄms +kolÄ«dz +lÄ«dzko +tiklÄ«dz +jebÅ¡u +tÄlab +tÄpÄ“c +nekÄ +itin +jÄ +jau +jel +nÄ“ +nezin +tad +tikai +vis +tak +iekams +vien +# modal verbs +bÅ«t +biju +biji +bija +bijÄm +bijÄt +esmu +esi +esam +esat +bÅ«Å¡u +bÅ«si +bÅ«s +bÅ«sim +bÅ«siet +tikt +tiku +tiki +tika +tikÄm +tikÄt +tieku +tiec +tiek +tiekam +tiekat +tikÅ¡u +tiks +tiksim +tiksiet +tapt +tapi +tapÄt +topat +tapÅ¡u +tapsi +taps +tapsim +tapsiet +kļūt +kļuvu +kļuvi +kļuva +kļuvÄm +kļuvÄt +kļūstu +kļūsti +kļūst +kļūstam +kļūstat +kļūšu +kļūsi +kļūs +kļūsim +kļūsiet +# verbs +varÄ“t +varÄ“ju +varÄ“jÄm +varÄ“Å¡u +varÄ“sim +var +varÄ“ji +varÄ“jÄt +varÄ“si +varÄ“siet +varat +varÄ“ja +varÄ“s diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_nl.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_nl.txt new file mode 100644 index 00000000000..f4d61f5092c --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_nl.txt @@ -0,0 +1,117 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Dutch stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large sample of Dutch text. + + | Dutch stop words frequently exhibit homonym clashes. These are indicated + | clearly below. + +de | the +en | and +van | of, from +ik | I, the ego +te | (1) chez, at etc, (2) to, (3) too +dat | that, which +die | that, those, who, which +in | in, inside +een | a, an, one +hij | he +het | the, it +niet | not, nothing, naught +zijn | (1) to be, being, (2) his, one's, its +is | is +was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river +op | on, upon, at, in, up, used up +aan | on, upon, to (as dative) +met | with, by +als | like, such as, when +voor | (1) before, in front of, (2) furrow +had | had, past tense all persons sing. of 'hebben' (have) +er | there +maar | but, only +om | round, about, for etc +hem | him +dan | then +zou | should/would, past tense all persons sing. of 'zullen' +of | or, whether, if +wat | what, something, anything +mijn | possessive and noun 'mine' +men | people, 'one' +dit | this +zo | so, thus, in this way +door | through by +over | over, across +ze | she, her, they, them +zich | oneself +bij | (1) a bee, (2) by, near, at +ook | also, too +tot | till, until +je | you +mij | me +uit | out of, from +der | Old Dutch form of 'van der' still found in surnames +daar | (1) there, (2) because +haar | (1) her, their, them, (2) hair +naar | (1) unpleasant, unwell etc, (2) towards, (3) as +heb | present first person sing. of 'to have' +hoe | how, why +heeft | present third person sing. of 'to have' +hebben | 'to have' and various parts thereof +deze | this +u | you +want | (1) for, (2) mitten, (3) rigging +nog | yet, still +zal | 'shall', first and third person sing. of verb 'zullen' (will) +me | me +zij | she, they +nu | now +ge | 'thou', still used in Belgium and south Netherlands +geen | none +omdat | because +iets | something, somewhat +worden | to become, grow, get +toch | yet, still +al | all, every, each +waren | (1) 'were' (2) to wander, (3) wares, (3) +veel | much, many +meer | (1) more, (2) lake +doen | to do, to make +toen | then, when +moet | noun 'spot/mote' and present form of 'to must' +ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' +zonder | without +kan | noun 'can' and present form of 'to be able' +hun | their, them +dus | so, consequently +alles | all, everything, anything +onder | under, beneath +ja | yes, of course +eens | once, one day +hier | here +wie | who +werd | imperfect third person sing. of 'become' +altijd | always +doch | yet, but etc +wordt | present third person sing. of 'become' +wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans +kunnen | to be able +ons | us/our +zelf | self +tegen | against, towards, at +na | after, near +reeds | already +wil | (1) present tense of 'want', (2) 'will', noun, (3) fender +kon | could; past tense of 'to be able' +niets | nothing +uw | your +iemand | somebody +geweest | been; past participle of 'be' +andere | other diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_no.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_no.txt new file mode 100644 index 00000000000..e76f36e69ed --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_no.txt @@ -0,0 +1,192 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Norwegian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This stop word list is for the dominant bokmÃ¥l dialect. Words unique + | to nynorsk are marked *. + + | Revised by Jan Bruusgaard , Jan 2005 + +og | and +i | in +jeg | I +det | it/this/that +at | to (w. inf.) +en | a/an +et | a/an +den | it/this/that +til | to +er | is/am/are +som | who/that +pÃ¥ | on +de | they / you(formal) +med | with +han | he +av | of +ikke | not +ikkje | not * +der | there +sÃ¥ | so +var | was/were +meg | me +seg | you +men | but +ett | one +har | have +om | about +vi | we +min | my +mitt | my +ha | have +hadde | had +hun | she +nÃ¥ | now +over | over +da | when/as +ved | by/know +fra | from +du | you +ut | out +sin | your +dem | them +oss | us +opp | up +man | you/one +kan | can +hans | his +hvor | where +eller | or +hva | what +skal | shall/must +selv | self (reflective) +sjøl | self (reflective) +her | here +alle | all +vil | will +bli | become +ble | became +blei | became * +blitt | have become +kunne | could +inn | in +nÃ¥r | when +være | be +kom | come +noen | some +noe | some +ville | would +dere | you +som | who/which/that +deres | their/theirs +kun | only/just +ja | yes +etter | after +ned | down +skulle | should +denne | this +for | for/because +deg | you +si | hers/his +sine | hers/his +sitt | hers/his +mot | against +Ã¥ | to +meget | much +hvorfor | why +dette | this +disse | these/those +uten | without +hvordan | how +ingen | none +din | your +ditt | your +blir | become +samme | same +hvilken | which +hvilke | which (plural) +sÃ¥nn | such a +inni | inside/within +mellom | between +vÃ¥r | our +hver | each +hvem | who +vors | us/ours +hvis | whose +bÃ¥de | both +bare | only/just +enn | than +fordi | as/because +før | before +mange | many +ogsÃ¥ | also +slik | just +vært | been +være | to be +bÃ¥e | both * +begge | both +siden | since +dykk | your * +dykkar | yours * +dei | they * +deira | them * +deires | theirs * +deim | them * +di | your (fem.) * +dÃ¥ | as/when * +eg | I * +ein | a/an * +eit | a/an * +eitt | a/an * +elles | or * +honom | he * +hjÃ¥ | at * +ho | she * +hoe | she * +henne | her +hennar | her/hers +hennes | hers +hoss | how * +hossen | how * +ikkje | not * +ingi | noone * +inkje | noone * +korleis | how * +korso | how * +kva | what/which * +kvar | where * +kvarhelst | where * +kven | who/whom * +kvi | why * +kvifor | why * +me | we * +medan | while * +mi | my * +mine | my * +mykje | much * +no | now * +nokon | some (masc./neut.) * +noka | some (fem.) * +nokor | some * +noko | some * +nokre | some * +si | his/hers * +sia | since * +sidan | since * +so | so * +somt | some * +somme | some * +um | about* +upp | up * +vere | be * +vore | was * +verte | become * +vort | become * +varte | became * +vart | became * + diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_pt.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_pt.txt new file mode 100644 index 00000000000..276c1b446f2 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_pt.txt @@ -0,0 +1,251 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Portuguese stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | of, from +a | the; to, at; her +o | the; him +que | who, that +e | and +do | de + o +da | de + a +em | in +um | a +para | for + | é from SER +com | with +não | not, no +uma | a +os | the; them +no | em + o +se | himself etc +na | em + a +por | for +mais | more +as | the; them +dos | de + os +como | as, like +mas | but + | foi from SER +ao | a + o +ele | he +das | de + as + | tem from TER +à | a + a +seu | his +sua | her +ou | or + | ser from SER +quando | when +muito | much + | há from HAV +nos | em + os; us +já | already, now + | está from EST +eu | I +também | also +só | only, just +pelo | per + o +pela | per + a +até | up to +isso | that +ela | he +entre | between + | era from SER +depois | after +sem | without +mesmo | same +aos | a + os + | ter from TER +seus | his +quem | whom +nas | em + as +me | me +esse | that +eles | they + | estão from EST +você | you + | tinha from TER + | foram from SER +essa | that +num | em + um +nem | nor +suas | her +meu | my +às | a + as +minha | my + | têm from TER +numa | em + uma +pelos | per + os +elas | they + | havia from HAV + | seja from SER +qual | which + | será from SER +nós | we + | tenho from TER +lhe | to him, her +deles | of them +essas | those +esses | those +pelas | per + as +este | this + | fosse from SER +dele | of him + + | other words. There are many contractions such as naquele = em+aquele, + | mo = me+o, but they are rare. + | Indefinite article plural forms are also rare. + +tu | thou +te | thee +vocês | you (plural) +vos | you +lhes | to them +meus | my +minhas +teu | thy +tua +teus +tuas +nosso | our +nossa +nossos +nossas + +dela | of her +delas | of them + +esta | this +estes | these +estas | these +aquele | that +aquela | that +aqueles | those +aquelas | those +isto | this +aquilo | that + + | forms of estar, to be (not including the infinitive): +estou +está +estamos +estão +estive +esteve +estivemos +estiveram +estava +estávamos +estavam +estivera +estivéramos +esteja +estejamos +estejam +estivesse +estivéssemos +estivessem +estiver +estivermos +estiverem + + | forms of haver, to have (not including the infinitive): +hei +há +havemos +hão +houve +houvemos +houveram +houvera +houvéramos +haja +hajamos +hajam +houvesse +houvéssemos +houvessem +houver +houvermos +houverem +houverei +houverá +houveremos +houverão +houveria +houveríamos +houveriam + + | forms of ser, to be (not including the infinitive): +sou +somos +são +era +éramos +eram +fui +foi +fomos +foram +fora +fôramos +seja +sejamos +sejam +fosse +fôssemos +fossem +for +formos +forem +serei +será +seremos +serão +seria +seríamos +seriam + + | forms of ter, to have (not including the infinitive): +tenho +tem +temos +tém +tinha +tínhamos +tinham +tive +teve +tivemos +tiveram +tivera +tivéramos +tenha +tenhamos +tenham +tivesse +tivéssemos +tivessem +tiver +tivermos +tiverem +terei +terá +teremos +terão +teria +teríamos +teriam diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ro.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ro.txt new file mode 100644 index 00000000000..4fdee90a5ba --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ro.txt @@ -0,0 +1,233 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +acea +aceasta +această +aceea +acei +aceia +acel +acela +acele +acelea +acest +acesta +aceste +acestea +aceÅŸti +aceÅŸtia +acolo +acum +ai +aia +aibă +aici +al +ăla +ale +alea +ălea +altceva +altcineva +am +ar +are +aÅŸ +aÅŸadar +asemenea +asta +ăsta +astăzi +astea +ăstea +ăştia +asupra +aÅ£i +au +avea +avem +aveÅ£i +azi +bine +bucur +bună +ca +că +căci +când +care +cărei +căror +cărui +cât +câte +câţi +către +câtva +ce +cel +ceva +chiar +cînd +cine +cineva +cît +cîte +cîţi +cîtva +contra +cu +cum +cumva +curând +curînd +da +dă +dacă +dar +datorită +de +deci +deja +deoarece +departe +deÅŸi +din +dinaintea +dintr +dintre +drept +după +ea +ei +el +ele +eram +este +eÅŸti +eu +face +fără +fi +fie +fiecare +fii +fim +fiÅ£i +iar +ieri +îi +îl +îmi +împotriva +în +înainte +înaintea +încât +încît +încotro +între +întrucât +întrucît +îţi +la +lângă +le +li +lîngă +lor +lui +mă +mâine +mea +mei +mele +mereu +meu +mi +mine +mult +multă +mulÅ£i +ne +nicăieri +nici +nimeni +niÅŸte +noastră +noastre +noi +noÅŸtri +nostru +nu +ori +oricând +oricare +oricât +orice +oricînd +oricine +oricît +oricum +oriunde +până +pe +pentru +peste +pînă +poate +pot +prea +prima +primul +prin +printr +sa +să +săi +sale +sau +său +se +ÅŸi +sînt +sîntem +sînteÅ£i +spre +sub +sunt +suntem +sunteÅ£i +ta +tăi +tale +tău +te +Å£i +Å£ie +tine +toată +toate +tot +toÅ£i +totuÅŸi +tu +un +una +unde +undeva +unei +unele +uneori +unor +vă +vi +voastră +voastre +voi +voÅŸtri +vostru +vouă +vreo +vreun diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ru.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ru.txt new file mode 100644 index 00000000000..64307693457 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_ru.txt @@ -0,0 +1,241 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | a russian stop word list. comments begin with vertical bar. each stop + | word is at the start of a line. + + | this is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | letter `Ñ‘' is translated to `е'. + +и | and +в | in/into +во | alternative form +не | not +что | what/that +он | he +на | on/onto +Ñ | i +Ñ | from +Ñо | alternative form +как | how +а | milder form of `no' (but) +то | conjunction and form of `that' +вÑе | all +она | she +так | so, thus +его | him +но | but +да | yes/and +Ñ‚Ñ‹ | thou +к | towards, by +у | around, chez +же | intensifier particle +вы | you +за | beyond, behind +бы | conditional/subj. particle +по | up to, along +только | only +ее | her +мне | to me +было | it was +вот | here is/are, particle +от | away from +Ð¼ÐµÐ½Ñ | me +еще | still, yet, more +нет | no, there isnt/arent +о | about +из | out of +ему | to him +теперь | now +когда | when +даже | even +ну | so, well +вдруг | suddenly +ли | interrogative particle +еÑли | if +уже | already, but homonym of `narrower' +или | or +ни | neither +быть | to be +был | he was +него | prepositional form of его +до | up to +Ð²Ð°Ñ | you accusative +нибудь | indef. suffix preceded by hyphen +опÑÑ‚ÑŒ | again +уж | already, but homonym of `adder' +вам | to you +Ñказал | he said +ведь | particle `after all' +там | there +потом | then +ÑÐµÐ±Ñ | oneself +ничего | nothing +ей | to her +может | usually with `быть' as `maybe' +они | they +тут | here +где | where +еÑÑ‚ÑŒ | there is/are +надо | got to, must +ней | prepositional form of ей +Ð´Ð»Ñ | for +мы | we +Ñ‚ÐµÐ±Ñ | thee +их | them, their +чем | than +была | she was +Ñам | self +чтоб | in order to +без | without +будто | as if +человек | man, person, one +чего | genitive form of `what' +раз | once +тоже | also +Ñебе | to oneself +под | beneath +жизнь | life +будет | will be +ж | short form of intensifer particle `же' +тогда | then +кто | who +Ñтот | this +говорил | was saying +того | genitive form of `that' +потому | for that reason +Ñтого | genitive form of `this' +какой | which +ÑовÑем | altogether +ним | prepositional form of `его', `они' +здеÑÑŒ | here +Ñтом | prepositional form of `Ñтот' +один | one +почти | almost +мой | my +тем | instrumental/dative plural of `тот', `то' +чтобы | full form of `in order that' +нее | her (acc.) +кажетÑÑ | it seems +ÑÐµÐ¹Ñ‡Ð°Ñ | now +были | they were +куда | where to +зачем | why +Ñказать | to say +вÑех | all (acc., gen. preposn. plural) +никогда | never +ÑÐµÐ³Ð¾Ð´Ð½Ñ | today +можно | possible, one can +при | by +наконец | finally +два | two +об | alternative form of `о', about +другой | another +хоть | even +поÑле | after +над | above +больше | more +тот | that one (masc.) +через | across, in +Ñти | these +Ð½Ð°Ñ | us +про | about +вÑего | in all, only, of all +них | prepositional form of `они' (they) +ÐºÐ°ÐºÐ°Ñ | which, feminine +много | lots +разве | interrogative particle +Ñказала | she said +три | three +Ñту | this, acc. fem. sing. +Ð¼Ð¾Ñ | my, feminine +впрочем | moreover, besides +хорошо | good +Ñвою | ones own, acc. fem. sing. +Ñтой | oblique form of `Ñта', fem. `this' +перед | in front of +иногда | sometimes +лучше | better +чуть | a little +том | preposn. form of `that one' +Ð½ÐµÐ»ÑŒÐ·Ñ | one must not +такой | such a one +им | to them +более | more +вÑегда | always +конечно | of course +вÑÑŽ | acc. fem. sing of `all' +между | between + + + | b: some paradigms + | + | personal pronouns + | + | Ñ Ð¼ÐµÐ½Ñ Ð¼Ð½Ðµ мной [мною] + | Ñ‚Ñ‹ Ñ‚ÐµÐ±Ñ Ñ‚ÐµÐ±Ðµ тобой [тобою] + | он его ему им [него, нему, ним] + | она ее Ñи ею [нее, нÑи, нею] + | оно его ему им [него, нему, ним] + | + | мы Ð½Ð°Ñ Ð½Ð°Ð¼ нами + | вы Ð²Ð°Ñ Ð²Ð°Ð¼ вами + | они их им ими [них, ним, ними] + | + | ÑÐµÐ±Ñ Ñебе Ñобой [Ñобою] + | + | demonstrative pronouns: Ñтот (this), тот (that) + | + | Ñтот Ñта Ñто Ñти + | Ñтого ÑÑ‚Ñ‹ Ñто Ñти + | Ñтого Ñтой Ñтого Ñтих + | Ñтому Ñтой Ñтому Ñтим + | Ñтим Ñтой Ñтим [Ñтою] Ñтими + | Ñтом Ñтой Ñтом Ñтих + | + | тот та то те + | того ту то те + | того той того тех + | тому той тому тем + | тем той тем [тою] теми + | том той том тех + | + | determinative pronouns + | + | (a) веÑÑŒ (all) + | + | веÑÑŒ вÑÑ Ð²Ñе вÑе + | вÑего вÑÑŽ вÑе вÑе + | вÑего вÑей вÑего вÑех + | вÑему вÑей вÑему вÑем + | вÑем вÑей вÑем [вÑею] вÑеми + | вÑем вÑей вÑем вÑех + | + | (b) Ñам (himself etc) + | + | Ñам Ñама Ñамо Ñами + | Ñамого Ñаму Ñамо Ñамих + | Ñамого Ñамой Ñамого Ñамих + | Ñамому Ñамой Ñамому Ñамим + | Ñамим Ñамой Ñамим [Ñамою] Ñамими + | Ñамом Ñамой Ñамом Ñамих + | + | stems of verbs `to be', `to have', `to do' and modal + | + | быть бы буд быв еÑÑ‚ÑŒ Ñуть + | име + | дел + | мог мож мочь + | уме + | хоч хот + | долж + | можн + | нужн + | Ð½ÐµÐ»ÑŒÐ·Ñ + diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_sv.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_sv.txt new file mode 100644 index 00000000000..22bddfd8cb3 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_sv.txt @@ -0,0 +1,131 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + + | A Swedish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | Swedish stop words occasionally exhibit homonym clashes. For example + | sÃ¥ = so, but also seed. These are indicated clearly below. + +och | and +det | it, this/that +att | to (with infinitive) +i | in, at +en | a +jag | I +hon | she +som | who, that +han | he +pÃ¥ | on +den | it, this/that +med | with +var | where, each +sig | him(self) etc +för | for +sÃ¥ | so (also: seed) +till | to +är | is +men | but +ett | a +om | if; around, about +hade | had +de | they, these/those +av | of +icke | not, no +mig | me +du | you +henne | her +dÃ¥ | then, when +sin | his +nu | now +har | have +inte | inte nÃ¥gon = no one +hans | his +honom | him +skulle | 'sake' +hennes | her +där | there +min | my +man | one (pronoun) +ej | nor +vid | at, by, on (also: vast) +kunde | could +nÃ¥got | some etc +frÃ¥n | from, off +ut | out +när | when +efter | after, behind +upp | up +vi | we +dem | them +vara | be +vad | what +över | over +än | than +dig | you +kan | can +sina | his +här | here +ha | have +mot | towards +alla | all +under | under (also: wonder) +nÃ¥gon | some etc +eller | or (else) +allt | all +mycket | much +sedan | since +ju | why +denna | this/that +själv | myself, yourself etc +detta | this/that +Ã¥t | to +utan | without +varit | was +hur | how +ingen | no +mitt | my +ni | you +bli | to be, become +blev | from bli +oss | us +din | thy +dessa | these/those +nÃ¥gra | some etc +deras | their +blir | from bli +mina | my +samma | (the) same +vilken | who, that +er | you, your +sÃ¥dan | such a +vÃ¥r | our +blivit | from bli +dess | its +inom | within +mellan | between +sÃ¥dant | such a +varför | why +varje | each +vilka | who, that +ditt | thy +vem | who +vilket | who, that +sitta | his +sÃ¥dana | such a +vart | each +dina | thy +vars | whose +vÃ¥rt | our +vÃ¥ra | our +ert | your +era | your +vilkas | whose + diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_th.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_th.txt new file mode 100644 index 00000000000..07f0fabe692 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_th.txt @@ -0,0 +1,119 @@ +# Thai stopwords from: +# "Opinion Detection in Thai Political News Columns +# Based on Subjectivity Analysis" +# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak +ไว้ +ไม่ +ไป +ได้ +ให้ +ใน +โดย +à¹à¸«à¹ˆà¸‡ +à¹à¸¥à¹‰à¸§ +à¹à¸¥à¸° +à¹à¸£à¸ +à¹à¸šà¸š +à¹à¸•à¹ˆ +เอง +เห็น +เลย +เริ่ม +เรา +เมื่อ +เพื่อ +เพราะ +เป็นà¸à¸²à¸£ +เป็น +เปิดเผย +เปิด +เนื่องจาภ+เดียวà¸à¸±à¸™ +เดียว +เช่น +เฉพาะ +เคย +เข้า +เขา +อีภ+อาจ +อะไร +ออภ+อย่าง +อยู่ +อยาภ+หาภ+หลาย +หลังจาภ+หลัง +หรือ +หนึ่ง +ส่วน +ส่ง +สุด +สà¹à¸²à¸«à¸£à¸±à¸š +ว่า +วัน +ลง +ร่วม +ราย +รับ +ระหว่าง +รวม +ยัง +มี +มาภ+มา +พร้อม +พบ +ผ่าน +ผล +บาง +น่า +นี้ +นà¹à¸² +นั้น +นัภ+นอà¸à¸ˆà¸²à¸ +ทุภ+ที่สุด +ที่ +ทà¹à¸²à¹ƒà¸«à¹‰ +ทà¹à¸² +ทาง +ทั้งนี้ +ทั้ง +ถ้า +ถูภ+ถึง +ต้อง +ต่างๆ +ต่าง +ต่อ +ตาม +ตั้งà¹à¸•à¹ˆ +ตั้ง +ด้าน +ด้วย +ดัง +ซึ่ง +ช่วง +จึง +จาภ+จัด +จะ +คือ +ความ +ครั้ง +คง +ขึ้น +ของ +ขอ +ขณะ +à¸à¹ˆà¸­à¸™ +à¸à¹‡ +à¸à¸²à¸£ +à¸à¸±à¸š +à¸à¸±à¸™ +à¸à¸§à¹ˆà¸² +à¸à¸¥à¹ˆà¸²à¸§ diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_tr.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_tr.txt new file mode 100644 index 00000000000..84d9408d4ea --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/stopwords_tr.txt @@ -0,0 +1,212 @@ +# Turkish stopwords from LUCENE-559 +# merged with the list from "Information Retrieval on Turkish Texts" +# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) +acaba +altmış +altı +ama +ancak +arada +aslında +ayrıca +bana +bazı +belki +ben +benden +beni +benim +beri +beÅŸ +bile +bin +bir +birçok +biri +birkaç +birkez +birÅŸey +birÅŸeyi +biz +bize +bizden +bizi +bizim +böyle +böylece +bu +buna +bunda +bundan +bunlar +bunları +bunların +bunu +bunun +burada +çok +çünkü +da +daha +dahi +de +defa +deÄŸil +diÄŸer +diye +doksan +dokuz +dolayı +dolayısıyla +dört +edecek +eden +ederek +edilecek +ediliyor +edilmesi +ediyor +eÄŸer +elli +en +etmesi +etti +ettiÄŸi +ettiÄŸini +gibi +göre +halen +hangi +hatta +hem +henüz +hep +hepsi +her +herhangi +herkesin +hiç +hiçbir +için +iki +ile +ilgili +ise +iÅŸte +itibaren +itibariyle +kadar +karşın +katrilyon +kendi +kendilerine +kendini +kendisi +kendisine +kendisini +kez +ki +kim +kimden +kime +kimi +kimse +kırk +milyar +milyon +mu +mü +mı +nasıl +ne +neden +nedenle +nerde +nerede +nereye +niye +niçin +o +olan +olarak +oldu +olduÄŸu +olduÄŸunu +olduklarını +olmadı +olmadığı +olmak +olması +olmayan +olmaz +olsa +olsun +olup +olur +olursa +oluyor +on +ona +ondan +onlar +onlardan +onları +onların +onu +onun +otuz +oysa +öyle +pek +raÄŸmen +sadece +sanki +sekiz +seksen +sen +senden +seni +senin +siz +sizden +sizi +sizin +ÅŸey +ÅŸeyden +ÅŸeyi +ÅŸeyler +şöyle +ÅŸu +ÅŸuna +ÅŸunda +ÅŸundan +ÅŸunları +ÅŸunu +tarafından +trilyon +tüm +üç +üzere +var +vardı +ve +veya +ya +yani +yapacak +yapılan +yapılması +yapıyor +yapmak +yaptı +yaptığı +yaptığını +yaptıkları +yedi +yerine +yetmiÅŸ +yine +yirmi +yoksa +yüz +zaten diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/userdict_ja.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/userdict_ja.txt new file mode 100644 index 00000000000..6f0368e4d81 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/lang/userdict_ja.txt @@ -0,0 +1,29 @@ +# +# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) +# +# Add entries to this file in order to override the statistical model in terms +# of segmentation, readings and part-of-speech tags. Notice that entries do +# not have weights since they are always used when found. This is by-design +# in order to maximize ease-of-use. +# +# Entries are defined using the following CSV format: +# , ... , ... , +# +# Notice that a single half-width space separates tokens and readings, and +# that the number tokens and readings must match exactly. +# +# Also notice that multiple entries with the same is undefined. +# +# Whitespace only lines are ignored. Comments are not allowed on entry lines. +# + +# Custom segmentation for kanji compounds +日本経済新èž,日本 経済 æ–°èž,ニホン ケイザイ シンブン,カスタムåè©ž +関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタムåè©ž + +# Custom segmentation for compound katakana +トートãƒãƒƒã‚°,トート ãƒãƒƒã‚°,トート ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž +ショルダーãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ショルダー ãƒãƒƒã‚°,ã‹ãšã‚«ãƒŠåè©ž + +# Custom reading for former sumo wrestler +æœé’é¾,æœé’é¾,アサショウリュウ,カスタム人å diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/protwords.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/protwords.txt new file mode 100644 index 00000000000..1dfc0abecbf --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/schema.xml b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/schema.xml new file mode 100644 index 00000000000..83080dfa40c --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/schema.xml @@ -0,0 +1,914 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + iddiff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/solrconfig.xml b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/solrconfig.xml new file mode 100644 index 00000000000..9d9178746cf --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/solrconfig.xml @@ -0,0 +1,1764 @@ + + + + + + + + + LUCENE_43 + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.data.dir:} + + + + + + + + + + + + + + + + ${solr.maxIndexingThreads:8} + + + + + + 128 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.ulog.dir:} + + + + + ${solr.autoCommit.maxTime:60000} + false + + + + + + ${solr.autoSoftCommit.maxTime:1000} + + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + static firstSearcher warming in solrconfig.xml + + + + + + false + + + 4 + + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + text + + + + + + + + + + + + + + explicit + json + true + text + + + + + + + + true + json + true + + + + + + + + explicit + + + velocity + browse + layout + Solritas + + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text + 100% + *:* + 10 + *,score + + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename + 3 + + + on + cat + manu_exact + content_type + author_s + ipod + GB + 1 + cat,inStock + after + price + 0 + 600 + 50 + popularity + 0 + 10 + 3 + manufacturedate_dt + NOW/YEAR-10YEARS + NOW + +1YEAR + before + after + + + on + content features title name + html + <b> + </b> + 0 + title + 0 + name + 3 + 200 + content + 750 + + + on + false + 5 + 2 + 5 + true + true + 5 + 3 + + + + + spellcheck + + + + + + + + + + + + + + application/json + + + + + application/csv + + + + + + + + + + + + + + + + + + + + + solrpingquery + + + all + + + + + + + + + explicit + true + + + + + + + + + + + + + + + + textSpell + + + + + + default + name + solr.DirectSolrSpellChecker + + internal + + 0.5 + + 2 + + 1 + + 5 + + 4 + + 0.01 + + + + + + wordbreak + solr.WordBreakSolrSpellChecker + name + true + true + 10 + + + + + + + + + + + + + + + + text + + default + wordbreak + on + true + 10 + 5 + 5 + true + true + 10 + 5 + + + spellcheck + + + + + + + + + + text + true + + + tvComponent + + + + + + + + + default + + + org.carrot2.clustering.lingo.LingoClusteringAlgorithm + + + 20 + + + clustering/carrot2 + + + ENGLISH + + + stc + org.carrot2.clustering.stc.STCClusteringAlgorithm + + + + + + + true + default + true + + name + id + + features + + true + + + + false + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + *:* + 10 + *,score + + + clustering + + + + + + + + + + true + + + terms + + + + + + + + string + elevate.xml + + + + + + explicit + text + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + + + + + + + + + + + text/plain; charset=UTF-8 + + + + + + + + + 5 + + + + + + + + + + + + + + + + + + *:* + + + diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/stopwords.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/stopwords.txt new file mode 100644 index 00000000000..ae1e83eeb3d --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/stopwords.txt @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/synonyms.txt b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/synonyms.txt new file mode 100644 index 00000000000..7f72128303b --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcelltest/collection1/conf/synonyms.txt @@ -0,0 +1,29 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaafoo => aaabar +bbbfoo => bbbfoo bbbbar +cccfoo => cccbar cccbaz +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + diff --git a/solr/contrib/solr-mr/src/test-files/solr/solrcloud/conf/solrconfig.xml b/solr/contrib/solr-mr/src/test-files/solr/solrcloud/conf/solrconfig.xml new file mode 100644 index 00000000000..a37ab12ecfe --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/solr/solrcloud/conf/solrconfig.xml @@ -0,0 +1,1787 @@ + + + + + + + + + LUCENE_43 + + + + + + + + + + + + + + + + + + + + + + + + ${solr.data.dir:} + + + + + ${solr.hdfs.home:} + ${solr.hdfs.confdir:} + ${solr.hdfs.security.kerberos.enabled:false} + ${solr.hdfs.security.kerberos.keytabfile:} + ${solr.hdfs.security.kerberos.principal:} + ${solr.hdfs.blockcache.enabled:true} + ${solr.hdfs.blockcache.slab.count:1} + ${solr.hdfs.blockcache.direct.memory.allocation:true} + ${solr.hdfs.blockcache.blocksperbank:16384} + ${solr.hdfs.blockcache.read.enabled:true} + ${solr.hdfs.blockcache.write.enabled:true} + ${solr.hdfs.nrtcachingdirectory.enable:true} + ${solr.hdfs.nrtcachingdirectory.maxmergesizemb:16} + ${solr.hdfs.nrtcachingdirectory.maxcachedmb:192} + + + + + + + + + + + + + ${solr.maxIndexingThreads:8} + + + + + + 128 + + + + + + + + + + + + + ${solr.lock.type:hdfs} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.ulog.dir:} + + + + + ${solr.autoCommit.maxTime:60000} + false + + + + + ${solr.autoSoftCommit.maxTime:1000} + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + static firstSearcher warming in solrconfig.xml + + + + + + false + + + 4 + + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + text + + + + + + + + + + + + + + explicit + json + true + text + + + + + + + + true + json + true + + + + + + + + explicit + + + velocity + browse + layout + Solritas + + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text + 100% + *:* + 10 + *,score + + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0 + + text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename + 3 + + + on + cat + manu_exact + content_type + author_s + ipod + GB + 1 + cat,inStock + after + price + 0 + 600 + 50 + popularity + 0 + 10 + 3 + manufacturedate_dt + NOW/YEAR-10YEARS + NOW + +1YEAR + before + after + + + on + content features title name + html + <b> + </b> + 0 + title + 0 + name + 3 + 200 + content + 750 + + + on + false + 5 + 2 + 5 + true + true + 5 + 3 + + + + + spellcheck + + + + + + + + + + + + + + application/json + + + + + application/csv + + + + + + + + + + + + + + + + + + + + + solrpingquery + + + all + + + + + + + + + explicit + true + + + + + + + + + + + + + + + + text_general + + + + + + default + text + solr.DirectSolrSpellChecker + + internal + + 0.5 + + 2 + + 1 + + 5 + + 4 + + 0.01 + + + + + + wordbreak + solr.WordBreakSolrSpellChecker + name + true + true + 10 + + + + + + + + + + + + + + + + text + + default + wordbreak + on + true + 10 + 5 + 5 + true + true + 10 + 5 + + + spellcheck + + + + + + + + + + text + true + + + tvComponent + + + + + + + + + default + + + org.carrot2.clustering.lingo.LingoClusteringAlgorithm + + + 20 + + + clustering/carrot2 + + + ENGLISH + + + stc + org.carrot2.clustering.stc.STCClusteringAlgorithm + + + + + + + true + default + true + + name + id + + features + + true + + + + false + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + *:* + 10 + *,score + + + clustering + + + + + + + + + + true + false + + + terms + + + + + + + + string + elevate.xml + + + + + + explicit + text + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + + + + + + + + + + + text/plain; charset=UTF-8 + + + + + + + + + 5 + + + + + + + + + + + + + + + + + + *:* + + + diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/NullHeader.docx b/solr/contrib/solr-mr/src/test-files/test-documents/NullHeader.docx new file mode 100644 index 00000000000..cc62b8d6beb Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/NullHeader.docx differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/boilerplate.html b/solr/contrib/solr-mr/src/test-files/test-documents/boilerplate.html new file mode 100644 index 00000000000..0286578693c --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/test-documents/boilerplate.html @@ -0,0 +1,58 @@ + + + + + + + + Title + + + + + + + +
+ + + + + +
boilerplatetext
+
+ +

This is the real meat of the page, +and represents the text we want. +It has lots of juicy content. + +We assume that it won't get filtered out. +And that all of the lines will be in the +output. +

+ +

+Here's another paragraph of text. +This is the end of the text. +

+ +

footer

+ + + diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/complex.mbox b/solr/contrib/solr-mr/src/test-files/test-documents/complex.mbox new file mode 100644 index 00000000000..27f7017d265 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/test-documents/complex.mbox @@ -0,0 +1,291 @@ +From core-user-return-14700-apmail-hadoop-core-user-archive=hadoop.apache.org@hadoop.apache.org Mon Jun 01 04:28:28 2009 +Return-Path: +Delivered-To: apmail-hadoop-core-user-archive@www.apache.org +Received: (qmail 19921 invoked from network); 1 Jun 2009 04:28:28 -0000 +Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) + by minotaur.apache.org with SMTP; 1 Jun 2009 04:28:28 -0000 +Received: (qmail 84995 invoked by uid 500); 1 Jun 2009 04:28:38 -0000 +Delivered-To: apmail-hadoop-core-user-archive@hadoop.apache.org +Received: (qmail 84895 invoked by uid 500); 1 Jun 2009 04:28:38 -0000 +Mailing-List: contact core-user-help@hadoop.apache.org; run by ezmlm +Precedence: bulk +List-Help: +List-Unsubscribe: +List-Post: +List-Id: +Reply-To: core-user@hadoop.apache.org +Delivered-To: mailing list core-user@hadoop.apache.org +Received: (qmail 84885 invoked by uid 99); 1 Jun 2009 04:28:38 -0000 +Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) + by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 01 Jun 2009 04:28:38 +0000 +X-ASF-Spam-Status: No, hits=1.2 required=10.0 + tests=SPF_NEUTRAL +X-Spam-Check-By: apache.org +Received-SPF: neutral (athena.apache.org: local policy) +Received: from [69.147.107.21] (HELO mrout2-b.corp.re1.wahoo.com) (69.147.107.21) + by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 01 Jun 2009 04:28:26 +0000 +Received: from SNV-EXPF01.ds.corp.wahoo.com (snv-expf01.ds.corp.wahoo.com [207.126.227.250]) + by mrout2-b.corp.re1.wahoo.com (8.13.8/8.13.8/y.out) with ESMTP id n514QYA6099963 + for ; Sun, 31 May 2009 21:26:35 -0700 (PDT) +DomainKey-Signature: a=rsa-sha1; s=serpent; d=wahoo-inc.com; c=nofws; q=dns; + h=received:user-agent:date:subject:from:to:message-id: + thread-topic:thread-index:in-reply-to:mime-version:content-type: + content-transfer-encoding:x-originalarrivaltime; + b=YVtSNdgjeeSBS1yY3XDolul49i+HrgNG7QszMo9LzGnrwejjgsl5+iUM6EiQgEpV +Received: from SNV-EXVS08.ds.corp.wahoo.com ([207.126.227.9]) by SNV-EXPF01.ds.corp.wahoo.com with Microsoft SMTPSVC(6.0.3790.3959); + Sun, 31 May 2009 21:26:34 -0700 +Received: from 10.66.92.213 ([10.66.92.213]) by SNV-EXVS08.ds.corp.wahoo.com ([207.126.227.58]) with Microsoft Exchange Server HTTP-DAV ; + Mon, 1 Jun 2009 04:26:33 +0000 +User-Agent: Microsoft-Entourage/12.17.0.090302 +Date: Mon, 01 Jun 2009 09:56:31 +0530 +Subject: Re: question about when shuffle/sort start working +From: Sam Judgement +To: +Message-ID: +Thread-Topic: question about when shuffle/sort start working +Thread-Index: AcnicSNoBw19cMU8UEaXwAdZ1YYhuw== +In-Reply-To: <440622.41041.qm@web111005.mail.gq1.wahoo.com> +Mime-version: 1.0 +Content-type: text/plain; + charset="US-ASCII" +Content-transfer-encoding: 7bit +X-OriginalArrivalTime: 01 Jun 2009 04:26:34.0501 (UTC) FILETIME=[257EAB50:01C9E271] +X-Virus-Checked: Checked by ClamAV on apache.org + +When a Mapper completes, MapCompletionEvents are generated. Reducers try to +fetch map outputs for a given map only on the receipt of such events. + +Sam + + +On 5/30/09 10:00 AM, "Jianmin Foo" wrote: + +> Hi, +> I am being confused by the protocol between mapper and reducer. When mapper +> emitting the (key,value) pair done, is there any signal the mapper send out to +> hadoop framework in protocol to indicate that map is done and the shuffle/sort +> can begin for reducer? If there is no this signal in protocol, when the +> framework begin the shuffle/sort? +> +> Thanks, +> Jianmin +> +> +> +> + + +From core-user-return-14701-apmail-hadoop-core-user-archive=hadoop.apache.org@hadoop.apache.org Mon Jun 01 05:31:14 2009 +Return-Path: +Delivered-To: apmail-hadoop-core-user-archive@www.apache.org +Received: (qmail 38243 invoked from network); 1 Jun 2009 05:31:14 -0000 +Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) + by minotaur.apache.org with SMTP; 1 Jun 2009 05:31:14 -0000 +Received: (qmail 15621 invoked by uid 500); 1 Jun 2009 05:31:24 -0000 +Delivered-To: apmail-hadoop-core-user-archive@hadoop.apache.org +Received: (qmail 15557 invoked by uid 500); 1 Jun 2009 05:31:24 -0000 +Mailing-List: contact core-user-help@hadoop.apache.org; run by ezmlm +Precedence: bulk +List-Help: +List-Unsubscribe: +List-Post: +List-Id: +Reply-To: core-user@hadoop.apache.org +Delivered-To: mailing list core-user@hadoop.apache.org +Received: (qmail 15547 invoked by uid 99); 1 Jun 2009 05:31:24 -0000 +Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) + by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 01 Jun 2009 05:31:24 +0000 +X-ASF-Spam-Status: No, hits=2.2 required=10.0 + tests=HTML_MESSAGE,SPF_PASS +X-Spam-Check-By: apache.org +Received-SPF: pass (nike.apache.org: local policy) +Received: from [68.142.237.94] (HELO n9.bullet.re3.wahoo.com) (68.142.237.94) + by apache.org (qpsmtpd/0.29) with SMTP; Mon, 01 Jun 2009 05:31:11 +0000 +Received: from [68.142.237.88] by n9.bullet.re3.wahoo.com with NNFMP; 01 Jun 2009 05:30:50 -0000 +Received: from [67.195.9.82] by t4.bullet.re3.wahoo.com with NNFMP; 01 Jun 2009 05:30:49 -0000 +Received: from [67.195.9.99] by t2.bullet.mail.gq1.wahoo.com with NNFMP; 01 Jun 2009 05:30:49 -0000 +Received: from [127.0.0.1] by omp103.mail.gq1.wahoo.com with NNFMP; 01 Jun 2009 05:28:01 -0000 +X-wahoo-Newman-Property: ymail-3 +X-wahoo-Newman-Id: 796121.97519.bm@omp103.mail.gq1.wahoo.com +Received: (qmail 35264 invoked by uid 60001); 1 Jun 2009 05:30:49 -0000 +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=wahoo.com; s=s1024; t=1243834249; bh=R8qzdi/IbLyO8UwpnaujDpT9E+6bJ7nkmZN2803EmRk=; h=Message-ID:X-YMail-OSG:Received:X-Mailer:References:Date:From:Subject:To:In-Reply-To:MIME-Version:Content-Type; b=vq4c6RIDbkuLPYd8mirusIXf6DqTb/IeT55In7W00Y5Sxx1ZiXBb78yE9+TDfXJ0elsEZvqv4ocyvolGE0eGtyYeJA0mZikpRNu6pidxPNpCplOcLHBRz7YQ7iERwv3TagRlWy2Xd3oD9ZeV0A05P7WUOiNNX1PUUJD1IVdrEZo= +DomainKey-Signature:a=rsa-sha1; q=dns; c=nofws; + s=s1024; d=wahoo.com; + h=Message-ID:X-YMail-OSG:Received:X-Mailer:References:Date:From:Subject:To:In-Reply-To:MIME-Version:Content-Type; + b=6HXZV98ON5vBwmE/xS8stVD0D2F4dkMY7a0suX5KVTb736JdR8G59mqBq/dWcpbFTLiCLtxi18LMb/dU1RKRGOEdn3l3j/jKXhBrhIgfg3qtNskPedXDKBvn7JGXiSkqpA/tUtPjvc0Uuk8/LaA01SQTz40Engg7nD8/EJdIAhA=; +Message-ID: <592088.35091.qm@web111010.mail.gq1.wahoo.com> +X-YMail-OSG: KzhhrJYVM1m.MCS6vRpRP2ZZO2PrfnbngosELDCIa91ZqvhJph4RdmzfUW0jw9W04RCSch1K730bPohwNpNBIk2QR_zt4_mfbhfq7YEPkSoz9LSXG90P9vIo5Fc8qyZN0U6vA9gtdyGQTpN5ahvillUH9nAF0TMWv2SvZJLjPlQ0Z0p8oK8ltBwGTgLrM8Jtdn9D29yoRyi3_EpVOfdD9OP.EK50Vr1XwSUYMbnpZ0WGHMwd.Yig7A6Elwadm3YVbfOdx2mfrG.jQsUAxQjRBNvbrOM57.FaE11kHTe9aoBWSeihNg-- +Received: from [216.145.54.7] by web111010.mail.gq1.wahoo.com via HTTP; Sun, 31 May 2009 22:30:49 PDT +X-Mailer: wahooMailRC/1277.43 wahooMailWebService/0.7.289.10 +References: +Date: Sun, 31 May 2009 22:30:49 -0700 (PDT) +From: Jianmin Foo +Subject: Re: question about when shuffle/sort start working +To: core-user@hadoop.apache.org +In-Reply-To: +MIME-Version: 1.0 +Content-Type: multipart/alternative; boundary="0-1193839393-1243834249=:35091" +X-Virus-Checked: Checked by ClamAV on apache.org + +--0-1193839393-1243834249=:35091 +Content-Type: text/plain; charset=us-ascii + +Thanks a lot for your explanation, Sam. + +So is this event generated by hadoop framework? Is there any API in mapper to fire this event? Actually, I am thinking to implement a mapper that will emit some pairs, then fire this event to let the reducer works, the same mapper task then emit some other pairs and repeat. Do you think is this logic feasible by current API? + +Thanks, +Jianmin + + + + + +________________________________ +From: Sam Judgement +To: core-user@hadoop.apache.org +Sent: Monday, June 1, 2009 12:26:31 PM +Subject: Re: question about when shuffle/sort start working + +When a Mapper completes, MapCompletionEvents are generated. Reducers try to +fetch map outputs for a given map only on the receipt of such events. + +Sam + + +On 5/30/09 10:00 AM, "Jianmin Foo" wrote: + +> Hi, +> I am being confused by the protocol between mapper and reducer. When mapper +> emitting the (key,value) pair done, is there any signal the mapper send out to +> hadoop framework in protocol to indicate that map is done and the shuffle/sort +> can begin for reducer? If there is no this signal in protocol, when the +> framework begin the shuffle/sort? +> +> Thanks, +> Jianmin +> +> +> +> + + + +--0-1193839393-1243834249=:35091-- + + +From core-user-return-14702-apmail-hadoop-core-user-archive=hadoop.apache.org@hadoop.apache.org Mon Jun 01 06:04:30 2009 +Return-Path: +Delivered-To: apmail-hadoop-core-user-archive@www.apache.org +Received: (qmail 53387 invoked from network); 1 Jun 2009 06:04:29 -0000 +Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) + by minotaur.apache.org with SMTP; 1 Jun 2009 06:04:29 -0000 +Received: (qmail 39066 invoked by uid 500); 1 Jun 2009 06:04:39 -0000 +Delivered-To: apmail-hadoop-core-user-archive@hadoop.apache.org +Received: (qmail 38970 invoked by uid 500); 1 Jun 2009 06:04:39 -0000 +Mailing-List: contact core-user-help@hadoop.apache.org; run by ezmlm +Precedence: bulk +List-Help: +List-Unsubscribe: +List-Post: +List-Id: +Reply-To: core-user@hadoop.apache.org +Delivered-To: mailing list core-user@hadoop.apache.org +Received: (qmail 38955 invoked by uid 99); 1 Jun 2009 06:04:39 -0000 +Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) + by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 01 Jun 2009 06:04:39 +0000 +X-ASF-Spam-Status: No, hits=1.2 required=10.0 + tests=SPF_NEUTRAL +X-Spam-Check-By: apache.org +Received-SPF: neutral (athena.apache.org: local policy) +Received: from [216.145.54.172] (HELO mrout2.wahoo.com) (216.145.54.172) + by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 01 Jun 2009 06:04:28 +0000 +Received: from SNV-EXBH01.ds.corp.wahoo.com (snv-exbh01.ds.corp.wahoo.com [207.126.227.249]) + by mrout2.wahoo.com (8.13.6/8.13.6/y.out) with ESMTP id n5163FGq038852 + for ; Sun, 31 May 2009 23:03:15 -0700 (PDT) +DomainKey-Signature: a=rsa-sha1; s=serpent; d=wahoo-inc.com; c=nofws; q=dns; + h=received:user-agent:date:subject:from:to:message-id: + thread-topic:thread-index:in-reply-to:mime-version:content-type: + content-transfer-encoding:x-originalarrivaltime; + b=rChE4SCnwtWaZpjhovkiXDKfDiVNdRRvsadSGG9S9bgvOexn/9/5JjEQx1pOR7Nb +Received: from SNV-EXVS08.ds.corp.wahoo.com ([207.126.227.9]) by SNV-EXBH01.ds.corp.wahoo.com with Microsoft SMTPSVC(6.0.3790.3959); + Sun, 31 May 2009 23:03:15 -0700 +Received: from 10.66.92.213 ([10.66.92.213]) by SNV-EXVS08.ds.corp.wahoo.com ([207.126.227.58]) with Microsoft Exchange Server HTTP-DAV ; + Mon, 1 Jun 2009 06:03:15 +0000 +User-Agent: Microsoft-Entourage/12.17.0.090302 +Date: Mon, 01 Jun 2009 11:33:13 +0530 +Subject: Re: question about when shuffle/sort start working +From: Sam Judgement +To: +Message-ID: +Thread-Topic: question about when shuffle/sort start working +Thread-Index: AcnifqWrLG6N7GAk7kqy9QalVWfegQ== +In-Reply-To: <592088.35091.qm@web111010.mail.gq1.wahoo.com> +Mime-version: 1.0 +Content-type: text/plain; + charset="US-ASCII" +Content-transfer-encoding: 7bit +X-OriginalArrivalTime: 01 Jun 2009 06:03:15.0462 (UTC) FILETIME=[A7231260:01C9E27E] +X-Virus-Checked: Checked by ClamAV on apache.org + + +No you cannot raise this event yourself, this event is generated internally +by the framework. + +I am guessing that what you probably want is to have a chain of MapReduce +Jobs where the output of one is automatically fed as input to another. You +can look at these classes: JobControl and ChainMapper/ChainReducer. + +Sam + +On 6/1/09 11:00 AM, "Jianmin Foo" wrote: + +> Thanks a lot for your explanation, Sam. +> +> So is this event generated by hadoop framework? Is there any API in mapper to +> fire this event? Actually, I am thinking to implement a mapper that will emit +> some pairs, then fire this event to let the reducer works, the +> same mapper task then emit some other pairs and repeat. Do you +> think is this logic feasible by current API? +> +> Thanks, +> Jianmin +> +> +> +> +> +> ________________________________ +> From: Sam Judgement +> To: core-user@hadoop.apache.org +> Sent: Monday, June 1, 2009 12:26:31 PM +> Subject: Re: question about when shuffle/sort start working +> +> When a Mapper completes, MapCompletionEvents are generated. Reducers try to +> fetch map outputs for a given map only on the receipt of such events. +> +> Sam +> +> +> On 5/30/09 10:00 AM, "Jianmin Foo" wrote: +> +>> Hi, +>> I am being confused by the protocol between mapper and reducer. When mapper +>> emitting the (key,value) pair done, is there any signal the mapper send out +>> to +>> hadoop framework in protocol to indicate that map is done and the +>> shuffle/sort +>> can begin for reducer? If there is no this signal in protocol, when the +>> framework begin the shuffle/sort? +>> +>> Thanks, +>> Jianmin +>> +>> +>> +>> +> +> +> + + diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/rsstest.rss b/solr/contrib/solr-mr/src/test-files/test-documents/rsstest.rss new file mode 100644 index 00000000000..758f6a18363 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/test-documents/rsstest.rss @@ -0,0 +1,36 @@ + + + + + TestChannel + http://test.channel.com/ + Sample RSS File for Junit test + en-us + + + Home Page of Chris Mattmann + http://www-scf.usc.edu/~mattmann/ + Chris Mattmann's home page + + + Awesome Open Source Search Engine + http://www.nutch.org/ + Yup, that's what it is + + + diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/sample-statuses-20120521-100919.avro b/solr/contrib/solr-mr/src/test-files/test-documents/sample-statuses-20120521-100919.avro new file mode 100644 index 00000000000..36f01a2d48c Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/sample-statuses-20120521-100919.avro differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/sample-statuses-20120906-141433 b/solr/contrib/solr-mr/src/test-files/test-documents/sample-statuses-20120906-141433 new file mode 100644 index 00000000000..e633a1f71f1 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/test-documents/sample-statuses-20120906-141433 @@ -0,0 +1,4 @@ +1000 +{"text":"sample tweet one","retweet_count":0,"in_reply_to_user_id":null,"retweeted":false,"truncated":false,"source":"href=\"http:\/\/sample.com\"","id_str":"1234567891","entities":{"user_mentions":[],"hashtags":[],"urls":[]},"in_reply_to_status_id":null,"place":null,"in_reply_to_status_id_str":null,"coordinates":null,"created_at":"Wed Sep 05 01:01:01 +0000 1985","in_reply_to_screen_name":null,"favorited":false,"in_reply_to_user_id_str":null,"user":{"default_profile_image":false,"friends_count":111,"profile_background_color":"3C0C29","location":"Palo Alto","is_translator":false,"profile_background_tile":true,"favourites_count":11,"verified":false,"profile_sidebar_fill_color":"efefef","follow_request_sent":null,"contributors_enabled":false,"description":"desc1","profile_sidebar_border_color":"eeeeee","profile_image_url_https":"https:\/\/si0.twimg.com\/profile_images\/1\/normal.jpg","id_str":"1111111","listed_count":1,"lang":"en","screen_name":"fake_user1","show_all_inline_media":false,"profile_use_background_image":true,"profile_image_url":"http:\/\/a0.twimg.com\/profile_images\/1111111\/normal.jpg","default_profile":false,"statuses_count":11111,"created_at":"Thu Apr 07 11:04:54 +0000 1985","profile_text_color":"333333","followers_count":111,"protected":false,"following":null,"notifications":null,"profile_background_image_url":"http:\/\/a0.twimg.com\/images\/themes\/theme1\/bg.gif","time_zone":null,"url":null,"name":"name1","geo_enabled":false,"profile_link_color":"009999","id":1111112,"profile_background_image_url_https":"https:\/\/si0.twimg.com\/images\/themes\/theme1\/bg.gif","utc_offset":null},"id":11111112,"contributors":null,"geo":null} +2000 +{"text":"sample tweet two","retweet_count":0,"in_reply_to_user_id":null,"retweeted":false,"truncated":false,"source":"href=\"http:\/\/sample.com\"","id_str":"2345678902","entities":{"user_mentions":[],"hashtags":[],"urls":[]},"in_reply_to_status_id":null,"place":null,"in_reply_to_status_id_str":null,"coordinates":null,"created_at":"Wed Sep 05 02:14:34 +0000 1985","in_reply_to_screen_name":null,"favorited":false,"in_reply_to_user_id_str":null,"user":{"default_profile_image":false,"friends_count":222,"profile_background_color":"3C0C29","location":"San Francisco","is_translator":false,"profile_background_tile":false,"favourites_count":22,"verified":false,"profile_sidebar_fill_color":"B2D948","follow_request_sent":null,"contributors_enabled":false,"description":"desc2","profile_sidebar_border_color":"8EC63D","profile_image_url_https":"https:\/\/si0.twimg.com\/profile_images\/22222222\/image_normal.jpg","id_str":"2222222","listed_count":0,"lang":"en","screen_name":"fake_user2","show_all_inline_media":false,"profile_use_background_image":true,"profile_image_url":"http:\/\/a0.twimg.com\/profile_images\/2222222\/image_normal.jpg","default_profile":false,"statuses_count":222222,"created_at":"Thu Aug 04 11:33:28 +0000 1985","profile_text_color":"444444","followers_count":222,"protected":false,"following":null,"notifications":null,"profile_background_image_url":"http:\/\/a0.twimg.com\/profile_background_images\/222222\/222222.jpg","time_zone":"Central Time (US & Canada)","url":null,"name":"name2","geo_enabled":false,"profile_link_color":"9A0057","id":2222223,"profile_background_image_url_https":"https:\/\/si0.twimg.com\/profile_background_images\/2222222\/22222.jpg","utc_offset":-21600},"id":222223,"contributors":null,"geo":null} \ No newline at end of file diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/sample-statuses-20120906-141433-medium.avro b/solr/contrib/solr-mr/src/test-files/test-documents/sample-statuses-20120906-141433-medium.avro new file mode 100644 index 00000000000..900507c6f05 Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/sample-statuses-20120906-141433-medium.avro differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/sample-statuses-20120906-141433.avro b/solr/contrib/solr-mr/src/test-files/test-documents/sample-statuses-20120906-141433.avro new file mode 100644 index 00000000000..4dbf180dc1d Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/sample-statuses-20120906-141433.avro differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/sample-statuses-20120906-141433.bz2 b/solr/contrib/solr-mr/src/test-files/test-documents/sample-statuses-20120906-141433.bz2 new file mode 100644 index 00000000000..a4a91594ce8 Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/sample-statuses-20120906-141433.bz2 differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/sample-statuses-20120906-141433.gz b/solr/contrib/solr-mr/src/test-files/test-documents/sample-statuses-20120906-141433.gz new file mode 100644 index 00000000000..3e7a44cb588 Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/sample-statuses-20120906-141433.gz differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/test-outlook.msg b/solr/contrib/solr-mr/src/test-files/test-documents/test-outlook.msg new file mode 100644 index 00000000000..c975c0c69d4 Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/test-outlook.msg differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/testAIFF.aif b/solr/contrib/solr-mr/src/test-files/test-documents/testAIFF.aif new file mode 100644 index 00000000000..97eac1d8e3d Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/testAIFF.aif differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/testBMP.bmp b/solr/contrib/solr-mr/src/test-files/test-documents/testBMP.bmp new file mode 100644 index 00000000000..c0176157039 Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/testBMP.bmp differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/testBMPfp.txt b/solr/contrib/solr-mr/src/test-files/test-documents/testBMPfp.txt new file mode 100644 index 00000000000..1da2966d451 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/test-documents/testBMPfp.txt @@ -0,0 +1,3 @@ +BMW to Make Hybrid Sports Car
 + By CHRISTOPH RAUWALD . +LEIPZIG, Germany—German car maker BMW AG said Friday it will start series production of a new plug-in hybrid sports car in 2013, to be based on the Vision EfficientDynamics Concept car shown at the Frankfurt auto show in September last year. Chief Executive Norbert Reithofer said the car will be produced in Germany but didn't provide details on the price. The BMW Vision EfficientDynamics Concept car is a sporty plug-in, full hybrid with a turbo-diesel engine, four seats and upward-pivoting doors. BMW executive board member Klaus Draeger told reporters he expects to achieve "a significant sales volume" with the new high-performance sports car. Asked whether annual sales could exceed 1,000 vehicles, Mr. Draeger said, "You said this and I'm not saying this is wrong." In March, Mr. Reithofer indicated that the concept car was set to make it into series production. "I like the car. And you know what it means when I say I like the car—it means I will drive it. It's not just a concept car," he told analysts during a presentation in Munich. The car will be designed for sale in all major global markets, which according to Mr. Draeger might require offering a gasoline engine instead of the prototype's three-cylinder diesel engine. Diesel cars account for roughly half of the European market, but are significantly less popular in the U.S. and hardly present at all in China. Mr. Draeger declined to comment on the vehicle's price tag, but noted that in order to achieve substantial sales volumes the price mustn't be too high. He said the same goes for BMW's planned Megacity Vehicle. A price tag of €60,000 ($85,242) or more would certainly limit potential sales volumes, he said. diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/testEMLX.emlx b/solr/contrib/solr-mr/src/test-files/test-documents/testEMLX.emlx new file mode 100644 index 00000000000..66766e10be3 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/test-documents/testEMLX.emlx @@ -0,0 +1,72 @@ + + +1795 +From: "Julien Nioche (JIRA)" +To: dev@tika.apache.org +Subject: [jira] Commented: (TIKA-461) RFC822 messages not parsed +Reply-To: dev@tika.apache.org +Delivered-To: mailing list dev@tika.apache.org +Date: Mon, 6 Sep 2010 05:25:34 -0400 (EDT) +In-Reply-To: <6089099.260231278600349994.JavaMail.jira@thor> +MIME-Version: 1.0 +Content-Type: text/plain; charset=utf-8 +Content-Transfer-Encoding: 7bit +X-JIRA-FingerPrint: 30527f35849b9dde25b450d4833f0394 +X-Virus-Checked: Checked by ClamAV on apache.org + + + [ https://issues.apache.org/jira/browse/TIKA-461?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12906468#action_12906468 ] + +Julien Nioche commented on TIKA-461: +------------------------------------ + +I'll have a look at mime4j and try to use it in Tika + +> RFC822 messages not parsed +> -------------------------- +> +> Key: TIKA-461 +> URL: https://issues.apache.org/jira/browse/TIKA-461 +> Project: Tika +> Issue Type: Bug +> Components: parser +> Affects Versions: 0.7 +> Reporter: Joshua Turner +> Assignee: Julien Nioche +> +> Presented with an RFC822 message exported from Thunderbird, AutodetectParser produces an empty body, and a Metadata containing only one key-value pair: "Content-Type=message/rfc822". Directly calling MboxParser likewise gives an empty body, but with two metadata pairs: "Content-Encoding=us-ascii Content-Type=application/mbox". +> A quick peek at the source of MboxParser shows that the implementation is pretty naive. If the wiring can be sorted out, something like Apache James' mime4j might be a better bet. + +-- +This message is automatically generated by JIRA. +- +You can reply to this email to add a comment to the issue online. + + + + + + flags + 0 + sender + "Julien Nioche (JIRA)" <jira@apache.org> + subject + [jira] Commented: (TIKA-461) RFC822 messages not parsed + to + dev@tika.apache.org + diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/testEXCEL.xls b/solr/contrib/solr-mr/src/test-files/test-documents/testEXCEL.xls new file mode 100644 index 00000000000..86b291606d0 Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/testEXCEL.xls differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/testEXCEL.xlsx b/solr/contrib/solr-mr/src/test-files/test-documents/testEXCEL.xlsx new file mode 100644 index 00000000000..8d5169f8410 Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/testEXCEL.xlsx differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/testFLAC.flac b/solr/contrib/solr-mr/src/test-files/test-documents/testFLAC.flac new file mode 100644 index 00000000000..ccec94717a4 Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/testFLAC.flac differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/testFLV.flv b/solr/contrib/solr-mr/src/test-files/test-documents/testFLV.flv new file mode 100644 index 00000000000..d35e9bb6063 Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/testFLV.flv differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/testJPEG_EXIF.jpg b/solr/contrib/solr-mr/src/test-files/test-documents/testJPEG_EXIF.jpg new file mode 100644 index 00000000000..1b93e771832 Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/testJPEG_EXIF.jpg differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/testJPEG_EXIF.jpg.gz b/solr/contrib/solr-mr/src/test-files/test-documents/testJPEG_EXIF.jpg.gz new file mode 100644 index 00000000000..2ee8e9c1b59 Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/testJPEG_EXIF.jpg.gz differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/testJPEG_EXIF.jpg.tar.gz b/solr/contrib/solr-mr/src/test-files/test-documents/testJPEG_EXIF.jpg.tar.gz new file mode 100644 index 00000000000..3f35102eaef Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/testJPEG_EXIF.jpg.tar.gz differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/testMP3i18n.mp3 b/solr/contrib/solr-mr/src/test-files/test-documents/testMP3i18n.mp3 new file mode 100644 index 00000000000..0f253704ebb Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/testMP3i18n.mp3 differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/testMP4.m4a b/solr/contrib/solr-mr/src/test-files/test-documents/testMP4.m4a new file mode 100644 index 00000000000..a9bc7312702 Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/testMP4.m4a differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/testPDF.pdf b/solr/contrib/solr-mr/src/test-files/test-documents/testPDF.pdf new file mode 100644 index 00000000000..1f1bcff6fe9 Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/testPDF.pdf differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/testPNG.png b/solr/contrib/solr-mr/src/test-files/test-documents/testPNG.png new file mode 100644 index 00000000000..afbcb5f7388 Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/testPNG.png differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/testPPT_various.ppt b/solr/contrib/solr-mr/src/test-files/test-documents/testPPT_various.ppt new file mode 100644 index 00000000000..75829de08d7 Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/testPPT_various.ppt differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/testPPT_various.pptx b/solr/contrib/solr-mr/src/test-files/test-documents/testPPT_various.pptx new file mode 100644 index 00000000000..92c2744dc4e Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/testPPT_various.pptx differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/testPSD.psd b/solr/contrib/solr-mr/src/test-files/test-documents/testPSD.psd new file mode 100644 index 00000000000..7cedbc21a7a Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/testPSD.psd differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/testPages.pages b/solr/contrib/solr-mr/src/test-files/test-documents/testPages.pages new file mode 100644 index 00000000000..9fe1e401297 Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/testPages.pages differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/testRTFVarious.rtf b/solr/contrib/solr-mr/src/test-files/test-documents/testRTFVarious.rtf new file mode 100644 index 00000000000..57fadb99988 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/test-documents/testRTFVarious.rtf @@ -0,0 +1,329 @@ +{\rtf1\adeflang1025\ansi\ansicpg1252\uc1\adeff31507\deff0\stshfdbch31506\stshfloch31506\stshfhich31506\stshfbi31507\deflang1033\deflangfe1033\themelang1033\themelangfe0\themelangcs0{\fonttbl{\f0\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f1\fbidi \fswiss\fcharset0\fprq2{\*\panose 020b0604020202020204}Arial;} +{\f2\fbidi \fmodern\fcharset0\fprq1{\*\panose 02070309020205020404}Courier New;}{\f3\fbidi \froman\fcharset2\fprq2{\*\panose 05050102010706020507}Symbol;}{\f10\fbidi \fnil\fcharset2\fprq2{\*\panose 05000000000000000000}Wingdings;} +{\f11\fbidi \fmodern\fcharset128\fprq1{\*\panose 02020609040205080304}MS Mincho{\*\falt \'82\'6c\'82\'72 \'96\'be\'92\'a9};}{\f15\fbidi \fmodern\fcharset128\fprq1{\*\panose 020b0609070205080204}MS Gothic{\*\falt MS Mincho};} +{\f34\fbidi \froman\fcharset0\fprq2{\*\panose 02040503050406030204}Cambria Math;}{\f37\fbidi \fswiss\fcharset0\fprq2{\*\panose 020f0502020204030204}Calibri;}{\f38\fbidi \fswiss\fcharset0\fprq2{\*\panose 020b0604030504040204}Tahoma;} +{\f175\fbidi \fmodern\fcharset128\fprq1{\*\panose 02020609040205080304}@MS Mincho;}{\f209\fbidi \fmodern\fcharset128\fprq1{\*\panose 00000000000000000000}@MS Gothic;} +{\flomajor\f31500\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\fdbmajor\f31501\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;} +{\fhimajor\f31502\fbidi \froman\fcharset0\fprq2{\*\panose 02040503050406030204}Cambria;}{\fbimajor\f31503\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;} +{\flominor\f31504\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\fdbminor\f31505\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;} +{\fhiminor\f31506\fbidi \fswiss\fcharset0\fprq2{\*\panose 020f0502020204030204}Calibri;}{\fbiminor\f31507\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f210\fbidi \froman\fcharset238\fprq2 Times New Roman CE;} +{\f211\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\f213\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\f214\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\f215\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);} +{\f216\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\f217\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\f218\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\f220\fbidi \fswiss\fcharset238\fprq2 Arial CE;} +{\f221\fbidi \fswiss\fcharset204\fprq2 Arial Cyr;}{\f223\fbidi \fswiss\fcharset161\fprq2 Arial Greek;}{\f224\fbidi \fswiss\fcharset162\fprq2 Arial Tur;}{\f225\fbidi \fswiss\fcharset177\fprq2 Arial (Hebrew);} +{\f226\fbidi \fswiss\fcharset178\fprq2 Arial (Arabic);}{\f227\fbidi \fswiss\fcharset186\fprq2 Arial Baltic;}{\f228\fbidi \fswiss\fcharset163\fprq2 Arial (Vietnamese);}{\f230\fbidi \fmodern\fcharset238\fprq1 Courier New CE;} +{\f231\fbidi \fmodern\fcharset204\fprq1 Courier New Cyr;}{\f233\fbidi \fmodern\fcharset161\fprq1 Courier New Greek;}{\f234\fbidi \fmodern\fcharset162\fprq1 Courier New Tur;}{\f235\fbidi \fmodern\fcharset177\fprq1 Courier New (Hebrew);} +{\f236\fbidi \fmodern\fcharset178\fprq1 Courier New (Arabic);}{\f237\fbidi \fmodern\fcharset186\fprq1 Courier New Baltic;}{\f238\fbidi \fmodern\fcharset163\fprq1 Courier New (Vietnamese);} +{\f322\fbidi \fmodern\fcharset0\fprq1 MS Mincho Western{\*\falt \'82\'6c\'82\'72 \'96\'be\'92\'a9};}{\f320\fbidi \fmodern\fcharset238\fprq1 MS Mincho CE{\*\falt \'82\'6c\'82\'72 \'96\'be\'92\'a9};} +{\f321\fbidi \fmodern\fcharset204\fprq1 MS Mincho Cyr{\*\falt \'82\'6c\'82\'72 \'96\'be\'92\'a9};}{\f323\fbidi \fmodern\fcharset161\fprq1 MS Mincho Greek{\*\falt \'82\'6c\'82\'72 \'96\'be\'92\'a9};} +{\f324\fbidi \fmodern\fcharset162\fprq1 MS Mincho Tur{\*\falt \'82\'6c\'82\'72 \'96\'be\'92\'a9};}{\f327\fbidi \fmodern\fcharset186\fprq1 MS Mincho Baltic{\*\falt \'82\'6c\'82\'72 \'96\'be\'92\'a9};}{\f550\fbidi \froman\fcharset238\fprq2 Cambria Math CE;} +{\f551\fbidi \froman\fcharset204\fprq2 Cambria Math Cyr;}{\f553\fbidi \froman\fcharset161\fprq2 Cambria Math Greek;}{\f554\fbidi \froman\fcharset162\fprq2 Cambria Math Tur;}{\f557\fbidi \froman\fcharset186\fprq2 Cambria Math Baltic;} +{\f580\fbidi \fswiss\fcharset238\fprq2 Calibri CE;}{\f581\fbidi \fswiss\fcharset204\fprq2 Calibri Cyr;}{\f583\fbidi \fswiss\fcharset161\fprq2 Calibri Greek;}{\f584\fbidi \fswiss\fcharset162\fprq2 Calibri Tur;} +{\f587\fbidi \fswiss\fcharset186\fprq2 Calibri Baltic;}{\f590\fbidi \fswiss\fcharset238\fprq2 Tahoma CE;}{\f591\fbidi \fswiss\fcharset204\fprq2 Tahoma Cyr;}{\f593\fbidi \fswiss\fcharset161\fprq2 Tahoma Greek;} +{\f594\fbidi \fswiss\fcharset162\fprq2 Tahoma Tur;}{\f595\fbidi \fswiss\fcharset177\fprq2 Tahoma (Hebrew);}{\f596\fbidi \fswiss\fcharset178\fprq2 Tahoma (Arabic);}{\f597\fbidi \fswiss\fcharset186\fprq2 Tahoma Baltic;} +{\f598\fbidi \fswiss\fcharset163\fprq2 Tahoma (Vietnamese);}{\f599\fbidi \fswiss\fcharset222\fprq2 Tahoma (Thai);}{\f1962\fbidi \fmodern\fcharset0\fprq1 @MS Mincho Western;}{\f1960\fbidi \fmodern\fcharset238\fprq1 @MS Mincho CE;} +{\f1961\fbidi \fmodern\fcharset204\fprq1 @MS Mincho Cyr;}{\f1963\fbidi \fmodern\fcharset161\fprq1 @MS Mincho Greek;}{\f1964\fbidi \fmodern\fcharset162\fprq1 @MS Mincho Tur;}{\f1967\fbidi \fmodern\fcharset186\fprq1 @MS Mincho Baltic;} +{\flomajor\f31508\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\flomajor\f31509\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\flomajor\f31511\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;} +{\flomajor\f31512\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\flomajor\f31513\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\flomajor\f31514\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);} +{\flomajor\f31515\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\flomajor\f31516\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fdbmajor\f31518\fbidi \froman\fcharset238\fprq2 Times New Roman CE;} +{\fdbmajor\f31519\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fdbmajor\f31521\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fdbmajor\f31522\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;} +{\fdbmajor\f31523\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fdbmajor\f31524\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fdbmajor\f31525\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;} +{\fdbmajor\f31526\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fhimajor\f31528\fbidi \froman\fcharset238\fprq2 Cambria CE;}{\fhimajor\f31529\fbidi \froman\fcharset204\fprq2 Cambria Cyr;} +{\fhimajor\f31531\fbidi \froman\fcharset161\fprq2 Cambria Greek;}{\fhimajor\f31532\fbidi \froman\fcharset162\fprq2 Cambria Tur;}{\fhimajor\f31535\fbidi \froman\fcharset186\fprq2 Cambria Baltic;} +{\fbimajor\f31538\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\fbimajor\f31539\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fbimajor\f31541\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;} +{\fbimajor\f31542\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\fbimajor\f31543\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fbimajor\f31544\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);} +{\fbimajor\f31545\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\fbimajor\f31546\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\flominor\f31548\fbidi \froman\fcharset238\fprq2 Times New Roman CE;} +{\flominor\f31549\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\flominor\f31551\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\flominor\f31552\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;} +{\flominor\f31553\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\flominor\f31554\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\flominor\f31555\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;} +{\flominor\f31556\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fdbminor\f31558\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\fdbminor\f31559\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;} +{\fdbminor\f31561\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fdbminor\f31562\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\fdbminor\f31563\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);} +{\fdbminor\f31564\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fdbminor\f31565\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\fdbminor\f31566\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);} +{\fhiminor\f31568\fbidi \fswiss\fcharset238\fprq2 Calibri CE;}{\fhiminor\f31569\fbidi \fswiss\fcharset204\fprq2 Calibri Cyr;}{\fhiminor\f31571\fbidi \fswiss\fcharset161\fprq2 Calibri Greek;}{\fhiminor\f31572\fbidi \fswiss\fcharset162\fprq2 Calibri Tur;} +{\fhiminor\f31575\fbidi \fswiss\fcharset186\fprq2 Calibri Baltic;}{\fbiminor\f31578\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\fbiminor\f31579\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;} +{\fbiminor\f31581\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fbiminor\f31582\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\fbiminor\f31583\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);} +{\fbiminor\f31584\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fbiminor\f31585\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\fbiminor\f31586\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}} +{\colortbl;\red0\green0\blue0;\red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0; +\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;\red192\green192\blue192;\chyperlink\ctint255\cshade255\red0\green0\blue255;\caccentone\ctint255\cshade255\red79\green129\blue189;}{\*\defchp \f31506\fs22 } +{\*\defpap \ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 }\noqfpromote {\stylesheet{\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 +\rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \snext0 \sqformat \spriority0 \styrsid16456967 Normal;}{\*\cs10 \additive \ssemihidden \sunhideused \spriority1 Default Paragraph Font;}{\* +\ts11\tsrowd\trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\tblind0\tblindtype3\tscellwidthfts0\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv \ql \li0\ri0\sa200\sl276\slmult1 +\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \snext11 \ssemihidden \sunhideused \sqformat Normal Table;}{ +\s15\ql \li0\ri0\widctlpar\tqc\tx4680\tqr\tx9360\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 +\sbasedon0 \snext15 \slink16 \sunhideused \styrsid4535536 header;}{\*\cs16 \additive \rtlch\fcs1 \af0 \ltrch\fcs0 \sbasedon10 \slink15 \slocked \styrsid4535536 Header Char;}{\s17\ql \li0\ri0\widctlpar +\tqc\tx4680\tqr\tx9360\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 +\sbasedon0 \snext17 \slink18 \sunhideused \styrsid4535536 footer;}{\*\cs18 \additive \rtlch\fcs1 \af0 \ltrch\fcs0 \sbasedon10 \slink17 \slocked \styrsid4535536 Footer Char;}{ +\s19\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af38\afs16\alang1025 \ltrch\fcs0 \f38\fs16\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 +\sbasedon0 \snext19 \slink20 \ssemihidden \sunhideused \styrsid4535536 Balloon Text;}{\*\cs20 \additive \rtlch\fcs1 \af38\afs16 \ltrch\fcs0 \f38\fs16 \sbasedon10 \slink19 \slocked \ssemihidden \styrsid4535536 Balloon Text Char;}{\*\cs21 \additive +\rtlch\fcs1 \af0 \ltrch\fcs0 \ul\cf17 \sbasedon10 \sunhideused \styrsid4535536 Hyperlink;}{\*\cs22 \additive \rtlch\fcs1 \af0 \ltrch\fcs0 \cf15 \sbasedon10 \ssemihidden \styrsid4535536 Placeholder Text;}{ +\s23\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs20\alang1025 \ltrch\fcs0 \f31506\fs20\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 +\sbasedon0 \snext23 \slink24 \ssemihidden \sunhideused \styrsid10829135 footnote text;}{\*\cs24 \additive \rtlch\fcs1 \af0\afs20 \ltrch\fcs0 \fs20 \sbasedon10 \slink23 \slocked \ssemihidden \styrsid10829135 Footnote Text Char;}{\*\cs25 \additive +\rtlch\fcs1 \af0 \ltrch\fcs0 \super \sbasedon10 \ssemihidden \sunhideused \styrsid10829135 footnote reference;}{\*\ts26\tsrowd\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv +\brdrs\brdrw10 \trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\tblind0\tblindtype3\tscellwidthfts0\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv +\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \sbasedon11 \snext26 \spriority59 \styrsid8288896 +Table Grid;}{\s27\ql \li720\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin720\itap0\contextualspace \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 +\sbasedon0 \snext27 \sqformat \spriority34 \styrsid10055055 List Paragraph;}{\s28\ql \li0\ri0\sa200\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \ab\af31507\afs18\alang1025 \ltrch\fcs0 +\b\f31506\fs18\cf18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext0 \sunhideused \sqformat \spriority35 \styrsid11105546 caption;}}{\*\listtable{\list\listtemplateid1249008552\listhybrid{\listlevel\levelnfc23\levelnfcn23\leveljc0 +\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\leveltemplateid67698689\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0\hres0\chhres0 \fi-360\li720\lin720 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0 +\levelstartat1\lvltentative\levelspace360\levelindent0{\leveltext\leveltemplateid67698691\'01o;}{\levelnumbers;}\f2\fbias0\hres0\chhres0 \fi-360\li1440\lin1440 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative +\levelspace360\levelindent0{\leveltext\leveltemplateid67698693\'01\u-3929 ?;}{\levelnumbers;}\f10\fbias0\hres0\chhres0 \fi-360\li2160\lin2160 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360 +\levelindent0{\leveltext\leveltemplateid67698689\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0\hres0\chhres0 \fi-360\li2880\lin2880 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360\levelindent0 +{\leveltext\leveltemplateid67698691\'01o;}{\levelnumbers;}\f2\fbias0\hres0\chhres0 \fi-360\li3600\lin3600 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360\levelindent0{\leveltext +\leveltemplateid67698693\'01\u-3929 ?;}{\levelnumbers;}\f10\fbias0\hres0\chhres0 \fi-360\li4320\lin4320 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360\levelindent0{\leveltext +\leveltemplateid67698689\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0\hres0\chhres0 \fi-360\li5040\lin5040 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360\levelindent0{\leveltext +\leveltemplateid67698691\'01o;}{\levelnumbers;}\f2\fbias0\hres0\chhres0 \fi-360\li5760\lin5760 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360\levelindent0{\leveltext\leveltemplateid67698693 +\'01\u-3929 ?;}{\levelnumbers;}\f10\fbias0\hres0\chhres0 \fi-360\li6480\lin6480 }{\listname ;}\listid73432867}{\list\listtemplateid1071396652\listhybrid{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360 +\levelindent0{\leveltext\leveltemplateid67698689\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0\hres0\chhres0 \fi-360\li720\lin720 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360\levelindent0 +{\leveltext\leveltemplateid67698691\'01o;}{\levelnumbers;}\f2\fbias0\hres0\chhres0 \fi-360\li1440\lin1440 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360\levelindent0{\leveltext +\leveltemplateid67698693\'01\u-3929 ?;}{\levelnumbers;}\f10\fbias0\hres0\chhres0 \fi-360\li2160\lin2160 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360\levelindent0{\leveltext +\leveltemplateid67698689\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0\hres0\chhres0 \fi-360\li2880\lin2880 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360\levelindent0{\leveltext +\leveltemplateid67698691\'01o;}{\levelnumbers;}\f2\fbias0\hres0\chhres0 \fi-360\li3600\lin3600 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360\levelindent0{\leveltext\leveltemplateid67698693 +\'01\u-3929 ?;}{\levelnumbers;}\f10\fbias0\hres0\chhres0 \fi-360\li4320\lin4320 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360\levelindent0{\leveltext\leveltemplateid67698689 +\'01\u-3913 ?;}{\levelnumbers;}\f3\fbias0\hres0\chhres0 \fi-360\li5040\lin5040 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360\levelindent0{\leveltext\leveltemplateid67698691 +\'01o;}{\levelnumbers;}\f2\fbias0\hres0\chhres0 \fi-360\li5760\lin5760 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360\levelindent0{\leveltext\leveltemplateid67698693 +\'01\u-3929 ?;}{\levelnumbers;}\f10\fbias0\hres0\chhres0 \fi-360\li6480\lin6480 }{\listname ;}\listid169494399}{\list\listtemplateid-487930464\listhybrid{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360 +\levelindent0{\leveltext\leveltemplateid67698705\'02\'00);}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \hres0\chhres0 \fi-360\li720\lin720 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360 +\levelindent0{\leveltext\leveltemplateid67698713\'02\'01.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \hres0\chhres0 \fi-360\li1440\lin1440 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace360 +\levelindent0{\leveltext\leveltemplateid67698715\'02\'02.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \hres0\chhres0 \fi-180\li2160\lin2160 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360 +\levelindent0{\leveltext\leveltemplateid67698703\'02\'03.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \hres0\chhres0 \fi-360\li2880\lin2880 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360 +\levelindent0{\leveltext\leveltemplateid67698713\'02\'04.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \hres0\chhres0 \fi-360\li3600\lin3600 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace360 +\levelindent0{\leveltext\leveltemplateid67698715\'02\'05.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \hres0\chhres0 \fi-180\li4320\lin4320 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360 +\levelindent0{\leveltext\leveltemplateid67698703\'02\'06.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \hres0\chhres0 \fi-360\li5040\lin5040 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace360 +\levelindent0{\leveltext\leveltemplateid67698713\'02\'07.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \hres0\chhres0 \fi-360\li5760\lin5760 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace360 +\levelindent0{\leveltext\leveltemplateid67698715\'02\'08.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \hres0\chhres0 \fi-180\li6480\lin6480 }{\listname ;}\listid1132862691}}{\*\listoverridetable{\listoverride\listid169494399\listoverridecount0\ls1} +{\listoverride\listid73432867\listoverridecount0\ls2}{\listoverride\listid1132862691\listoverridecount0\ls3}}{\*\rsidtbl \rsid724479\rsid2255182\rsid2767955\rsid4260063\rsid4535536\rsid5051464\rsid5706211\rsid5843828\rsid7218132\rsid8152053\rsid8288896 +\rsid9897893\rsid9969477\rsid10055055\rsid10249050\rsid10829135\rsid11105546\rsid12662658\rsid12941695\rsid13331334\rsid14163426\rsid14225018\rsid14292078\rsid14556934\rsid16456967\rsid16539678}{\mmathPr\mmathFont34\mbrkBin0\mbrkBinSub0\msmallFrac0 +\mdispDef1\mlMargin0\mrMargin0\mdefJc1\mwrapIndent1440\mintLim0\mnaryLim1}{\info{\subject Subject is here}{\author Michael McCandless}{\keywords Keyword1 Keyword2}{\operator Michael McCandless}{\creatim\yr2011\mo8\dy29\hr5\min20} +{\revtim\yr2011\mo8\dy30\hr6\min13}{\version30}{\edmins445}{\nofpages2}{\nofwords95}{\nofchars546}{\nofcharsws640}{\vern32771}}{\*\xmlnstbl {\xmlns1 http://schemas.microsoft.com/office/word/2003/wordml}} +\paperw12240\paperh15840\margl1440\margr1440\margt1440\margb1440\gutter0\ltrsect +\widowctrl\ftnbj\aenddoc\trackmoves1\trackformatting1\donotembedsysfont1\relyonvml0\donotembedlingdata0\grfdocevents0\validatexml1\showplaceholdtext0\ignoremixedcontent0\saveinvalidxml0\showxmlerrors1\noxlattoyen +\expshrtn\noultrlspc\dntblnsbdb\nospaceforul\formshade\horzdoc\dgmargin\dghspace180\dgvspace180\dghorigin1440\dgvorigin1440\dghshow1\dgvshow1 +\jexpand\viewkind1\viewscale150\pgbrdrhead\pgbrdrfoot\splytwnine\ftnlytwnine\htmautsp\nolnhtadjtbl\useltbaln\alntblind\lytcalctblwd\lyttblrtgr\lnbrkrule\nobrkwrptbl\snaptogridincell\allowfieldendsel\wrppunct +\asianbrkrule\rsidroot4535536\newtblstyruls\nogrowautofit\usenormstyforlist\noindnmbrts\felnbrelev\nocxsptable\indrlsweleven\noafcnsttbl\afelev\utinl\hwelev\spltpgpar\notcvasp\notbrkcnstfrctbl\notvatxbx\krnprsnet\cachedcolbal \nouicompat \fet0 +{\*\wgrffmtfilter 2450}\nofeaturethrottle1\ilfomacatclnup0{\*\ftnsep \ltrpar \pard\plain \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid4535536 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 +\f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 \chftnsep +\par }}{\*\ftnsepc \ltrpar \pard\plain \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid4535536 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 +{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 \chftnsepc +\par }}{\*\aftnsep \ltrpar \pard\plain \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid4535536 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 +{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 \chftnsep +\par }}{\*\aftnsepc \ltrpar \pard\plain \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid4535536 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 +\f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 \chftnsepc +\par }}\ltrpar \sectd \ltrsect\linex0\endnhere\sectlinegrid360\sectdefaultcl\sectrsid16456967\sftnbj {\headerr \ltrpar \pard\plain \ltrpar\s15\ql \li0\ri0\widctlpar\tqc\tx4680\tqr\tx9360\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 +\rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 This is the header text}{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid12662658 .}{\rtlch\fcs1 +\af31507 \ltrch\fcs0 \insrsid4535536 +\par +\par }}{\footerr \ltrpar \pard\plain \ltrpar\s17\ql \li0\ri0\widctlpar\tqc\tx4680\tqr\tx9360\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 +\f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 This is the footer text. +\par +\par }}{\*\pnseclvl1\pnucrm\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl2\pnucltr\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl3\pndec\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl4\pnlcltr\pnstart1\pnindent720\pnhang {\pntxta )}} +{\*\pnseclvl5\pndec\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl6\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl8 +\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl9\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}\pard\plain \ltrpar\ql \li0\ri0\sa200\sl276\slmult1 +\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 +\lang1024\langfe1024\noproof\langfenp1028\insrsid4535536 {\shp{\*\shpinst\shpleft4866\shptop1990\shpright8593\shpbottom2658\shpfhdr0\shpbxcolumn\shpbxignore\shpbypara\shpbyignore\shpwr3\shpwrk0\shpfblwtxt0\shpz0\shplid1026 +{\sp{\sn shapeType}{\sv 202}}{\sp{\sn fFlipH}{\sv 0}}{\sp{\sn fFlipV}{\sv 0}}{\sp{\sn lTxid}{\sv 65536}}{\sp{\sn hspNext}{\sv 1026}}{\sp{\sn fFitShapeToText}{\sv 1}}{\sp{\sn dhgt}{\sv 251660288}}{\sp{\sn pctHoriz}{\sv 400}}{\sp{\sn pctVert}{\sv 200}} +{\sp{\sn sizerelh}{\sv 0}}{\sp{\sn sizerelv}{\sv 0}}{\sp{\sn fLayoutInCell}{\sv 1}}{\shptxt \ltrpar \pard\plain \ltrpar\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 +\af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 Here is a text box +\par }}}{\shprslt{\*\do\dobxcolumn\dobypara\dodhgt8192\dptxbx\dptxlrtb{\dptxbxtext\ltrpar \pard\plain \ltrpar\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs22\alang1025 +\ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 Here is a text box +\par }}\dpx4866\dpy1990\dpxsize3727\dpysize668\dpfillfgcr255\dpfillfgcg255\dpfillfgcb255\dpfillbgcr255\dpfillbgcg255\dpfillbgcb255\dpfillpat1\dplinew15\dplinecor0\dplinecog0\dplinecob0}}}}{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 Footnote appears here} +{\rtlch\fcs1 \af31507 \ltrch\fcs0 \cs25\super\insrsid10829135 \chftn {\footnote \ltrpar \pard\plain \ltrpar\s23\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs20\alang1025 \ltrch\fcs0 +\f31506\fs20\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \cs25\super\insrsid10829135 \chftn }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid10829135 This is a footnote.}}}{\rtlch\fcs1 \af31507 \ltrch\fcs0 +\insrsid14292078 +\par }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid14556934 +\par }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \b\insrsid14556934\charrsid14556934 Bold}{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid14556934 }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \i\insrsid14556934\charrsid14556934 italic}{\rtlch\fcs1 \af31507 \ltrch\fcs0 +\insrsid14556934 }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \ul\insrsid14556934\charrsid14556934 underline}{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid14556934 }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \super\insrsid14556934\charrsid14556934 superscript}{\rtlch\fcs1 +\af31507 \ltrch\fcs0 \insrsid14556934 }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \sub\insrsid14556934\charrsid14556934 subscript}{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid14556934 +\par }\pard \ltrpar\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid10055055 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid14292078 +\par }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid10055055 Here is a list: +\par {\listtext\pard\plain\ltrpar \s27 \rtlch\fcs1 \af31507\afs22 \ltrch\fcs0 \f3\fs22\insrsid10055055 \loch\af3\dbch\af31506\hich\f3 \'b7\tab}}\pard\plain \ltrpar\s27\ql \fi-360\li720\ri0\sa200\sl276\slmult1 +\widctlpar\wrapdefault\aspalpha\aspnum\faauto\ls2\adjustright\rin0\lin720\itap0\pararsid10055055\contextualspace \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 +\ltrch\fcs0 \insrsid10055055 Bullet 1 +\par {\listtext\pard\plain\ltrpar \s27 \rtlch\fcs1 \af31507\afs22 \ltrch\fcs0 \f3\fs22\insrsid10055055 \loch\af3\dbch\af31506\hich\f3 \'b7\tab}Bullet 2 +\par {\listtext\pard\plain\ltrpar \s27 \rtlch\fcs1 \af31507\afs22 \ltrch\fcs0 \f3\fs22\insrsid10055055 \loch\af3\dbch\af31506\hich\f3 \'b7\tab}Bullet 3 +\par }\pard\plain \ltrpar\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid10055055 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 +{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid10055055 Here is a numbered list: +\par {\listtext\pard\plain\ltrpar \s27 \rtlch\fcs1 \af31507\afs22 \ltrch\fcs0 \f31506\fs22\insrsid10055055 \hich\af31506\dbch\af31506\loch\f31506 1)\tab}}\pard\plain \ltrpar\s27\ql \fi-360\li720\ri0\sa200\sl276\slmult1 +\widctlpar\wrapdefault\aspalpha\aspnum\faauto\ls3\adjustright\rin0\lin720\itap0\pararsid10055055\contextualspace \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 +\ltrch\fcs0 \insrsid10055055 Number bullet 1 +\par {\listtext\pard\plain\ltrpar \s27 \rtlch\fcs1 \af31507\afs22 \ltrch\fcs0 \f31506\fs22\insrsid10055055 \hich\af31506\dbch\af31506\loch\f31506 2)\tab}Number bullet 2 +\par {\listtext\pard\plain\ltrpar \s27 \rtlch\fcs1 \af31507\afs22 \ltrch\fcs0 \f31506\fs22\insrsid10055055 \hich\af31506\dbch\af31506\loch\f31506 3)\tab}Number bullet 3 +\par }\pard\plain \ltrpar\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 +\af31507 \ltrch\fcs0 \insrsid10829135 +\par }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536\charrsid4535536 }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 Keyword1 Keyword2}{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 }{\rtlch\fcs1 +\af31507 \ltrch\fcs0 \insrsid15481255 +\par }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 +\par }{\field{\*\fldinst {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 HYPERLINK "http://tika.apache.org" }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536 {\*\datafield +00d0c9ea79f9bace118c8200aa004ba90b0200000003000000e0c9ea79f9bace118c8200aa004ba90b4800000068007400740070003a002f002f00740069006b0061002e006100700061006300680065002e006f00720067002f000000795881f43b1d7f48af2c825dc485276300000000a5ab0000}}}{\fldrslt { +\rtlch\fcs1 \af31507 \ltrch\fcs0 \cs21\ul\cf17\insrsid4535536\charrsid4535536 This is a hyperlink}}}\sectd \ltrsect\linex0\endnhere\sectlinegrid360\sectdefaultcl\sectrsid16456967\sftnbj {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid14292078 +\par +\par }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4535536\charrsid4535536 }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid14292078 }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid14292078 Subject is here}{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid14292078 }{\rtlch\fcs1 +\af31507 \ltrch\fcs0 \insrsid4535536 +\par }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid8288896 +\par \ltrrow}\trowd \irow0\irowband0\ltrrow\ts26\trgaph108\trleft-108\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 +\trftsWidth1\trftsWidthB3\trautofit1\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\tblrsid8288896\tbllkhdrrows\tbllkhdrcols\tbllknocolband\tblind0\tblindtype3 \clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 +\clbrdrr\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth3192\clshdrawnil \cellx3084\clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth3192\clshdrawnil \cellx6276\clvertalt +\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth3192\clshdrawnil \cellx9468\pard\plain \ltrpar +\ql \li0\ri0\widctlpar\intbl\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\yts26 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid8288896 +Row 1 Col 1\cell Row 1 Col 2\cell Row 1 Col 3\cell }\pard\plain \ltrpar\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\intbl\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 +\f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid8288896 \trowd \irow0\irowband0\ltrrow\ts26\trgaph108\trleft-108\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr +\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 \trftsWidth1\trftsWidthB3\trautofit1\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\tblrsid8288896\tbllkhdrrows\tbllkhdrcols\tbllknocolband\tblind0\tblindtype3 \clvertalt\clbrdrt +\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth3192\clshdrawnil \cellx3084\clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 +\cltxlrtb\clftsWidth3\clwWidth3192\clshdrawnil \cellx6276\clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth3192\clshdrawnil \cellx9468\row \ltrrow}\pard\plain \ltrpar +\ql \li0\ri0\widctlpar\intbl\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\yts26 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid8288896 +Row 2 Col 1\cell Row 2 Col 2\cell Row 2 Col 3\cell }\pard\plain \ltrpar\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\intbl\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 +\f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid8288896 \trowd \irow1\irowband1\lastrow \ltrrow\ts26\trgaph108\trleft-108\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr +\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10 \trftsWidth1\trftsWidthB3\trautofit1\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\tblrsid8288896\tbllkhdrrows\tbllkhdrcols\tbllknocolband\tblind0\tblindtype3 \clvertalt\clbrdrt +\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth3192\clshdrawnil \cellx3084\clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 +\cltxlrtb\clftsWidth3\clwWidth3192\clshdrawnil \cellx6276\clvertalt\clbrdrt\brdrs\brdrw10 \clbrdrl\brdrs\brdrw10 \clbrdrb\brdrs\brdrw10 \clbrdrr\brdrs\brdrw10 \cltxlrtb\clftsWidth3\clwWidth3192\clshdrawnil \cellx9468\row }\pard \ltrpar +\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid8288896 +\par }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid724479 Suddenly some }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid5706211 J}{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid724479 apanese text:}{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid9969477 +\par }{\rtlch\fcs1 \af11 \ltrch\fcs0 \loch\af11\hich\af11\dbch\af11\insrsid724479\charrsid724479 \loch\af11\hich\af11\dbch\f11 \uc2\u12478\'83\'5d\u12523\'83\'8b\u12466\'83\'51\u12392\'82\'c6\u23614\'94\'f6\u23822\'8d\'e8\u12289\'81\'41\u28129\'92\'57\u12293 +\'81\'58\u12392\'82\'c6\u26368\'8d\'c5\u26399\'8a\'fa}{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid9969477 +\par }{\rtlch\fcs1 \af15 \ltrch\fcs0 \lang1033\langfe1041\loch\af15\hich\af15\dbch\af15\langfenp1041\insrsid5843828 \loch\af15\hich\af15\dbch\f15 \uc2\u-248\'81\'69\u-217\'82\'66\u-216\'82\'67\u-207\'82\'70\u-247\'81\'6a}{\rtlch\fcs1 \af31507 \ltrch\fcs0 +\insrsid9969477 +\par }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid5706211 And then some Gothic text: +\par }\pard \ltrpar\ql \li0\ri0\nowidctlpar\wrapdefault\faauto\rin0\lin0\itap0\pararsid14163426 {\rtlch\fcs1 \af1\afs20 \ltrch\fcs0 \f1\fs20\insrsid14163426 \u-10240\'3f\u-8398\'3f\u-10240\'3f\u-8385\'3f\u-10240\'3f\u-8380\'3f\u-10240\'3f\u-8391\'3f\u-10240 +\'3f\u-8381\'3f\u-10240\'3f\u-8390\'3f}{\rtlch\fcs1 \af1\afs20 \ltrch\fcs0 \f1\fs20\insrsid14163426 +\par }\pard \ltrpar\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid9969477 +\par }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid7218132 Here is a citation:}{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid9969477 +\par }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid12941695 }{\field{\*\fldinst {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid12941695 CITATION Kra \\l 1033 }}{\fldrslt {\rtlch\fcs1 \af31507 \ltrch\fcs0 \lang1024\langfe1024\noproof\insrsid12941695 (Kramer)}}} +\sectd \ltrsect\linex0\endnhere\sectlinegrid360\sectdefaultcl\sectrsid16456967\sftnbj {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid12941695 }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid9969477 +\par }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid11105546 +\par }\pard\plain \ltrpar\s28\ql \li0\ri0\sa200\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid11105546 \rtlch\fcs1 \ab\af31507\afs18\alang1025 \ltrch\fcs0 \b\f31506\fs18\cf18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 +{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid11105546 Figure }{\field{\*\fldinst {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid11105546 SEQ Figure \\* ARABIC }}{\fldrslt {\rtlch\fcs1 \af31507 \ltrch\fcs0 \lang1024\langfe1024\noproof\insrsid11105546 1}}} +\sectd \ltrsect\linex0\endnhere\sectlinegrid360\sectdefaultcl\sectrsid16456967\sftnbj {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid11105546 This is a caption for Figure 1 +\par }\pard\plain \ltrpar\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid8152053 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 { +\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid8152053 +\par +\par }{\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid5051464 \sect }\sectd \ltrsect\sbknone\linex0\cols2\endnhere\sectlinegrid360\sectdefaultcl\sectrsid5051464\sftnbj \pard\plain \ltrpar\ql \li0\ri0\sa200\sl276\slmult1 +\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid5051464 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid5051464 +Row 1 column 1 +\par Row 2 column 1 +\par }\pard \ltrpar\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid8152053 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid5051464 Row 1 column 2 +\par Row 2 column 2 +\par \sect }\sectd \ltrsect\sbknone\linex0\endnhere\sectlinegrid360\sectdefaultcl\sectrsid5051464\sftnbj \pard\plain \ltrpar\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid8152053 \rtlch\fcs1 +\af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid5051464\charrsid8152053 +\par }{\*\themedata 504b030414000600080000002100828abc13fa0000001c020000130000005b436f6e74656e745f54797065735d2e786d6cac91cb6ac3301045f785fe83d0b6d8 +72ba28a5d8cea249777d2cd20f18e4b12d6a8f843409c9df77ecb850ba082d74231062ce997b55ae8fe3a00e1893f354e9555e6885647de3a8abf4fbee29bbd7 +2a3150038327acf409935ed7d757e5ee14302999a654e99e393c18936c8f23a4dc072479697d1c81e51a3b13c07e4087e6b628ee8cf5c4489cf1c4d075f92a0b +44d7a07a83c82f308ac7b0a0f0fbf90c2480980b58abc733615aa2d210c2e02cb04430076a7ee833dfb6ce62e3ed7e14693e8317d8cd0433bf5c60f53fea2fe7 +065bd80facb647e9e25c7fc421fd2ddb526b2e9373fed4bb902e182e97b7b461e6bfad3f010000ffff0300504b030414000600080000002100a5d6a7e7c00000 +00360100000b0000005f72656c732f2e72656c73848fcf6ac3300c87ef85bd83d17d51d2c31825762fa590432fa37d00e1287f68221bdb1bebdb4fc7060abb08 +84a4eff7a93dfeae8bf9e194e720169aaa06c3e2433fcb68e1763dbf7f82c985a4a725085b787086a37bdbb55fbc50d1a33ccd311ba548b63095120f88d94fbc +52ae4264d1c910d24a45db3462247fa791715fd71f989e19e0364cd3f51652d73760ae8fa8c9ffb3c330cc9e4fc17faf2ce545046e37944c69e462a1a82fe353 +bd90a865aad41ed0b5b8f9d6fd010000ffff0300504b0304140006000800000021006b799616830000008a0000001c0000007468656d652f7468656d652f7468 +656d654d616e616765722e786d6c0ccc4d0ac3201040e17da17790d93763bb284562b2cbaebbf600439c1a41c7a0d29fdbd7e5e38337cedf14d59b4b0d592c9c +070d8a65cd2e88b7f07c2ca71ba8da481cc52c6ce1c715e6e97818c9b48d13df49c873517d23d59085adb5dd20d6b52bd521ef2cdd5eb9246a3d8b4757e8d3f7 +29e245eb2b260a0238fd010000ffff0300504b03041400060008000000210096b5ade296060000501b0000160000007468656d652f7468656d652f7468656d65 +312e786d6cec594f6fdb3614bf0fd87720746f6327761a07758ad8b19b2d4d1bc46e871e698996d850a240d2497d1bdae38001c3ba618715d86d87615b8116d8 +a5fb34d93a6c1dd0afb0475292c5585e9236d88aad3e2412f9e3fbff1e1fa9abd7eec70c1d1221294fda5efd72cd4324f1794093b0eddd1ef62fad79482a9c04 +98f184b4bd2991deb58df7dfbb8ad755446282607d22d771db8b944ad79796a40fc3585ee62949606ecc458c15bc8a702910f808e8c66c69b9565b5d8a314d3c +94e018c8de1a8fa94fd05093f43672e23d06af89927ac06762a049136785c10607758d9053d965021d62d6f6804fc08f86e4bef210c352c144dbab999fb7b471 +7509af678b985ab0b6b4ae6f7ed9ba6c4170b06c788a705430adf71bad2b5b057d03606a1ed7ebf5babd7a41cf00b0ef83a6569632cd467faddec9699640f671 +9e76b7d6ac355c7c89feca9cccad4ea7d36c65b258a206641f1b73f8b5da6a6373d9c11b90c537e7f08dce66b7bbeae00dc8e257e7f0fd2badd5868b37a088d1 +e4600ead1ddaef67d40bc898b3ed4af81ac0d76a197c86826828a24bb318f3442d8ab518dfe3a20f000d6458d104a9694ac6d88728eee2782428d60cf03ac1a5 +193be4cbb921cd0b495fd054b5bd0f530c1931a3f7eaf9f7af9e3f45c70f9e1d3ff8e9f8e1c3e3073f5a42ceaa6d9c84e5552fbffdeccfc71fa33f9e7ef3f2d1 +17d57859c6fffac327bffcfc793510d26726ce8b2f9ffcf6ecc98baf3efdfdbb4715f04d814765f890c644a29be408edf3181433567125272371be15c308d3f2 +8acd249438c19a4b05fd9e8a1cf4cd296699771c393ac4b5e01d01e5a30a787d72cf1178108989a2159c77a2d801ee72ce3a5c545a6147f32a99793849c26ae6 +6252c6ed637c58c5bb8b13c7bfbd490a75330f4b47f16e441c31f7184e140e494214d273fc80900aedee52ead87597fa824b3e56e82e451d4c2b4d32a423279a +668bb6690c7e9956e90cfe766cb37b077538abd27a8b1cba48c80acc2a841f12e698f13a9e281c57911ce298950d7e03aba84ac8c154f8655c4f2af074481847 +bd804859b5e696007d4b4edfc150b12addbecba6b18b148a1e54d1bc81392f23b7f84137c2715a851dd0242a633f900710a218ed715505dfe56e86e877f0034e +16bafb0e258ebb4faf06b769e888340b103d3311da9750aa9d0a1cd3e4efca31a3508f6d0c5c5c398602f8e2ebc71591f5b616e24dd893aa3261fb44f95d843b +5974bb5c04f4edafb95b7892ec1108f3f98de75dc97d5772bdff7cc95d94cf672db4b3da0a6557f70db629362d72bcb0431e53c6066acac80d699a6409fb44d0 +8741bdce9c0e4971624a2378cceaba830b05366b90e0ea23aaa241845368b0eb9e2612ca8c742851ca251ceccc70256d8d87265dd96361531f186c3d9058edf2 +c00eafe8e1fc5c509031bb4d680e9f39a3154de0accc56ae644441edd76156d7429d995bdd88664a9dc3ad50197c38af1a0c16d684060441db02565e85f3b966 +0d0713cc48a0ed6ef7dedc2dc60b17e92219e180643ed27acffba86e9c94c78ab90980d8a9f0913ee49d62b512b79626fb06dccee2a432bbc60276b9f7dec44b +7904cfbca4f3f6443ab2a49c9c2c41476dafd55c6e7ac8c769db1bc399161ee314bc2e75cf8759081743be1236ec4f4d6693e5336fb672c5dc24a8c33585b5fb +9cc24e1d4885545b58463634cc5416022cd19cacfccb4d30eb45296023fd35a458598360f8d7a4003bbaae25e331f155d9d9a5116d3bfb9a95523e51440ca2e0 +088dd844ec6370bf0e55d027a012ae264c45d02f708fa6ad6da6dce29c255df9f6cae0ec38666984b372ab5334cf640b37795cc860de4ae2816e95b21be5ceaf +8a49f90b52a51cc6ff3355f47e0237052b81f6800fd7b802239daf6d8f0b1571a8426944fdbe80c6c1d40e8816b88b8569082ab84c36ff0539d4ff6dce591a26 +ade1c0a7f669880485fd484582903d284b26fa4e2156cff62e4b9265844c4495c495a9157b440e091bea1ab8aaf7760f4510eaa69a6465c0e04ec69ffb9e65d0 +28d44d4e39df9c1a52ecbd3607fee9cec7263328e5d661d3d0e4f62f44acd855ed7ab33cdf7bcb8ae889599bd5c8b3029895b6825696f6af29c239b75a5bb1e6 +345e6ee6c28117e73586c1a2214ae1be07e93fb0ff51e133fb65426fa843be0fb515c187064d0cc206a2fa926d3c902e907670048d931db4c1a44959d366ad93 +b65abe595f70a75bf03d616c2dd959fc7d4e6317cd99cbcec9c58b34766661c7d6766ca1a9c1b327531486c6f941c638c67cd22a7f75e2a37be0e82db8df9f30 +254d30c1372581a1f51c983c80e4b71ccdd28dbf000000ffff0300504b0304140006000800000021000dd1909fb60000001b010000270000007468656d652f74 +68656d652f5f72656c732f7468656d654d616e616765722e786d6c2e72656c73848f4d0ac2301484f78277086f6fd3ba109126dd88d0add40384e4350d363f24 +51eced0dae2c082e8761be9969bb979dc9136332de3168aa1a083ae995719ac16db8ec8e4052164e89d93b64b060828e6f37ed1567914b284d262452282e3198 +720e274a939cd08a54f980ae38a38f56e422a3a641c8bbd048f7757da0f19b017cc524bd62107bd5001996509affb3fd381a89672f1f165dfe514173d9850528 +a2c6cce0239baa4c04ca5bbabac4df000000ffff0300504b01022d0014000600080000002100828abc13fa0000001c0200001300000000000000000000000000 +000000005b436f6e74656e745f54797065735d2e786d6c504b01022d0014000600080000002100a5d6a7e7c0000000360100000b000000000000000000000000 +002b0100005f72656c732f2e72656c73504b01022d00140006000800000021006b799616830000008a0000001c00000000000000000000000000140200007468 +656d652f7468656d652f7468656d654d616e616765722e786d6c504b01022d001400060008000000210096b5ade296060000501b000016000000000000000000 +00000000d10200007468656d652f7468656d652f7468656d65312e786d6c504b01022d00140006000800000021000dd1909fb60000001b010000270000000000 +00000000000000009b0900007468656d652f7468656d652f5f72656c732f7468656d654d616e616765722e786d6c2e72656c73504b050600000000050005005d010000960a00000000} +{\*\colorschememapping 3c3f786d6c2076657273696f6e3d22312e302220656e636f64696e673d225554462d3822207374616e64616c6f6e653d22796573223f3e0d0a3c613a636c724d +617020786d6c6e733a613d22687474703a2f2f736368656d61732e6f70656e786d6c666f726d6174732e6f72672f64726177696e676d6c2f323030362f6d6169 +6e22206267313d226c743122207478313d22646b3122206267323d226c743222207478323d22646b322220616363656e74313d22616363656e74312220616363 +656e74323d22616363656e74322220616363656e74333d22616363656e74332220616363656e74343d22616363656e74342220616363656e74353d22616363656e74352220616363656e74363d22616363656e74362220686c696e6b3d22686c696e6b2220666f6c486c696e6b3d22666f6c486c696e6b222f3e} +{\*\latentstyles\lsdstimax267\lsdlockeddef0\lsdsemihiddendef1\lsdunhideuseddef1\lsdqformatdef0\lsdprioritydef99{\lsdlockedexcept \lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority0 \lsdlocked0 Normal; +\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 1;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 2;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 3;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 4; +\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 5;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 6;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 7;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 8;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 9; +\lsdpriority39 \lsdlocked0 toc 1;\lsdpriority39 \lsdlocked0 toc 2;\lsdpriority39 \lsdlocked0 toc 3;\lsdpriority39 \lsdlocked0 toc 4;\lsdpriority39 \lsdlocked0 toc 5;\lsdpriority39 \lsdlocked0 toc 6;\lsdpriority39 \lsdlocked0 toc 7; +\lsdpriority39 \lsdlocked0 toc 8;\lsdpriority39 \lsdlocked0 toc 9;\lsdqformat1 \lsdpriority35 \lsdlocked0 caption;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority10 \lsdlocked0 Title;\lsdpriority1 \lsdlocked0 Default Paragraph Font; +\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority11 \lsdlocked0 Subtitle;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority22 \lsdlocked0 Strong;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority20 \lsdlocked0 Emphasis; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority59 \lsdlocked0 Table Grid;\lsdunhideused0 \lsdlocked0 Placeholder Text;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority1 \lsdlocked0 No Spacing; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading;\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List;\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List;\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 1; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 1; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 1;\lsdunhideused0 \lsdlocked0 Revision; +\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority34 \lsdlocked0 List Paragraph;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority29 \lsdlocked0 Quote;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority30 \lsdlocked0 Intense Quote; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 1; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 1; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 2; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 2; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 2; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 2; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 2; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 3; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 3; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 3; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 3; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 3; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 4; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 4; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 4; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 4; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 5; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 5; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 5; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 5; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 5; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 6; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 6; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 6; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 6; +\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 6; +\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority19 \lsdlocked0 Subtle Emphasis;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority21 \lsdlocked0 Intense Emphasis; +\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority31 \lsdlocked0 Subtle Reference;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority32 \lsdlocked0 Intense Reference; +\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority33 \lsdlocked0 Book Title;\lsdpriority37 \lsdlocked0 Bibliography;\lsdqformat1 \lsdpriority39 \lsdlocked0 TOC Heading;}}{\*\datastore 010500000200000018000000 +4d73786d6c322e534158584d4c5265616465722e352e30000000000000000000000e0000 +d0cf11e0a1b11ae1000000000000000000000000000000003e000300feff0900060000000000000000000000010000000100000000000000001000000200000001000000feffffff0000000000000000ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +fffffffffffffffffdffffff05000000feffffff04000000fefffffffeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffff52006f006f007400200045006e00740072007900000000000000000000000000000000000000000000000000000000000000000000000000000000000000000016000500ffffffffffffffff01000000ec69d9888b8b3d4c859eaf6cd158be0f0000000000000000000000000076 +bb6efd66cc0103000000c0030000000000004d0073006f004400610074006100530074006f0072006500000000000000000000000000000000000000000000000000000000000000000000000000000000001a000101ffffffffffffffff0200000000000000000000000000000000000000000000000076bb6efd66cc01 +0076bb6efd66cc010000000000000000000000003500cb004c0053004a004300ca00d80044005500470056003000cd0045004500d100c3004c00c000cd0051003d003d000000000000000000000000000000000032000101ffffffffffffffff0300000000000000000000000000000000000000000000000076bb6efd66 +cc010076bb6efd66cc010000000000000000000000004900740065006d0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000a000201ffffffff04000000ffffffff000000000000000000000000000000000000000000000000 +0000000000000000000000000000000016020000000000000100000002000000030000000400000005000000060000000700000008000000feffffff0a0000000b0000000c0000000d0000000e000000feffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff3c623a536f757263657320786d6c6e733a623d22687474703a2f2f736368656d61732e6f70656e786d6c666f726d6174732e6f72672f6f6666696365446f63756d656e742f323030362f6269626c696f6772617068792220786d6c6e733d +22687474703a2f2f736368656d61732e6f70656e786d6c666f726d6174732e6f72672f6f6666696365446f63756d656e742f323030362f6269626c696f677261706879222053656c65637465645374796c653d225c4150412e58534c22205374796c654e616d653d22415041223e3c623a536f757263653e3c623a546167 +3e4b72613c2f623a5461673e3c623a536f75726365547970653e426f6f6b3c2f623a536f75726365547970653e3c623a477569643e7b32313839323034362d453338412d344136382d383931312d3837313145343731453345347d3c2f623a477569643e3c623a4c4349443e303c2f623a4c4349443e3c623a417574686f +723e3c623a417574686f723e3c623a4e616d654c6973743e3c623a506572736f6e3e3c623a4c6173743e4b72616d65723c2f623a4c6173743e3c2f623a506572736f6e3e3c2f623a4e616d654c6973743e3c2f623a417574686f723e3c2f623a417574686f723e3c623a5469746c653e486f7720746f207573652054696b +613c2f623a5469746c653e3c623a5265664f726465723e313c2f623a5265664f726465723e3c2f623a536f757263653e3c2f623a536f75726365733e0d0a68aa1a083ae995719ac16db8ec8e4052164e89d93b64b060828e6f37ed1567914b284d262452282e31983c3f786d6c2076657273696f6e3d22312e302220656e +636f64696e673d225554462d3822207374616e64616c6f6e653d226e6f223f3e0d0a3c64733a6461746173746f72654974656d2064733a6974656d49443d227b32344432423237452d423832412d343130442d393536412d4431303443363332453042357d2220786d6c6e733a64733d22687474703a2f2f736368656d61 +732e6f70656e786d6c666f726d6174732e6f72672f6f6666696365446f63756d656e742f323030362f637573746f6d586d6c223e3c64733a736368656d61526566733e3c64733a736368656d615265662064733a7572693d22687474703a2f2f736368656d61732e6f70656e786d6c666f726d6174732e6f72672f6f6666 +696365446f63756d656e742f323030362f6269626c696f677261706879222f3e3c2f64733a736368656d61526566733e3c2f64733a6461746173746f72654974656d3e68656d65312e786d6c504b01022d00140006000800000021000dd1909fb60000001b01000027000000000000000000000000009b0900007468656d +652f7468656d652f5f72656c732f7468656d654d616e616765722e786d6c2e72656c73504b050600000000050005005d500072006f007000650072007400690065007300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000016000200ffffffffffffffffffff +ffff0000000000000000000000000000000000000000000000000000000000000000000000000900000055010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffff +ffffffff0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffff +ffffffffffff0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffff +ffffffffffffffff0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000105000000000000}} diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/testSVG.svg b/solr/contrib/solr-mr/src/test-files/test-documents/testSVG.svg new file mode 100644 index 00000000000..8a05a4835b6 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/test-documents/testSVG.svg @@ -0,0 +1,23 @@ + + + + + Test SVG image + + \ No newline at end of file diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/testTIFF.tif b/solr/contrib/solr-mr/src/test-files/test-documents/testTIFF.tif new file mode 100644 index 00000000000..8f6c7abba42 Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/testTIFF.tif differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/testVISIO.vsd b/solr/contrib/solr-mr/src/test-files/test-documents/testVISIO.vsd new file mode 100644 index 00000000000..d699e11122b Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/testVISIO.vsd differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/testWAV.wav b/solr/contrib/solr-mr/src/test-files/test-documents/testWAV.wav new file mode 100644 index 00000000000..59a063ece01 Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/testWAV.wav differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/testWORD_various.doc b/solr/contrib/solr-mr/src/test-files/test-documents/testWORD_various.doc new file mode 100644 index 00000000000..a2ad2364565 Binary files /dev/null and b/solr/contrib/solr-mr/src/test-files/test-documents/testWORD_various.doc differ diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/testXML.xml b/solr/contrib/solr-mr/src/test-files/test-documents/testXML.xml new file mode 100644 index 00000000000..a01a402977b --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/test-documents/testXML.xml @@ -0,0 +1,48 @@ + + + + + Tika test document + + Rida Benjelloun + + Java + + XML + + XSLT + + JDOM + + Indexation + + Framework d'indexation des documents XML, HTML, PDF etc.. + + http://www.apache.org + + 2000-12-01T00:00:00.000Z + + test + + application/msword + + Fr + + Archimède et Lius à Châteauneuf testing chars en été + + \ No newline at end of file diff --git a/solr/contrib/solr-mr/src/test-files/test-documents/testXML2.xml b/solr/contrib/solr-mr/src/test-files/test-documents/testXML2.xml new file mode 100644 index 00000000000..6611ee14957 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/test-documents/testXML2.xml @@ -0,0 +1,22 @@ + + + + 123 + Hello World + Solr rocks + diff --git a/solr/contrib/solr-mr/src/test-files/test-morphlines/loadSolrBasic.conf b/solr/contrib/solr-mr/src/test-files/test-morphlines/loadSolrBasic.conf new file mode 100644 index 00000000000..b033320b776 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/test-morphlines/loadSolrBasic.conf @@ -0,0 +1,63 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# morphline.conf example file +# this is a comment +// this is yet another comment + +# for details see https://github.com/typesafehub/config#optional-system-or-env-variable-overrides +SOLR_COLLECTION : "collection1" +SOLR_COLLECTION : ${?ENV_SOLR_COLLECTION} + +ZK_HOST : "127.0.0.1:2181/solr" +ZK_HOST : ${?ENV_ZK_HOST} + +SOLR_HOME_DIR : "example/solr/collection1" +SOLR_HOME_DIR : ${?ENV_SOLR_HOME_DIR} + +SOLR_LOCATOR : { + collection : ${SOLR_COLLECTION} + zkHost : ${ZK_HOST} + solrHomeDir : ${SOLR_HOME_DIR} + # batchSize : 1000 +} +SOLR_LOCATOR : ${?ENV_SOLR_LOCATOR} + +morphlines : [ + { + id : morphline1 + importCommands : ["com.cloudera.**", "org.apache.solr.**"] + + commands : [ + { + sanitizeUnknownSolrFields { + solrLocator : ${SOLR_LOCATOR} + } + } + + { + loadSolr { + solrLocator : ${SOLR_LOCATOR} + boosts : { + id : 1.0 + } + } + } + + { logDebug { format : "output record: {}", args : ["@{}"] } } + ] + } +] diff --git a/solr/contrib/solr-mr/src/test-files/test-morphlines/solrCellDocumentTypes.conf b/solr/contrib/solr-mr/src/test-files/test-morphlines/solrCellDocumentTypes.conf new file mode 100644 index 00000000000..bf1e58d5fb4 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/test-morphlines/solrCellDocumentTypes.conf @@ -0,0 +1,255 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Application configuration file in HOCON format (Human-Optimized Config Object Notation). +# HOCON syntax is defined at http://github.com/typesafehub/config/blob/master/HOCON.md +# and also used by Akka (http://www.akka.io) and Play (http://www.playframework.org/). +# For more examples see http://doc.akka.io/docs/akka/2.1.2/general/configuration.html + +# morphline.conf example file +# this is a comment +// this is yet another comment + +morphlines : [ + { + id : morphline1 + importCommands : ["com.cloudera.**", "org.apache.solr.**"] + + commands : [ + { separateAttachments {} } + + # java command that doesn't do anything except for test compilation + { + java { + imports : "import java.util.*;" + code: """ + List tags = record.get("javaWithImports"); + return child.process(record); + """ + } + } + + # java command that doesn't do anything except for test compilation + { + java { + code: """ + List tags = record.get("javaWithoutImports"); + return child.process(record); + """ + } + } + + { + # used for auto-detection if MIME type isn't explicitly supplied + detectMimeType { + includeDefaultMimeTypes : true + mimeTypesFiles : [RESOURCES_DIR/custom-mimetypes.xml] + } + } + + { + tryRules { + throwExceptionIfAllRulesFailed : true + rules : [ + # next top-level rule: + { + commands : [ + { logDebug { format : "hello unpack" } } + { unpack {} } + { generateUUID {} } + { callParentPipe {} } + ] + } + + { + commands : [ + { logDebug { format : "hello decompress" } } + { decompress {} } + { callParentPipe {} } + ] + } + + { + commands : [ + { + readAvroContainer { + supportedMimeTypes : [avro/binary] + # readerSchemaString : "" # optional, avro json schema blurb for getSchema() + # readerSchemaFile : /path/to/syslog.avsc + } + } + + { extractAvroTree {} } + + { + setValues { + id : "@{/id}" + user_screen_name : "@{/user_screen_name}" + text : "@{/text}" + } + } + + { + sanitizeUnknownSolrFields { + solrLocator : ${SOLR_LOCATOR} + } + } + ] + } + + { + commands : [ + { + readJsonTestTweets { + supportedMimeTypes : ["mytwittertest/json+delimited+length"] + } + } + + { + sanitizeUnknownSolrFields { + solrLocator : ${SOLR_LOCATOR} + } + } + ] + } + + # next top-level rule: + { + commands : [ + { logDebug { format : "hello solrcell" } } + { + # wrap SolrCell around an HTML Tika parser + solrCell { + solrLocator : ${SOLR_LOCATOR} + # captureAttr : true # default is false + capture : [ + + # twitter feed schema + user_friends_count + user_location + user_description + user_statuses_count + user_followers_count + user_name + user_screen_name + created_at + text + retweet_count + retweeted + in_reply_to_user_id + source + in_reply_to_status_id + media_url_https + expanded_url + + # file metadata + file_download_url + file_upload_url + file_scheme + file_host + file_port + file_path + file_name + file_length + file_last_modified + file_owner + file_group + file_permissions_user + file_permissions_group + file_permissions_other + file_permissions_stickybit + ] + + fmap : { content : text, content-type : content_type } # rename "content" field to "text" fields + dateFormats : [ "yyyy-MM-dd'T'HH:mm:ss", "yyyy-MM-dd"] # various java.text.SimpleDateFormat + # xpath : "/xhtml:html/xhtml:body/xhtml:div/descendant:node()" + uprefix : "ignored_" + lowernames : true + # solrContentHandlerFactory : org.apache.solr.tika.TrimSolrContentHandlerFactory + + # Tika parsers to be registered. If multiple parsers support the same MIME type, + # the parser is chosen that is closest to the bottom in this list: + parsers : [ + { parser : org.apache.tika.parser.asm.ClassParser } + # { parser : org.gagravarr.tika.OggParser, additionalSupportedMimeTypes : [audio/ogg] } + { parser : org.gagravarr.tika.FlacParser } + { parser : org.apache.tika.parser.audio.AudioParser } + { parser : org.apache.tika.parser.audio.MidiParser } + { parser : org.apache.tika.parser.crypto.Pkcs7Parser } + { parser : org.apache.tika.parser.dwg.DWGParser } + { parser : org.apache.tika.parser.epub.EpubParser } + { parser : org.apache.tika.parser.executable.ExecutableParser } + { parser : org.apache.tika.parser.feed.FeedParser } + { parser : org.apache.tika.parser.font.AdobeFontMetricParser } + { parser : org.apache.tika.parser.font.TrueTypeParser } + { parser : org.apache.tika.parser.xml.XMLParser } + { parser : org.apache.tika.parser.html.HtmlParser } + { parser : org.apache.tika.parser.image.ImageParser } + { parser : org.apache.tika.parser.image.PSDParser } + { parser : org.apache.tika.parser.image.TiffParser } + { parser : org.apache.tika.parser.iptc.IptcAnpaParser } + { parser : org.apache.tika.parser.iwork.IWorkPackageParser } + { parser : org.apache.tika.parser.jpeg.JpegParser } + { parser : org.apache.tika.parser.mail.RFC822Parser } + { parser : org.apache.tika.parser.mbox.MboxParser, additionalSupportedMimeTypes : [message/x-emlx] } + { parser : org.apache.tika.parser.microsoft.OfficeParser } + { parser : org.apache.tika.parser.microsoft.TNEFParser } + { parser : org.apache.tika.parser.microsoft.ooxml.OOXMLParser } + { parser : org.apache.tika.parser.mp3.Mp3Parser } + { parser : org.apache.tika.parser.mp4.MP4Parser } + { parser : org.apache.tika.parser.hdf.HDFParser } + { parser : org.apache.tika.parser.netcdf.NetCDFParser } + { parser : org.apache.tika.parser.odf.OpenDocumentParser } + { parser : org.apache.tika.parser.pdf.PDFParser } + { parser : org.apache.tika.parser.pkg.CompressorParser } + { parser : org.apache.tika.parser.pkg.PackageParser } + { parser : org.apache.tika.parser.rtf.RTFParser } + { parser : org.apache.tika.parser.txt.TXTParser } + { parser : org.apache.tika.parser.video.FLVParser } + { parser : org.apache.tika.parser.xml.DcXMLParser } + { parser : org.apache.tika.parser.xml.FictionBookParser } + { parser : org.apache.tika.parser.chm.ChmParser } + ] + } + } + + { generateUUID { field : ignored_base_id } } + + { + generateSolrSequenceKey { + baseIdField: ignored_base_id + solrLocator : ${SOLR_LOCATOR} + } + } + + ] + } + ] + } + } + + { + loadSolr { + solrLocator : ${SOLR_LOCATOR} + } + } + + { + logDebug { + format : "My output record: {}" + args : ["@{}"] + } + } + + ] + } +] diff --git a/solr/contrib/solr-mr/src/test-files/test-morphlines/solrCellJPGCompressed.conf b/solr/contrib/solr-mr/src/test-files/test-morphlines/solrCellJPGCompressed.conf new file mode 100644 index 00000000000..e1a9679678e --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/test-morphlines/solrCellJPGCompressed.conf @@ -0,0 +1,135 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Application configuration file in HOCON format (Human-Optimized Config Object Notation). +# HOCON syntax is defined at http://github.com/typesafehub/config/blob/master/HOCON.md +# and also used by Akka (http://www.akka.io) and Play (http://www.playframework.org/). +# For more examples see http://doc.akka.io/docs/akka/2.1.2/general/configuration.html + +# morphline.conf example file +# this is a comment +// this is yet another comment + +morphlines : [ + { + id : morphline1 + importCommands : ["com.cloudera.**", "org.apache.solr.**"] + + commands : [ + { separateAttachments {} } + + # java command that doesn't do anything except for test compilation + { + java { + imports : "import java.util.*;" + code: """ + List tags = record.get("javaWithImports"); + return child.process(record); + """ + } + } + + # java command that doesn't do anything except for test compilation + { + java { + code: """ + List tags = record.get("javaWithoutImports"); + return child.process(record); + """ + } + } + + { + # auto-detect MIME type if it isn't explicitly supplied + detectMimeType { + includeDefaultMimeTypes : true + } + } + + { + tryRules { + throwExceptionIfAllRulesFailed : true + rules : [ + # next top-level rule: + { + commands : [ + { logDebug { format : "hello unpack" } } + { unpack {} } + { callParentPipe {} } + ] + } + + { + commands : [ + { logDebug { format : "hello decompress" } } + { decompress {} } + { callParentPipe {} } + ] + } + + # next top-level rule: + { + commands : [ + { logDebug { format : "hello solrcell" } } + { + # wrap SolrCell around a JPG Tika parser + solrCell { + solrLocator : ${SOLR_LOCATOR} + captureAttr : true # default is false + capture : [content, a, h1, h2] # extract some fields + fmap : { exif_image_height : text, a : anchor, h1 : heading1 } # rename some fields + dateFormats : [ "yyyy-MM-dd'T'HH:mm:ss", "yyyy-MM-dd"] # various java.text.SimpleDateFormat + xpath : "/xhtml:html/xhtml:body/xhtml:div/descendant:node()" + uprefix : "ignored_" + lowernames : true + solrContentHandlerFactory : org.apache.solr.morphlines.cell.TrimSolrContentHandlerFactory + parsers : [ # nested Tika parsers + { parser : org.apache.tika.parser.jpeg.JpegParser } + ] + } + } + + { logDebug { format : "solrcell output: {}", args : ["@{}"] } } + ] + } + ] + } + } + + { generateUUID { field : ignored_base_id } } + + { + generateSolrSequenceKey { + baseIdField: ignored_base_id + solrLocator : ${SOLR_LOCATOR} + } + } + + { + loadSolr { + solrLocator : ${SOLR_LOCATOR} + } + } + + { + logDebug { + format : "My output record: {}" + args : ["@{}"] + } + } + + ] + } +] diff --git a/solr/contrib/solr-mr/src/test-files/test-morphlines/solrCellXML.conf b/solr/contrib/solr-mr/src/test-files/test-morphlines/solrCellXML.conf new file mode 100644 index 00000000000..6c19c5ee692 --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/test-morphlines/solrCellXML.conf @@ -0,0 +1,69 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Application configuration file in HOCON format (Human-Optimized Config Object Notation). +# HOCON syntax is defined at http://github.com/typesafehub/config/blob/master/HOCON.md +# and also used by Akka (http://www.akka.io) and Play (http://www.playframework.org/). +# For more examples see http://doc.akka.io/docs/akka/2.1.2/general/configuration.html + +# morphline.conf example file +# this is a comment +// this is yet another comment + +morphlines : [ + { + id : morphline1 + importCommands : ["com.cloudera.**", "org.apache.solr.**"] + + commands : [ + { + addValues { _attachment_mimetype : application/xml } + # alternatively, consider using detectMimeTypes command + } + + { + # wrap SolrCell around a JPG Tika parser + solrCell { + solrLocator : ${SOLR_LOCATOR} + parsers : [ # nested Tika parsers + { parser : org.apache.tika.parser.xml.XMLParser } + ] + } + } + + { + generateSolrSequenceKey { + baseIdField: base_id + solrLocator : ${SOLR_LOCATOR} + } + } + + { + sanitizeUnknownSolrFields { + solrLocator : ${SOLR_LOCATOR} + } + } + + { logDebug { format : "solrcell output: {}", args : ["@{}"] } } + + { + loadSolr { + solrLocator : ${SOLR_LOCATOR} + } + } + + ] + } +] diff --git a/solr/contrib/solr-mr/src/test-files/test-morphlines/tokenizeText.conf b/solr/contrib/solr-mr/src/test-files/test-morphlines/tokenizeText.conf new file mode 100644 index 00000000000..c58d4d2236c --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/test-morphlines/tokenizeText.conf @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +morphlines : [ + { + id : morphline1 + importCommands : ["com.cloudera.**", "org.apache.solr.**"] + + commands : [ + { + tokenizeText { + inputField : message + outputField : tokens + solrFieldType : text_en + solrLocator : ${SOLR_LOCATOR} + } + } + + { logDebug { format : "output record {}", args : ["@{}"] } } + ] + } +] diff --git a/solr/contrib/solr-mr/src/test-files/test-morphlines/tutorialReadAvroContainer.conf b/solr/contrib/solr-mr/src/test-files/test-morphlines/tutorialReadAvroContainer.conf new file mode 100644 index 00000000000..cf34c4fac7e --- /dev/null +++ b/solr/contrib/solr-mr/src/test-files/test-morphlines/tutorialReadAvroContainer.conf @@ -0,0 +1,140 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Application configuration file in HOCON format (Human-Optimized Config Object Notation). +# HOCON syntax is defined at http://github.com/typesafehub/config/blob/master/HOCON.md +# and also used by Akka (http://www.akka.io) and Play (http://www.playframework.org/). +# For more examples see http://doc.akka.io/docs/akka/2.1.2/general/configuration.html + +# morphline.conf example file +# this is a comment + +# Specify server locations in a SOLR_LOCATOR variable; used later in variable substitutions: +SOLR_LOCATOR : { + # Name of solr collection + collection : collection1 + + # ZooKeeper ensemble + zkHost : "127.0.0.1:2181/solr" + + # The maximum number of documents to send to Solr per network batch (throughput knob) + # batchSize : 1000 +} + +# Specify an array of one or more morphlines, each of which defines an ETL +# transformation chain. A morphline consists of one or more (potentially +# nested) commands. A morphline is a way to consume records (e.g. Flume events, +# HDFS files or blocks), turn them into a stream of records, and pipe the stream +# of records through a set of easily configurable transformations on it's way to +# Solr. +morphlines : [ + { + # Name used to identify a morphline. E.g. used if there are multiple morphlines in a + # morphline config file + id : morphline1 + + # Import all morphline commands in these java packages and their subpackages. + # Other commands that may be present on the classpath are not visible to this morphline. + importCommands : ["com.cloudera.**", "org.apache.solr.**"] + + commands : [ + { + # Parse Avro container file and emit a record for each avro object + readAvroContainer { + # Optionally, require the input record to match one of these MIME types: + # supportedMimeTypes : [avro/binary] + + # Optionally, use a custom Avro schema in JSON format inline: + # readerSchemaString : """""" + + # Optionally, use a custom Avro schema file in JSON format: + # readerSchemaFile : /path/to/syslog.avsc + } + } + + { + # Consume the output record of the previous command and pipe another record downstream. + # + # extractAvroPaths is a command that uses zero or more avro path expressions to extract + # values from an Avro object. Each expression consists of a record output field name (on + # the left side of the colon ':') as well as zero or more path steps (on the right hand + # side), each path step separated by a '/' slash. Avro arrays are traversed with the '[]' + # notation. + # + # The result of a path expression is a list of objects, each of which is added to the + # given record output field. + # + # The path language supports all Avro concepts, including nested structures, records, + # arrays, maps, unions, etc, as well as a flatten option that collects the primitives in + # a subtree into a flat list. + extractAvroPaths { + flatten : false + paths : { + id : /id + text : /text + user_friends_count : /user_friends_count + user_location : /user_location + user_description : /user_description + user_statuses_count : /user_statuses_count + user_followers_count : /user_followers_count + user_name : /user_name + user_screen_name : /user_screen_name + created_at : /created_at + retweet_count : /retweet_count + retweeted : /retweeted + in_reply_to_user_id : /in_reply_to_user_id + source : /source + in_reply_to_status_id : /in_reply_to_status_id + media_url_https : /media_url_https + expanded_url : /expanded_url + } + } + } + + # Consume the output record of the previous command and pipe another record downstream. + # + # convert timestamp field to native Solr timestamp format + # e.g. 2012-09-06T07:14:34Z to 2012-09-06T07:14:34.000Z + { + convertTimestamp { + field : created_at + inputFormats : ["yyyy-MM-dd'T'HH:mm:ss'Z'", "yyyy-MM-dd"] + inputTimezone : UTC +# outputFormat : "yyyy-MM-dd'T'HH:mm:ss.SSSZ" + outputTimezone : America/Los_Angeles + } + } + + # Consume the output record of the previous command and pipe another record downstream. + # + # This command sanitizes record fields that are unknown to Solr schema.xml by deleting + # them. Recall that Solr throws an exception on any attempt to load a document that + # contains a field that isn't specified in schema.xml. + { + sanitizeUnknownSolrFields { + # Location from which to fetch Solr schema + solrLocator : ${SOLR_LOCATOR} + } + } + + # log the record at DEBUG level to SLF4J + { logDebug { format : "output record: {}", args : ["@{}"] } } + + # load the record into a Solr server or MapReduce Reducer. + { + loadSolr { + solrLocator : ${SOLR_LOCATOR} + } + } + ] + } +] diff --git a/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/IdentityMapper.java b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/IdentityMapper.java new file mode 100644 index 00000000000..370dee189c9 --- /dev/null +++ b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/IdentityMapper.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Mapper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class IdentityMapper extends Mapper { + + private static final Logger LOGGER = LoggerFactory.getLogger(IdentityMapper.class); + + @Override + protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { + LOGGER.info("map key: {}, value: {}", key, value); + context.write(value, NullWritable.get()); + } +} \ No newline at end of file diff --git a/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/IdentityReducer.java b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/IdentityReducer.java new file mode 100644 index 00000000000..104a88225f7 --- /dev/null +++ b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/IdentityReducer.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; + +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class IdentityReducer extends Reducer { + + private static final Logger LOGGER = LoggerFactory.getLogger(IdentityReducer.class); + + @Override + protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException { + LOGGER.info("reduce key: {}, value: {}", key, values); + context.write(key, NullWritable.get()); + } +} \ No newline at end of file diff --git a/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/LineRandomizerMapperReducerTest.java b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/LineRandomizerMapperReducerTest.java new file mode 100644 index 00000000000..379e60a4dc9 --- /dev/null +++ b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/LineRandomizerMapperReducerTest.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver; +import org.apache.hadoop.mrunit.types.Pair; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class LineRandomizerMapperReducerTest extends Assert { + + private MapReduceDriver mapReduceDriver; + + @Before + public void setUp() { + LineRandomizerMapper mapper = new LineRandomizerMapper(); + LineRandomizerReducer reducer = new LineRandomizerReducer(); + mapReduceDriver = MapReduceDriver.newMapReduceDriver(mapper, reducer); + } + + @Test + public void testMapReduce1Item() throws IOException { + mapReduceDriver.withInput(new LongWritable(0), new Text("hello")); + mapReduceDriver.withOutput(new Text("hello"), NullWritable.get()); + mapReduceDriver.runTest(); + } + + @Test + public void testMapReduce2Items() throws IOException { + mapReduceDriver.withAll(Arrays.asList( + new Pair(new LongWritable(0), new Text("hello")), + new Pair(new LongWritable(1), new Text("world")) + )); + mapReduceDriver.withAllOutput(Arrays.asList( + new Pair(new Text("world"), NullWritable.get()), + new Pair(new Text("hello"), NullWritable.get()) + )); + mapReduceDriver.runTest(); + } + + @Test + public void testMapReduce3Items() throws IOException { + mapReduceDriver.withAll(Arrays.asList( + new Pair(new LongWritable(0), new Text("hello")), + new Pair(new LongWritable(1), new Text("world")), + new Pair(new LongWritable(2), new Text("nadja")) + )); + mapReduceDriver.withAllOutput(Arrays.asList( + new Pair(new Text("nadja"), NullWritable.get()), + new Pair(new Text("world"), NullWritable.get()), + new Pair(new Text("hello"), NullWritable.get()) + )); + mapReduceDriver.runTest(); + } + + @Test + public void testMapReduce4Items() throws IOException { + mapReduceDriver.withAll(Arrays.asList( + new Pair(new LongWritable(0), new Text("hello")), + new Pair(new LongWritable(1), new Text("world")), + new Pair(new LongWritable(2), new Text("nadja")), + new Pair(new LongWritable(3), new Text("basti")) + )); + mapReduceDriver.withAllOutput(Arrays.asList( + new Pair(new Text("nadja"), NullWritable.get()), + new Pair(new Text("world"), NullWritable.get()), + new Pair(new Text("basti"), NullWritable.get()), + new Pair(new Text("hello"), NullWritable.get()) + )); + mapReduceDriver.runTest(); + } + +} \ No newline at end of file diff --git a/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/MRUnitBase.java b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/MRUnitBase.java new file mode 100644 index 00000000000..6238c225748 --- /dev/null +++ b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/MRUnitBase.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.File; +import java.io.IOException; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.hadoop.morphline.MorphlineMapRunner; +import org.apache.solr.util.ExternalPaths; +import org.junit.AfterClass; +import org.junit.BeforeClass; + +public abstract class MRUnitBase extends SolrTestCaseJ4 { + + protected static final String RESOURCES_DIR = ExternalPaths.SOURCE_HOME + "/contrib/solr-mr/src/test-files"; + protected static final String DOCUMENTS_DIR = RESOURCES_DIR + "/test-documents"; + protected static File solrHomeZip; + + @BeforeClass + public static void setupClass() throws Exception { + solrHomeZip = SolrOutputFormat.createSolrHomeZip(new File(RESOURCES_DIR + "/solr/mrunit")); + assertNotNull(solrHomeZip); + } + + @AfterClass + public static void teardownClass() throws Exception { + solrHomeZip.delete(); + } + + protected void setupHadoopConfig(Configuration config) throws IOException { + config.set(SolrOutputFormat.ZIP_NAME, solrHomeZip.getName()); + + String tempDir = TEMP_DIR + "/test-morphlines-" + System.currentTimeMillis(); + new File(tempDir).mkdirs(); + FileUtils.copyFile(new File(RESOURCES_DIR + "/custom-mimetypes.xml"), new File(tempDir + "/custom-mimetypes.xml")); + + setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes"); + + config.set(MorphlineMapRunner.MORPHLINE_FILE_PARAM, tempDir + "/test-morphlines/solrCellDocumentTypes.conf"); + } + + public static void setupMorphline(String tempDir, String file) throws IOException { + String morphlineText = FileUtils.readFileToString(new File(RESOURCES_DIR + "/" + file + ".conf"), "UTF-8"); + morphlineText = morphlineText.replaceAll("RESOURCES_DIR", new File(tempDir).getAbsolutePath()); + morphlineText = morphlineText.replaceAll("\\$\\{SOLR_LOCATOR\\}", "{ collection : collection1 }"); + + FileUtils.writeStringToFile(new File(tempDir + "/" + file + ".conf"), morphlineText, "UTF-8"); + } +} diff --git a/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/MapReduceIndexerToolArgumentParserTest.java b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/MapReduceIndexerToolArgumentParserTest.java new file mode 100644 index 00000000000..a292a1b0d39 --- /dev/null +++ b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/MapReduceIndexerToolArgumentParserTest.java @@ -0,0 +1,463 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.PrintStream; +import java.io.UnsupportedEncodingException; +import java.util.Arrays; +import java.util.Collections; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.solr.cloud.AbstractZkTestCase; +import org.apache.solr.hadoop.dedup.NoChangeUpdateConflictResolver; +import org.apache.solr.hadoop.dedup.RetainMostRecentUpdateConflictResolver; +import org.apache.solr.util.ExternalPaths; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class MapReduceIndexerToolArgumentParserTest extends LuceneTestCase { + + private Configuration conf; + private MapReduceIndexerTool.MyArgumentParser parser; + private MapReduceIndexerTool.Options opts; + private PrintStream oldSystemOut; + private PrintStream oldSystemErr; + private ByteArrayOutputStream bout; + private ByteArrayOutputStream berr; + + private static final String RESOURCES_DIR = ExternalPaths.SOURCE_HOME + "/contrib/solr-mr/src/test-files"; + private static final File MINIMR_INSTANCE_DIR = new File(RESOURCES_DIR + "/solr/minimr"); + + private static final String MORPHLINE_FILE = RESOURCES_DIR + "/test-morphlines/solrCellDocumentTypes.conf"; + + private static final Logger LOG = LoggerFactory.getLogger(MapReduceIndexerToolArgumentParserTest.class); + + + private static final File solrHomeDirectory = new File(TEMP_DIR, MorphlineGoLiveMiniMRTest.class.getName()); + + @Before + public void setUp() throws Exception { + super.setUp(); + AbstractZkTestCase.SOLRHOME = solrHomeDirectory; + FileUtils.copyDirectory(MINIMR_INSTANCE_DIR, solrHomeDirectory); + + conf = new Configuration(); + parser = new MapReduceIndexerTool.MyArgumentParser(); + opts = new MapReduceIndexerTool.Options(); + oldSystemOut = System.out; + bout = new ByteArrayOutputStream(); + System.setOut(new PrintStream(bout, true, "UTF-8")); + oldSystemErr = System.err; + berr = new ByteArrayOutputStream(); + System.setErr(new PrintStream(berr, true, "UTF-8")); + } + + @After + public void tearDown() throws Exception { + super.tearDown(); + System.setOut(oldSystemOut); + System.setErr(oldSystemErr); + } + + @Test + public void testArgsParserTypicalUse() { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--morphline-id", "morphline_xyz", + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--mappers", "10", + "--reducers", "9", + "--fanout", "8", + "--max-segments", "7", + "--shards", "1", + "--verbose", + "file:///home", + "file:///dev", + }; + Integer res = parser.parseArgs(args, conf, opts); + assertNull(res != null ? res.toString() : "", res); + assertEquals(Collections.singletonList(new Path("file:///tmp")), opts.inputLists); + assertEquals(new Path("file:/tmp/foo"), opts.outputDir); + assertEquals(new File(MINIMR_INSTANCE_DIR.getPath()), opts.solrHomeDir); + assertEquals(10, opts.mappers); + assertEquals(9, opts.reducers); + assertEquals(8, opts.fanout); + assertEquals(7, opts.maxSegments); + assertEquals(new Integer(1), opts.shards); + assertEquals(null, opts.fairSchedulerPool); + assertTrue(opts.isVerbose); + assertEquals(Arrays.asList(new Path("file:///home"), new Path("file:///dev")), opts.inputFiles); + assertEquals(RetainMostRecentUpdateConflictResolver.class.getName(), opts.updateConflictResolver); + assertEquals(MORPHLINE_FILE, opts.morphlineFile.getPath()); + assertEquals("morphline_xyz", opts.morphlineId); + assertEmptySystemErrAndEmptySystemOut(); + } + + @Test + public void testArgsParserMultipleSpecsOfSameKind() { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--input-list", "file:///", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--shards", "1", + "file:///home", + "file:///dev", + }; + assertNull(parser.parseArgs(args, conf, opts)); + assertEquals(Arrays.asList(new Path("file:///tmp"), new Path("file:///")), opts.inputLists); + assertEquals(Arrays.asList(new Path("file:///home"), new Path("file:///dev")), opts.inputFiles); + assertEquals(new Path("file:/tmp/foo"), opts.outputDir); + assertEquals(new File(MINIMR_INSTANCE_DIR.getPath()), opts.solrHomeDir); + assertEmptySystemErrAndEmptySystemOut(); + } + + @Test + public void testArgsParserTypicalUseWithEqualsSign() { + String[] args = new String[] { + "--input-list=file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir=file:/tmp/foo", + "--solr-home-dir=" + MINIMR_INSTANCE_DIR.getPath(), + "--mappers=10", + "--shards", "1", + "--verbose", + "file:///home", + "file:///dev", + }; + assertNull(parser.parseArgs(args, conf, opts)); + assertEquals(Collections.singletonList(new Path("file:///tmp")), opts.inputLists); + assertEquals(new Path("file:/tmp/foo"), opts.outputDir); + assertEquals(new File(MINIMR_INSTANCE_DIR.getPath()), opts.solrHomeDir); + assertEquals(10, opts.mappers); + assertEquals(new Integer(1), opts.shards); + assertEquals(null, opts.fairSchedulerPool); + assertTrue(opts.isVerbose); + assertEquals(Arrays.asList(new Path("file:///home"), new Path("file:///dev")), opts.inputFiles); + assertEmptySystemErrAndEmptySystemOut(); + } + + @Test + public void testArgsParserMultipleSpecsOfSameKindWithEqualsSign() { + String[] args = new String[] { + "--input-list=file:///tmp", + "--input-list=file:///", + "--morphline-file", MORPHLINE_FILE, + "--output-dir=file:/tmp/foo", + "--solr-home-dir=" + MINIMR_INSTANCE_DIR.getPath(), + "--shards", "1", + "file:///home", + "file:///dev", + }; + assertNull(parser.parseArgs(args, conf, opts)); + assertEquals(Arrays.asList(new Path("file:///tmp"), new Path("file:///")), opts.inputLists); + assertEquals(Arrays.asList(new Path("file:///home"), new Path("file:///dev")), opts.inputFiles); + assertEquals(new Path("file:/tmp/foo"), opts.outputDir); + assertEquals(new File(MINIMR_INSTANCE_DIR.getPath()), opts.solrHomeDir); + assertEmptySystemErrAndEmptySystemOut(); + } + + @Test + public void testArgsParserHelp() throws UnsupportedEncodingException { + String[] args = new String[] { "--help" }; + assertEquals(new Integer(0), parser.parseArgs(args, conf, opts)); + String helpText = new String(bout.toByteArray(), "UTF-8"); + assertTrue(helpText.contains("MapReduce batch job driver that ")); + assertTrue(helpText.contains("bin/hadoop command")); + assertEquals(0, berr.toByteArray().length); + } + + @Test + public void testArgsParserOk() { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--shards", "1", + }; + assertNull(parser.parseArgs(args, conf, opts)); + assertEquals(new Integer(1), opts.shards); + assertEmptySystemErrAndEmptySystemOut(); + } + + @Test + public void testArgsParserUpdateConflictResolver() { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--shards", "1", + "--update-conflict-resolver", NoChangeUpdateConflictResolver.class.getName(), + }; + assertNull(parser.parseArgs(args, conf, opts)); + assertEquals(NoChangeUpdateConflictResolver.class.getName(), opts.updateConflictResolver); + assertEmptySystemErrAndEmptySystemOut(); + } + + @Test + public void testArgsParserUnknownArgName() throws Exception { + String[] args = new String[] { + "--xxxxxxxxinputlist", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--shards", "1", + }; + assertArgumentParserException(args); + } + + @Test + public void testArgsParserFileNotFound1() throws Exception { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/fileNotFound/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--shards", "1", + }; + assertArgumentParserException(args); + } + + @Test + public void testArgsParserFileNotFound2() throws Exception { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", "/fileNotFound", + "--shards", "1", + }; + assertArgumentParserException(args); + } + + @Test + public void testArgsParserIntOutOfRange() throws Exception { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--shards", "1", + "--mappers", "-20" + }; + assertArgumentParserException(args); + } + + @Test + public void testArgsParserIllegalFanout() throws Exception { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--shards", "1", + "--fanout", "1" // must be >= 2 + }; + assertArgumentParserException(args); + } + + @Test + public void testArgsParserSolrHomeMustContainSolrConfigFile() throws Exception { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--shards", "1", + "--solr-home-dir", "/", + }; + assertArgumentParserException(args); + } + + @Test + public void testArgsShardUrlOk() { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--shard-url", "http://localhost:8983/solr/collection1", + "--shard-url", "http://localhost:8983/solr/collection2", + }; + assertNull(parser.parseArgs(args, conf, opts)); + assertEquals(Arrays.asList( + Collections.singletonList("http://localhost:8983/solr/collection1"), + Collections.singletonList("http://localhost:8983/solr/collection2")), + opts.shardUrls); + assertEquals(new Integer(2), opts.shards); + assertEmptySystemErrAndEmptySystemOut(); + } + + @Test + public void testArgsShardUrlMustHaveAParam() throws Exception { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--shard-url", + }; + assertArgumentParserException(args); + } + + @Test + public void testArgsShardUrlAndShardsSucceeds() { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--shards", "1", + "--shard-url", "http://localhost:8983/solr/collection1", + }; + assertNull(parser.parseArgs(args, conf, opts)); + assertEmptySystemErrAndEmptySystemOut(); + } + + @Test + public void testArgsShardUrlNoGoLive() { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--shard-url", "http://localhost:8983/solr/collection1" + }; + assertNull(parser.parseArgs(args, conf, opts)); + assertEmptySystemErrAndEmptySystemOut(); + assertEquals(new Integer(1), opts.shards); + } + + @Test + public void testArgsShardUrlsAndZkhostAreMutuallyExclusive() throws Exception { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--shard-url", "http://localhost:8983/solr/collection1", + "--shard-url", "http://localhost:8983/solr/collection1", + "--zk-host", "http://localhost:2185", + "--go-live" + }; + assertArgumentParserException(args); + } + + @Test + public void testArgsGoLiveAndSolrUrl() { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--shard-url", "http://localhost:8983/solr/collection1", + "--shard-url", "http://localhost:8983/solr/collection1", + "--go-live" + }; + Integer result = parser.parseArgs(args, conf, opts); + assertNull(result); + assertEmptySystemErrAndEmptySystemOut(); + } + + @Test + public void testArgsZkHostNoGoLive() throws Exception { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--zk-host", "http://localhost:2185", + }; + assertArgumentParserException(args); + } + + @Test + public void testArgsGoLiveZkHostNoCollection() throws Exception { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--zk-host", "http://localhost:2185", + "--go-live" + }; + assertArgumentParserException(args); + } + + @Test + public void testArgsGoLiveNoZkHostOrSolrUrl() throws Exception { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), + "--go-live" + }; + assertArgumentParserException(args); + } + + @Test + public void testNoSolrHomeDirOrZKHost() throws Exception { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--shards", "1", + }; + assertArgumentParserException(args); + } + + @Test + public void testZKHostNoSolrHomeDirOk() { + String[] args = new String[] { + "--input-list", "file:///tmp", + "--morphline-file", MORPHLINE_FILE, + "--output-dir", "file:/tmp/foo", + "--zk-host", "http://localhost:2185", + "--collection", "collection1", + }; + assertNull(parser.parseArgs(args, conf, opts)); + assertEmptySystemErrAndEmptySystemOut(); + } + + private void assertEmptySystemErrAndEmptySystemOut() { + assertEquals(0, bout.toByteArray().length); + assertEquals(0, berr.toByteArray().length); + } + + private void assertArgumentParserException(String[] args) throws UnsupportedEncodingException { + assertEquals("should have returned fail code", new Integer(1), parser.parseArgs(args, conf, opts)); + assertEquals("no sys out expected:" + new String(bout.toByteArray(), "UTF-8"), 0, bout.toByteArray().length); + String usageText; + usageText = new String(berr.toByteArray(), "UTF-8"); + + assertTrue("should start with usage msg \"usage: hadoop \":" + usageText, usageText.startsWith("usage: hadoop ")); + } + +} diff --git a/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java new file mode 100644 index 00000000000..49891290b0a --- /dev/null +++ b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java @@ -0,0 +1,401 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.lang.reflect.Array; +import java.util.Arrays; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.security.authorize.ProxyUsers; +import org.apache.hadoop.util.JarFinder; +import org.apache.hadoop.util.ToolRunner; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.LuceneTestCase.Slow; +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.cloud.AbstractZkTestCase; +import org.apache.solr.hadoop.hack.MiniMRCluster; +import org.apache.solr.util.ExternalPaths; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Ignore; +import org.junit.Test; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence; + +@ThreadLeakAction({Action.WARN}) +@ThreadLeakLingering(linger = 0) +@ThreadLeakZombies(Consequence.CONTINUE) +@ThreadLeakScope(Scope.NONE) +@Slow +public class MorphlineBasicMiniMRTest extends SolrTestCaseJ4 { + + private static final boolean ENABLE_LOCAL_JOB_RUNNER = false; // for debugging only + private static final String RESOURCES_DIR = ExternalPaths.SOURCE_HOME + "/contrib/solr-mr/src/test-files"; + private static final String DOCUMENTS_DIR = RESOURCES_DIR + "/test-documents"; + private static final File MINIMR_CONF_DIR = new File(RESOURCES_DIR + "/solr/minimr"); + + private static final String SEARCH_ARCHIVES_JAR = JarFinder.getJar(MapReduceIndexerTool.class); + + private static MiniDFSCluster dfsCluster = null; + private static MiniMRCluster mrCluster = null; + private static int numRuns = 0; + + private final String inputAvroFile; + private final int count; + + private static String tempDir; + + private static final File solrHomeDirectory = new File(TEMP_DIR, MorphlineBasicMiniMRTest.class.getName()); + + protected MapReduceIndexerTool createTool() { + return new MapReduceIndexerTool(); + } + + public MorphlineBasicMiniMRTest() { + int data = random().nextInt(3); + switch (data) { + case 0: + this.inputAvroFile = "sample-statuses-20120906-141433.avro"; + this.count = 2; + break; + case 1: + this.inputAvroFile = "sample-statuses-20120521-100919.avro"; + this.count = 20; + break; + case 2: + this.inputAvroFile = "sample-statuses-20120906-141433-medium.avro"; + this.count = 2104; + break; + default: + throw new RuntimeException("Test setup is broken"); + } + + } + + @BeforeClass + public static void setupClass() throws Exception { + LuceneTestCase.assumeTrue( + "Currently this test can only be run without the lucene test security policy in place", + System.getProperty("java.security.manager", "").equals("")); + + LuceneTestCase.assumeFalse("HDFS tests were disabled by -Dtests.disableHdfs", + Boolean.parseBoolean(System.getProperty("tests.disableHdfs", "false"))); + + AbstractZkTestCase.SOLRHOME = solrHomeDirectory; + FileUtils.copyDirectory(MINIMR_CONF_DIR, solrHomeDirectory); + + tempDir = TEMP_DIR + "/test-morphlines-" + System.currentTimeMillis(); + new File(tempDir).mkdirs(); + FileUtils.copyFile(new File(RESOURCES_DIR + "/custom-mimetypes.xml"), new File(tempDir + "/custom-mimetypes.xml")); + + MRUnitBase.setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes"); + + System.setProperty("hadoop.log.dir", new File(solrHomeDirectory, "logs").getAbsolutePath()); + + int taskTrackers = 1; + int dataNodes = 2; +// String proxyUser = System.getProperty("user.name"); +// String proxyGroup = "g"; +// StringBuilder sb = new StringBuilder(); +// sb.append("127.0.0.1,localhost"); +// for (InetAddress i : InetAddress.getAllByName(InetAddress.getLocalHost().getHostName())) { +// sb.append(",").append(i.getCanonicalHostName()); +// } + + createTempDir(); + new File(dataDir, "nm-local-dirs").mkdirs(); + + System.setProperty("solr.hdfs.blockcache.enabled", "false"); + + System.setProperty("test.build.dir", dataDir + File.separator + "hdfs" + File.separator + "test-build-dir"); + System.setProperty("test.build.data", dataDir + File.separator + "hdfs" + File.separator + "build"); + System.setProperty("test.cache.data", dataDir + File.separator + "hdfs" + File.separator + "cache"); + + JobConf conf = new JobConf(); + conf.set("dfs.block.access.token.enable", "false"); + conf.set("dfs.permissions", "true"); + conf.set("hadoop.security.authentication", "simple"); + conf.set(YarnConfiguration.NM_LOCAL_DIRS, dataDir.getPath() + File.separator + "nm-local-dirs"); + conf.set(YarnConfiguration.DEFAULT_NM_LOG_DIRS, dataDir + File.separator + "nm-logs"); + conf.set("testWorkDir", dataDir.getPath() + File.separator + "testWorkDir"); + + dfsCluster = new MiniDFSCluster(conf, dataNodes, true, null); + FileSystem fileSystem = dfsCluster.getFileSystem(); + fileSystem.mkdirs(new Path("/tmp")); + fileSystem.mkdirs(new Path("/user")); + fileSystem.mkdirs(new Path("/hadoop/mapred/system")); + fileSystem.setPermission(new Path("/tmp"), FsPermission.valueOf("-rwxrwxrwx")); + fileSystem.setPermission(new Path("/user"), FsPermission.valueOf("-rwxrwxrwx")); + fileSystem.setPermission(new Path("/hadoop/mapred/system"), FsPermission.valueOf("-rwx------")); + String nnURI = fileSystem.getUri().toString(); + int numDirs = 1; + String[] racks = null; + String[] hosts = null; + + mrCluster = new MiniMRCluster(0, 0, taskTrackers, nnURI, numDirs, racks, hosts, null, conf); + ProxyUsers.refreshSuperUserGroupsConfiguration(conf); + } + + @AfterClass + public static void teardownClass() throws Exception { + System.clearProperty("solr.hdfs.blockcache.enabled"); + System.clearProperty("test.build.dir"); + System.clearProperty("test.build.data"); + System.clearProperty("test.cache.data"); + if (mrCluster != null) { + mrCluster.shutdown(); + mrCluster = null; + } + if (dfsCluster != null) { + dfsCluster.shutdown(); + dfsCluster = null; + } + } + + @After + public void tearDown() throws Exception { + System.clearProperty("hadoop.log.dir"); + System.clearProperty("solr.hdfs.blockcache.enabled"); + + super.tearDown(); + } + + private JobConf getJobConf() { + return mrCluster.createJobConf(); + } + + @Test + public void testPathParts() throws Exception { // see PathParts + FileSystem fs = dfsCluster.getFileSystem(); + int dfsClusterPort = fs.getWorkingDirectory().toUri().getPort(); + assertTrue(dfsClusterPort > 0); + JobConf jobConf = getJobConf(); + Configuration simpleConf = new Configuration(); + + for (Configuration conf : Arrays.asList(jobConf, simpleConf)) { + for (String queryAndFragment : Arrays.asList("", "?key=value#fragment")) { + for (String up : Arrays.asList("", "../")) { + String down = up.length() == 0 ? "foo/" : ""; + String uploadURL = "hdfs://localhost:12345/user/foo/" + up + "bar.txt" + queryAndFragment; + PathParts parts = new PathParts(uploadURL, conf); + assertEquals(uploadURL, parts.getUploadURL()); + assertEquals("/user/" + down + "bar.txt", parts.getURIPath()); + assertEquals("bar.txt", parts.getName()); + assertEquals("hdfs", parts.getScheme()); + assertEquals("localhost", parts.getHost()); + assertEquals(12345, parts.getPort()); + assertEquals("hdfs://localhost:12345/user/" + down + "bar.txt", parts.getId()); + assertEquals(parts.getId(), parts.getDownloadURL()); + assertFileNotFound(parts); + + uploadURL = "hdfs://localhost/user/foo/" + up + "bar.txt" + queryAndFragment; + parts = new PathParts(uploadURL, conf); + assertEquals(uploadURL, parts.getUploadURL()); + assertEquals("/user/" + down + "bar.txt", parts.getURIPath()); + assertEquals("bar.txt", parts.getName()); + assertEquals("hdfs", parts.getScheme()); + assertEquals("localhost", parts.getHost()); + assertEquals(8020, parts.getPort()); + assertEquals("hdfs://localhost:8020/user/" + down + "bar.txt", parts.getId()); + assertEquals(parts.getId(), parts.getDownloadURL()); + assertFileNotFound(parts); + } + } + } + + for (Configuration conf : Arrays.asList(jobConf)) { + for (String queryAndFragment : Arrays.asList("", "?key=value#fragment")) { + for (String up : Arrays.asList("", "../")) { + // verify using absolute path + String down = up.length() == 0 ? "foo/" : ""; + String uploadURL = "/user/foo/" + up + "bar.txt" + queryAndFragment; + PathParts parts = new PathParts(uploadURL, conf); + assertEquals(uploadURL, parts.getUploadURL()); + assertEquals("/user/" + down + "bar.txt", parts.getURIPath()); + assertEquals("bar.txt", parts.getName()); + assertEquals("hdfs", parts.getScheme()); + assertTrue("localhost".equals(parts.getHost()) || "localhost.localdomain".equals(parts.getHost())); + assertEquals(dfsClusterPort, parts.getPort()); + assertTrue(parts.getId().equals("hdfs://localhost:" + dfsClusterPort + "/user/" + down + "bar.txt") + || parts.getId().equals("hdfs://localhost.localdomain:" + dfsClusterPort + "/user/" + down + "bar.txt") + ); + assertFileNotFound(parts); + + // verify relative path is interpreted to be relative to user's home dir and resolved to an absolute path + uploadURL = "xuser/foo/" + up + "bar.txt" + queryAndFragment; + parts = new PathParts(uploadURL, conf); + assertEquals(uploadURL, parts.getUploadURL()); + String homeDir = "/user/" + System.getProperty("user.name"); + assertEquals(homeDir + "/xuser/" + down + "bar.txt", parts.getURIPath()); + assertEquals("bar.txt", parts.getName()); + assertEquals("hdfs", parts.getScheme()); + assertTrue("localhost".equals(parts.getHost()) || "localhost.localdomain".equals(parts.getHost())); + assertEquals(dfsClusterPort, parts.getPort()); + assertTrue(parts.getId().equals("hdfs://localhost:" + dfsClusterPort + homeDir + "/xuser/" + down + "bar.txt") + || parts.getId().equals("hdfs://localhost.localdomain:" + dfsClusterPort + homeDir + "/xuser/" + down + "bar.txt") + ); + assertFileNotFound(parts); + } + } + } + + try { + new PathParts("/user/foo/bar.txt", simpleConf); + fail("host/port resolution requires minimr conf, not a simple conf"); + } catch (IllegalArgumentException e) { + ; // expected + } + } + + private void assertFileNotFound(PathParts parts) { + try { + parts.getFileSystem().getFileStatus(parts.getUploadPath()); + fail(); + } catch (IOException e) { + ; // expected + } + } + + @Test + public void mrRun() throws Exception { + FileSystem fs = dfsCluster.getFileSystem(); + Path inDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/input")); + fs.delete(inDir, true); + String DATADIR = "/user/testing/testMapperReducer/data"; + Path dataDir = fs.makeQualified(new Path(DATADIR)); + fs.delete(dataDir, true); + Path outDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/output")); + fs.delete(outDir, true); + + assertTrue(fs.mkdirs(inDir)); + Path INPATH = new Path(inDir, "input.txt"); + OutputStream os = fs.create(INPATH); + Writer wr = new OutputStreamWriter(os, "UTF-8"); + wr.write(DATADIR + "/" + inputAvroFile); + wr.close(); + + assertTrue(fs.mkdirs(dataDir)); + fs.copyFromLocalFile(new Path(DOCUMENTS_DIR, inputAvroFile), dataDir); + + JobConf jobConf = getJobConf(); + if (ENABLE_LOCAL_JOB_RUNNER) { // enable Hadoop LocalJobRunner; this enables to run in debugger and set breakpoints + jobConf.set("mapred.job.tracker", "local"); + } + jobConf.setMaxMapAttempts(1); + jobConf.setMaxReduceAttempts(1); + jobConf.setJar(SEARCH_ARCHIVES_JAR); + jobConf.setBoolean("ignoreTikaException", false); + + int shards = 2; + int maxReducers = Integer.MAX_VALUE; + if (ENABLE_LOCAL_JOB_RUNNER) { + // local job runner has a couple of limitations: only one reducer is supported and the DistributedCache doesn't work. + // see http://blog.cloudera.com/blog/2009/07/advice-on-qa-testing-your-mapreduce-jobs/ + maxReducers = 1; + shards = 1; + } + + String[] args = new String[] { + "--morphline-file=" + tempDir + "/test-morphlines/solrCellDocumentTypes.conf", + "--morphline-id=morphline1", + "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(), + "--output-dir=" + outDir.toString(), + "--shards=" + shards, + "--verbose", + numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(), + numRuns % 3 == 0 ? "--reducers=" + shards : (numRuns % 3 == 1 ? "--reducers=-1" : "--reducers=" + Math.min(8, maxReducers)) + }; + if (numRuns % 3 == 2) { + args = concat(args, new String[] {"--fanout=2"}); + } + if (numRuns == 0) { + // force (slow) MapReduce based randomization to get coverage for that as well + args = concat(new String[] {"-D", MapReduceIndexerTool.MAIN_MEMORY_RANDOMIZATION_THRESHOLD + "=-1"}, args); + } + MapReduceIndexerTool tool = createTool(); + int res = ToolRunner.run(jobConf, tool, args); + assertEquals(0, res); + Job job = tool.job; + assertTrue(job.isComplete()); + assertTrue(job.isSuccessful()); + + if (numRuns % 3 != 2) { + // Only run this check if mtree merge is disabled. + // With mtree merge enabled the BatchWriter counters aren't available anymore because + // variable "job" now refers to the merge job rather than the indexing job + assertEquals("Invalid counter " + SolrRecordWriter.class.getName() + "." + SolrCounters.DOCUMENTS_WRITTEN, + count, job.getCounters().findCounter(SolrCounters.class.getName(), SolrCounters.DOCUMENTS_WRITTEN.toString()).getValue()); + } + + // Check the output is as expected + outDir = new Path(outDir, MapReduceIndexerTool.RESULTS_DIR); + Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(outDir)); + + System.out.println("outputfiles:" + Arrays.toString(outputFiles)); + + UtilsForTests.validateSolrServerDocumentCount(MINIMR_CONF_DIR, fs, outDir, count, shards); + + // run again with --dryrun mode: + tool = createTool(); + args = concat(args, new String[] {"--dry-run"}); + res = ToolRunner.run(jobConf, tool, args); + assertEquals(0, res); + + numRuns++; + } + + protected static T[] concat(T[]... arrays) { + if (arrays.length <= 0) { + throw new IllegalArgumentException(); + } + Class clazz = null; + int length = 0; + for (T[] array : arrays) { + clazz = array.getClass(); + length += array.length; + } + T[] result = (T[]) Array.newInstance(clazz.getComponentType(), length); + int pos = 0; + for (T[] array : arrays) { + System.arraycopy(array, 0, result, pos, array.length); + pos += array.length; + } + return result; + } + +} diff --git a/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/MorphlineGoLiveMiniMRTest.java b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/MorphlineGoLiveMiniMRTest.java new file mode 100644 index 00000000000..32f24980fa0 --- /dev/null +++ b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/MorphlineGoLiveMiniMRTest.java @@ -0,0 +1,730 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.UnsupportedEncodingException; +import java.io.Writer; +import java.lang.reflect.Array; +import java.net.URI; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.security.authorize.ProxyUsers; +import org.apache.hadoop.util.JarFinder; +import org.apache.hadoop.util.ToolRunner; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.LuceneTestCase.Slow; +import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.embedded.JettySolrRunner; +import org.apache.solr.client.solrj.impl.HttpSolrServer; +import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.cloud.AbstractFullDistribZkTestBase; +import org.apache.solr.cloud.AbstractZkTestCase; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.common.cloud.SolrZkClient; +import org.apache.solr.common.cloud.ZkCoreNodeProps; +import org.apache.solr.common.params.CollectionParams.CollectionAction; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.hadoop.hack.MiniMRClientCluster; +import org.apache.solr.hadoop.hack.MiniMRClientClusterFactory; +import org.apache.solr.util.ExternalPaths; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Ignore; +import org.junit.Test; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence; + +@ThreadLeakAction({Action.WARN}) +@ThreadLeakLingering(linger = 0) +@ThreadLeakZombies(Consequence.CONTINUE) +@ThreadLeakScope(Scope.NONE) +@SuppressCodecs({"Lucene3x", "Lucene40"}) +@Slow +public class MorphlineGoLiveMiniMRTest extends AbstractFullDistribZkTestBase { + + private static final String RESOURCES_DIR = ExternalPaths.SOURCE_HOME + "/contrib/solr-mr/src/test-files"; + private static final String DOCUMENTS_DIR = RESOURCES_DIR + "/test-documents"; + private static final File MINIMR_INSTANCE_DIR = new File(RESOURCES_DIR + "/solr/minimr"); + private static final File MINIMR_CONF_DIR = new File(RESOURCES_DIR + "/solr/minimr"); + + private static final String SEARCH_ARCHIVES_JAR = JarFinder.getJar(MapReduceIndexerTool.class); + + private static MiniDFSCluster dfsCluster = null; + private static MiniMRClientCluster mrCluster = null; + private static int numRuns = 0; + private static String tempDir; + + private final String inputAvroFile1; + private final String inputAvroFile2; + private final String inputAvroFile3; + + private static final File solrHomeDirectory = new File(TEMP_DIR, MorphlineGoLiveMiniMRTest.class.getName()); + + @Override + public String getSolrHome() { + return solrHomeDirectory.getPath(); + } + + public MorphlineGoLiveMiniMRTest() { + this.inputAvroFile1 = "sample-statuses-20120521-100919.avro"; + this.inputAvroFile2 = "sample-statuses-20120906-141433.avro"; + this.inputAvroFile3 = "sample-statuses-20120906-141433-medium.avro"; + + fixShardCount = true; + sliceCount = TEST_NIGHTLY ? 3 : 3; + shardCount = TEST_NIGHTLY ? 3 : 3; + } + + private static boolean isYarn() { + try { + Job.class.getMethod("getCluster"); + return true; + } catch (NoSuchMethodException e) { + return false; + } + } + + @BeforeClass + public static void setupClass() throws Exception { + LuceneTestCase.assumeTrue( + "Currently this test can only be run without the lucene test security policy in place", + System.getProperty("java.security.manager", "").equals("")); + + LuceneTestCase.assumeFalse("HDFS tests were disabled by -Dtests.disableHdfs", + Boolean.parseBoolean(System.getProperty("tests.disableHdfs", "false"))); + + AbstractZkTestCase.SOLRHOME = solrHomeDirectory; + FileUtils.copyDirectory(MINIMR_INSTANCE_DIR, solrHomeDirectory); + + tempDir = TEMP_DIR + "/test-morphlines-" + System.currentTimeMillis(); + new File(tempDir).mkdirs(); + FileUtils.copyFile(new File(RESOURCES_DIR + "/custom-mimetypes.xml"), new File(tempDir + "/custom-mimetypes.xml")); + + MRUnitBase.setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes"); + + + System.setProperty("hadoop.log.dir", new File(dataDir, "logs").getAbsolutePath()); + + int taskTrackers = 2; + int dataNodes = 2; + + System.setProperty("solr.hdfs.blockcache.enabled", "false"); + + JobConf conf = new JobConf(); + conf.set("dfs.block.access.token.enable", "false"); + conf.set("dfs.permissions", "true"); + conf.set("hadoop.security.authentication", "simple"); + + conf.set(YarnConfiguration.NM_LOCAL_DIRS, dataDir + File.separator + "nm-local-dirs"); + conf.set(YarnConfiguration.DEFAULT_NM_LOG_DIRS, dataDir + File.separator + "nm-logs"); + + + createTempDir(); + new File(dataDir + File.separator + "nm-local-dirs").mkdirs(); + + System.setProperty("test.build.dir", dataDir + File.separator + "hdfs" + File.separator + "test-build-dir"); + System.setProperty("test.build.data", dataDir + File.separator + "hdfs" + File.separator + "build"); + System.setProperty("test.cache.data", dataDir + File.separator + "hdfs" + File.separator + "cache"); + + dfsCluster = new MiniDFSCluster(conf, dataNodes, true, null); + FileSystem fileSystem = dfsCluster.getFileSystem(); + fileSystem.mkdirs(new Path("/tmp")); + fileSystem.mkdirs(new Path("/user")); + fileSystem.mkdirs(new Path("/hadoop/mapred/system")); + fileSystem.setPermission(new Path("/tmp"), + FsPermission.valueOf("-rwxrwxrwx")); + fileSystem.setPermission(new Path("/user"), + FsPermission.valueOf("-rwxrwxrwx")); + fileSystem.setPermission(new Path("/hadoop/mapred/system"), + FsPermission.valueOf("-rwx------")); + + mrCluster = MiniMRClientClusterFactory.create(MorphlineGoLiveMiniMRTest.class, 1, conf, new File(dataDir, "mrCluster")); + + //new MiniMRCluster(0, 0, taskTrackers, nnURI, numDirs, racks, + //hosts, null, conf); + + ProxyUsers.refreshSuperUserGroupsConfiguration(conf); + } + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + System.setProperty("host", "127.0.0.1"); + System.setProperty("numShards", Integer.toString(sliceCount)); + URI uri = dfsCluster.getFileSystem().getUri(); + System.setProperty("solr.hdfs.home", uri.toString() + "/" + this.getClass().getName()); + uploadConfFiles(); + } + + @Override + @After + public void tearDown() throws Exception { + super.tearDown(); + System.clearProperty("host"); + System.clearProperty("numShards"); + System.clearProperty("solr.hdfs.home"); + } + + @AfterClass + public static void teardownClass() throws Exception { + System.clearProperty("solr.hdfs.blockcache.enabled"); + System.clearProperty("hadoop.log.dir"); + System.clearProperty("test.build.dir"); + System.clearProperty("test.build.data"); + System.clearProperty("test.cache.data"); + + if (mrCluster != null) { + //mrCluster.shutdown(); + mrCluster = null; + } + if (dfsCluster != null) { + dfsCluster.shutdown(); + dfsCluster = null; + } + FileSystem.closeAll(); + } + + private JobConf getJobConf() throws IOException { + JobConf jobConf = new JobConf(mrCluster.getConfig()); + return jobConf; + } + + @Test + @Override + public void testDistribSearch() throws Exception { + super.testDistribSearch(); + } + + @Test + public void testBuildShardUrls() throws Exception { + // 2x3 + Integer numShards = 2; + List urls = new ArrayList(); + urls.add("shard1"); + urls.add("shard2"); + urls.add("shard3"); + urls.add("shard4"); + urls.add("shard5"); + urls.add("shard6"); + List> shardUrls = MapReduceIndexerTool.buildShardUrls(urls , numShards); + + assertEquals(shardUrls.toString(), 2, shardUrls.size()); + + for (List u : shardUrls) { + assertEquals(3, u.size()); + } + + // 1x6 + numShards = 1; + shardUrls = MapReduceIndexerTool.buildShardUrls(urls , numShards); + + assertEquals(shardUrls.toString(), 1, shardUrls.size()); + + for (List u : shardUrls) { + assertEquals(6, u.size()); + } + + // 6x1 + numShards = 6; + shardUrls = MapReduceIndexerTool.buildShardUrls(urls , numShards); + + assertEquals(shardUrls.toString(), 6, shardUrls.size()); + + for (List u : shardUrls) { + assertEquals(1, u.size()); + } + + // 3x2 + numShards = 3; + shardUrls = MapReduceIndexerTool.buildShardUrls(urls , numShards); + + assertEquals(shardUrls.toString(), 3, shardUrls.size()); + + for (List u : shardUrls) { + assertEquals(2, u.size()); + } + + // null shards, 6x1 + numShards = null; + shardUrls = MapReduceIndexerTool.buildShardUrls(urls , numShards); + + assertEquals(shardUrls.toString(), 6, shardUrls.size()); + + for (List u : shardUrls) { + assertEquals(1, u.size()); + } + + // null shards 3x1 + numShards = null; + + urls = new ArrayList(); + urls.add("shard1"); + urls.add("shard2"); + urls.add("shard3"); + + shardUrls = MapReduceIndexerTool.buildShardUrls(urls , numShards); + + assertEquals(shardUrls.toString(), 3, shardUrls.size()); + + for (List u : shardUrls) { + assertEquals(1, u.size()); + } + + // 2x(2,3) off balance + numShards = 2; + urls = new ArrayList(); + urls.add("shard1"); + urls.add("shard2"); + urls.add("shard3"); + urls.add("shard4"); + urls.add("shard5"); + shardUrls = MapReduceIndexerTool.buildShardUrls(urls , numShards); + + assertEquals(shardUrls.toString(), 2, shardUrls.size()); + + Set counts = new HashSet(); + counts.add(shardUrls.get(0).size()); + counts.add(shardUrls.get(1).size()); + + assertTrue(counts.contains(2)); + assertTrue(counts.contains(3)); + } + + private String[] prependInitialArgs(String[] args) { + String[] head = new String[] { + "--morphline-file=" + tempDir + "/test-morphlines/solrCellDocumentTypes.conf", + "--morphline-id=morphline1", + }; + return concat(head, args); + } + + @Override + public void doTest() throws Exception { + + waitForRecoveriesToFinish(false); + + FileSystem fs = dfsCluster.getFileSystem(); + Path inDir = fs.makeQualified(new Path( + "/user/testing/testMapperReducer/input")); + fs.delete(inDir, true); + String DATADIR = "/user/testing/testMapperReducer/data"; + Path dataDir = fs.makeQualified(new Path(DATADIR)); + fs.delete(dataDir, true); + Path outDir = fs.makeQualified(new Path( + "/user/testing/testMapperReducer/output")); + fs.delete(outDir, true); + + assertTrue(fs.mkdirs(inDir)); + Path INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile1); + + JobConf jobConf = getJobConf(); + // enable mapred.job.tracker = local to run in debugger and set breakpoints + // jobConf.set("mapred.job.tracker", "local"); + jobConf.setMaxMapAttempts(1); + jobConf.setMaxReduceAttempts(1); + jobConf.setJar(SEARCH_ARCHIVES_JAR); + jobConf.setBoolean("ignoreTikaException", false); + + MapReduceIndexerTool tool; + int res; + QueryResponse results; + HttpSolrServer server = new HttpSolrServer(cloudJettys.get(0).url); + String[] args = new String[]{}; + + args = new String[] { + "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(), + "--output-dir=" + outDir.toString(), + "--log4j=" + ExternalPaths.SOURCE_HOME + "/core/src/test-files/log4j.properties", + "--mappers=3", + ++numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(), + "--go-live-threads", Integer.toString(random().nextInt(15) + 1), + "--verbose", + "--go-live" + }; + args = prependInitialArgs(args); + List argList = new ArrayList(); + getShardUrlArgs(argList); + args = concat(args, argList.toArray(new String[0])); + + if (true) { + tool = new MapReduceIndexerTool(); + + res = ToolRunner.run(jobConf, tool, args); + + assertEquals(0, res); + assertTrue(tool.job.isComplete()); + assertTrue(tool.job.isSuccessful()); + results = server.query(new SolrQuery("*:*")); + assertEquals(20, results.getResults().getNumFound()); + } + + fs.delete(inDir, true); + fs.delete(outDir, true); + fs.delete(dataDir, true); + assertTrue(fs.mkdirs(inDir)); + INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile2); + + args = new String[] { + "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(), + "--output-dir=" + outDir.toString(), + "--mappers=3", + "--verbose", + "--go-live", + ++numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(), + "--go-live-threads", Integer.toString(random().nextInt(15) + 1) + }; + args = prependInitialArgs(args); + argList = new ArrayList(); + getShardUrlArgs(argList); + args = concat(args, argList.toArray(new String[0])); + + if (true) { + tool = new MapReduceIndexerTool(); + res = ToolRunner.run(jobConf, tool, args); + assertEquals(0, res); + assertTrue(tool.job.isComplete()); + assertTrue(tool.job.isSuccessful()); + results = server.query(new SolrQuery("*:*")); + + assertEquals(22, results.getResults().getNumFound()); + } + + // try using zookeeper + String collection = "collection1"; + if (random().nextBoolean()) { + // sometimes, use an alias + createAlias("updatealias", "collection1"); + collection = "updatealias"; + } + + fs.delete(inDir, true); + fs.delete(outDir, true); + fs.delete(dataDir, true); + INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile3); + + args = new String[] { + "--output-dir=" + outDir.toString(), + "--mappers=3", + "--reducers=6", + "--verbose", + "--go-live", + ++numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(), + "--zk-host", zkServer.getZkAddress(), + "--collection", collection + }; + args = prependInitialArgs(args); + + if (true) { + tool = new MapReduceIndexerTool(); + res = ToolRunner.run(jobConf, tool, args); + assertEquals(0, res); + assertTrue(tool.job.isComplete()); + assertTrue(tool.job.isSuccessful()); + + results = server.query(new SolrQuery("*:*")); + assertEquals(2126, results.getResults().getNumFound()); + } + + server.shutdown(); + + // try using zookeeper with replication + String replicatedCollection = "replicated_collection"; + createCollection(replicatedCollection, 2, 3, 2); + waitForRecoveriesToFinish(false); + cloudClient.setDefaultCollection(replicatedCollection); + fs.delete(inDir, true); + fs.delete(outDir, true); + fs.delete(dataDir, true); + assertTrue(fs.mkdirs(dataDir)); + INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile3); + + args = new String[] { + "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(), + "--output-dir=" + outDir.toString(), + "--mappers=3", + "--reducers=6", + "--verbose", + "--go-live", + "--zk-host", zkServer.getZkAddress(), + "--collection", replicatedCollection, dataDir.toString() + }; + args = prependInitialArgs(args); + + if (true) { + tool = new MapReduceIndexerTool(); + res = ToolRunner.run(jobConf, tool, args); + assertEquals(0, res); + assertTrue(tool.job.isComplete()); + assertTrue(tool.job.isSuccessful()); + + results = cloudClient.query(new SolrQuery("*:*")); + assertEquals(2104, results.getResults().getNumFound()); + + checkConsistency(replicatedCollection); + } + + // try using solr_url with replication + cloudClient.deleteByQuery("*:*"); + cloudClient.commit(); + fs.delete(inDir, true); + fs.delete(dataDir, true); + assertTrue(fs.mkdirs(dataDir)); + INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile3); + + args = new String[] { + "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(), + "--output-dir=" + outDir.toString(), + "--shards", "2", + "--mappers=3", + "--verbose", + "--go-live", + "--go-live-threads", Integer.toString(random().nextInt(15) + 1), dataDir.toString() + }; + args = prependInitialArgs(args); + + argList = new ArrayList(); + getShardUrlArgs(argList, replicatedCollection); + args = concat(args, argList.toArray(new String[0])); + + if (true) { + tool = new MapReduceIndexerTool(); + res = ToolRunner.run(jobConf, tool, args); + assertEquals(0, res); + assertTrue(tool.job.isComplete()); + assertTrue(tool.job.isSuccessful()); + + checkConsistency(replicatedCollection); + + results = cloudClient.query(new SolrQuery("*:*")); + assertEquals(2104, results.getResults().getNumFound()); + } + + } + + private void getShardUrlArgs(List args) { + for (int i = 0; i < shardCount; i++) { + args.add("--shard-url"); + args.add(cloudJettys.get(i).url); + } + } + + private void checkConsistency(String replicatedCollection) + throws SolrServerException { + Collection slices = cloudClient.getZkStateReader().getClusterState() + .getSlices(replicatedCollection); + for (Slice slice : slices) { + Collection replicas = slice.getReplicas(); + long found = -1; + for (Replica replica : replicas) { + HttpSolrServer client = new HttpSolrServer( + new ZkCoreNodeProps(replica).getCoreUrl()); + SolrQuery query = new SolrQuery("*:*"); + query.set("distrib", false); + QueryResponse replicaResults = client.query(query); + long count = replicaResults.getResults().getNumFound(); + if (found != -1) { + assertEquals(slice.getName() + " is inconsistent " + + new ZkCoreNodeProps(replica).getCoreUrl(), found, count); + } + found = count; + } + } + } + + private void getShardUrlArgs(List args, String replicatedCollection) { + Collection slices = cloudClient.getZkStateReader().getClusterState().getSlices(replicatedCollection); + for (Slice slice : slices) { + Collection replicas = slice.getReplicas(); + for (Replica replica : replicas) { + args.add("--shard-url"); + args.add(new ZkCoreNodeProps(replica).getCoreUrl()); + } + } + } + + private Path upAvroFile(FileSystem fs, Path inDir, String DATADIR, + Path dataDir, String localFile) throws IOException, UnsupportedEncodingException { + Path INPATH = new Path(inDir, "input.txt"); + OutputStream os = fs.create(INPATH); + Writer wr = new OutputStreamWriter(os, "UTF-8"); + wr.write(DATADIR + File.separator + localFile); + wr.close(); + + assertTrue(fs.mkdirs(dataDir)); + fs.copyFromLocalFile(new Path(DOCUMENTS_DIR, localFile), dataDir); + return INPATH; + } + + @Override + public JettySolrRunner createJetty(File solrHome, String dataDir, + String shardList, String solrConfigOverride, String schemaOverride) + throws Exception { + + JettySolrRunner jetty = new JettySolrRunner(solrHome.getAbsolutePath(), + context, 0, solrConfigOverride, schemaOverride); + + jetty.setShards(shardList); + + if (System.getProperty("collection") == null) { + System.setProperty("collection", "collection1"); + } + + jetty.start(); + + System.clearProperty("collection"); + + return jetty; + } + + private static void putConfig(SolrZkClient zkClient, File solrhome, String name) throws Exception { + putConfig(zkClient, solrhome, name, name); + } + + private static void putConfig(SolrZkClient zkClient, File solrhome, String srcName, String destName) + throws Exception { + + File file = new File(solrhome, "conf" + File.separator + srcName); + if (!file.exists()) { + // LOG.info("skipping " + file.getAbsolutePath() + + // " because it doesn't exist"); + return; + } + + String destPath = "/configs/conf1/" + destName; + // LOG.info("put " + file.getAbsolutePath() + " to " + destPath); + zkClient.makePath(destPath, file, false, true); + } + + private void uploadConfFiles() throws Exception { + // upload our own config files + SolrZkClient zkClient = new SolrZkClient(zkServer.getZkAddress(), 10000); + putConfig(zkClient, new File(RESOURCES_DIR + "/solr/solrcloud"), + "solrconfig.xml"); + putConfig(zkClient, MINIMR_CONF_DIR, "schema.xml"); + putConfig(zkClient, MINIMR_CONF_DIR, "elevate.xml"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_en.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_ar.txt"); + + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_bg.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_ca.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_cz.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_da.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_el.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_es.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_eu.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_de.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_fa.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_fi.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_fr.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_ga.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_gl.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_hi.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_hu.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_hy.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_id.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_it.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_ja.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_lv.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_nl.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_no.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_pt.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_ro.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_ru.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_sv.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_th.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stopwords_tr.txt"); + + putConfig(zkClient, MINIMR_CONF_DIR, "lang/contractions_ca.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/contractions_fr.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/contractions_ga.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "lang/contractions_it.txt"); + + putConfig(zkClient, MINIMR_CONF_DIR, "lang/stemdict_nl.txt"); + + putConfig(zkClient, MINIMR_CONF_DIR, "lang/hyphenations_ga.txt"); + + putConfig(zkClient, MINIMR_CONF_DIR, "stopwords.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "protwords.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "currency.xml"); + putConfig(zkClient, MINIMR_CONF_DIR, "open-exchange-rates.json"); + putConfig(zkClient, MINIMR_CONF_DIR, "mapping-ISOLatin1Accent.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "old_synonyms.txt"); + putConfig(zkClient, MINIMR_CONF_DIR, "synonyms.txt"); + zkClient.close(); + } + + protected static T[] concat(T[]... arrays) { + if (arrays.length <= 0) { + throw new IllegalArgumentException(); + } + Class clazz = null; + int length = 0; + for (T[] array : arrays) { + clazz = array.getClass(); + length += array.length; + } + T[] result = (T[]) Array.newInstance(clazz.getComponentType(), length); + int pos = 0; + for (T[] array : arrays) { + System.arraycopy(array, 0, result, pos, array.length); + pos += array.length; + } + return result; + } + + private NamedList createAlias(String alias, String collections) throws SolrServerException, IOException { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("collections", collections); + params.set("name", alias); + params.set("action", CollectionAction.CREATEALIAS.toString()); + QueryRequest request = new QueryRequest(params); + request.setPath("/admin/collections"); + return cloudClient.request(request); + } + + +} diff --git a/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/MorphlineMapperTest.java b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/MorphlineMapperTest.java new file mode 100644 index 00000000000..3316caa0824 --- /dev/null +++ b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/MorphlineMapperTest.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mrunit.mapreduce.MapDriver; +import org.apache.hadoop.mrunit.types.Pair; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.hadoop.morphline.MorphlineMapper; +import org.junit.Test; + +public class MorphlineMapperTest extends MRUnitBase { + + @Test + public void testMapper() throws Exception { + MorphlineMapper mapper = new MorphlineMapper(); + MapDriver mapDriver = MapDriver.newMapDriver(mapper);; + + Configuration config = mapDriver.getConfiguration(); + setupHadoopConfig(config); + + mapDriver.withInput(new LongWritable(0L), new Text("hdfs://localhost/" + DOCUMENTS_DIR + "/sample-statuses-20120906-141433.avro")); + + SolrInputDocument sid = new SolrInputDocument(); + sid.addField("id", "uniqueid1"); + sid.addField("user_name", "user1"); + sid.addField("text", "content of record one"); + SolrInputDocumentWritable sidw = new SolrInputDocumentWritable(sid); + + mapDriver + .withCacheArchive(solrHomeZip.getAbsolutePath()) + .withOutput(new Text("0"), sidw); + //mapDriver.runTest(); + List> result = mapDriver.run(); + for (Pair p: result) { + System.out.println(p.getFirst()); + System.out.println(p.getSecond()); + } + } +} diff --git a/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/MorphlineReducerTest.java b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/MorphlineReducerTest.java new file mode 100644 index 00000000000..dee44119243 --- /dev/null +++ b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/MorphlineReducerTest.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import static org.mockito.Mockito.when; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.TaskID; +import org.apache.hadoop.mapreduce.InputFormat; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.mrunit.mapreduce.ReduceDriver; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.solr.cloud.AbstractZkTestCase; +import org.apache.solr.common.SolrInputDocument; +import org.junit.BeforeClass; +import org.junit.Test; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; + +import com.google.common.collect.Lists; + +public class MorphlineReducerTest extends MRUnitBase { + + public static class MySolrReducer extends SolrReducer { + Context context; + + @Override + protected void setup(Context context) throws IOException, InterruptedException { + this.context = context; + + // handle a bug in MRUnit - should be fixed in MRUnit 1.0.0 + when(context.getTaskAttemptID()).thenAnswer(new Answer() { + @Override + public TaskAttemptID answer(final InvocationOnMock invocation) { + // FIXME MRUNIT seems to pass taskid to the reduce task as mapred.TaskID rather than mapreduce.TaskID + return new TaskAttemptID(new TaskID("000000000000", 0, true, 0), 0); + } + }); + + super.setup(context); + } + + } + + public static class NullInputFormat extends InputFormat { + @Override + public List getSplits(JobContext context) throws IOException, + InterruptedException { + return Lists.newArrayList(); + } + + @Override + public RecordReader createRecordReader(InputSplit split, + TaskAttemptContext context) throws IOException, InterruptedException { + return null; + } + + } + + @Test + public void testReducer() throws Exception { + MySolrReducer myReducer = new MySolrReducer(); + ReduceDriver reduceDriver = ReduceDriver.newReduceDriver(myReducer); + + Configuration config = reduceDriver.getConfiguration(); + setupHadoopConfig(config); + + List values = new ArrayList(); + SolrInputDocument sid = new SolrInputDocument(); + String id = "myid1"; + sid.addField("id", id); + sid.addField("text", "some unique text"); + SolrInputDocumentWritable sidw = new SolrInputDocumentWritable(sid); + values.add(sidw); + reduceDriver.withInput(new Text(id), values); + + reduceDriver.withCacheArchive(solrHomeZip.getAbsolutePath()); + + reduceDriver.withOutputFormat(SolrOutputFormat.class, NullInputFormat.class); + + reduceDriver.run(); + + assertEquals("Expected 1 counter increment", 1, reduceDriver.getCounters() + .findCounter(SolrCounters.class.getName(), SolrCounters.DOCUMENTS_WRITTEN.toString()).getValue()); + } + +} diff --git a/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/PathValidation.java b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/PathValidation.java new file mode 100644 index 00000000000..c76649edb71 --- /dev/null +++ b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/PathValidation.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import java.util.regex.Pattern; + +import org.apache.hadoop.fs.Path; +import org.junit.Test; + +public class PathValidation extends MRUnitBase { + + @Test + public void testPath() { + Path path = new Path("hdfs://c2202.mycompany.com:8020/user/foo/bar.txt"); + assertEquals("/user/foo/bar.txt", path.toUri().getPath()); + assertEquals("bar.txt", path.getName()); + assertEquals("hdfs", path.toUri().getScheme()); + assertEquals("c2202.mycompany.com:8020", path.toUri().getAuthority()); + + path = new Path("/user/foo/bar.txt"); + assertEquals("/user/foo/bar.txt", path.toUri().getPath()); + assertEquals("bar.txt", path.getName()); + assertEquals(null, path.toUri().getScheme()); + assertEquals(null, path.toUri().getAuthority()); + + assertEquals("-", new Path("-").toString()); + } + + @Test + public void testRegex() { + Pattern regex = Pattern.compile("text/plain|text/html"); + assertTrue(regex.matcher("text/plain").matches()); + assertTrue(regex.matcher("text/html").matches()); + assertFalse(regex.matcher("xxtext/html").matches()); + } + +} diff --git a/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/UtilsForTests.java b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/UtilsForTests.java new file mode 100644 index 00000000000..b21d8d204e7 --- /dev/null +++ b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/UtilsForTests.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop; + +import static org.junit.Assert.assertEquals; + +import java.io.File; +import java.io.IOException; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.util.ExternalPaths; + + +public class UtilsForTests { + protected static final String RESOURCES_DIR = ExternalPaths.SOURCE_HOME + "/contrib/solr-mr/src/test-files"; + + public static void validateSolrServerDocumentCount(File solrHomeDir, FileSystem fs, Path outDir, int expectedDocs, int expectedShards) + throws IOException, SolrServerException { + + long actualDocs = 0; + int actualShards = 0; + for (FileStatus dir : fs.listStatus(outDir)) { // for each shard + if (dir.getPath().getName().startsWith("part") && dir.isDirectory()) { + actualShards++; + EmbeddedSolrServer solr = SolrRecordWriter.createEmbeddedSolrServer( + new Path(solrHomeDir.getAbsolutePath()), fs, dir.getPath()); + + try { + SolrQuery query = new SolrQuery(); + query.setQuery("*:*"); + QueryResponse resp = solr.query(query); + long numDocs = resp.getResults().getNumFound(); + actualDocs += numDocs; + } finally { + solr.shutdown(); + } + } + } + assertEquals(expectedShards, actualShards); + assertEquals(expectedDocs, actualDocs); + } + +} diff --git a/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/hack/MiniMRClientCluster.java b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/hack/MiniMRClientCluster.java new file mode 100644 index 00000000000..be5ea01cd29 --- /dev/null +++ b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/hack/MiniMRClientCluster.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.hack; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; + +/* + * A simple interface for a client MR cluster used for testing. This interface + * provides basic methods which are independent of the underlying Mini Cluster ( + * either through MR1 or MR2). + */ +public interface MiniMRClientCluster { + + public void start() throws IOException; + + /** + * Stop and start back the cluster using the same configuration. + */ + public void restart() throws IOException; + + public void stop() throws IOException; + + public Configuration getConfig() throws IOException; + +} diff --git a/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/hack/MiniMRClientClusterFactory.java b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/hack/MiniMRClientClusterFactory.java new file mode 100644 index 00000000000..2bf721b7a6c --- /dev/null +++ b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/hack/MiniMRClientClusterFactory.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.hack; + +import java.io.File; +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.util.JarFinder; + +/** + * A MiniMRCluster factory. In MR2, it provides a wrapper MiniMRClientCluster + * interface around the MiniMRYarnCluster. While in MR1, it provides such + * wrapper around MiniMRCluster. This factory should be used in tests to provide + * an easy migration of tests across MR1 and MR2. + */ +public class MiniMRClientClusterFactory { + + public static MiniMRClientCluster create(Class caller, int noOfNMs, + Configuration conf, File testWorkDir) throws IOException { + return create(caller, caller.getSimpleName(), noOfNMs, conf, testWorkDir); + } + + public static MiniMRClientCluster create(Class caller, String identifier, + int noOfNMs, Configuration conf, File testWorkDir) throws IOException { + + if (conf == null) { + conf = new Configuration(); + } + + FileSystem fs = FileSystem.get(conf); + + Path testRootDir = new Path(testWorkDir.getPath(), identifier + "-tmpDir") + .makeQualified(fs); + Path appJar = new Path(testRootDir, "MRAppJar.jar"); + + // Copy MRAppJar and make it private. + Path appMasterJar = new Path(MiniMRYarnCluster.APPJAR); + + fs.copyFromLocalFile(appMasterJar, appJar); + fs.setPermission(appJar, new FsPermission("744")); + + Job job = Job.getInstance(conf); + + job.addFileToClassPath(appJar); + + Path callerJar = new Path(JarFinder.getJar(caller)); + Path remoteCallerJar = new Path(testRootDir, callerJar.getName()); + fs.copyFromLocalFile(callerJar, remoteCallerJar); + fs.setPermission(remoteCallerJar, new FsPermission("744")); + job.addFileToClassPath(remoteCallerJar); + + MiniMRYarnCluster miniMRYarnCluster; + try { + miniMRYarnCluster = new MiniMRYarnCluster(identifier, + noOfNMs, testWorkDir); + } catch (Exception e) { + throw new RuntimeException(e); + } + job.getConfiguration().set("minimrclientcluster.caller.name", + identifier); + job.getConfiguration().setInt("minimrclientcluster.nodemanagers.number", + noOfNMs); + miniMRYarnCluster.init(job.getConfiguration()); + miniMRYarnCluster.start(); + + return new MiniMRYarnClusterAdapter(miniMRYarnCluster, testWorkDir); + } + +} diff --git a/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/hack/MiniMRCluster.java b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/hack/MiniMRCluster.java new file mode 100644 index 00000000000..b399b7a9552 --- /dev/null +++ b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/hack/MiniMRCluster.java @@ -0,0 +1,283 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.hack; + +import java.io.File; +import java.io.IOException; +import java.util.Random; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.JobID; +import org.apache.hadoop.mapred.JobPriority; +import org.apache.hadoop.mapred.MapTaskCompletionEventsUpdate; +import org.apache.hadoop.mapred.TaskCompletionEvent; +import org.apache.hadoop.security.AccessControlException; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.lucene.util.LuceneTestCase; + + +/** + * This class is an MR2 replacement for older MR1 MiniMRCluster, that was used + * by tests prior to MR2. This replacement class uses the new MiniMRYarnCluster + * in MR2 but provides the same old MR1 interface, so tests can be migrated from + * MR1 to MR2 with minimal changes. + * + * Due to major differences between MR1 and MR2, a number of methods are either + * unimplemented/unsupported or were re-implemented to provide wrappers around + * MR2 functionality. + * + * @deprecated Use {@link org.apache.hadoop.mapred.MiniMRClientClusterFactory} + * instead + */ +@Deprecated +public class MiniMRCluster { + private static final Log LOG = LogFactory.getLog(MiniMRCluster.class); + + private MiniMRClientCluster mrClientCluster; + + public String getTaskTrackerLocalDir(int taskTracker) { + throw new UnsupportedOperationException(); + } + + public String[] getTaskTrackerLocalDirs(int taskTracker) { + throw new UnsupportedOperationException(); + } + + class JobTrackerRunner { + // Mock class + } + + class TaskTrackerRunner { + // Mock class + } + + public JobTrackerRunner getJobTrackerRunner() { + throw new UnsupportedOperationException(); + } + + TaskTrackerRunner getTaskTrackerRunner(int id) { + throw new UnsupportedOperationException(); + } + + public int getNumTaskTrackers() { + throw new UnsupportedOperationException(); + } + + public void setInlineCleanupThreads() { + throw new UnsupportedOperationException(); + } + + public void waitUntilIdle() { + throw new UnsupportedOperationException(); + } + + private void waitTaskTrackers() { + throw new UnsupportedOperationException(); + } + + public int getJobTrackerPort() { + throw new UnsupportedOperationException(); + } + + public JobConf createJobConf() { + JobConf jobConf = null; + try { + jobConf = new JobConf(mrClientCluster.getConfig()); + } catch (IOException e) { + LOG.error(e); + } + return jobConf; + } + + public JobConf createJobConf(JobConf conf) { + JobConf jobConf = null; + try { + jobConf = new JobConf(mrClientCluster.getConfig()); + } catch (IOException e) { + LOG.error(e); + } + return jobConf; + } + + static JobConf configureJobConf(JobConf conf, String namenode, + int jobTrackerPort, int jobTrackerInfoPort, UserGroupInformation ugi) { + throw new UnsupportedOperationException(); + } + + public MiniMRCluster(int numTaskTrackers, String namenode, int numDir, + String[] racks, String[] hosts) throws Exception { + this(0, 0, numTaskTrackers, namenode, numDir, racks, hosts); + } + + public MiniMRCluster(int numTaskTrackers, String namenode, int numDir, + String[] racks, String[] hosts, JobConf conf) throws Exception { + this(0, 0, numTaskTrackers, namenode, numDir, racks, hosts, null, conf); + } + + public MiniMRCluster(int numTaskTrackers, String namenode, int numDir) + throws Exception { + this(0, 0, numTaskTrackers, namenode, numDir); + } + + public MiniMRCluster(int jobTrackerPort, int taskTrackerPort, + int numTaskTrackers, String namenode, int numDir) throws Exception { + this(jobTrackerPort, taskTrackerPort, numTaskTrackers, namenode, numDir, + null); + } + + public MiniMRCluster(int jobTrackerPort, int taskTrackerPort, + int numTaskTrackers, String namenode, int numDir, String[] racks) + throws Exception { + this(jobTrackerPort, taskTrackerPort, numTaskTrackers, namenode, numDir, + racks, null); + } + + public MiniMRCluster(int jobTrackerPort, int taskTrackerPort, + int numTaskTrackers, String namenode, int numDir, String[] racks, + String[] hosts) throws Exception { + this(jobTrackerPort, taskTrackerPort, numTaskTrackers, namenode, numDir, + racks, hosts, null); + } + + public MiniMRCluster(int jobTrackerPort, int taskTrackerPort, + int numTaskTrackers, String namenode, int numDir, String[] racks, + String[] hosts, UserGroupInformation ugi) throws Exception { + this(jobTrackerPort, taskTrackerPort, numTaskTrackers, namenode, numDir, + racks, hosts, ugi, null); + } + + public MiniMRCluster(int jobTrackerPort, int taskTrackerPort, + int numTaskTrackers, String namenode, int numDir, String[] racks, + String[] hosts, UserGroupInformation ugi, JobConf conf) + throws Exception { + this(jobTrackerPort, taskTrackerPort, numTaskTrackers, namenode, numDir, + racks, hosts, ugi, conf, 0); + } + + public MiniMRCluster(int jobTrackerPort, int taskTrackerPort, + int numTaskTrackers, String namenode, int numDir, String[] racks, + String[] hosts, UserGroupInformation ugi, JobConf conf, + int numTrackerToExclude) throws Exception { + this(jobTrackerPort, taskTrackerPort, numTaskTrackers, namenode, numDir, + racks, hosts, ugi, conf, numTrackerToExclude, new Clock()); + } + + public MiniMRCluster(int jobTrackerPort, int taskTrackerPort, + int numTaskTrackers, String namenode, int numDir, String[] racks, + String[] hosts, UserGroupInformation ugi, JobConf conf, + int numTrackerToExclude, Clock clock) throws Exception { + if (conf == null) conf = new JobConf(); + FileSystem.setDefaultUri(conf, namenode); + String identifier = this.getClass().getSimpleName() + "_" + + Integer.toString(LuceneTestCase.random().nextInt(Integer.MAX_VALUE)); + mrClientCluster = MiniMRClientClusterFactory.create(this.getClass(), + identifier, numTaskTrackers, conf, new File(conf.get("testWorkDir"))); + } + + public UserGroupInformation getUgi() { + throw new UnsupportedOperationException(); + } + + public TaskCompletionEvent[] getTaskCompletionEvents(JobID id, int from, + int max) throws IOException { + throw new UnsupportedOperationException(); + } + + public void setJobPriority(JobID jobId, JobPriority priority) + throws AccessControlException, IOException { + throw new UnsupportedOperationException(); + } + + public JobPriority getJobPriority(JobID jobId) { + throw new UnsupportedOperationException(); + } + + public long getJobFinishTime(JobID jobId) { + throw new UnsupportedOperationException(); + } + + public void initializeJob(JobID jobId) throws IOException { + throw new UnsupportedOperationException(); + } + + public MapTaskCompletionEventsUpdate getMapTaskCompletionEventsUpdates( + int index, JobID jobId, int max) throws IOException { + throw new UnsupportedOperationException(); + } + + public JobConf getJobTrackerConf() { + JobConf jobConf = null; + try { + jobConf = new JobConf(mrClientCluster.getConfig()); + } catch (IOException e) { + LOG.error(e); + } + return jobConf; + } + + public int getFaultCount(String hostName) { + throw new UnsupportedOperationException(); + } + + public void startJobTracker() { + // Do nothing + } + + public void startJobTracker(boolean wait) { + // Do nothing + } + + public void stopJobTracker() { + // Do nothing + } + + public void stopTaskTracker(int id) { + // Do nothing + } + + public void startTaskTracker(String host, String rack, int idx, int numDir) + throws IOException { + // Do nothing + } + + void addTaskTracker(TaskTrackerRunner taskTracker) { + throw new UnsupportedOperationException(); + } + + int getTaskTrackerID(String trackerName) { + throw new UnsupportedOperationException(); + } + + public void shutdown() { + try { + mrClientCluster.stop(); + } catch (IOException e) { + LOG.error(e); + } + } + + static class Clock { + long getTime() { + return System.currentTimeMillis(); + } + } + +} diff --git a/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/hack/MiniMRYarnCluster.java b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/hack/MiniMRYarnCluster.java new file mode 100644 index 00000000000..8fa1b3132bc --- /dev/null +++ b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/hack/MiniMRYarnCluster.java @@ -0,0 +1,205 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.hack; + +import java.io.File; +import java.io.IOException; +import java.util.Locale; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapred.LocalContainerLauncher; +import org.apache.hadoop.mapred.ShuffleHandler; +import org.apache.hadoop.mapreduce.MRConfig; +import org.apache.hadoop.mapreduce.MRJobConfig; +import org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer; +import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig; +import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils; +import org.apache.hadoop.service.AbstractService; +import org.apache.hadoop.service.Service; +import org.apache.hadoop.util.JarFinder; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; +import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; +import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor; + +/** + * Configures and starts the MR-specific components in the YARN cluster. + * + */ +public class MiniMRYarnCluster extends MiniYARNCluster { + + public static final String APPJAR = JarFinder.getJar(LocalContainerLauncher.class); + + private static final Log LOG = LogFactory.getLog(MiniMRYarnCluster.class); + private JobHistoryServer historyServer; + private JobHistoryServerWrapper historyServerWrapper; + + public MiniMRYarnCluster(String testName, File testWorkDir) { + this(testName, 1, testWorkDir); + } + + public MiniMRYarnCluster(String testName, int noOfNMs, File testWorkDir) { + super(testName, noOfNMs, 4, 4, testWorkDir); + //TODO: add the history server + historyServerWrapper = new JobHistoryServerWrapper(); + addService(historyServerWrapper); + } + + @Override + public void serviceInit(Configuration conf) throws Exception { + conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME); + if (conf.get(MRJobConfig.MR_AM_STAGING_DIR) == null) { + conf.set(MRJobConfig.MR_AM_STAGING_DIR, new File(getTestWorkDir(), + "apps_staging_dir/").getAbsolutePath()); + } + + // By default, VMEM monitoring disabled, PMEM monitoring enabled. + if (!conf.getBoolean( + MRConfig.MAPREDUCE_MINICLUSTER_CONTROL_RESOURCE_MONITORING, + MRConfig.DEFAULT_MAPREDUCE_MINICLUSTER_CONTROL_RESOURCE_MONITORING)) { + conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false); + conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false); + } + + conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "000"); + + try { + Path stagingPath = FileContext.getFileContext(conf).makeQualified( + new Path(conf.get(MRJobConfig.MR_AM_STAGING_DIR))); + /* + * Re-configure the staging path on Windows if the file system is localFs. + * We need to use a absolute path that contains the drive letter. The unit + * test could run on a different drive than the AM. We can run into the + * issue that job files are localized to the drive where the test runs on, + * while the AM starts on a different drive and fails to find the job + * metafiles. Using absolute path can avoid this ambiguity. + */ + if (Path.WINDOWS) { + if (LocalFileSystem.class.isInstance(stagingPath.getFileSystem(conf))) { + conf.set(MRJobConfig.MR_AM_STAGING_DIR, + new File(conf.get(MRJobConfig.MR_AM_STAGING_DIR)) + .getAbsolutePath()); + } + } + FileContext fc=FileContext.getFileContext(stagingPath.toUri(), conf); + if (fc.util().exists(stagingPath)) { + LOG.info(stagingPath + " exists! deleting..."); + fc.delete(stagingPath, true); + } + LOG.info("mkdir: " + stagingPath); + //mkdir the staging directory so that right permissions are set while running as proxy user + fc.mkdir(stagingPath, null, true); + //mkdir done directory as well + String doneDir = JobHistoryUtils.getConfiguredHistoryServerDoneDirPrefix(conf); + Path doneDirPath = fc.makeQualified(new Path(doneDir)); + fc.mkdir(doneDirPath, null, true); + } catch (IOException e) { + throw new YarnRuntimeException("Could not create staging directory. ", e); + } + conf.set(MRConfig.MASTER_ADDRESS, "test"); // The default is local because of + // which shuffle doesn't happen + //configure the shuffle service in NM + conf.setStrings(YarnConfiguration.NM_AUX_SERVICES, + new String[] { ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID }); + conf.setClass(String.format(Locale.ENGLISH, YarnConfiguration.NM_AUX_SERVICE_FMT, + ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID), ShuffleHandler.class, + Service.class); + + // Non-standard shuffle port + conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0); + + conf.setClass(YarnConfiguration.NM_CONTAINER_EXECUTOR, + DefaultContainerExecutor.class, ContainerExecutor.class); + + // TestMRJobs is for testing non-uberized operation only; see TestUberAM + // for corresponding uberized tests. + conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false); + + super.serviceInit(conf); + } + + private class JobHistoryServerWrapper extends AbstractService { + public JobHistoryServerWrapper() { + super(JobHistoryServerWrapper.class.getName()); + } + + @Override + public synchronized void serviceStart() throws Exception { + try { + if (!getConfig().getBoolean( + JHAdminConfig.MR_HISTORY_MINICLUSTER_FIXED_PORTS, + JHAdminConfig.DEFAULT_MR_HISTORY_MINICLUSTER_FIXED_PORTS)) { + // pick free random ports. + getConfig().set(JHAdminConfig.MR_HISTORY_ADDRESS, + MiniYARNCluster.getHostname() + ":0"); + getConfig().set(JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS, + MiniYARNCluster.getHostname() + ":0"); + } + historyServer = new JobHistoryServer(); + historyServer.init(getConfig()); + new Thread() { + public void run() { + historyServer.start(); + }; + }.start(); + while (historyServer.getServiceState() == STATE.INITED) { + LOG.info("Waiting for HistoryServer to start..."); + Thread.sleep(1500); + } + //TODO Add a timeout. State.STOPPED check ? + if (historyServer.getServiceState() != STATE.STARTED) { + throw new IOException("HistoryServer failed to start"); + } + super.serviceStart(); + } catch (Throwable t) { + throw new YarnRuntimeException(t); + } + //need to do this because historyServer.init creates a new Configuration + getConfig().set(JHAdminConfig.MR_HISTORY_ADDRESS, + historyServer.getConfig().get(JHAdminConfig.MR_HISTORY_ADDRESS)); + getConfig().set(JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS, + historyServer.getConfig().get(JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS)); + + LOG.info("MiniMRYARN ResourceManager address: " + + getConfig().get(YarnConfiguration.RM_ADDRESS)); + LOG.info("MiniMRYARN ResourceManager web address: " + + getConfig().get(YarnConfiguration.RM_WEBAPP_ADDRESS)); + LOG.info("MiniMRYARN HistoryServer address: " + + getConfig().get(JHAdminConfig.MR_HISTORY_ADDRESS)); + LOG.info("MiniMRYARN HistoryServer web address: " + + getConfig().get(JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS)); + } + + @Override + public synchronized void serviceStop() throws Exception { + if (historyServer != null) { + historyServer.stop(); + } + super.serviceStop(); + } + } + + public JobHistoryServer getHistoryServer() { + return this.historyServer; + } +} diff --git a/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/hack/MiniMRYarnClusterAdapter.java b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/hack/MiniMRYarnClusterAdapter.java new file mode 100644 index 00000000000..08ab881005b --- /dev/null +++ b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/hack/MiniMRYarnClusterAdapter.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.hack; + +import java.io.File; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig; +import org.apache.hadoop.service.Service.STATE; +import org.apache.hadoop.yarn.conf.YarnConfiguration; + +/** + * An adapter for MiniMRYarnCluster providing a MiniMRClientCluster interface. + * This interface could be used by tests across both MR1 and MR2. + */ +public class MiniMRYarnClusterAdapter implements MiniMRClientCluster { + + private MiniMRYarnCluster miniMRYarnCluster; + + private File testWorkDir; + + private static final Log LOG = LogFactory.getLog(MiniMRYarnClusterAdapter.class); + + public MiniMRYarnClusterAdapter(MiniMRYarnCluster miniMRYarnCluster, File testWorkDir) { + this.miniMRYarnCluster = miniMRYarnCluster; + this.testWorkDir = testWorkDir; + } + + @Override + public Configuration getConfig() { + return miniMRYarnCluster.getConfig(); + } + + @Override + public void start() { + miniMRYarnCluster.start(); + } + + @Override + public void stop() { + miniMRYarnCluster.stop(); + } + + @Override + public void restart() { + if (!miniMRYarnCluster.getServiceState().equals(STATE.STARTED)){ + LOG.warn("Cannot restart the mini cluster, start it first"); + return; + } + Configuration oldConf = new Configuration(getConfig()); + String callerName = oldConf.get("minimrclientcluster.caller.name", + this.getClass().getName()); + int noOfNMs = oldConf.getInt("minimrclientcluster.nodemanagers.number", 1); + oldConf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS, true); + oldConf.setBoolean(JHAdminConfig.MR_HISTORY_MINICLUSTER_FIXED_PORTS, true); + stop(); + miniMRYarnCluster = new MiniMRYarnCluster(callerName, noOfNMs, testWorkDir); + miniMRYarnCluster.init(oldConf); + miniMRYarnCluster.start(); + } + +} diff --git a/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/hack/MiniYARNCluster.java b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/hack/MiniYARNCluster.java new file mode 100644 index 00000000000..d02726657e4 --- /dev/null +++ b/solr/contrib/solr-mr/src/test/org/apache/solr/hadoop/hack/MiniYARNCluster.java @@ -0,0 +1,410 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.hadoop.hack; + +import java.io.File; +import java.io.IOException; +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.Locale; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.service.AbstractService; +import org.apache.hadoop.service.CompositeService; +import org.apache.hadoop.util.Shell; +import org.apache.hadoop.util.Shell.ShellCommandExecutor; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.event.Dispatcher; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; +import org.apache.hadoop.yarn.factories.RecordFactory; +import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import org.apache.hadoop.yarn.ipc.RPCUtil; +import org.apache.hadoop.yarn.server.api.ResourceTracker; +import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; +import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResponse; +import org.apache.hadoop.yarn.server.nodemanager.Context; +import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.NodeManager; +import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater; +import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl; +import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; +import org.apache.hadoop.yarn.server.resourcemanager.ResourceTrackerService; + +public class MiniYARNCluster extends CompositeService { + + private static final Log LOG = LogFactory.getLog(MiniYARNCluster.class); + + // temp fix until metrics system can auto-detect itself running in unit test: + static { + DefaultMetricsSystem.setMiniClusterMode(true); + } + + private NodeManager[] nodeManagers; + private ResourceManager resourceManager; + + private ResourceManagerWrapper resourceManagerWrapper; + + private File testWorkDir; + + // Number of nm-local-dirs per nodemanager + private int numLocalDirs; + // Number of nm-log-dirs per nodemanager + private int numLogDirs; + + /** + * @param testName name of the test + * @param noOfNodeManagers the number of node managers in the cluster + * @param numLocalDirs the number of nm-local-dirs per nodemanager + * @param numLogDirs the number of nm-log-dirs per nodemanager + */ + public MiniYARNCluster(String testName, int noOfNodeManagers, + int numLocalDirs, int numLogDirs, File testWorkDir) { + super(testName.replace("$", "")); + this.numLocalDirs = numLocalDirs; + this.numLogDirs = numLogDirs; + String testSubDir = testName.replace("$", ""); + File targetWorkDir = new File(testWorkDir, testSubDir); + try { + FileContext.getLocalFSFileContext().delete( + new Path(targetWorkDir.getAbsolutePath()), true); + } catch (Exception e) { + LOG.warn("COULD NOT CLEANUP", e); + throw new YarnRuntimeException("could not cleanup test dir: "+ e, e); + } + + if (Shell.WINDOWS) { + // The test working directory can exceed the maximum path length supported + // by some Windows APIs and cmd.exe (260 characters). To work around this, + // create a symlink in temporary storage with a much shorter path, + // targeting the full path to the test working directory. Then, use the + // symlink as the test working directory. + String targetPath = targetWorkDir.getAbsolutePath(); + File link = new File(System.getProperty("java.io.tmpdir"), + String.valueOf(System.currentTimeMillis())); + String linkPath = link.getAbsolutePath(); + + try { + FileContext.getLocalFSFileContext().delete(new Path(linkPath), true); + } catch (IOException e) { + throw new YarnRuntimeException("could not cleanup symlink: " + linkPath, e); + } + + // Guarantee target exists before creating symlink. + targetWorkDir.mkdirs(); + + ShellCommandExecutor shexec = new ShellCommandExecutor( + Shell.getSymlinkCommand(targetPath, linkPath)); + try { + shexec.execute(); + } catch (IOException e) { + throw new YarnRuntimeException(String.format(Locale.ENGLISH, + "failed to create symlink from %s to %s, shell output: %s", linkPath, + targetPath, shexec.getOutput()), e); + } + + this.testWorkDir = link; + } else { + this.testWorkDir = targetWorkDir; + } + + resourceManagerWrapper = new ResourceManagerWrapper(); + addService(resourceManagerWrapper); + nodeManagers = new CustomNodeManager[noOfNodeManagers]; + for(int index = 0; index < noOfNodeManagers; index++) { + addService(new NodeManagerWrapper(index)); + nodeManagers[index] = new CustomNodeManager(); + } + } + + @Override + public void serviceInit(Configuration conf) throws Exception { + super.serviceInit(conf instanceof YarnConfiguration ? conf + : new YarnConfiguration( + conf)); + } + + public File getTestWorkDir() { + return testWorkDir; + } + + public ResourceManager getResourceManager() { + return this.resourceManager; + } + + public NodeManager getNodeManager(int i) { + return this.nodeManagers[i]; + } + + public static String getHostname() { + try { + return InetAddress.getLocalHost().getHostName(); + } + catch (UnknownHostException ex) { + throw new RuntimeException(ex); + } + } + + private class ResourceManagerWrapper extends AbstractService { + public ResourceManagerWrapper() { + super(ResourceManagerWrapper.class.getName()); + } + + @Override + public synchronized void serviceStart() throws Exception { + try { + getConfig().setBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, true); + if (!getConfig().getBoolean( + YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS, + YarnConfiguration.DEFAULT_YARN_MINICLUSTER_FIXED_PORTS)) { + // pick free random ports. + String hostname = MiniYARNCluster.getHostname(); + getConfig().set(YarnConfiguration.RM_ADDRESS, + hostname + ":0"); + getConfig().set(YarnConfiguration.RM_ADMIN_ADDRESS, + hostname + ":0"); + getConfig().set(YarnConfiguration.RM_SCHEDULER_ADDRESS, + hostname + ":0"); + getConfig().set(YarnConfiguration.RM_RESOURCE_TRACKER_ADDRESS, + hostname + ":0"); + getConfig().set(YarnConfiguration.RM_WEBAPP_ADDRESS, + hostname + ":0"); + } + resourceManager = new ResourceManager() { + @Override + protected void doSecureLogin() throws IOException { + // Don't try to login using keytab in the testcase. + }; + }; + resourceManager.init(getConfig()); + new Thread() { + public void run() { + resourceManager.start(); + }; + }.start(); + int waitCount = 0; + while (resourceManager.getServiceState() == STATE.INITED + && waitCount++ < 60) { + LOG.info("Waiting for RM to start..."); + Thread.sleep(1500); + } + if (resourceManager.getServiceState() != STATE.STARTED) { + // RM could have failed. + throw new IOException( + "ResourceManager failed to start. Final state is " + + resourceManager.getServiceState()); + } + super.serviceStart(); + } catch (Throwable t) { + throw new YarnRuntimeException(t); + } + LOG.info("MiniYARN ResourceManager address: " + + getConfig().get(YarnConfiguration.RM_ADDRESS)); + LOG.info("MiniYARN ResourceManager web address: " + + getConfig().get(YarnConfiguration.RM_WEBAPP_ADDRESS)); + } + + @Override + public synchronized void serviceStop() throws Exception { + if (resourceManager != null) { + resourceManager.stop(); + } + super.serviceStop(); + + if (Shell.WINDOWS) { + // On Windows, clean up the short temporary symlink that was created to + // work around path length limitation. + String testWorkDirPath = testWorkDir.getAbsolutePath(); + try { + FileContext.getLocalFSFileContext().delete(new Path(testWorkDirPath), + true); + } catch (IOException e) { + LOG.warn("could not cleanup symlink: " + + testWorkDir.getAbsolutePath()); + } + } + } + } + + private class NodeManagerWrapper extends AbstractService { + int index = 0; + + public NodeManagerWrapper(int i) { + super(NodeManagerWrapper.class.getName() + "_" + i); + index = i; + } + + public synchronized void serviceInit(Configuration conf) throws Exception { + Configuration config = new YarnConfiguration(conf); + super.serviceInit(config); + } + + /** + * Create local/log directories + * @param dirType type of directories i.e. local dirs or log dirs + * @param numDirs number of directories + * @return the created directories as a comma delimited String + */ + private String prepareDirs(String dirType, int numDirs) { + File []dirs = new File[numDirs]; + String dirsString = ""; + for (int i = 0; i < numDirs; i++) { + dirs[i]= new File(testWorkDir, MiniYARNCluster.this.getName() + + "-" + dirType + "Dir-nm-" + index + "_" + i); + dirs[i].mkdirs(); + LOG.info("Created " + dirType + "Dir in " + dirs[i].getAbsolutePath()); + String delimiter = (i > 0) ? "," : ""; + dirsString = dirsString.concat(delimiter + dirs[i].getAbsolutePath()); + } + return dirsString; + } + + public synchronized void serviceStart() throws Exception { + try { + // create nm-local-dirs and configure them for the nodemanager + String localDirsString = prepareDirs("local", numLocalDirs); + getConfig().set(YarnConfiguration.NM_LOCAL_DIRS, localDirsString); + // create nm-log-dirs and configure them for the nodemanager + String logDirsString = prepareDirs("log", numLogDirs); + getConfig().set(YarnConfiguration.NM_LOG_DIRS, logDirsString); + + File remoteLogDir = + new File(testWorkDir, MiniYARNCluster.this.getName() + + "-remoteLogDir-nm-" + index); + remoteLogDir.mkdir(); + getConfig().set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, + remoteLogDir.getAbsolutePath()); + // By default AM + 2 containers + getConfig().setInt(YarnConfiguration.NM_PMEM_MB, 4*1024); + getConfig().set(YarnConfiguration.NM_ADDRESS, + MiniYARNCluster.getHostname() + ":0"); + getConfig().set(YarnConfiguration.NM_LOCALIZER_ADDRESS, + MiniYARNCluster.getHostname() + ":0"); + getConfig().set(YarnConfiguration.NM_WEBAPP_ADDRESS, + MiniYARNCluster.getHostname() + ":0"); + + // Disable resource checks by default + if (!getConfig().getBoolean( + YarnConfiguration.YARN_MINICLUSTER_CONTROL_RESOURCE_MONITORING, + YarnConfiguration. + DEFAULT_YARN_MINICLUSTER_CONTROL_RESOURCE_MONITORING)) { + getConfig().setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false); + getConfig().setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false); + } + + LOG.info("Starting NM: " + index); + nodeManagers[index].init(getConfig()); + new Thread() { + public void run() { + nodeManagers[index].start(); + }; + }.start(); + int waitCount = 0; + while (nodeManagers[index].getServiceState() == STATE.INITED + && waitCount++ < 60) { + LOG.info("Waiting for NM " + index + " to start..."); + Thread.sleep(1000); + } + if (nodeManagers[index].getServiceState() != STATE.STARTED) { + // RM could have failed. + throw new IOException("NodeManager " + index + " failed to start"); + } + super.serviceStart(); + } catch (Throwable t) { + throw new YarnRuntimeException(t); + } + } + + @Override + public synchronized void serviceStop() throws Exception { + if (nodeManagers[index] != null) { + nodeManagers[index].stop(); + } + super.serviceStop(); + } + } + + private class CustomNodeManager extends NodeManager { + @Override + protected void doSecureLogin() throws IOException { + // Don't try to login using keytab in the testcase. + }; + + @Override + protected NodeStatusUpdater createNodeStatusUpdater(Context context, + Dispatcher dispatcher, NodeHealthCheckerService healthChecker) { + return new NodeStatusUpdaterImpl(context, dispatcher, + healthChecker, metrics) { + @Override + protected ResourceTracker getRMClient() { + final ResourceTrackerService rt = resourceManager + .getResourceTrackerService(); + final RecordFactory recordFactory = + RecordFactoryProvider.getRecordFactory(null); + + // For in-process communication without RPC + return new ResourceTracker() { + + @Override + public NodeHeartbeatResponse nodeHeartbeat( + NodeHeartbeatRequest request) throws YarnException, + IOException { + NodeHeartbeatResponse response = recordFactory.newRecordInstance( + NodeHeartbeatResponse.class); + try { + response = rt.nodeHeartbeat(request); + } catch (YarnException e) { + LOG.info("Exception in heartbeat from node " + + request.getNodeStatus().getNodeId(), e); + throw e; + } + return response; + } + + @Override + public RegisterNodeManagerResponse registerNodeManager( + RegisterNodeManagerRequest request) + throws YarnException, IOException { + RegisterNodeManagerResponse response = recordFactory. + newRecordInstance(RegisterNodeManagerResponse.class); + try { + response = rt.registerNodeManager(request); + } catch (YarnException e) { + LOG.info("Exception in node registration from " + + request.getNodeId().toString(), e); + throw e; + } + return response; + } + }; + }; + + @Override + protected void stopRMProxy() { + return; + } + }; + }; + } +} diff --git a/solr/core/src/java/org/apache/solr/store/hdfs/HdfsDirectory.java b/solr/core/src/java/org/apache/solr/store/hdfs/HdfsDirectory.java index 8e9fcb0f2e9..af8c0973a4b 100644 --- a/solr/core/src/java/org/apache/solr/store/hdfs/HdfsDirectory.java +++ b/solr/core/src/java/org/apache/solr/store/hdfs/HdfsDirectory.java @@ -54,7 +54,6 @@ public class HdfsDirectory extends BaseDirectory { public HdfsDirectory(Path hdfsDirPath, Configuration configuration) throws IOException { - assert hdfsDirPath.toString().startsWith("hdfs:/") : hdfsDirPath.toString(); setLockFactory(NoLockFactory.getNoLockFactory()); this.hdfsDirPath = hdfsDirPath; this.configuration = configuration; diff --git a/solr/core/src/test-files/log4j.properties b/solr/core/src/test-files/log4j.properties index 4562e3fd80a..08a32f3f38f 100644 --- a/solr/core/src/test-files/log4j.properties +++ b/solr/core/src/test-files/log4j.properties @@ -8,6 +8,7 @@ log4j.appender.CONSOLE.layout.ConversionPattern=%-5p - %d{yyyy-MM-dd HH:mm:ss.SS log4j.logger.org.apache.zookeeper=WARN log4j.logger.org.apache.hadoop=WARN +log4j.logger.org.apache.solr.hadoop=INFO #log4j.logger.org.apache.solr.update.processor.LogUpdateProcessor=DEBUG #log4j.logger.org.apache.solr.update.processor.DistributedUpdateProcessor=DEBUG diff --git a/solr/example/cloud-scripts/log4j.properties b/solr/example/scripts/cloud-scripts/log4j.properties similarity index 100% rename from solr/example/cloud-scripts/log4j.properties rename to solr/example/scripts/cloud-scripts/log4j.properties diff --git a/solr/example/cloud-scripts/zkcli.bat b/solr/example/scripts/cloud-scripts/zkcli.bat similarity index 67% rename from solr/example/cloud-scripts/zkcli.bat rename to solr/example/scripts/cloud-scripts/zkcli.bat index 8232a726cac..ac092e01874 100644 --- a/solr/example/cloud-scripts/zkcli.bat +++ b/solr/example/scripts/cloud-scripts/zkcli.bat @@ -8,4 +8,4 @@ REM Find location of this script set SDIR=%~dp0 if "%SDIR:~-1%"=="\" set SDIR=%SDIR:~0,-1% -"%JVM%" -Dlog4j.configuration=file:%SDIR%\log4j.properties -classpath "%SDIR%\..\solr-webapp\webapp\WEB-INF\lib\*;%SDIR%\..\lib\ext\*" org.apache.solr.cloud.ZkCLI %* +"%JVM%" -Dlog4j.configuration=file:%SDIR%\log4j.properties -classpath "%SDIR%\..\..\solr-webapp\webapp\WEB-INF\lib\*;%SDIR%\..\..\lib\ext\*" org.apache.solr.cloud.ZkCLI %* diff --git a/solr/example/cloud-scripts/zkcli.sh b/solr/example/scripts/cloud-scripts/zkcli.sh similarity index 62% rename from solr/example/cloud-scripts/zkcli.sh rename to solr/example/scripts/cloud-scripts/zkcli.sh index ab5da966fa5..15b5392d2e5 100644 --- a/solr/example/cloud-scripts/zkcli.sh +++ b/solr/example/scripts/cloud-scripts/zkcli.sh @@ -9,5 +9,5 @@ JVM="java" sdir="`dirname \"$0\"`" -PATH=$JAVA_HOME/bin:$PATH $JVM -Dlog4j.configuration=file:$sdir/log4j.properties -classpath "$sdir/../solr-webapp/webapp/WEB-INF/lib/*:$sdir/../lib/ext/*" org.apache.solr.cloud.ZkCLI ${1+"$@"} +PATH=$JAVA_HOME/bin:$PATH $JVM -Dlog4j.configuration=file:$sdir/log4j.properties -classpath "$sdir/../../solr-webapp/webapp/WEB-INF/lib/*:$sdir/../../lib/ext/*" org.apache.solr.cloud.ZkCLI ${1+"$@"} diff --git a/solr/example/scripts/solr-mr/solr-mr.bat b/solr/example/scripts/solr-mr/solr-mr.bat new file mode 100644 index 00000000000..89f934628bd --- /dev/null +++ b/solr/example/scripts/solr-mr/solr-mr.bat @@ -0,0 +1,9 @@ + +set JVM=java + +REM Find location of this script + +set SDIR=%~dp0 +if "%SDIR:~-1%"=="\" set SDIR=%SDIR:~0,-1% + +"%JVM%" -classpath "%SDIR%\..\..\..\dist\*:%SDIR%\..\..\..\contrib\solr-mr\lib\*:%SDIR%\..\..\..\contrib\solr-morphlines-core\lib\*:%SDIR%\..\..\..\contrib\solr-morphlines-cell\lib\*:%SDIR%\..\..\..\contrib\extraction\lib\*:%SDIR%\..\..\solr-webapp\solr\WEB-INF\lib\*:%SDIR%\..\..\lib\ext\*" org.apache.solr.hadoop.MapReduceIndexerTool %* diff --git a/solr/example/scripts/solr-mr/solr-mr.sh b/solr/example/scripts/solr-mr/solr-mr.sh new file mode 100644 index 00000000000..f48eaae336b --- /dev/null +++ b/solr/example/scripts/solr-mr/solr-mr.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +JVM="java" + +# Find location of this script + +sdir="`dirname \"$0\"`" + +PATH=$JAVA_HOME/bin:$PATH $JVM -cp "$sdir/../../../dist/*:$sdir/../../../contrib/solr-mr/lib/*:$sdir/../../../contrib/solr-morphlines-core/lib/*:$sdir/../../../contrib/solr-morphlines-cell/lib/*:$sdir/../../../contrib/extraction/lib/*:$sdir/../../solr-webapp/solr/WEB-INF/lib/*:$sdir/../../lib/ext/*" org.apache.solr.hadoop.MapReduceIndexerTool ${1+"$@"} + diff --git a/solr/licenses/Saxon-HE-9.5.1-2.jar.sha1 b/solr/licenses/Saxon-HE-9.5.1-2.jar.sha1 new file mode 100644 index 00000000000..2d919372410 --- /dev/null +++ b/solr/licenses/Saxon-HE-9.5.1-2.jar.sha1 @@ -0,0 +1 @@ +02a76558799673a46d88914e11b247dcf80ba92c diff --git a/solr/licenses/Saxon-HE-LICENSE-MPL.txt b/solr/licenses/Saxon-HE-LICENSE-MPL.txt new file mode 100644 index 00000000000..608cd2ef524 --- /dev/null +++ b/solr/licenses/Saxon-HE-LICENSE-MPL.txt @@ -0,0 +1,108 @@ +MOZILLA PUBLIC LICENSE +Version 1.0 + +1. Definitions. + +1.1. ``Contributor'' means each entity that creates or contributes to the creation of Modifications. +1.2. ``Contributor Version'' means the combination of the Original Code, prior Modifications used by a Contributor, and the Modifications made by that particular Contributor. + +1.3. ``Covered Code'' means the Original Code or Modifications or the combination of the Original Code and Modifications, in each case including portions thereof. + +1.4. ``Electronic Distribution Mechanism'' means a mechanism generally accepted in the software development community for the electronic transfer of data. + +1.5. ``Executable'' means Covered Code in any form other than Source Code. + +1.6. ``Initial Developer'' means the individual or entity identified as the Initial Developer in the Source Code notice required by Exhibit A. + +1.7. ``Larger Work'' means a work which combines Covered Code or portions thereof with code not governed by the terms of this License. + +1.8. ``License'' means this document. + +1.9. ``Modifications'' means any addition to or deletion from the substance or structure of either the Original Code or any previous Modifications. When Covered Code is released as a series of files, a Modification is: + +A. Any addition to or deletion from the contents of a file containing Original Code or previous Modifications. + +B. Any new file that contains any part of the Original Code or previous Modifications. + +1.10. ``Original Code'' means Source Code of computer software code which is described in the Source Code notice required by Exhibit A as Original Code, and which, at the time of its release under this License is not already Covered Code governed by this License. + +1.11. ``Source Code'' means the preferred form of the Covered Code for making modifications to it, including all modules it contains, plus any associated interface definition files, scripts used to control compilation and installation of an Executable, or a list of source code differential comparisons against either the Original Code or another well known, available Covered Code of the Contributor's choice. The Source Code can be in a compressed or archival form, provided the appropriate decompression or de-archiving software is widely available for no charge. + +1.12. ``You'' means an individual or a legal entity exercising rights under, and complying with all of the terms of, this License or a future version of this License issued under Section 6.1. For legal entities, ``You'' includes any entity which controls, is controlled by, or is under common control with You. For purposes of this definition, ``control'' means (a) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b) ownership of fifty percent (50%) or more of the outstanding shares or beneficial ownership of such entity. + +2. Source Code License. +2.1. The Initial Developer Grant. +The Initial Developer hereby grants You a world-wide, royalty-free, non-exclusive license, subject to third party intellectual property claims: +(a) to use, reproduce, modify, display, perform, sublicense and distribute the Original Code (or portions thereof) with or without Modifications, or as part of a Larger Work; and + +(b) under patents now or hereafter owned or controlled by Initial Developer, to make, have made, use and sell (``Utilize'') the Original Code (or portions thereof), but solely to the extent that any such patent is reasonably necessary to enable You to Utilize the Original Code (or portions thereof) and not to any greater extent that may be necessary to Utilize further Modifications or combinations. + +2.2. Contributor Grant. +Each Contributor hereby grants You a world-wide, royalty-free, non-exclusive license, subject to third party intellectual property claims: + +(a) to use, reproduce, modify, display, perform, sublicense and distribute the Modifications created by such Contributor (or portions thereof) either on an unmodified basis, with other Modifications, as Covered Code or as part of a Larger Work; and + +(b) under patents now or hereafter owned or controlled by Contributor, to Utilize the Contributor Version (or portions thereof), but solely to the extent that any such patent is reasonably necessary to enable You to Utilize the Contributor Version (or portions thereof), and not to any greater extent that may be necessary to Utilize further Modifications or combinations. + +3. Distribution Obligations. +3.1. Application of License. +The Modifications which You create or to which You contribute are governed by the terms of this License, including without limitation Section 2.2. The Source Code version of Covered Code may be distributed only under the terms of this License or a future version of this License released under Section 6.1, and You must include a copy of this License with every copy of the Source Code You distribute. You may not offer or impose any terms on any Source Code version that alters or restricts the applicable version of this License or the recipients' rights hereunder. However, You may include an additional document offering the additional rights described in Section 3.5. +3.2. Availability of Source Code. +Any Modification which You create or to which You contribute must be made available in Source Code form under the terms of this License either on the same media as an Executable version or via an accepted Electronic Distribution Mechanism to anyone to whom you made an Executable version available; and if made available via Electronic Distribution Mechanism, must remain available for at least twelve (12) months after the date it initially became available, or at least six (6) months after a subsequent version of that particular Modification has been made available to such recipients. You are responsible for ensuring that the Source Code version remains available even if the Electronic Distribution Mechanism is maintained by a third party. + +3.3. Description of Modifications. +You must cause all Covered Code to which you contribute to contain a file documenting the changes You made to create that Covered Code and the date of any change. You must include a prominent statement that the Modification is derived, directly or indirectly, from Original Code provided by the Initial Developer and including the name of the Initial Developer in (a) the Source Code, and (b) in any notice in an Executable version or related documentation in which You describe the origin or ownership of the Covered Code. + +3.4. Intellectual Property Matters + +(a) Third Party Claims. +If You have knowledge that a party claims an intellectual property right in particular functionality or code (or its utilization under this License), you must include a text file with the source code distribution titled ``LEGAL'' which describes the claim and the party making the claim in sufficient detail that a recipient will know whom to contact. If you obtain such knowledge after You make Your Modification available as described in Section 3.2, You shall promptly modify the LEGAL file in all copies You make available thereafter and shall take other steps (such as notifying appropriate mailing lists or newsgroups) reasonably calculated to inform those who received the Covered Code that new knowledge has been obtained. + +(b) Contributor APIs. +If Your Modification is an application programming interface and You own or control patents which are reasonably necessary to implement that API, you must also include this information in the LEGAL file. + +3.5. Required Notices. +You must duplicate the notice in Exhibit A in each file of the Source Code, and this License in any documentation for the Source Code, where You describe recipients' rights relating to Covered Code. If You created one or more Modification(s), You may add your name as a Contributor to the notice described in Exhibit A. If it is not possible to put such notice in a particular Source Code file due to its structure, then you must include such notice in a location (such as a relevant directory file) where a user would be likely to look for such a notice. You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Covered Code. However, You may do so only on Your own behalf, and not on behalf of the Initial Developer or any Contributor. You must make it absolutely clear than any such warranty, support, indemnity or liability obligation is offered by You alone, and You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of warranty, support, indemnity or liability terms You offer. + +3.6. Distribution of Executable Versions. +You may distribute Covered Code in Executable form only if the requirements of Section 3.1-3.5 have been met for that Covered Code, and if You include a notice stating that the Source Code version of the Covered Code is available under the terms of this License, including a description of how and where You have fulfilled the obligations of Section 3.2. The notice must be conspicuously included in any notice in an Executable version, related documentation or collateral in which You describe recipients' rights relating to the Covered Code. You may distribute the Executable version of Covered Code under a license of Your choice, which may contain terms different from this License, provided that You are in compliance with the terms of this License and that the license for the Executable version does not attempt to limit or alter the recipient's rights in the Source Code version from the rights set forth in this License. If You distribute the Executable version under a different license You must make it absolutely clear that any terms which differ from this License are offered by You alone, not by the Initial Developer or any Contributor. You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of any such terms You offer. + +3.7. Larger Works. +You may create a Larger Work by combining Covered Code with other code not governed by the terms of this License and distribute the Larger Work as a single product. In such a case, You must make sure the requirements of this License are fulfilled for the Covered Code. + +4. Inability to Comply Due to Statute or Regulation. +If it is impossible for You to comply with any of the terms of this License with respect to some or all of the Covered Code due to statute or regulation then You must: (a) comply with the terms of this License to the maximum extent possible; and (b) describe the limitations and the code they affect. Such description must be included in the LEGAL file described in Section 3.4 and must be included with all distributions of the Source Code. Except to the extent prohibited by statute or regulation, such description must be sufficiently detailed for a recipient of ordinary skill to be able to understand it. + +5. Application of this License. +This License applies to code to which the Initial Developer has attached the notice in Exhibit A, and to related Covered Code. +6. Versions of the License. +6.1. New Versions. +Netscape Communications Corporation (``Netscape'') may publish revised and/or new versions of the License from time to time. Each version will be given a distinguishing version number. +6.2. Effect of New Versions. +Once Covered Code has been published under a particular version of the License, You may always continue to use it under the terms of that version. You may also choose to use such Covered Code under the terms of any subsequent version of the License published by Netscape. No one other than Netscape has the right to modify the terms applicable to Covered Code created under this License. + +6.3. Derivative Works. +If you create or use a modified version of this License (which you may only do in order to apply it to code which is not already Covered Code governed by this License), you must (a) rename Your license so that the phrases ``Mozilla'', ``MOZILLAPL'', ``MOZPL'', ``Netscape'', ``NPL'' or any confusingly similar phrase do not appear anywhere in your license and (b) otherwise make it clear that your version of the license contains terms which differ from the Mozilla Public License and Netscape Public License. (Filling in the name of the Initial Developer, Original Code or Contributor in the notice described in Exhibit A shall not of themselves be deemed to be modifications of this License.) + +7. DISCLAIMER OF WARRANTY. +COVERED CODE IS PROVIDED UNDER THIS LICENSE ON AN ``AS IS'' BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED CODE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED CODE IS WITH YOU. SHOULD ANY COVERED CODE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF ANY COVERED CODE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER. +8. TERMINATION. +This License and the rights granted hereunder will terminate automatically if You fail to comply with terms herein and fail to cure such breach within 30 days of becoming aware of the breach. All sublicenses to the Covered Code which are properly granted shall survive any termination of this License. Provisions which, by their nature, must remain in effect beyond the termination of this License shall survive. +9. LIMITATION OF LIABILITY. +UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL THE INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED CODE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO YOU OR ANY OTHER PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THAT EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU. +10. U.S. GOVERNMENT END USERS. +The Covered Code is a ``commercial item,'' as that term is defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of ``commercial computer software'' and ``commercial computer software documentation,'' as such terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all U.S. Government End Users acquire Covered Code with only those rights set forth herein. +11. MISCELLANEOUS. +This License represents the complete agreement concerning subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. This License shall be governed by California law provisions (except to the extent applicable law, if any, provides otherwise), excluding its conflict-of-law provisions. With respect to disputes in which at least one party is a citizen of, or an entity chartered or registered to do business in, the United States of America: (a) unless otherwise agreed in writing, all disputes relating to this License (excepting any dispute relating to intellectual property rights) shall be subject to final and binding arbitration, with the losing party paying all costs of arbitration; (b) any arbitration relating to this Agreement shall be held in Santa Clara County, California, under the auspices of JAMS/EndDispute; and (c) any litigation relating to this Agreement shall be subject to the jurisdiction of the Federal Courts of the Northern District of California, with venue lying in Santa Clara County, California, with the losing party responsible for costs, including without limitation, court costs and reasonable attorneys fees and expenses. The application of the United Nations Convention on Contracts for the International Sale of Goods is expressly excluded. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not apply to this License. +12. RESPONSIBILITY FOR CLAIMS. +Except in cases where another Contributor has failed to comply with Section 3.4, You are responsible for damages arising, directly or indirectly, out of Your utilization of rights under this License, based on the number of copies of Covered Code you made available, the revenues you received from utilizing such rights, and other relevant factors. You agree to work with affected parties to distribute responsibility on an equitable basis. +EXHIBIT A. +``The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.mozilla.org/MPL/ +Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the specific language governing rights and limitations under the License. + +The Original Code is ______________________________________. + +The Initial Developer of the Original Code is ________________________. Portions created by ______________________ are Copyright (C) ______ _______________________. All Rights Reserved. + +Contributor(s): ______________________________________.'' + diff --git a/solr/licenses/aopalliance-1.0.jar.sha1 b/solr/licenses/aopalliance-1.0.jar.sha1 new file mode 100644 index 00000000000..5da3c21c7f4 --- /dev/null +++ b/solr/licenses/aopalliance-1.0.jar.sha1 @@ -0,0 +1 @@ +0235ba8b489512805ac13a8f9ea77a1ca5ebe3e8 diff --git a/solr/licenses/aopalliance-LICENSE-PD.txt b/solr/licenses/aopalliance-LICENSE-PD.txt new file mode 100644 index 00000000000..c75d4e6d9aa --- /dev/null +++ b/solr/licenses/aopalliance-LICENSE-PD.txt @@ -0,0 +1 @@ +Released to Public Domain \ No newline at end of file diff --git a/solr/licenses/argparse4j-0.4.0.jar.sha1 b/solr/licenses/argparse4j-0.4.0.jar.sha1 new file mode 100644 index 00000000000..142c614e02f --- /dev/null +++ b/solr/licenses/argparse4j-0.4.0.jar.sha1 @@ -0,0 +1 @@ +d6ec4128ff0a3ef64f992f1d489b2b4179c8ba81 diff --git a/solr/licenses/argparse4j-LICENSE-MIT.txt b/solr/licenses/argparse4j-LICENSE-MIT.txt new file mode 100644 index 00000000000..42612cfa84a --- /dev/null +++ b/solr/licenses/argparse4j-LICENSE-MIT.txt @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2011, 2013 Tatsuhiro Tsujikawa + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ \ No newline at end of file diff --git a/solr/licenses/asm-3.1.jar.sha1 b/solr/licenses/asm-3.1.jar.sha1 new file mode 100644 index 00000000000..f746051b226 --- /dev/null +++ b/solr/licenses/asm-3.1.jar.sha1 @@ -0,0 +1 @@ +c157def142714c544bdea2e6144645702adf7097 diff --git a/solr/licenses/asm-LICENSE-BSD.txt b/solr/licenses/asm-LICENSE-BSD.txt new file mode 100644 index 00000000000..dda31be0099 --- /dev/null +++ b/solr/licenses/asm-LICENSE-BSD.txt @@ -0,0 +1,29 @@ +/*** + * ASM: a very small and fast Java bytecode manipulation framework + * Copyright (c) 2000-2007 INRIA, France Telecom + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ \ No newline at end of file diff --git a/solr/licenses/aspectjrt-1.6.11.jar.sha1 b/solr/licenses/aspectjrt-1.6.11.jar.sha1 new file mode 100644 index 00000000000..8ee4319d892 --- /dev/null +++ b/solr/licenses/aspectjrt-1.6.11.jar.sha1 @@ -0,0 +1 @@ +70afce58891e5f0566a968288c93120b977e3bd0 diff --git a/solr/licenses/aspectjrt-LICENSE-EPL.txt b/solr/licenses/aspectjrt-LICENSE-EPL.txt new file mode 100644 index 00000000000..c93934f3940 --- /dev/null +++ b/solr/licenses/aspectjrt-LICENSE-EPL.txt @@ -0,0 +1,71 @@ +Eclipse Public License - v 1.0 + +THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. + +1. DEFINITIONS + +"Contribution" means: + +a) in the case of the initial Contributor, the initial code and documentation distributed under this Agreement, and +b) in the case of each subsequent Contributor: +i) changes to the Program, and +ii) additions to the Program; +where such changes and/or additions to the Program originate from and are distributed by that particular Contributor. A Contribution 'originates' from a Contributor if it was added to the Program by such Contributor itself or anyone acting on such Contributor's behalf. Contributions do not include additions to the Program which: (i) are separate modules of software distributed in conjunction with the Program under their own license agreement, and (ii) are not derivative works of the Program. +"Contributor" means any person or entity that distributes the Program. + +"Licensed Patents" mean patent claims licensable by a Contributor which are necessarily infringed by the use or sale of its Contribution alone or when combined with the Program. + +"Program" means the Contributions distributed in accordance with this Agreement. + +"Recipient" means anyone who receives the Program under this Agreement, including all Contributors. + +2. GRANT OF RIGHTS + +a) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, worldwide, royalty-free copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, distribute and sublicense the Contribution of such Contributor, if any, and such derivative works, in source code and object code form. +b) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, worldwide, royalty-free patent license under Licensed Patents to make, use, sell, offer to sell, import and otherwise transfer the Contribution of such Contributor, if any, in source code and object code form. This patent license shall apply to the combination of the Contribution and the Program if, at the time the Contribution is added by the Contributor, such addition of the Contribution causes such combination to be covered by the Licensed Patents. The patent license shall not apply to any other combinations which include the Contribution. No hardware per se is licensed hereunder. +c) Recipient understands that although each Contributor grants the licenses to its Contributions set forth herein, no assurances are provided by any Contributor that the Program does not infringe the patent or other intellectual property rights of any other entity. Each Contributor disclaims any liability to Recipient for claims brought by any other entity based on infringement of intellectual property rights or otherwise. As a condition to exercising the rights and licenses granted hereunder, each Recipient hereby assumes sole responsibility to secure any other intellectual property rights needed, if any. For example, if a third party patent license is required to allow Recipient to distribute the Program, it is Recipient's responsibility to acquire that license before distributing the Program. +d) Each Contributor represents that to its knowledge it has sufficient copyright rights in its Contribution, if any, to grant the copyright license set forth in this Agreement. +3. REQUIREMENTS + +A Contributor may choose to distribute the Program in object code form under its own license agreement, provided that: + +a) it complies with the terms and conditions of this Agreement; and +b) its license agreement: +i) effectively disclaims on behalf of all Contributors all warranties and conditions, express and implied, including warranties or conditions of title and non-infringement, and implied warranties or conditions of merchantability and fitness for a particular purpose; +ii) effectively excludes on behalf of all Contributors all liability for damages, including direct, indirect, special, incidental and consequential damages, such as lost profits; +iii) states that any provisions which differ from this Agreement are offered by that Contributor alone and not by any other party; and +iv) states that source code for the Program is available from such Contributor, and informs licensees how to obtain it in a reasonable manner on or through a medium customarily used for software exchange. +When the Program is made available in source code form: + +a) it must be made available under this Agreement; and +b) a copy of this Agreement must be included with each copy of the Program. +Contributors may not remove or alter any copyright notices contained within the Program. + +Each Contributor must identify itself as the originator of its Contribution, if any, in a manner that reasonably allows subsequent Recipients to identify the originator of the Contribution. + +4. COMMERCIAL DISTRIBUTION + +Commercial distributors of software may accept certain responsibilities with respect to end users, business partners and the like. While this license is intended to facilitate the commercial use of the Program, the Contributor who includes the Program in a commercial product offering should do so in a manner which does not create potential liability for other Contributors. Therefore, if a Contributor includes the Program in a commercial product offering, such Contributor ("Commercial Contributor") hereby agrees to defend and indemnify every other Contributor ("Indemnified Contributor") against any losses, damages and costs (collectively "Losses") arising from claims, lawsuits and other legal actions brought by a third party against the Indemnified Contributor to the extent caused by the acts or omissions of such Commercial Contributor in connection with its distribution of the Program in a commercial product offering. The obligations in this section do not apply to any claims or Losses relating to any actual or alleged intellectual property infringement. In order to qualify, an Indemnified Contributor must: a) promptly notify the Commercial Contributor in writing of such claim, and b) allow the Commercial Contributor to control, and cooperate with the Commercial Contributor in, the defense and any related settlement negotiations. The Indemnified Contributor may participate in any such claim at its own expense. + +For example, a Contributor might include the Program in a commercial product offering, Product X. That Contributor is then a Commercial Contributor. If that Commercial Contributor then makes performance claims, or offers warranties related to Product X, those performance claims and warranties are such Commercial Contributor's responsibility alone. Under this section, the Commercial Contributor would have to defend claims against the other Contributors related to those performance claims and warranties, and if a court requires any other Contributor to pay any damages as a result, the Commercial Contributor must pay those damages. + +5. NO WARRANTY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the appropriateness of using and distributing the Program and assumes all risks associated with its exercise of rights under this Agreement , including but not limited to the risks and costs of program errors, compliance with applicable laws, damage to or loss of data, programs or equipment, and unavailability or interruption of operations. + +6. DISCLAIMER OF LIABILITY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +7. GENERAL + +If any provision of this Agreement is invalid or unenforceable under applicable law, it shall not affect the validity or enforceability of the remainder of the terms of this Agreement, and without further action by the parties hereto, such provision shall be reformed to the minimum extent necessary to make such provision valid and enforceable. + +If Recipient institutes patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Program itself (excluding combinations of the Program with other software or hardware) infringes such Recipient's patent(s), then such Recipient's rights granted under Section 2(b) shall terminate as of the date such litigation is filed. + +All Recipient's rights under this Agreement shall terminate if it fails to comply with any of the material terms or conditions of this Agreement and does not cure such failure in a reasonable period of time after becoming aware of such noncompliance. If all Recipient's rights under this Agreement terminate, Recipient agrees to cease use and distribution of the Program as soon as reasonably practicable. However, Recipient's obligations under this Agreement and any licenses granted by Recipient relating to the Program shall continue and survive. + +Everyone is permitted to copy and distribute copies of this Agreement, but in order to avoid inconsistency the Agreement is copyrighted and may only be modified in the following manner. The Agreement Steward reserves the right to publish new versions (including revisions) of this Agreement from time to time. No one other than the Agreement Steward has the right to modify this Agreement. The Eclipse Foundation is the initial Agreement Steward. The Eclipse Foundation may assign the responsibility to serve as the Agreement Steward to a suitable separate entity. Each new version of the Agreement will be given a distinguishing version number. The Program (including Contributions) may always be distributed subject to the version of the Agreement under which it was received. In addition, after a new version of the Agreement is published, Contributor may elect to distribute the Program (including its Contributions) under the new version. Except as expressly stated in Sections 2(a) and 2(b) above, Recipient receives no rights or licenses to the intellectual property of any Contributor under this Agreement, whether expressly, by implication, estoppel or otherwise. All rights in the Program not expressly granted under this Agreement are reserved. + +This Agreement is governed by the laws of the State of New York and the intellectual property laws of the United States of America. No party to this Agreement will bring a legal action under this Agreement more than one year after the cause of action arose. Each party waives its rights to a jury trial in any resulting litigation. + diff --git a/solr/licenses/avro-1.7.4.jar.sha1 b/solr/licenses/avro-1.7.4.jar.sha1 new file mode 100644 index 00000000000..d2d38cd1d91 --- /dev/null +++ b/solr/licenses/avro-1.7.4.jar.sha1 @@ -0,0 +1 @@ +416e7030879814f52845b97f04bb50ecd1cef372 diff --git a/solr/licenses/avro-LICENSE-ASL.txt b/solr/licenses/avro-LICENSE-ASL.txt new file mode 100644 index 00000000000..2f23f979d2d --- /dev/null +++ b/solr/licenses/avro-LICENSE-ASL.txt @@ -0,0 +1,308 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +---------------------------------------------------------------------- +License for the Jansson C JSON parser used in the C implementation: + +Copyright (c) 2009 Petri Lehtinen + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +---------------------------------------------------------------------- +License for the Json.NET used in the C# implementation: + +Copyright (c) 2007 James Newton-King + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +---------------------------------------------------------------------- +License for msinttypes used in the C implementation: +Source from: +http://code.google.com/p/msinttypes/downloads/detail?name=msinttypes-r26.zip + +Copyright (c) 2006-2008 Alexander Chemeris + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The name of the author may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +---------------------------------------------------------------------- +License for Dirent API for Microsoft Visual Studio used in the C implementation: +Source from: +http://www.softagalleria.net/download/dirent/dirent-1.11.zip + +Copyright (C) 2006 Toni Ronkko + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +``Software''), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL TONI RONKKO BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +---------------------------------------------------------------------- \ No newline at end of file diff --git a/solr/licenses/avro-NOTICE.txt b/solr/licenses/avro-NOTICE.txt new file mode 100644 index 00000000000..da479fec1be --- /dev/null +++ b/solr/licenses/avro-NOTICE.txt @@ -0,0 +1,9 @@ +Apache Avro +Copyright 2010 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +C JSON parsing provided by Jansson and +written by Petri Lehtinen. The original software is +available from http://www.digip.org/jansson/. \ No newline at end of file diff --git a/solr/licenses/cdk-morphlines-avro-0.8.1.jar.sha1 b/solr/licenses/cdk-morphlines-avro-0.8.1.jar.sha1 new file mode 100644 index 00000000000..d18d6607c4f --- /dev/null +++ b/solr/licenses/cdk-morphlines-avro-0.8.1.jar.sha1 @@ -0,0 +1 @@ +571c226f5ac71ce5fd23ae9aafb363eb4c58481f diff --git a/solr/licenses/cdk-morphlines-avro-LICENSE-ASL.txt b/solr/licenses/cdk-morphlines-avro-LICENSE-ASL.txt new file mode 100644 index 00000000000..d6456956733 --- /dev/null +++ b/solr/licenses/cdk-morphlines-avro-LICENSE-ASL.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/solr/licenses/cdk-morphlines-avro-NOTICE.txt b/solr/licenses/cdk-morphlines-avro-NOTICE.txt new file mode 100644 index 00000000000..e66f59741cd --- /dev/null +++ b/solr/licenses/cdk-morphlines-avro-NOTICE.txt @@ -0,0 +1,8 @@ +This product includes software developed by Cloudera, Inc. +(http://www.cloudera.com/). + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +This product includes software developed by +Saxonica (http://www.saxonica.com/). \ No newline at end of file diff --git a/solr/licenses/cdk-morphlines-core-0.8.1-tests.jar.sha1 b/solr/licenses/cdk-morphlines-core-0.8.1-tests.jar.sha1 new file mode 100644 index 00000000000..9018483ca64 --- /dev/null +++ b/solr/licenses/cdk-morphlines-core-0.8.1-tests.jar.sha1 @@ -0,0 +1 @@ +93b395579f13f4387c90d370f5f6fdb054070a3e diff --git a/solr/licenses/cdk-morphlines-core-0.8.1.jar.sha1 b/solr/licenses/cdk-morphlines-core-0.8.1.jar.sha1 new file mode 100644 index 00000000000..ea6f85d696c --- /dev/null +++ b/solr/licenses/cdk-morphlines-core-0.8.1.jar.sha1 @@ -0,0 +1 @@ +c0b87a3c7377db58e328dfecb850323d99655775 diff --git a/solr/licenses/cdk-morphlines-core-LICENSE-ASL.txt b/solr/licenses/cdk-morphlines-core-LICENSE-ASL.txt new file mode 100644 index 00000000000..d6456956733 --- /dev/null +++ b/solr/licenses/cdk-morphlines-core-LICENSE-ASL.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/solr/licenses/cdk-morphlines-core-NOTICE.txt b/solr/licenses/cdk-morphlines-core-NOTICE.txt new file mode 100644 index 00000000000..e66f59741cd --- /dev/null +++ b/solr/licenses/cdk-morphlines-core-NOTICE.txt @@ -0,0 +1,8 @@ +This product includes software developed by Cloudera, Inc. +(http://www.cloudera.com/). + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +This product includes software developed by +Saxonica (http://www.saxonica.com/). \ No newline at end of file diff --git a/solr/licenses/cdk-morphlines-hadoop-sequencefile-0.8.1.jar.sha1 b/solr/licenses/cdk-morphlines-hadoop-sequencefile-0.8.1.jar.sha1 new file mode 100644 index 00000000000..5efdde56d53 --- /dev/null +++ b/solr/licenses/cdk-morphlines-hadoop-sequencefile-0.8.1.jar.sha1 @@ -0,0 +1 @@ +c50c9ff93fcf4007dba04edb3f07872849b5aebb diff --git a/solr/licenses/cdk-morphlines-hadoop-sequencefile-LICENSE-ASL.txt b/solr/licenses/cdk-morphlines-hadoop-sequencefile-LICENSE-ASL.txt new file mode 100644 index 00000000000..d6456956733 --- /dev/null +++ b/solr/licenses/cdk-morphlines-hadoop-sequencefile-LICENSE-ASL.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/solr/licenses/cdk-morphlines-hadoop-sequencefile-NOTICE.txt b/solr/licenses/cdk-morphlines-hadoop-sequencefile-NOTICE.txt new file mode 100644 index 00000000000..e66f59741cd --- /dev/null +++ b/solr/licenses/cdk-morphlines-hadoop-sequencefile-NOTICE.txt @@ -0,0 +1,8 @@ +This product includes software developed by Cloudera, Inc. +(http://www.cloudera.com/). + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +This product includes software developed by +Saxonica (http://www.saxonica.com/). \ No newline at end of file diff --git a/solr/licenses/cdk-morphlines-json-0.8.1.jar.sha1 b/solr/licenses/cdk-morphlines-json-0.8.1.jar.sha1 new file mode 100644 index 00000000000..6c50b027662 --- /dev/null +++ b/solr/licenses/cdk-morphlines-json-0.8.1.jar.sha1 @@ -0,0 +1 @@ +494a6d3bcbba7abc8fddf25ac511e51c7c5a2ff9 diff --git a/solr/licenses/cdk-morphlines-json-LICENSE-ASL.txt b/solr/licenses/cdk-morphlines-json-LICENSE-ASL.txt new file mode 100644 index 00000000000..d6456956733 --- /dev/null +++ b/solr/licenses/cdk-morphlines-json-LICENSE-ASL.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/solr/licenses/cdk-morphlines-json-NOTICE.txt b/solr/licenses/cdk-morphlines-json-NOTICE.txt new file mode 100644 index 00000000000..e66f59741cd --- /dev/null +++ b/solr/licenses/cdk-morphlines-json-NOTICE.txt @@ -0,0 +1,8 @@ +This product includes software developed by Cloudera, Inc. +(http://www.cloudera.com/). + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +This product includes software developed by +Saxonica (http://www.saxonica.com/). \ No newline at end of file diff --git a/solr/licenses/cdk-morphlines-saxon-0.8.1.jar.sha1 b/solr/licenses/cdk-morphlines-saxon-0.8.1.jar.sha1 new file mode 100644 index 00000000000..486d6a6c692 --- /dev/null +++ b/solr/licenses/cdk-morphlines-saxon-0.8.1.jar.sha1 @@ -0,0 +1 @@ +9805d7f1cf0240869c02832651fc4518bac77cf9 diff --git a/solr/licenses/cdk-morphlines-saxon-LICENSE-ASL.txt b/solr/licenses/cdk-morphlines-saxon-LICENSE-ASL.txt new file mode 100644 index 00000000000..d6456956733 --- /dev/null +++ b/solr/licenses/cdk-morphlines-saxon-LICENSE-ASL.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/solr/licenses/cdk-morphlines-saxon-NOTICE.txt b/solr/licenses/cdk-morphlines-saxon-NOTICE.txt new file mode 100644 index 00000000000..e66f59741cd --- /dev/null +++ b/solr/licenses/cdk-morphlines-saxon-NOTICE.txt @@ -0,0 +1,8 @@ +This product includes software developed by Cloudera, Inc. +(http://www.cloudera.com/). + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +This product includes software developed by +Saxonica (http://www.saxonica.com/). \ No newline at end of file diff --git a/solr/licenses/cdk-morphlines-tika-core-0.8.1.jar.sha1 b/solr/licenses/cdk-morphlines-tika-core-0.8.1.jar.sha1 new file mode 100644 index 00000000000..312593060bd --- /dev/null +++ b/solr/licenses/cdk-morphlines-tika-core-0.8.1.jar.sha1 @@ -0,0 +1 @@ +96c3497423c8fb2c2fd75f836960e40b7e69454d diff --git a/solr/licenses/cdk-morphlines-tika-core-LICENSE-ASL.txt b/solr/licenses/cdk-morphlines-tika-core-LICENSE-ASL.txt new file mode 100644 index 00000000000..d6456956733 --- /dev/null +++ b/solr/licenses/cdk-morphlines-tika-core-LICENSE-ASL.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/solr/licenses/cdk-morphlines-tika-core-NOTICE.txt b/solr/licenses/cdk-morphlines-tika-core-NOTICE.txt new file mode 100644 index 00000000000..e66f59741cd --- /dev/null +++ b/solr/licenses/cdk-morphlines-tika-core-NOTICE.txt @@ -0,0 +1,8 @@ +This product includes software developed by Cloudera, Inc. +(http://www.cloudera.com/). + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +This product includes software developed by +Saxonica (http://www.saxonica.com/). \ No newline at end of file diff --git a/solr/licenses/cdk-morphlines-tika-decompress-0.8.1.jar.sha1 b/solr/licenses/cdk-morphlines-tika-decompress-0.8.1.jar.sha1 new file mode 100644 index 00000000000..36e3c3f6628 --- /dev/null +++ b/solr/licenses/cdk-morphlines-tika-decompress-0.8.1.jar.sha1 @@ -0,0 +1 @@ +060ccc1dda318af6826feca736c6c25ef9c8f207 diff --git a/solr/licenses/cdk-morphlines-tika-decompress-LICENSE-ASL.txt b/solr/licenses/cdk-morphlines-tika-decompress-LICENSE-ASL.txt new file mode 100644 index 00000000000..d6456956733 --- /dev/null +++ b/solr/licenses/cdk-morphlines-tika-decompress-LICENSE-ASL.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/solr/licenses/cdk-morphlines-tika-decompress-NOTICE.txt b/solr/licenses/cdk-morphlines-tika-decompress-NOTICE.txt new file mode 100644 index 00000000000..e66f59741cd --- /dev/null +++ b/solr/licenses/cdk-morphlines-tika-decompress-NOTICE.txt @@ -0,0 +1,8 @@ +This product includes software developed by Cloudera, Inc. +(http://www.cloudera.com/). + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +This product includes software developed by +Saxonica (http://www.saxonica.com/). \ No newline at end of file diff --git a/solr/licenses/cdk-morphlines-twitter-0.8.1.jar.sha1 b/solr/licenses/cdk-morphlines-twitter-0.8.1.jar.sha1 new file mode 100644 index 00000000000..8368d3d6ab5 --- /dev/null +++ b/solr/licenses/cdk-morphlines-twitter-0.8.1.jar.sha1 @@ -0,0 +1 @@ +d950d1f83ff52f4c8d948962c57bd60252c5e773 diff --git a/solr/licenses/cdk-morphlines-twitter-LICENSE-ASL.txt b/solr/licenses/cdk-morphlines-twitter-LICENSE-ASL.txt new file mode 100644 index 00000000000..d6456956733 --- /dev/null +++ b/solr/licenses/cdk-morphlines-twitter-LICENSE-ASL.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/solr/licenses/cdk-morphlines-twitter-NOTICE.txt b/solr/licenses/cdk-morphlines-twitter-NOTICE.txt new file mode 100644 index 00000000000..e66f59741cd --- /dev/null +++ b/solr/licenses/cdk-morphlines-twitter-NOTICE.txt @@ -0,0 +1,8 @@ +This product includes software developed by Cloudera, Inc. +(http://www.cloudera.com/). + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +This product includes software developed by +Saxonica (http://www.saxonica.com/). \ No newline at end of file diff --git a/solr/licenses/config-1.0.2.jar.sha1 b/solr/licenses/config-1.0.2.jar.sha1 new file mode 100644 index 00000000000..6dbf80d7f94 --- /dev/null +++ b/solr/licenses/config-1.0.2.jar.sha1 @@ -0,0 +1 @@ +a0bca82c39f23f75e3afccd6e12840eeabaea123 diff --git a/solr/licenses/config-LICENSE-ASL.txt b/solr/licenses/config-LICENSE-ASL.txt new file mode 100644 index 00000000000..d6456956733 --- /dev/null +++ b/solr/licenses/config-LICENSE-ASL.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/solr/licenses/config-NOTICE.txt b/solr/licenses/config-NOTICE.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/solr/licenses/guice-3.0.jar.sha1 b/solr/licenses/guice-3.0.jar.sha1 new file mode 100644 index 00000000000..7ce1a30309c --- /dev/null +++ b/solr/licenses/guice-3.0.jar.sha1 @@ -0,0 +1 @@ +9d84f15fe35e2c716a02979fb62f50a29f38aefa diff --git a/solr/licenses/guice-LICENSE-ASL.txt b/solr/licenses/guice-LICENSE-ASL.txt new file mode 100644 index 00000000000..7a4a3ea2424 --- /dev/null +++ b/solr/licenses/guice-LICENSE-ASL.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/solr/licenses/guice-NOTICE.txt b/solr/licenses/guice-NOTICE.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/solr/licenses/guice-servlet-3.0.jar.sha1 b/solr/licenses/guice-servlet-3.0.jar.sha1 new file mode 100644 index 00000000000..a496feb6904 --- /dev/null +++ b/solr/licenses/guice-servlet-3.0.jar.sha1 @@ -0,0 +1 @@ +610cde0e8da5a8b7d8efb8f0b8987466ffebaaf9 diff --git a/solr/licenses/guice-servlet-LICENSE-ASL.txt b/solr/licenses/guice-servlet-LICENSE-ASL.txt new file mode 100644 index 00000000000..7a4a3ea2424 --- /dev/null +++ b/solr/licenses/guice-servlet-LICENSE-ASL.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/solr/licenses/guice-servlet-NOTICE.txt b/solr/licenses/guice-servlet-NOTICE.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/solr/licenses/hadoop-mapreduce-client-app-2.2.0.jar.sha1 b/solr/licenses/hadoop-mapreduce-client-app-2.2.0.jar.sha1 new file mode 100644 index 00000000000..32c3a59dcc7 --- /dev/null +++ b/solr/licenses/hadoop-mapreduce-client-app-2.2.0.jar.sha1 @@ -0,0 +1 @@ +9e5bdd970000b330382128350a957609cbcfe348 diff --git a/solr/licenses/hadoop-mapreduce-client-app-LICENSE-ASL.txt b/solr/licenses/hadoop-mapreduce-client-app-LICENSE-ASL.txt new file mode 100644 index 00000000000..9a8e847ee84 --- /dev/null +++ b/solr/licenses/hadoop-mapreduce-client-app-LICENSE-ASL.txt @@ -0,0 +1,244 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +APACHE HADOOP SUBCOMPONENTS: + +The Apache Hadoop project contains subcomponents with separate copyright +notices and license terms. Your use of the source code for the these +subcomponents is subject to the terms and conditions of the following +licenses. + +For the org.apache.hadoop.util.bloom.* classes: + +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract + * 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ \ No newline at end of file diff --git a/solr/licenses/hadoop-mapreduce-client-app-NOTICE.txt b/solr/licenses/hadoop-mapreduce-client-app-NOTICE.txt new file mode 100644 index 00000000000..c56a5e4eac1 --- /dev/null +++ b/solr/licenses/hadoop-mapreduce-client-app-NOTICE.txt @@ -0,0 +1,2 @@ +This product includes software developed by The Apache Software +Foundation (http://www.apache.org/). \ No newline at end of file diff --git a/solr/licenses/hadoop-mapreduce-client-common-2.2.0.jar.sha1 b/solr/licenses/hadoop-mapreduce-client-common-2.2.0.jar.sha1 new file mode 100644 index 00000000000..87cb25e47b1 --- /dev/null +++ b/solr/licenses/hadoop-mapreduce-client-common-2.2.0.jar.sha1 @@ -0,0 +1 @@ +5600fdda58499e3901bf179f1614a8ca38090871 diff --git a/solr/licenses/hadoop-mapreduce-client-common-LICENSE-ASL.txt b/solr/licenses/hadoop-mapreduce-client-common-LICENSE-ASL.txt new file mode 100644 index 00000000000..9a8e847ee84 --- /dev/null +++ b/solr/licenses/hadoop-mapreduce-client-common-LICENSE-ASL.txt @@ -0,0 +1,244 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +APACHE HADOOP SUBCOMPONENTS: + +The Apache Hadoop project contains subcomponents with separate copyright +notices and license terms. Your use of the source code for the these +subcomponents is subject to the terms and conditions of the following +licenses. + +For the org.apache.hadoop.util.bloom.* classes: + +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract + * 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ \ No newline at end of file diff --git a/solr/licenses/hadoop-mapreduce-client-common-NOTICE.txt b/solr/licenses/hadoop-mapreduce-client-common-NOTICE.txt new file mode 100644 index 00000000000..c56a5e4eac1 --- /dev/null +++ b/solr/licenses/hadoop-mapreduce-client-common-NOTICE.txt @@ -0,0 +1,2 @@ +This product includes software developed by The Apache Software +Foundation (http://www.apache.org/). \ No newline at end of file diff --git a/solr/licenses/hadoop-mapreduce-client-core-2.2.0.jar.sha1 b/solr/licenses/hadoop-mapreduce-client-core-2.2.0.jar.sha1 new file mode 100644 index 00000000000..ead6387a193 --- /dev/null +++ b/solr/licenses/hadoop-mapreduce-client-core-2.2.0.jar.sha1 @@ -0,0 +1 @@ +4be274d45f35543d3c4dd8e2bfed2cebc56696c7 diff --git a/solr/licenses/hadoop-mapreduce-client-core-LICENSE-ASL.txt b/solr/licenses/hadoop-mapreduce-client-core-LICENSE-ASL.txt new file mode 100644 index 00000000000..9a8e847ee84 --- /dev/null +++ b/solr/licenses/hadoop-mapreduce-client-core-LICENSE-ASL.txt @@ -0,0 +1,244 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +APACHE HADOOP SUBCOMPONENTS: + +The Apache Hadoop project contains subcomponents with separate copyright +notices and license terms. Your use of the source code for the these +subcomponents is subject to the terms and conditions of the following +licenses. + +For the org.apache.hadoop.util.bloom.* classes: + +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract + * 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ \ No newline at end of file diff --git a/solr/licenses/hadoop-mapreduce-client-core-NOTICE.txt b/solr/licenses/hadoop-mapreduce-client-core-NOTICE.txt new file mode 100644 index 00000000000..c56a5e4eac1 --- /dev/null +++ b/solr/licenses/hadoop-mapreduce-client-core-NOTICE.txt @@ -0,0 +1,2 @@ +This product includes software developed by The Apache Software +Foundation (http://www.apache.org/). \ No newline at end of file diff --git a/solr/licenses/hadoop-mapreduce-client-hs-2.2.0.jar.sha1 b/solr/licenses/hadoop-mapreduce-client-hs-2.2.0.jar.sha1 new file mode 100644 index 00000000000..455d9cdee85 --- /dev/null +++ b/solr/licenses/hadoop-mapreduce-client-hs-2.2.0.jar.sha1 @@ -0,0 +1 @@ +7c3b62138f881f1a98f02347b1002b9bde052b81 diff --git a/solr/licenses/hadoop-mapreduce-client-hs-LICENSE-ASL.txt b/solr/licenses/hadoop-mapreduce-client-hs-LICENSE-ASL.txt new file mode 100644 index 00000000000..9a8e847ee84 --- /dev/null +++ b/solr/licenses/hadoop-mapreduce-client-hs-LICENSE-ASL.txt @@ -0,0 +1,244 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +APACHE HADOOP SUBCOMPONENTS: + +The Apache Hadoop project contains subcomponents with separate copyright +notices and license terms. Your use of the source code for the these +subcomponents is subject to the terms and conditions of the following +licenses. + +For the org.apache.hadoop.util.bloom.* classes: + +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract + * 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ \ No newline at end of file diff --git a/solr/licenses/hadoop-mapreduce-client-hs-NOTICE.txt b/solr/licenses/hadoop-mapreduce-client-hs-NOTICE.txt new file mode 100644 index 00000000000..c56a5e4eac1 --- /dev/null +++ b/solr/licenses/hadoop-mapreduce-client-hs-NOTICE.txt @@ -0,0 +1,2 @@ +This product includes software developed by The Apache Software +Foundation (http://www.apache.org/). \ No newline at end of file diff --git a/solr/licenses/hadoop-mapreduce-client-jobclient-2.2.0-tests.jar.sha1 b/solr/licenses/hadoop-mapreduce-client-jobclient-2.2.0-tests.jar.sha1 new file mode 100644 index 00000000000..67376200fee --- /dev/null +++ b/solr/licenses/hadoop-mapreduce-client-jobclient-2.2.0-tests.jar.sha1 @@ -0,0 +1 @@ +4c75b683a7d96a48172535c115b2067faf211cfc diff --git a/solr/licenses/hadoop-mapreduce-client-jobclient-2.2.0.jar.sha1 b/solr/licenses/hadoop-mapreduce-client-jobclient-2.2.0.jar.sha1 new file mode 100644 index 00000000000..8f63967662f --- /dev/null +++ b/solr/licenses/hadoop-mapreduce-client-jobclient-2.2.0.jar.sha1 @@ -0,0 +1 @@ +842d0c9d8793fd21bfbb1c6b1fa9fbc05698f76c diff --git a/solr/licenses/hadoop-mapreduce-client-jobclient-LICENSE-ASL.txt b/solr/licenses/hadoop-mapreduce-client-jobclient-LICENSE-ASL.txt new file mode 100644 index 00000000000..9a8e847ee84 --- /dev/null +++ b/solr/licenses/hadoop-mapreduce-client-jobclient-LICENSE-ASL.txt @@ -0,0 +1,244 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +APACHE HADOOP SUBCOMPONENTS: + +The Apache Hadoop project contains subcomponents with separate copyright +notices and license terms. Your use of the source code for the these +subcomponents is subject to the terms and conditions of the following +licenses. + +For the org.apache.hadoop.util.bloom.* classes: + +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract + * 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ \ No newline at end of file diff --git a/solr/licenses/hadoop-mapreduce-client-jobclient-NOTICE.txt b/solr/licenses/hadoop-mapreduce-client-jobclient-NOTICE.txt new file mode 100644 index 00000000000..c56a5e4eac1 --- /dev/null +++ b/solr/licenses/hadoop-mapreduce-client-jobclient-NOTICE.txt @@ -0,0 +1,2 @@ +This product includes software developed by The Apache Software +Foundation (http://www.apache.org/). \ No newline at end of file diff --git a/solr/licenses/hadoop-mapreduce-client-shuffle-2.2.0.jar.sha1 b/solr/licenses/hadoop-mapreduce-client-shuffle-2.2.0.jar.sha1 new file mode 100644 index 00000000000..1845c5437bf --- /dev/null +++ b/solr/licenses/hadoop-mapreduce-client-shuffle-2.2.0.jar.sha1 @@ -0,0 +1 @@ +c4c9da8f8f6ab1e3ba68798f30360eff4ba52187 diff --git a/solr/licenses/hadoop-mapreduce-client-shuffle-LICENSE-ASL.txt b/solr/licenses/hadoop-mapreduce-client-shuffle-LICENSE-ASL.txt new file mode 100644 index 00000000000..9a8e847ee84 --- /dev/null +++ b/solr/licenses/hadoop-mapreduce-client-shuffle-LICENSE-ASL.txt @@ -0,0 +1,244 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +APACHE HADOOP SUBCOMPONENTS: + +The Apache Hadoop project contains subcomponents with separate copyright +notices and license terms. Your use of the source code for the these +subcomponents is subject to the terms and conditions of the following +licenses. + +For the org.apache.hadoop.util.bloom.* classes: + +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract + * 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ \ No newline at end of file diff --git a/solr/licenses/hadoop-mapreduce-client-shuffle-NOTICE.txt b/solr/licenses/hadoop-mapreduce-client-shuffle-NOTICE.txt new file mode 100644 index 00000000000..c56a5e4eac1 --- /dev/null +++ b/solr/licenses/hadoop-mapreduce-client-shuffle-NOTICE.txt @@ -0,0 +1,2 @@ +This product includes software developed by The Apache Software +Foundation (http://www.apache.org/). \ No newline at end of file diff --git a/solr/licenses/hadoop-yarn-api-2.2.0.jar.sha1 b/solr/licenses/hadoop-yarn-api-2.2.0.jar.sha1 new file mode 100644 index 00000000000..c81f37c98eb --- /dev/null +++ b/solr/licenses/hadoop-yarn-api-2.2.0.jar.sha1 @@ -0,0 +1 @@ +655910becbe9c5c60033e9e64e95aab0ec4ce94a diff --git a/solr/licenses/hadoop-yarn-api-LICENSE-ASL.txt b/solr/licenses/hadoop-yarn-api-LICENSE-ASL.txt new file mode 100644 index 00000000000..9a8e847ee84 --- /dev/null +++ b/solr/licenses/hadoop-yarn-api-LICENSE-ASL.txt @@ -0,0 +1,244 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +APACHE HADOOP SUBCOMPONENTS: + +The Apache Hadoop project contains subcomponents with separate copyright +notices and license terms. Your use of the source code for the these +subcomponents is subject to the terms and conditions of the following +licenses. + +For the org.apache.hadoop.util.bloom.* classes: + +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract + * 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ \ No newline at end of file diff --git a/solr/licenses/hadoop-yarn-api-NOTICE.txt b/solr/licenses/hadoop-yarn-api-NOTICE.txt new file mode 100644 index 00000000000..c56a5e4eac1 --- /dev/null +++ b/solr/licenses/hadoop-yarn-api-NOTICE.txt @@ -0,0 +1,2 @@ +This product includes software developed by The Apache Software +Foundation (http://www.apache.org/). \ No newline at end of file diff --git a/solr/licenses/hadoop-yarn-client-2.2.0.jar.sha1 b/solr/licenses/hadoop-yarn-client-2.2.0.jar.sha1 new file mode 100644 index 00000000000..ed769373d3f --- /dev/null +++ b/solr/licenses/hadoop-yarn-client-2.2.0.jar.sha1 @@ -0,0 +1 @@ +f299044dd9e546ca30a30014ef30699306e9ef3e diff --git a/solr/licenses/hadoop-yarn-client-LICENSE-ASL.txt b/solr/licenses/hadoop-yarn-client-LICENSE-ASL.txt new file mode 100644 index 00000000000..9a8e847ee84 --- /dev/null +++ b/solr/licenses/hadoop-yarn-client-LICENSE-ASL.txt @@ -0,0 +1,244 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +APACHE HADOOP SUBCOMPONENTS: + +The Apache Hadoop project contains subcomponents with separate copyright +notices and license terms. Your use of the source code for the these +subcomponents is subject to the terms and conditions of the following +licenses. + +For the org.apache.hadoop.util.bloom.* classes: + +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract + * 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ \ No newline at end of file diff --git a/solr/licenses/hadoop-yarn-client-NOTICE.txt b/solr/licenses/hadoop-yarn-client-NOTICE.txt new file mode 100644 index 00000000000..c56a5e4eac1 --- /dev/null +++ b/solr/licenses/hadoop-yarn-client-NOTICE.txt @@ -0,0 +1,2 @@ +This product includes software developed by The Apache Software +Foundation (http://www.apache.org/). \ No newline at end of file diff --git a/solr/licenses/hadoop-yarn-common-2.2.0.jar.sha1 b/solr/licenses/hadoop-yarn-common-2.2.0.jar.sha1 new file mode 100644 index 00000000000..05d1a40d409 --- /dev/null +++ b/solr/licenses/hadoop-yarn-common-2.2.0.jar.sha1 @@ -0,0 +1 @@ +77f18c3d40dcb45b0be2602cfa5115a5edb40db1 diff --git a/solr/licenses/hadoop-yarn-common-LICENSE-ASL.txt b/solr/licenses/hadoop-yarn-common-LICENSE-ASL.txt new file mode 100644 index 00000000000..9a8e847ee84 --- /dev/null +++ b/solr/licenses/hadoop-yarn-common-LICENSE-ASL.txt @@ -0,0 +1,244 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +APACHE HADOOP SUBCOMPONENTS: + +The Apache Hadoop project contains subcomponents with separate copyright +notices and license terms. Your use of the source code for the these +subcomponents is subject to the terms and conditions of the following +licenses. + +For the org.apache.hadoop.util.bloom.* classes: + +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract + * 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ \ No newline at end of file diff --git a/solr/licenses/hadoop-yarn-common-NOTICE.txt b/solr/licenses/hadoop-yarn-common-NOTICE.txt new file mode 100644 index 00000000000..c56a5e4eac1 --- /dev/null +++ b/solr/licenses/hadoop-yarn-common-NOTICE.txt @@ -0,0 +1,2 @@ +This product includes software developed by The Apache Software +Foundation (http://www.apache.org/). \ No newline at end of file diff --git a/solr/licenses/hadoop-yarn-server-common-2.2.0.jar.sha1 b/solr/licenses/hadoop-yarn-server-common-2.2.0.jar.sha1 new file mode 100644 index 00000000000..ad9a65e2451 --- /dev/null +++ b/solr/licenses/hadoop-yarn-server-common-2.2.0.jar.sha1 @@ -0,0 +1 @@ +ce13e5699bbe644da95bfd7e01549c6a389fec7f diff --git a/solr/licenses/hadoop-yarn-server-common-LICENSE-ASL.txt b/solr/licenses/hadoop-yarn-server-common-LICENSE-ASL.txt new file mode 100644 index 00000000000..9a8e847ee84 --- /dev/null +++ b/solr/licenses/hadoop-yarn-server-common-LICENSE-ASL.txt @@ -0,0 +1,244 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +APACHE HADOOP SUBCOMPONENTS: + +The Apache Hadoop project contains subcomponents with separate copyright +notices and license terms. Your use of the source code for the these +subcomponents is subject to the terms and conditions of the following +licenses. + +For the org.apache.hadoop.util.bloom.* classes: + +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract + * 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ \ No newline at end of file diff --git a/solr/licenses/hadoop-yarn-server-common-NOTICE.txt b/solr/licenses/hadoop-yarn-server-common-NOTICE.txt new file mode 100644 index 00000000000..c56a5e4eac1 --- /dev/null +++ b/solr/licenses/hadoop-yarn-server-common-NOTICE.txt @@ -0,0 +1,2 @@ +This product includes software developed by The Apache Software +Foundation (http://www.apache.org/). \ No newline at end of file diff --git a/solr/licenses/hadoop-yarn-server-nodemanager-2.2.0.jar.sha1 b/solr/licenses/hadoop-yarn-server-nodemanager-2.2.0.jar.sha1 new file mode 100644 index 00000000000..52551013e9c --- /dev/null +++ b/solr/licenses/hadoop-yarn-server-nodemanager-2.2.0.jar.sha1 @@ -0,0 +1 @@ +5e7f0f16676afffff62919578bcb5606e3548f36 diff --git a/solr/licenses/hadoop-yarn-server-nodemanager-LICENSE-ASL.txt b/solr/licenses/hadoop-yarn-server-nodemanager-LICENSE-ASL.txt new file mode 100644 index 00000000000..9a8e847ee84 --- /dev/null +++ b/solr/licenses/hadoop-yarn-server-nodemanager-LICENSE-ASL.txt @@ -0,0 +1,244 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +APACHE HADOOP SUBCOMPONENTS: + +The Apache Hadoop project contains subcomponents with separate copyright +notices and license terms. Your use of the source code for the these +subcomponents is subject to the terms and conditions of the following +licenses. + +For the org.apache.hadoop.util.bloom.* classes: + +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract + * 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ \ No newline at end of file diff --git a/solr/licenses/hadoop-yarn-server-nodemanager-NOTICE.txt b/solr/licenses/hadoop-yarn-server-nodemanager-NOTICE.txt new file mode 100644 index 00000000000..c56a5e4eac1 --- /dev/null +++ b/solr/licenses/hadoop-yarn-server-nodemanager-NOTICE.txt @@ -0,0 +1,2 @@ +This product includes software developed by The Apache Software +Foundation (http://www.apache.org/). \ No newline at end of file diff --git a/solr/licenses/hadoop-yarn-server-resourcemanager-2.2.0.jar.sha1 b/solr/licenses/hadoop-yarn-server-resourcemanager-2.2.0.jar.sha1 new file mode 100644 index 00000000000..57843e0d07b --- /dev/null +++ b/solr/licenses/hadoop-yarn-server-resourcemanager-2.2.0.jar.sha1 @@ -0,0 +1 @@ +867da9c1c98a2c8c9b6cf7f3f9354818cd8831cf diff --git a/solr/licenses/hadoop-yarn-server-resourcemanager-LICENSE-ASL.txt b/solr/licenses/hadoop-yarn-server-resourcemanager-LICENSE-ASL.txt new file mode 100644 index 00000000000..9a8e847ee84 --- /dev/null +++ b/solr/licenses/hadoop-yarn-server-resourcemanager-LICENSE-ASL.txt @@ -0,0 +1,244 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +APACHE HADOOP SUBCOMPONENTS: + +The Apache Hadoop project contains subcomponents with separate copyright +notices and license terms. Your use of the source code for the these +subcomponents is subject to the terms and conditions of the following +licenses. + +For the org.apache.hadoop.util.bloom.* classes: + +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract + * 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ \ No newline at end of file diff --git a/solr/licenses/hadoop-yarn-server-resourcemanager-NOTICE.txt b/solr/licenses/hadoop-yarn-server-resourcemanager-NOTICE.txt new file mode 100644 index 00000000000..c56a5e4eac1 --- /dev/null +++ b/solr/licenses/hadoop-yarn-server-resourcemanager-NOTICE.txt @@ -0,0 +1,2 @@ +This product includes software developed by The Apache Software +Foundation (http://www.apache.org/). \ No newline at end of file diff --git a/solr/licenses/hadoop-yarn-server-tests-2.2.0-tests.jar.sha1 b/solr/licenses/hadoop-yarn-server-tests-2.2.0-tests.jar.sha1 new file mode 100644 index 00000000000..dafd029f029 --- /dev/null +++ b/solr/licenses/hadoop-yarn-server-tests-2.2.0-tests.jar.sha1 @@ -0,0 +1 @@ +d6bf9776d45f3812a9011d768d571bc554706f05 diff --git a/solr/licenses/hadoop-yarn-server-tests-LICENSE-ASL.txt b/solr/licenses/hadoop-yarn-server-tests-LICENSE-ASL.txt new file mode 100644 index 00000000000..9a8e847ee84 --- /dev/null +++ b/solr/licenses/hadoop-yarn-server-tests-LICENSE-ASL.txt @@ -0,0 +1,244 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +APACHE HADOOP SUBCOMPONENTS: + +The Apache Hadoop project contains subcomponents with separate copyright +notices and license terms. Your use of the source code for the these +subcomponents is subject to the terms and conditions of the following +licenses. + +For the org.apache.hadoop.util.bloom.* classes: + +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract + * 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ \ No newline at end of file diff --git a/solr/licenses/hadoop-yarn-server-tests-NOTICE.txt b/solr/licenses/hadoop-yarn-server-tests-NOTICE.txt new file mode 100644 index 00000000000..c56a5e4eac1 --- /dev/null +++ b/solr/licenses/hadoop-yarn-server-tests-NOTICE.txt @@ -0,0 +1,2 @@ +This product includes software developed by The Apache Software +Foundation (http://www.apache.org/). \ No newline at end of file diff --git a/solr/licenses/hadoop-yarn-server-web-proxy-2.2.0.jar.sha1 b/solr/licenses/hadoop-yarn-server-web-proxy-2.2.0.jar.sha1 new file mode 100644 index 00000000000..23494168c8c --- /dev/null +++ b/solr/licenses/hadoop-yarn-server-web-proxy-2.2.0.jar.sha1 @@ -0,0 +1 @@ +ab2404e576910f14cbcd185f81776ff806571b37 diff --git a/solr/licenses/hadoop-yarn-server-web-proxy-LICENSE-ASL.txt b/solr/licenses/hadoop-yarn-server-web-proxy-LICENSE-ASL.txt new file mode 100644 index 00000000000..9a8e847ee84 --- /dev/null +++ b/solr/licenses/hadoop-yarn-server-web-proxy-LICENSE-ASL.txt @@ -0,0 +1,244 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +APACHE HADOOP SUBCOMPONENTS: + +The Apache Hadoop project contains subcomponents with separate copyright +notices and license terms. Your use of the source code for the these +subcomponents is subject to the terms and conditions of the following +licenses. + +For the org.apache.hadoop.util.bloom.* classes: + +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract + * 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ \ No newline at end of file diff --git a/solr/licenses/hadoop-yarn-server-web-proxy-NOTICE.txt b/solr/licenses/hadoop-yarn-server-web-proxy-NOTICE.txt new file mode 100644 index 00000000000..c56a5e4eac1 --- /dev/null +++ b/solr/licenses/hadoop-yarn-server-web-proxy-NOTICE.txt @@ -0,0 +1,2 @@ +This product includes software developed by The Apache Software +Foundation (http://www.apache.org/). \ No newline at end of file diff --git a/solr/licenses/jackson-annotations-2.2.3.jar.sha1 b/solr/licenses/jackson-annotations-2.2.3.jar.sha1 new file mode 100644 index 00000000000..bd1ab32a61c --- /dev/null +++ b/solr/licenses/jackson-annotations-2.2.3.jar.sha1 @@ -0,0 +1 @@ +0527fece4f23a457070a36c371a26d6c0208e1c3 diff --git a/solr/licenses/jackson-annotations-LICENSE-ASL.txt b/solr/licenses/jackson-annotations-LICENSE-ASL.txt new file mode 100644 index 00000000000..cebe8c83b06 --- /dev/null +++ b/solr/licenses/jackson-annotations-LICENSE-ASL.txt @@ -0,0 +1,8 @@ +This copy of Jackson JSON processor annotations is licensed under the +Apache (Software) License, version 2.0 ("the License"). +See the License for details about distribution rights, and the +specific rights regarding derivate works. + +You may obtain a copy of the License at: + +http://www.apache.org/licenses/LICENSE-2.0 \ No newline at end of file diff --git a/solr/licenses/jackson-annotations-NOTICE.txt b/solr/licenses/jackson-annotations-NOTICE.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/solr/licenses/jackson-core-2.2.3.jar.sha1 b/solr/licenses/jackson-core-2.2.3.jar.sha1 new file mode 100644 index 00000000000..a3463fa5a65 --- /dev/null +++ b/solr/licenses/jackson-core-2.2.3.jar.sha1 @@ -0,0 +1 @@ +1a0113da2cab5f4c216b4e5e7c1dbfaa67087e14 diff --git a/solr/licenses/jackson-core-LICENSE-ASL.txt b/solr/licenses/jackson-core-LICENSE-ASL.txt new file mode 100644 index 00000000000..aa15a313ac3 --- /dev/null +++ b/solr/licenses/jackson-core-LICENSE-ASL.txt @@ -0,0 +1,8 @@ +This copy of Jackson JSON processor streaming parser/generator is licensed under the +Apache (Software) License, version 2.0 ("the License"). +See the License for details about distribution rights, and the +specific rights regarding derivate works. + +You may obtain a copy of the License at: + +http://www.apache.org/licenses/LICENSE-2.0 \ No newline at end of file diff --git a/solr/licenses/jackson-core-NOTICE.txt b/solr/licenses/jackson-core-NOTICE.txt new file mode 100644 index 00000000000..deee84d3747 --- /dev/null +++ b/solr/licenses/jackson-core-NOTICE.txt @@ -0,0 +1,20 @@ +# Jackson JSON processor + +Jackson is a high-performance, Free/Open Source JSON processing library. +It was originally written by Tatu Saloranta (tatu.saloranta@iki.fi), and has +been in development since 2007. +It is currently developed by a community of developers, as well as supported +commercially by FasterXML.com. + +## Licensing + +Jackson core and extension components may licensed under different licenses. +To find the details that apply to this artifact see the accompanying LICENSE file. +For more information, including possible other licensing options, contact +FasterXML.com (http://fasterxml.com). + +## Credits + +A list of contributors may be found from CREDITS file, which is included +in some artifacts (usually source distributions); but is always available +from the source code management (SCM) system project uses. \ No newline at end of file diff --git a/solr/licenses/jackson-core-asl-1.9.13.jar.sha1 b/solr/licenses/jackson-core-asl-1.9.13.jar.sha1 new file mode 100644 index 00000000000..80a8b891a3c --- /dev/null +++ b/solr/licenses/jackson-core-asl-1.9.13.jar.sha1 @@ -0,0 +1 @@ +3c304d70f42f832e0a86d45bd437f692129299a4 diff --git a/solr/licenses/jackson-databind-2.2.3.jar.sha1 b/solr/licenses/jackson-databind-2.2.3.jar.sha1 new file mode 100644 index 00000000000..c554c3215da --- /dev/null +++ b/solr/licenses/jackson-databind-2.2.3.jar.sha1 @@ -0,0 +1 @@ +03ae380888029daefb91d3ecdca3a37d8cb92bc9 diff --git a/solr/licenses/jackson-databind-LICENSE-ASL.txt b/solr/licenses/jackson-databind-LICENSE-ASL.txt new file mode 100644 index 00000000000..97c8034a608 --- /dev/null +++ b/solr/licenses/jackson-databind-LICENSE-ASL.txt @@ -0,0 +1,8 @@ +This copy of Jackson JSON processor databind module is licensed under the +Apache (Software) License, version 2.0 ("the License"). +See the License for details about distribution rights, and the +specific rights regarding derivate works. + +You may obtain a copy of the License at: + +http://www.apache.org/licenses/LICENSE-2.0 \ No newline at end of file diff --git a/solr/licenses/jackson-databind-NOTICE.txt b/solr/licenses/jackson-databind-NOTICE.txt new file mode 100644 index 00000000000..8c716829153 --- /dev/null +++ b/solr/licenses/jackson-databind-NOTICE.txt @@ -0,0 +1,20 @@ +# Jackson JSON processor + +Jackson is a high-performance, Free/Open Source JSON processing library. +It was originally written by Tatu Saloranta (tatu.saloranta@iki.fi), and has +been in development since 2007. +It is currently developed by a community of developers, as well as supported +commercially by FasterXML.com. + +## Licensing + +Jackson core and extension components may be licensed under different licenses. +To find the details that apply to this artifact see the accompanying LICENSE file. +For more information, including possible other licensing options, contact +FasterXML.com (http://fasterxml.com). + +## Credits + +A list of contributors may be found from CREDITS file, which is included +in some artifacts (usually source distributions); but is always available +from the source code management (SCM) system project uses. \ No newline at end of file diff --git a/solr/licenses/jackson-jaxrs-1.9.13.jar.sha1 b/solr/licenses/jackson-jaxrs-1.9.13.jar.sha1 new file mode 100644 index 00000000000..3bffb8f68d0 --- /dev/null +++ b/solr/licenses/jackson-jaxrs-1.9.13.jar.sha1 @@ -0,0 +1 @@ +534d72d2b9d6199dd531dfb27083dd4844082bba diff --git a/solr/licenses/jackson-jaxrs-LICENSE-ASL.txt b/solr/licenses/jackson-jaxrs-LICENSE-ASL.txt new file mode 100644 index 00000000000..49ac3a7d45f --- /dev/null +++ b/solr/licenses/jackson-jaxrs-LICENSE-ASL.txt @@ -0,0 +1,13 @@ +This copy of Jackson JSON processor is licensed under the +Apache (Software) License, version 2.0 ("the License"). +See the License for details about distribution rights, and the +specific rights regarding derivate works. + +You may obtain a copy of the License at: + +http://www.apache.org/licenses/ + +A copy is also included with both the the downloadable source code package +and jar that contains class bytecodes, as file "ASL 2.0". In both cases, +that file should be located next to this file: in source distribution +the location should be "release-notes/asl"; and in jar "META-INF/" \ No newline at end of file diff --git a/solr/licenses/jackson-jaxrs-NOTICE.txt b/solr/licenses/jackson-jaxrs-NOTICE.txt new file mode 100644 index 00000000000..e9ebcc69fde --- /dev/null +++ b/solr/licenses/jackson-jaxrs-NOTICE.txt @@ -0,0 +1,7 @@ +This product currently only contains code developed by authors +of specific components, as identified by the source code files; +if such notes are missing files have been created by +Tatu Saloranta. + +For additional credits (generally to people who reported problems) +see CREDITS file. \ No newline at end of file diff --git a/solr/licenses/jackson-mapper-asl-1.9.13.jar.sha1 b/solr/licenses/jackson-mapper-asl-1.9.13.jar.sha1 new file mode 100644 index 00000000000..972b214d969 --- /dev/null +++ b/solr/licenses/jackson-mapper-asl-1.9.13.jar.sha1 @@ -0,0 +1 @@ +1ee2f2bed0e5dd29d1cb155a166e6f8d50bbddb7 diff --git a/solr/licenses/javax.inject-1.jar.sha1 b/solr/licenses/javax.inject-1.jar.sha1 new file mode 100644 index 00000000000..7ef3c707b3c --- /dev/null +++ b/solr/licenses/javax.inject-1.jar.sha1 @@ -0,0 +1 @@ +6975da39a7040257bd51d21a231b76c915872d38 diff --git a/solr/licenses/javax.inject-LICENSE-ASL.txt b/solr/licenses/javax.inject-LICENSE-ASL.txt new file mode 100644 index 00000000000..d6456956733 --- /dev/null +++ b/solr/licenses/javax.inject-LICENSE-ASL.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/solr/licenses/javax.inject-NOTICE.txt b/solr/licenses/javax.inject-NOTICE.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/solr/licenses/jaxb-impl-2.2.2.jar.sha1 b/solr/licenses/jaxb-impl-2.2.2.jar.sha1 new file mode 100644 index 00000000000..1b31975a311 --- /dev/null +++ b/solr/licenses/jaxb-impl-2.2.2.jar.sha1 @@ -0,0 +1 @@ +5b206d63c546fd4a8fa53c3b4a96345ad80fc45a diff --git a/solr/licenses/jaxb-impl-LICENSE-CDDL.txt b/solr/licenses/jaxb-impl-LICENSE-CDDL.txt new file mode 100644 index 00000000000..a0ccc93564c --- /dev/null +++ b/solr/licenses/jaxb-impl-LICENSE-CDDL.txt @@ -0,0 +1,263 @@ +COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0 + +1. Definitions. + + 1.1. Contributor. means each individual or entity that creates or contributes to the creation of Modifications. + + 1.2. Contributor Version. means the combination of the Original Software, prior Modifications used by a Contributor (if any), and the Modifications made by that particular Contributor. + + 1.3. Covered Software. means (a) the Original Software, or (b) Modifications, or (c) the combination of files containing Original Software with files containing Modifications, in each case including portions thereof. + + 1.4. Executable. means the Covered Software in any form other than Source Code. + + 1.5. Initial Developer. means the individual or entity that first makes Original Software available under this License. + + 1.6. Larger Work. means a work which combines Covered Software or portions thereof with code not governed by the terms of this License. + + 1.7. License. means this document. + + 1.8. Licensable. means having the right to grant, to the maximum extent possible, whether at the time of the initial grant or subsequently acquired, any and all of the rights conveyed herein. + + 1.9. Modifications. means the Source Code and Executable form of any of the following: + + A. Any file that results from an addition to, deletion from or modification of the contents of a file containing Original Software or previous Modifications; + + B. Any new file that contains any part of the Original Software or previous Modification; or + + C. Any new file that is contributed or otherwise made available under the terms of this License. + + 1.10. Original Software. means the Source Code and Executable form of computer software code that is originally released under this License. + + 1.11. Patent Claims. means any patent claim(s), now owned or hereafter acquired, including without limitation, method, process, and apparatus claims, in any patent Licensable by grantor. + + 1.12. Source Code. means (a) the common form of computer software code in which modifications are made and (b) associated documentation included in or with such code. + + 1.13. You. (or .Your.) means an individual or a legal entity exercising rights under, and complying with all of the terms of, this License. For legal entities, .You. includes any entity which controls, is controlled by, or is under common control with You. For purposes of this definition, .control. means (a) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b) ownership of more than fifty percent (50%) of the outstanding shares or beneficial ownership of such entity. + +2. License Grants. + + 2.1. The Initial Developer Grant. + + Conditioned upon Your compliance with Section 3.1 below and subject to third party intellectual property claims, the Initial Developer hereby grants You a world-wide, royalty-free, non-exclusive license: + + (a) under intellectual property rights (other than patent or trademark) Licensable by Initial Developer, to use, reproduce, modify, display, perform, sublicense and distribute the Original Software (or portions thereof), with or without Modifications, and/or as part of a Larger Work; and + + (b) under Patent Claims infringed by the making, using or selling of Original Software, to make, have made, use, practice, sell, and offer for sale, and/or otherwise dispose of the Original Software (or portions thereof). + + (c) The licenses granted in Sections 2.1(a) and (b) are effective on the date Initial Developer first distributes or otherwise makes the Original Software available to a third party under the terms of this License. + + (d) Notwithstanding Section 2.1(b) above, no patent license is granted: (1) for code that You delete from the Original Software, or (2) for infringements caused by: (i) the modification of the Original Software, or (ii) the combination of the Original Software with other software or devices. + + 2.2. Contributor Grant. + + Conditioned upon Your compliance with Section 3.1 below and subject to third party intellectual property claims, each Contributor hereby grants You a world-wide, royalty-free, non-exclusive license: + + (a) under intellectual property rights (other than patent or trademark) Licensable by Contributor to use, reproduce, modify, display, perform, sublicense and distribute the Modifications created by such Contributor (or portions thereof), either on an unmodified basis, with other Modifications, as Covered Software and/or as part of a Larger Work; and + + (b) under Patent Claims infringed by the making, using, or selling of Modifications made by that Contributor either alone and/or in combination with its Contributor Version (or portions of such combination), to make, use, sell, offer for sale, have made, and/or otherwise dispose of: (1) Modifications made by that Contributor (or portions thereof); and (2) the combination of Modifications made by that Contributor with its Contributor Version (or portions of such combination). + + (c) The licenses granted in Sections 2.2(a) and 2.2(b) are effective on the date Contributor first distributes or otherwise makes the Modifications available to a third party. + + (d) Notwithstanding Section 2.2(b) above, no patent license is granted: (1) for any code that Contributor has deleted from the Contributor Version; (2) for infringements caused by: (i) third party modifications of Contributor Version, or (ii) the combination of Modifications made by that Contributor with other software (except as part of the Contributor Version) or other devices; or (3) under Patent Claims infringed by Covered Software in the absence of Modifications made by that Contributor. + +3. Distribution Obligations. + + 3.1. Availability of Source Code. + Any Covered Software that You distribute or otherwise make available in Executable form must also be made available in Source Code form and that Source Code form must be distributed only under the terms of this License. You must include a copy of this License with every copy of the Source Code form of the Covered Software You distribute or otherwise make available. You must inform recipients of any such Covered Software in Executable form as to how they can obtain such Covered Software in Source Code form in a reasonable manner on or through a medium customarily used for software exchange. + + 3.2. Modifications. + The Modifications that You create or to which You contribute are governed by the terms of this License. You represent that You believe Your Modifications are Your original creation(s) and/or You have sufficient rights to grant the rights conveyed by this License. + + 3.3. Required Notices. + You must include a notice in each of Your Modifications that identifies You as the Contributor of the Modification. You may not remove or alter any copyright, patent or trademark notices contained within the Covered Software, or any notices of licensing or any descriptive text giving attribution to any Contributor or the Initial Developer. + + 3.4. Application of Additional Terms. + You may not offer or impose any terms on any Covered Software in Source Code form that alters or restricts the applicable version of this License or the recipients. rights hereunder. You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Covered Software. However, you may do so only on Your own behalf, and not on behalf of the Initial Developer or any Contributor. You must make it absolutely clear that any such warranty, support, indemnity or liability obligation is offered by You alone, and You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of warranty, support, indemnity or liability terms You offer. + + 3.5. Distribution of Executable Versions. + You may distribute the Executable form of the Covered Software under the terms of this License or under the terms of a license of Your choice, which may contain terms different from this License, provided that You are in compliance with the terms of this License and that the license for the Executable form does not attempt to limit or alter the recipient.s rights in the Source Code form from the rights set forth in this License. If You distribute the Covered Software in Executable form under a different license, You must make it absolutely clear that any terms which differ from this License are offered by You alone, not by the Initial Developer or Contributor. You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of any such terms You offer. + + 3.6. Larger Works. + You may create a Larger Work by combining Covered Software with other code not governed by the terms of this License and distribute the Larger Work as a single product. In such a case, You must make sure the requirements of this License are fulfilled for the Covered Software. + +4. Versions of the License. + + 4.1. New Versions. + Sun Microsystems, Inc. is the initial license steward and may publish revised and/or new versions of this License from time to time. Each version will be given a distinguishing version number. Except as provided in Section 4.3, no one other than the license steward has the right to modify this License. + + 4.2. Effect of New Versions. + You may always continue to use, distribute or otherwise make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. If the Initial Developer includes a notice in the Original Software prohibiting it from being distributed or otherwise made available under any subsequent version of the License, You must distribute and make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. Otherwise, You may also choose to use, distribute or otherwise make the Covered Software available under the terms of any subsequent version of the License published by the license steward. + + 4.3. Modified Versions. + When You are an Initial Developer and You want to create a new license for Your Original Software, You may create and use a modified version of this License if You: (a) rename the license and remove any references to the name of the license steward (except to note that the license differs from this License); and (b) otherwise make it clear that the license contains terms which differ from this License. + +5. DISCLAIMER OF WARRANTY. + + COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN .AS IS. BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED SOFTWARE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER. + +6. TERMINATION. + + 6.1. This License and the rights granted hereunder will terminate automatically if You fail to comply with terms herein and fail to cure such breach within 30 days of becoming aware of the breach. Provisions which, by their nature, must remain in effect beyond the termination of this License shall survive. + + 6.2. If You assert a patent infringement claim (excluding declaratory judgment actions) against Initial Developer or a Contributor (the Initial Developer or Contributor against whom You assert such claim is referred to as .Participant.) alleging that the Participant Software (meaning the Contributor Version where the Participant is a Contributor or the Original Software where the Participant is the Initial Developer) directly or indirectly infringes any patent, then any and all rights granted directly or indirectly to You by such Participant, the Initial Developer (if the Initial Developer is not the Participant) and all Contributors under Sections 2.1 and/or 2.2 of this License shall, upon 60 days notice from Participant terminate prospectively and automatically at the expiration of such 60 day notice period, unless if within such 60 day period You withdraw Your claim with respect to the Participant Software against such Participant either unilaterally or pursuant to a written agreement with Participant. + + 6.3. In the event of termination under Sections 6.1 or 6.2 above, all end user licenses that have been validly granted by You or any distributor hereunder prior to termination (excluding licenses granted to You by any distributor) shall survive termination. + +7. LIMITATION OF LIABILITY. + + UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOST PROFITS, LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY RESULTING FROM SUCH PARTY.S NEGLIGENCE TO THE EXTENT APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU. + +8. U.S. GOVERNMENT END USERS. + + The Covered Software is a .commercial item,. as that term is defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of .commercial computer software. (as that term is defined at 48 C.F.R. ? 252.227-7014(a)(1)) and .commercial computer software documentation. as such terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all U.S. Government End Users acquire Covered Software with only those rights set forth herein. This U.S. Government Rights clause is in lieu of, and supersedes, any other FAR, DFAR, or other clause or provision that addresses Government rights in computer software under this License. + +9. MISCELLANEOUS. + + This License represents the complete agreement concerning subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. This License shall be governed by the law of the jurisdiction specified in a notice contained within the Original Software (except to the extent applicable law, if any, provides otherwise), excluding such jurisdiction.s conflict-of-law provisions. Any litigation relating to this License shall be subject to the jurisdiction of the courts located in the jurisdiction and venue specified in a notice contained within the Original Software, with the losing party responsible for costs, including, without limitation, court costs and reasonable attorneys. fees and expenses. The application of the United Nations Convention on Contracts for the International Sale of Goods is expressly excluded. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not apply to this License. You agree that You alone are responsible for compliance with the United States export administration regulations (and the export control laws and regulation of any other countries) when You use, distribute or otherwise make available any Covered Software. + +10. RESPONSIBILITY FOR CLAIMS. + + As between Initial Developer and the Contributors, each party is responsible for claims and damages arising, directly or indirectly, out of its utilization of rights under this License and You agree to work with Initial Developer and Contributors to distribute such responsibility on an equitable basis. Nothing herein is intended or shall be deemed to constitute any admission of liability. + + NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) + + The code released under the CDDL shall be governed by the laws of the State of California (excluding conflict-of-law provisions). Any litigation relating to this License shall be subject to the jurisdiction of the Federal Courts of the Northern District of California and the state courts of the State of California, with venue lying in Santa Clara County, California. + + +The GNU General Public License (GPL) Version 2, June 1991 + + +Copyright (C) 1989, 1991 Free Software Foundation, Inc. 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. + +Preamble + +The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Library General Public License instead.) You can apply it to your programs, too. + +When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. + +To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. + +For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. + +We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. + +Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. + +Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. + +The precise terms and conditions for copying, distribution and modification follow. + + +TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + +0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. + +1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. + +You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. + +2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. + + c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. + +3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. + +If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. + +4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. + +5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. + +6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. + +7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. + +This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. + +8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. + +9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. + +10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. + +NO WARRANTY + +11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +END OF TERMS AND CONDITIONS + + +How to Apply These Terms to Your New Programs + +If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. + +To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. + + One line to give the program's name and a brief idea of what it does. + + Copyright (C) + + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. + + signature of Ty Coon, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Library General Public License instead of this License. + + +"CLASSPATH" EXCEPTION TO THE GPL VERSION 2 + +Certain source files distributed by Sun Microsystems, Inc. are subject to the following clarification and special exception to the GPL Version 2, but only where Sun has expressly included in the particular source file's header the words + +"Sun designates this particular file as subject to the "Classpath" exception as provided by Sun in the License file that accompanied this code." + +Linking this library statically or dynamically with other modules is making a combined work based on this library. Thus, the terms and conditions of the GNU General Public License Version 2 cover the whole combination. + +As a special exception, the copyright holders of this library give you permission to link this library with independent modules to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions of the license of that module.? An independent module is a module which is not derived from or based on this library.? If you modify this library, you may extend this exception to your version of the library, but you are not obligated to do so.? If you do not wish to do so, delete this exception statement from your version. diff --git a/solr/licenses/jersey-bundle-1.8.jar.sha1 b/solr/licenses/jersey-bundle-1.8.jar.sha1 new file mode 100644 index 00000000000..4ff2c766383 --- /dev/null +++ b/solr/licenses/jersey-bundle-1.8.jar.sha1 @@ -0,0 +1 @@ +b59d9d4dd6d6301515697b29260f1f4dcaabd771 diff --git a/solr/licenses/jersey-bundle-LICENSE-CDDL.txt b/solr/licenses/jersey-bundle-LICENSE-CDDL.txt new file mode 100644 index 00000000000..64df8d56300 --- /dev/null +++ b/solr/licenses/jersey-bundle-LICENSE-CDDL.txt @@ -0,0 +1,85 @@ +COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL)Version 1.1 + +1. Definitions. + +1.1. “Contributor†means each individual or entity that creates or contributes to the creation of Modifications. +1.2. “Contributor Version†means the combination of the Original Software, prior Modifications used by a Contributor (if any), and the Modifications made by that particular Contributor. +1.3. “Covered Software†means (a) the Original Software, or (b) Modifications, or (c) the combination of files containing Original Software with files containing Modifications, in each case including portions thereof. +1.4. “Executable†means the Covered Software in any form other than Source Code. +1.5. “Initial Developer†means the individual or entity that first makes Original Software available under this License. +1.6. “Larger Work†means a work which combines Covered Software or portions thereof with code not governed by the terms of this License. +1.7. “License†means this document. +1.8. “Licensable†means having the right to grant, to the maximum extent possible, whether at the time of the initial grant or subsequently acquired, any and all of the rights conveyed herein. +1.9. “Modifications†means the Source Code and Executable form of any of the following: +A. Any file that results from an addition to, deletion from or modification of the contents of a file containing Original Software or previous Modifications; +B. Any new file that contains any part of the Original Software or previous Modification; or +C. Any new file that is contributed or otherwise made available under the terms of this License. +1.10. “Original Software†means the Source Code and Executable form of computer software code that is originally released under this License. +1.11. “Patent Claims†means any patent claim(s), now owned or hereafter acquired, including without limitation, method, process, and apparatus claims, in any patent Licensable by grantor. +1.12. “Source Code†means (a) the common form of computer software code in which modifications are made and (b) associated documentation included in or with such code. +1.13. “You†(or “Yourâ€) means an individual or a legal entity exercising rights under, and complying with all of the terms of, this License. For legal entities, “You†includes any entity which controls, is controlled by, or is under common control with You. For purposes of this definition, “control†means (a) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b) ownership of more than fifty percent (50%) of the outstanding shares or beneficial ownership of such entity. +2. License Grants. + +2.1. The Initial Developer Grant. +Conditioned upon Your compliance with Section 3.1 below and subject to third party intellectual property claims, the Initial Developer hereby grants You a world-wide, royalty-free, non-exclusive license: +(a) under intellectual property rights (other than patent or trademark) Licensable by Initial Developer, to use, reproduce, modify, display, perform, sublicense and distribute the Original Software (or portions thereof), with or without Modifications, and/or as part of a Larger Work; and +(b) under Patent Claims infringed by the making, using or selling of Original Software, to make, have made, use, practice, sell, and offer for sale, and/or otherwise dispose of the Original Software (or portions thereof). +(c) The licenses granted in Sections 2.1(a) and (b) are effective on the date Initial Developer first distributes or otherwise makes the Original Software available to a third party under the terms of this License. +(d) Notwithstanding Section 2.1(b) above, no patent license is granted: (1) for code that You delete from the Original Software, or (2) for infringements caused by: (i) the modification of the Original Software, or (ii) the combination of the Original Software with other software or devices. +2.2. Contributor Grant. +Conditioned upon Your compliance with Section 3.1 below and subject to third party intellectual property claims, each Contributor hereby grants You a world-wide, royalty-free, non-exclusive license: +(a) under intellectual property rights (other than patent or trademark) Licensable by Contributor to use, reproduce, modify, display, perform, sublicense and distribute the Modifications created by such Contributor (or portions thereof), either on an unmodified basis, with other Modifications, as Covered Software and/or as part of a Larger Work; and +(b) under Patent Claims infringed by the making, using, or selling of Modifications made by that Contributor either alone and/or in combination with its Contributor Version (or portions of such combination), to make, use, sell, offer for sale, have made, and/or otherwise dispose of: (1) Modifications made by that Contributor (or portions thereof); and (2) the combination of Modifications made by that Contributor with its Contributor Version (or portions of such combination). +(c) The licenses granted in Sections 2.2(a) and 2.2(b) are effective on the date Contributor first distributes or otherwise makes the Modifications available to a third party. +(d) Notwithstanding Section 2.2(b) above, no patent license is granted: (1) for any code that Contributor has deleted from the Contributor Version; (2) for infringements caused by: (i) third party modifications of Contributor Version, or (ii) the combination of Modifications made by that Contributor with other software (except as part of the Contributor Version) or other devices; or (3) under Patent Claims infringed by Covered Software in the absence of Modifications made by that Contributor. +3. Distribution Obligations. + +3.1. Availability of Source Code. +Any Covered Software that You distribute or otherwise make available in Executable form must also be made available in Source Code form and that Source Code form must be distributed only under the terms of this License. You must include a copy of this License with every copy of the Source Code form of the Covered Software You distribute or otherwise make available. You must inform recipients of any such Covered Software in Executable form as to how they can obtain such Covered Software in Source Code form in a reasonable manner on or through a medium customarily used for software exchange. +3.2. Modifications. +The Modifications that You create or to which You contribute are governed by the terms of this License. You represent that You believe Your Modifications are Your original creation(s) and/or You have sufficient rights to grant the rights conveyed by this License. +3.3. Required Notices. +You must include a notice in each of Your Modifications that identifies You as the Contributor of the Modification. You may not remove or alter any copyright, patent or trademark notices contained within the Covered Software, or any notices of licensing or any descriptive text giving attribution to any Contributor or the Initial Developer. +3.4. Application of Additional Terms. +You may not offer or impose any terms on any Covered Software in Source Code form that alters or restricts the applicable version of this License or the recipients' rights hereunder. You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Covered Software. However, you may do so only on Your own behalf, and not on behalf of the Initial Developer or any Contributor. You must make it absolutely clear that any such warranty, support, indemnity or liability obligation is offered by You alone, and You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of warranty, support, indemnity or liability terms You offer. +3.5. Distribution of Executable Versions. +You may distribute the Executable form of the Covered Software under the terms of this License or under the terms of a license of Your choice, which may contain terms different from this License, provided that You are in compliance with the terms of this License and that the license for the Executable form does not attempt to limit or alter the recipient's rights in the Source Code form from the rights set forth in this License. If You distribute the Covered Software in Executable form under a different license, You must make it absolutely clear that any terms which differ from this License are offered by You alone, not by the Initial Developer or Contributor. You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of any such terms You offer. +3.6. Larger Works. +You may create a Larger Work by combining Covered Software with other code not governed by the terms of this License and distribute the Larger Work as a single product. In such a case, You must make sure the requirements of this License are fulfilled for the Covered Software. +4. Versions of the License. + +4.1. New Versions. +Oracle is the initial license steward and may publish revised and/or new versions of this License from time to time. Each version will be given a distinguishing version number. Except as provided in Section 4.3, no one other than the license steward has the right to modify this License. +4.2. Effect of New Versions. +You may always continue to use, distribute or otherwise make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. If the Initial Developer includes a notice in the Original Software prohibiting it from being distributed or otherwise made available under any subsequent version of the License, You must distribute and make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. Otherwise, You may also choose to use, distribute or otherwise make the Covered Software available under the terms of any subsequent version of the License published by the license steward. +4.3. Modified Versions. +When You are an Initial Developer and You want to create a new license for Your Original Software, You may create and use a modified version of this License if You: (a) rename the license and remove any references to the name of the license steward (except to note that the license differs from this License); and (b) otherwise make it clear that the license contains terms which differ from this License. +5. DISCLAIMER OF WARRANTY. + +COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN “AS IS†BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED SOFTWARE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER. + +6. TERMINATION. + +6.1. This License and the rights granted hereunder will terminate automatically if You fail to comply with terms herein and fail to cure such breach within 30 days of becoming aware of the breach. Provisions which, by their nature, must remain in effect beyond the termination of this License shall survive. +6.2. If You assert a patent infringement claim (excluding declaratory judgment actions) against Initial Developer or a Contributor (the Initial Developer or Contributor against whom You assert such claim is referred to as “Participantâ€) alleging that the Participant Software (meaning the Contributor Version where the Participant is a Contributor or the Original Software where the Participant is the Initial Developer) directly or indirectly infringes any patent, then any and all rights granted directly or indirectly to You by such Participant, the Initial Developer (if the Initial Developer is not the Participant) and all Contributors under Sections 2.1 and/or 2.2 of this License shall, upon 60 days notice from Participant terminate prospectively and automatically at the expiration of such 60 day notice period, unless if within such 60 day period You withdraw Your claim with respect to the Participant Software against such Participant either unilaterally or pursuant to a written agreement with Participant. +6.3. If You assert a patent infringement claim against Participant alleging that the Participant Software directly or indirectly infringes any patent where such claim is resolved (such as by license or settlement) prior to the initiation of patent infringement litigation, then the reasonable value of the licenses granted by such Participant under Sections 2.1 or 2.2 shall be taken into account in determining the amount or value of any payment or license. +6.4. In the event of termination under Sections 6.1 or 6.2 above, all end user licenses that have been validly granted by You or any distributor hereunder prior to termination (excluding licenses granted to You by any distributor) shall survive termination. +7. LIMITATION OF LIABILITY. + +UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU. + +8. U.S. GOVERNMENT END USERS. + +The Covered Software is a “commercial item,†as that term is defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of “commercial computer software†(as that term is defined at 48 C.F.R. § 252.227-7014(a)(1)) and “commercial computer software documentation†as such terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all U.S. Government End Users acquire Covered Software with only those rights set forth herein. This U.S. Government Rights clause is in lieu of, and supersedes, any other FAR, DFAR, or other clause or provision that addresses Government rights in computer software under this License. + +9. MISCELLANEOUS. + +This License represents the complete agreement concerning subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. This License shall be governed by the law of the jurisdiction specified in a notice contained within the Original Software (except to the extent applicable law, if any, provides otherwise), excluding such jurisdiction's conflict-of-law provisions. Any litigation relating to this License shall be subject to the jurisdiction of the courts located in the jurisdiction and venue specified in a notice contained within the Original Software, with the losing party responsible for costs, including, without limitation, court costs and reasonable attorneys' fees and expenses. The application of the United Nations Convention on Contracts for the International Sale of Goods is expressly excluded. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not apply to this License. You agree that You alone are responsible for compliance with the United States export administration regulations (and the export control laws and regulation of any other countries) when You use, distribute or otherwise make available any Covered Software. + +10. RESPONSIBILITY FOR CLAIMS. + +As between Initial Developer and the Contributors, each party is responsible for claims and damages arising, directly or indirectly, out of its utilization of rights under this License and You agree to work with Initial Developer and Contributors to distribute such responsibility on an equitable basis. Nothing herein is intended or shall be deemed to constitute any admission of liability. + +NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) + +The code released under the CDDL shall be governed by the laws of the State of California (excluding conflict-of-law provisions). Any litigation relating to this License shall be subject to the jurisdiction of the Federal Courts of the Northern District of California and the state courts of the State of California, with venue lying in Santa Clara County, California. \ No newline at end of file diff --git a/solr/licenses/jersey-core-1.8.jar.sha1 b/solr/licenses/jersey-core-1.8.jar.sha1 new file mode 100644 index 00000000000..1a24e1368da --- /dev/null +++ b/solr/licenses/jersey-core-1.8.jar.sha1 @@ -0,0 +1 @@ +b6a0553c0eb3da45a9b8947a0a7283b3b9266d0d diff --git a/solr/licenses/jersey-guice-1.8.jar.sha1 b/solr/licenses/jersey-guice-1.8.jar.sha1 new file mode 100644 index 00000000000..c37b9bf62d9 --- /dev/null +++ b/solr/licenses/jersey-guice-1.8.jar.sha1 @@ -0,0 +1 @@ +f4e7772030608e281bb39ffcc7028c2e430356e7 diff --git a/solr/licenses/jersey-guice-LICENSE-CDDL.txt b/solr/licenses/jersey-guice-LICENSE-CDDL.txt new file mode 100644 index 00000000000..64df8d56300 --- /dev/null +++ b/solr/licenses/jersey-guice-LICENSE-CDDL.txt @@ -0,0 +1,85 @@ +COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL)Version 1.1 + +1. Definitions. + +1.1. “Contributor†means each individual or entity that creates or contributes to the creation of Modifications. +1.2. “Contributor Version†means the combination of the Original Software, prior Modifications used by a Contributor (if any), and the Modifications made by that particular Contributor. +1.3. “Covered Software†means (a) the Original Software, or (b) Modifications, or (c) the combination of files containing Original Software with files containing Modifications, in each case including portions thereof. +1.4. “Executable†means the Covered Software in any form other than Source Code. +1.5. “Initial Developer†means the individual or entity that first makes Original Software available under this License. +1.6. “Larger Work†means a work which combines Covered Software or portions thereof with code not governed by the terms of this License. +1.7. “License†means this document. +1.8. “Licensable†means having the right to grant, to the maximum extent possible, whether at the time of the initial grant or subsequently acquired, any and all of the rights conveyed herein. +1.9. “Modifications†means the Source Code and Executable form of any of the following: +A. Any file that results from an addition to, deletion from or modification of the contents of a file containing Original Software or previous Modifications; +B. Any new file that contains any part of the Original Software or previous Modification; or +C. Any new file that is contributed or otherwise made available under the terms of this License. +1.10. “Original Software†means the Source Code and Executable form of computer software code that is originally released under this License. +1.11. “Patent Claims†means any patent claim(s), now owned or hereafter acquired, including without limitation, method, process, and apparatus claims, in any patent Licensable by grantor. +1.12. “Source Code†means (a) the common form of computer software code in which modifications are made and (b) associated documentation included in or with such code. +1.13. “You†(or “Yourâ€) means an individual or a legal entity exercising rights under, and complying with all of the terms of, this License. For legal entities, “You†includes any entity which controls, is controlled by, or is under common control with You. For purposes of this definition, “control†means (a) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b) ownership of more than fifty percent (50%) of the outstanding shares or beneficial ownership of such entity. +2. License Grants. + +2.1. The Initial Developer Grant. +Conditioned upon Your compliance with Section 3.1 below and subject to third party intellectual property claims, the Initial Developer hereby grants You a world-wide, royalty-free, non-exclusive license: +(a) under intellectual property rights (other than patent or trademark) Licensable by Initial Developer, to use, reproduce, modify, display, perform, sublicense and distribute the Original Software (or portions thereof), with or without Modifications, and/or as part of a Larger Work; and +(b) under Patent Claims infringed by the making, using or selling of Original Software, to make, have made, use, practice, sell, and offer for sale, and/or otherwise dispose of the Original Software (or portions thereof). +(c) The licenses granted in Sections 2.1(a) and (b) are effective on the date Initial Developer first distributes or otherwise makes the Original Software available to a third party under the terms of this License. +(d) Notwithstanding Section 2.1(b) above, no patent license is granted: (1) for code that You delete from the Original Software, or (2) for infringements caused by: (i) the modification of the Original Software, or (ii) the combination of the Original Software with other software or devices. +2.2. Contributor Grant. +Conditioned upon Your compliance with Section 3.1 below and subject to third party intellectual property claims, each Contributor hereby grants You a world-wide, royalty-free, non-exclusive license: +(a) under intellectual property rights (other than patent or trademark) Licensable by Contributor to use, reproduce, modify, display, perform, sublicense and distribute the Modifications created by such Contributor (or portions thereof), either on an unmodified basis, with other Modifications, as Covered Software and/or as part of a Larger Work; and +(b) under Patent Claims infringed by the making, using, or selling of Modifications made by that Contributor either alone and/or in combination with its Contributor Version (or portions of such combination), to make, use, sell, offer for sale, have made, and/or otherwise dispose of: (1) Modifications made by that Contributor (or portions thereof); and (2) the combination of Modifications made by that Contributor with its Contributor Version (or portions of such combination). +(c) The licenses granted in Sections 2.2(a) and 2.2(b) are effective on the date Contributor first distributes or otherwise makes the Modifications available to a third party. +(d) Notwithstanding Section 2.2(b) above, no patent license is granted: (1) for any code that Contributor has deleted from the Contributor Version; (2) for infringements caused by: (i) third party modifications of Contributor Version, or (ii) the combination of Modifications made by that Contributor with other software (except as part of the Contributor Version) or other devices; or (3) under Patent Claims infringed by Covered Software in the absence of Modifications made by that Contributor. +3. Distribution Obligations. + +3.1. Availability of Source Code. +Any Covered Software that You distribute or otherwise make available in Executable form must also be made available in Source Code form and that Source Code form must be distributed only under the terms of this License. You must include a copy of this License with every copy of the Source Code form of the Covered Software You distribute or otherwise make available. You must inform recipients of any such Covered Software in Executable form as to how they can obtain such Covered Software in Source Code form in a reasonable manner on or through a medium customarily used for software exchange. +3.2. Modifications. +The Modifications that You create or to which You contribute are governed by the terms of this License. You represent that You believe Your Modifications are Your original creation(s) and/or You have sufficient rights to grant the rights conveyed by this License. +3.3. Required Notices. +You must include a notice in each of Your Modifications that identifies You as the Contributor of the Modification. You may not remove or alter any copyright, patent or trademark notices contained within the Covered Software, or any notices of licensing or any descriptive text giving attribution to any Contributor or the Initial Developer. +3.4. Application of Additional Terms. +You may not offer or impose any terms on any Covered Software in Source Code form that alters or restricts the applicable version of this License or the recipients' rights hereunder. You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Covered Software. However, you may do so only on Your own behalf, and not on behalf of the Initial Developer or any Contributor. You must make it absolutely clear that any such warranty, support, indemnity or liability obligation is offered by You alone, and You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of warranty, support, indemnity or liability terms You offer. +3.5. Distribution of Executable Versions. +You may distribute the Executable form of the Covered Software under the terms of this License or under the terms of a license of Your choice, which may contain terms different from this License, provided that You are in compliance with the terms of this License and that the license for the Executable form does not attempt to limit or alter the recipient's rights in the Source Code form from the rights set forth in this License. If You distribute the Covered Software in Executable form under a different license, You must make it absolutely clear that any terms which differ from this License are offered by You alone, not by the Initial Developer or Contributor. You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of any such terms You offer. +3.6. Larger Works. +You may create a Larger Work by combining Covered Software with other code not governed by the terms of this License and distribute the Larger Work as a single product. In such a case, You must make sure the requirements of this License are fulfilled for the Covered Software. +4. Versions of the License. + +4.1. New Versions. +Oracle is the initial license steward and may publish revised and/or new versions of this License from time to time. Each version will be given a distinguishing version number. Except as provided in Section 4.3, no one other than the license steward has the right to modify this License. +4.2. Effect of New Versions. +You may always continue to use, distribute or otherwise make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. If the Initial Developer includes a notice in the Original Software prohibiting it from being distributed or otherwise made available under any subsequent version of the License, You must distribute and make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. Otherwise, You may also choose to use, distribute or otherwise make the Covered Software available under the terms of any subsequent version of the License published by the license steward. +4.3. Modified Versions. +When You are an Initial Developer and You want to create a new license for Your Original Software, You may create and use a modified version of this License if You: (a) rename the license and remove any references to the name of the license steward (except to note that the license differs from this License); and (b) otherwise make it clear that the license contains terms which differ from this License. +5. DISCLAIMER OF WARRANTY. + +COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN “AS IS†BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED SOFTWARE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER. + +6. TERMINATION. + +6.1. This License and the rights granted hereunder will terminate automatically if You fail to comply with terms herein and fail to cure such breach within 30 days of becoming aware of the breach. Provisions which, by their nature, must remain in effect beyond the termination of this License shall survive. +6.2. If You assert a patent infringement claim (excluding declaratory judgment actions) against Initial Developer or a Contributor (the Initial Developer or Contributor against whom You assert such claim is referred to as “Participantâ€) alleging that the Participant Software (meaning the Contributor Version where the Participant is a Contributor or the Original Software where the Participant is the Initial Developer) directly or indirectly infringes any patent, then any and all rights granted directly or indirectly to You by such Participant, the Initial Developer (if the Initial Developer is not the Participant) and all Contributors under Sections 2.1 and/or 2.2 of this License shall, upon 60 days notice from Participant terminate prospectively and automatically at the expiration of such 60 day notice period, unless if within such 60 day period You withdraw Your claim with respect to the Participant Software against such Participant either unilaterally or pursuant to a written agreement with Participant. +6.3. If You assert a patent infringement claim against Participant alleging that the Participant Software directly or indirectly infringes any patent where such claim is resolved (such as by license or settlement) prior to the initiation of patent infringement litigation, then the reasonable value of the licenses granted by such Participant under Sections 2.1 or 2.2 shall be taken into account in determining the amount or value of any payment or license. +6.4. In the event of termination under Sections 6.1 or 6.2 above, all end user licenses that have been validly granted by You or any distributor hereunder prior to termination (excluding licenses granted to You by any distributor) shall survive termination. +7. LIMITATION OF LIABILITY. + +UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU. + +8. U.S. GOVERNMENT END USERS. + +The Covered Software is a “commercial item,†as that term is defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of “commercial computer software†(as that term is defined at 48 C.F.R. § 252.227-7014(a)(1)) and “commercial computer software documentation†as such terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all U.S. Government End Users acquire Covered Software with only those rights set forth herein. This U.S. Government Rights clause is in lieu of, and supersedes, any other FAR, DFAR, or other clause or provision that addresses Government rights in computer software under this License. + +9. MISCELLANEOUS. + +This License represents the complete agreement concerning subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. This License shall be governed by the law of the jurisdiction specified in a notice contained within the Original Software (except to the extent applicable law, if any, provides otherwise), excluding such jurisdiction's conflict-of-law provisions. Any litigation relating to this License shall be subject to the jurisdiction of the courts located in the jurisdiction and venue specified in a notice contained within the Original Software, with the losing party responsible for costs, including, without limitation, court costs and reasonable attorneys' fees and expenses. The application of the United Nations Convention on Contracts for the International Sale of Goods is expressly excluded. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not apply to this License. You agree that You alone are responsible for compliance with the United States export administration regulations (and the export control laws and regulation of any other countries) when You use, distribute or otherwise make available any Covered Software. + +10. RESPONSIBILITY FOR CLAIMS. + +As between Initial Developer and the Contributors, each party is responsible for claims and damages arising, directly or indirectly, out of its utilization of rights under this License and You agree to work with Initial Developer and Contributors to distribute such responsibility on an equitable basis. Nothing herein is intended or shall be deemed to constitute any admission of liability. + +NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) + +The code released under the CDDL shall be governed by the laws of the State of California (excluding conflict-of-law provisions). Any litigation relating to this License shall be subject to the jurisdiction of the Federal Courts of the Northern District of California and the state courts of the State of California, with venue lying in Santa Clara County, California. \ No newline at end of file diff --git a/solr/licenses/jersey-json-1.8.jar.sha1 b/solr/licenses/jersey-json-1.8.jar.sha1 new file mode 100644 index 00000000000..dcd29565da0 --- /dev/null +++ b/solr/licenses/jersey-json-1.8.jar.sha1 @@ -0,0 +1 @@ +825621478fec59983106efaa032c679f925b4eff diff --git a/solr/licenses/jersey-json-LICENSE-CDDL.txt b/solr/licenses/jersey-json-LICENSE-CDDL.txt new file mode 100644 index 00000000000..64df8d56300 --- /dev/null +++ b/solr/licenses/jersey-json-LICENSE-CDDL.txt @@ -0,0 +1,85 @@ +COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL)Version 1.1 + +1. Definitions. + +1.1. “Contributor†means each individual or entity that creates or contributes to the creation of Modifications. +1.2. “Contributor Version†means the combination of the Original Software, prior Modifications used by a Contributor (if any), and the Modifications made by that particular Contributor. +1.3. “Covered Software†means (a) the Original Software, or (b) Modifications, or (c) the combination of files containing Original Software with files containing Modifications, in each case including portions thereof. +1.4. “Executable†means the Covered Software in any form other than Source Code. +1.5. “Initial Developer†means the individual or entity that first makes Original Software available under this License. +1.6. “Larger Work†means a work which combines Covered Software or portions thereof with code not governed by the terms of this License. +1.7. “License†means this document. +1.8. “Licensable†means having the right to grant, to the maximum extent possible, whether at the time of the initial grant or subsequently acquired, any and all of the rights conveyed herein. +1.9. “Modifications†means the Source Code and Executable form of any of the following: +A. Any file that results from an addition to, deletion from or modification of the contents of a file containing Original Software or previous Modifications; +B. Any new file that contains any part of the Original Software or previous Modification; or +C. Any new file that is contributed or otherwise made available under the terms of this License. +1.10. “Original Software†means the Source Code and Executable form of computer software code that is originally released under this License. +1.11. “Patent Claims†means any patent claim(s), now owned or hereafter acquired, including without limitation, method, process, and apparatus claims, in any patent Licensable by grantor. +1.12. “Source Code†means (a) the common form of computer software code in which modifications are made and (b) associated documentation included in or with such code. +1.13. “You†(or “Yourâ€) means an individual or a legal entity exercising rights under, and complying with all of the terms of, this License. For legal entities, “You†includes any entity which controls, is controlled by, or is under common control with You. For purposes of this definition, “control†means (a) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b) ownership of more than fifty percent (50%) of the outstanding shares or beneficial ownership of such entity. +2. License Grants. + +2.1. The Initial Developer Grant. +Conditioned upon Your compliance with Section 3.1 below and subject to third party intellectual property claims, the Initial Developer hereby grants You a world-wide, royalty-free, non-exclusive license: +(a) under intellectual property rights (other than patent or trademark) Licensable by Initial Developer, to use, reproduce, modify, display, perform, sublicense and distribute the Original Software (or portions thereof), with or without Modifications, and/or as part of a Larger Work; and +(b) under Patent Claims infringed by the making, using or selling of Original Software, to make, have made, use, practice, sell, and offer for sale, and/or otherwise dispose of the Original Software (or portions thereof). +(c) The licenses granted in Sections 2.1(a) and (b) are effective on the date Initial Developer first distributes or otherwise makes the Original Software available to a third party under the terms of this License. +(d) Notwithstanding Section 2.1(b) above, no patent license is granted: (1) for code that You delete from the Original Software, or (2) for infringements caused by: (i) the modification of the Original Software, or (ii) the combination of the Original Software with other software or devices. +2.2. Contributor Grant. +Conditioned upon Your compliance with Section 3.1 below and subject to third party intellectual property claims, each Contributor hereby grants You a world-wide, royalty-free, non-exclusive license: +(a) under intellectual property rights (other than patent or trademark) Licensable by Contributor to use, reproduce, modify, display, perform, sublicense and distribute the Modifications created by such Contributor (or portions thereof), either on an unmodified basis, with other Modifications, as Covered Software and/or as part of a Larger Work; and +(b) under Patent Claims infringed by the making, using, or selling of Modifications made by that Contributor either alone and/or in combination with its Contributor Version (or portions of such combination), to make, use, sell, offer for sale, have made, and/or otherwise dispose of: (1) Modifications made by that Contributor (or portions thereof); and (2) the combination of Modifications made by that Contributor with its Contributor Version (or portions of such combination). +(c) The licenses granted in Sections 2.2(a) and 2.2(b) are effective on the date Contributor first distributes or otherwise makes the Modifications available to a third party. +(d) Notwithstanding Section 2.2(b) above, no patent license is granted: (1) for any code that Contributor has deleted from the Contributor Version; (2) for infringements caused by: (i) third party modifications of Contributor Version, or (ii) the combination of Modifications made by that Contributor with other software (except as part of the Contributor Version) or other devices; or (3) under Patent Claims infringed by Covered Software in the absence of Modifications made by that Contributor. +3. Distribution Obligations. + +3.1. Availability of Source Code. +Any Covered Software that You distribute or otherwise make available in Executable form must also be made available in Source Code form and that Source Code form must be distributed only under the terms of this License. You must include a copy of this License with every copy of the Source Code form of the Covered Software You distribute or otherwise make available. You must inform recipients of any such Covered Software in Executable form as to how they can obtain such Covered Software in Source Code form in a reasonable manner on or through a medium customarily used for software exchange. +3.2. Modifications. +The Modifications that You create or to which You contribute are governed by the terms of this License. You represent that You believe Your Modifications are Your original creation(s) and/or You have sufficient rights to grant the rights conveyed by this License. +3.3. Required Notices. +You must include a notice in each of Your Modifications that identifies You as the Contributor of the Modification. You may not remove or alter any copyright, patent or trademark notices contained within the Covered Software, or any notices of licensing or any descriptive text giving attribution to any Contributor or the Initial Developer. +3.4. Application of Additional Terms. +You may not offer or impose any terms on any Covered Software in Source Code form that alters or restricts the applicable version of this License or the recipients' rights hereunder. You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Covered Software. However, you may do so only on Your own behalf, and not on behalf of the Initial Developer or any Contributor. You must make it absolutely clear that any such warranty, support, indemnity or liability obligation is offered by You alone, and You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of warranty, support, indemnity or liability terms You offer. +3.5. Distribution of Executable Versions. +You may distribute the Executable form of the Covered Software under the terms of this License or under the terms of a license of Your choice, which may contain terms different from this License, provided that You are in compliance with the terms of this License and that the license for the Executable form does not attempt to limit or alter the recipient's rights in the Source Code form from the rights set forth in this License. If You distribute the Covered Software in Executable form under a different license, You must make it absolutely clear that any terms which differ from this License are offered by You alone, not by the Initial Developer or Contributor. You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of any such terms You offer. +3.6. Larger Works. +You may create a Larger Work by combining Covered Software with other code not governed by the terms of this License and distribute the Larger Work as a single product. In such a case, You must make sure the requirements of this License are fulfilled for the Covered Software. +4. Versions of the License. + +4.1. New Versions. +Oracle is the initial license steward and may publish revised and/or new versions of this License from time to time. Each version will be given a distinguishing version number. Except as provided in Section 4.3, no one other than the license steward has the right to modify this License. +4.2. Effect of New Versions. +You may always continue to use, distribute or otherwise make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. If the Initial Developer includes a notice in the Original Software prohibiting it from being distributed or otherwise made available under any subsequent version of the License, You must distribute and make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. Otherwise, You may also choose to use, distribute or otherwise make the Covered Software available under the terms of any subsequent version of the License published by the license steward. +4.3. Modified Versions. +When You are an Initial Developer and You want to create a new license for Your Original Software, You may create and use a modified version of this License if You: (a) rename the license and remove any references to the name of the license steward (except to note that the license differs from this License); and (b) otherwise make it clear that the license contains terms which differ from this License. +5. DISCLAIMER OF WARRANTY. + +COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN “AS IS†BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED SOFTWARE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER. + +6. TERMINATION. + +6.1. This License and the rights granted hereunder will terminate automatically if You fail to comply with terms herein and fail to cure such breach within 30 days of becoming aware of the breach. Provisions which, by their nature, must remain in effect beyond the termination of this License shall survive. +6.2. If You assert a patent infringement claim (excluding declaratory judgment actions) against Initial Developer or a Contributor (the Initial Developer or Contributor against whom You assert such claim is referred to as “Participantâ€) alleging that the Participant Software (meaning the Contributor Version where the Participant is a Contributor or the Original Software where the Participant is the Initial Developer) directly or indirectly infringes any patent, then any and all rights granted directly or indirectly to You by such Participant, the Initial Developer (if the Initial Developer is not the Participant) and all Contributors under Sections 2.1 and/or 2.2 of this License shall, upon 60 days notice from Participant terminate prospectively and automatically at the expiration of such 60 day notice period, unless if within such 60 day period You withdraw Your claim with respect to the Participant Software against such Participant either unilaterally or pursuant to a written agreement with Participant. +6.3. If You assert a patent infringement claim against Participant alleging that the Participant Software directly or indirectly infringes any patent where such claim is resolved (such as by license or settlement) prior to the initiation of patent infringement litigation, then the reasonable value of the licenses granted by such Participant under Sections 2.1 or 2.2 shall be taken into account in determining the amount or value of any payment or license. +6.4. In the event of termination under Sections 6.1 or 6.2 above, all end user licenses that have been validly granted by You or any distributor hereunder prior to termination (excluding licenses granted to You by any distributor) shall survive termination. +7. LIMITATION OF LIABILITY. + +UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU. + +8. U.S. GOVERNMENT END USERS. + +The Covered Software is a “commercial item,†as that term is defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of “commercial computer software†(as that term is defined at 48 C.F.R. § 252.227-7014(a)(1)) and “commercial computer software documentation†as such terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all U.S. Government End Users acquire Covered Software with only those rights set forth herein. This U.S. Government Rights clause is in lieu of, and supersedes, any other FAR, DFAR, or other clause or provision that addresses Government rights in computer software under this License. + +9. MISCELLANEOUS. + +This License represents the complete agreement concerning subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. This License shall be governed by the law of the jurisdiction specified in a notice contained within the Original Software (except to the extent applicable law, if any, provides otherwise), excluding such jurisdiction's conflict-of-law provisions. Any litigation relating to this License shall be subject to the jurisdiction of the courts located in the jurisdiction and venue specified in a notice contained within the Original Software, with the losing party responsible for costs, including, without limitation, court costs and reasonable attorneys' fees and expenses. The application of the United Nations Convention on Contracts for the International Sale of Goods is expressly excluded. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not apply to this License. You agree that You alone are responsible for compliance with the United States export administration regulations (and the export control laws and regulation of any other countries) when You use, distribute or otherwise make available any Covered Software. + +10. RESPONSIBILITY FOR CLAIMS. + +As between Initial Developer and the Contributors, each party is responsible for claims and damages arising, directly or indirectly, out of its utilization of rights under this License and You agree to work with Initial Developer and Contributors to distribute such responsibility on an equitable basis. Nothing herein is intended or shall be deemed to constitute any admission of liability. + +NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) + +The code released under the CDDL shall be governed by the laws of the State of California (excluding conflict-of-law provisions). Any litigation relating to this License shall be subject to the jurisdiction of the Federal Courts of the Northern District of California and the state courts of the State of California, with venue lying in Santa Clara County, California. \ No newline at end of file diff --git a/solr/licenses/jersey-server-1.8.jar.sha1 b/solr/licenses/jersey-server-1.8.jar.sha1 new file mode 100644 index 00000000000..9e885f6d7e9 --- /dev/null +++ b/solr/licenses/jersey-server-1.8.jar.sha1 @@ -0,0 +1 @@ +6da1231f5e2d7a9f7d194e292fc3695ba7710b2f diff --git a/solr/licenses/jersey-server-LICENSE-CDDL.txt b/solr/licenses/jersey-server-LICENSE-CDDL.txt new file mode 100644 index 00000000000..64df8d56300 --- /dev/null +++ b/solr/licenses/jersey-server-LICENSE-CDDL.txt @@ -0,0 +1,85 @@ +COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL)Version 1.1 + +1. Definitions. + +1.1. “Contributor†means each individual or entity that creates or contributes to the creation of Modifications. +1.2. “Contributor Version†means the combination of the Original Software, prior Modifications used by a Contributor (if any), and the Modifications made by that particular Contributor. +1.3. “Covered Software†means (a) the Original Software, or (b) Modifications, or (c) the combination of files containing Original Software with files containing Modifications, in each case including portions thereof. +1.4. “Executable†means the Covered Software in any form other than Source Code. +1.5. “Initial Developer†means the individual or entity that first makes Original Software available under this License. +1.6. “Larger Work†means a work which combines Covered Software or portions thereof with code not governed by the terms of this License. +1.7. “License†means this document. +1.8. “Licensable†means having the right to grant, to the maximum extent possible, whether at the time of the initial grant or subsequently acquired, any and all of the rights conveyed herein. +1.9. “Modifications†means the Source Code and Executable form of any of the following: +A. Any file that results from an addition to, deletion from or modification of the contents of a file containing Original Software or previous Modifications; +B. Any new file that contains any part of the Original Software or previous Modification; or +C. Any new file that is contributed or otherwise made available under the terms of this License. +1.10. “Original Software†means the Source Code and Executable form of computer software code that is originally released under this License. +1.11. “Patent Claims†means any patent claim(s), now owned or hereafter acquired, including without limitation, method, process, and apparatus claims, in any patent Licensable by grantor. +1.12. “Source Code†means (a) the common form of computer software code in which modifications are made and (b) associated documentation included in or with such code. +1.13. “You†(or “Yourâ€) means an individual or a legal entity exercising rights under, and complying with all of the terms of, this License. For legal entities, “You†includes any entity which controls, is controlled by, or is under common control with You. For purposes of this definition, “control†means (a) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b) ownership of more than fifty percent (50%) of the outstanding shares or beneficial ownership of such entity. +2. License Grants. + +2.1. The Initial Developer Grant. +Conditioned upon Your compliance with Section 3.1 below and subject to third party intellectual property claims, the Initial Developer hereby grants You a world-wide, royalty-free, non-exclusive license: +(a) under intellectual property rights (other than patent or trademark) Licensable by Initial Developer, to use, reproduce, modify, display, perform, sublicense and distribute the Original Software (or portions thereof), with or without Modifications, and/or as part of a Larger Work; and +(b) under Patent Claims infringed by the making, using or selling of Original Software, to make, have made, use, practice, sell, and offer for sale, and/or otherwise dispose of the Original Software (or portions thereof). +(c) The licenses granted in Sections 2.1(a) and (b) are effective on the date Initial Developer first distributes or otherwise makes the Original Software available to a third party under the terms of this License. +(d) Notwithstanding Section 2.1(b) above, no patent license is granted: (1) for code that You delete from the Original Software, or (2) for infringements caused by: (i) the modification of the Original Software, or (ii) the combination of the Original Software with other software or devices. +2.2. Contributor Grant. +Conditioned upon Your compliance with Section 3.1 below and subject to third party intellectual property claims, each Contributor hereby grants You a world-wide, royalty-free, non-exclusive license: +(a) under intellectual property rights (other than patent or trademark) Licensable by Contributor to use, reproduce, modify, display, perform, sublicense and distribute the Modifications created by such Contributor (or portions thereof), either on an unmodified basis, with other Modifications, as Covered Software and/or as part of a Larger Work; and +(b) under Patent Claims infringed by the making, using, or selling of Modifications made by that Contributor either alone and/or in combination with its Contributor Version (or portions of such combination), to make, use, sell, offer for sale, have made, and/or otherwise dispose of: (1) Modifications made by that Contributor (or portions thereof); and (2) the combination of Modifications made by that Contributor with its Contributor Version (or portions of such combination). +(c) The licenses granted in Sections 2.2(a) and 2.2(b) are effective on the date Contributor first distributes or otherwise makes the Modifications available to a third party. +(d) Notwithstanding Section 2.2(b) above, no patent license is granted: (1) for any code that Contributor has deleted from the Contributor Version; (2) for infringements caused by: (i) third party modifications of Contributor Version, or (ii) the combination of Modifications made by that Contributor with other software (except as part of the Contributor Version) or other devices; or (3) under Patent Claims infringed by Covered Software in the absence of Modifications made by that Contributor. +3. Distribution Obligations. + +3.1. Availability of Source Code. +Any Covered Software that You distribute or otherwise make available in Executable form must also be made available in Source Code form and that Source Code form must be distributed only under the terms of this License. You must include a copy of this License with every copy of the Source Code form of the Covered Software You distribute or otherwise make available. You must inform recipients of any such Covered Software in Executable form as to how they can obtain such Covered Software in Source Code form in a reasonable manner on or through a medium customarily used for software exchange. +3.2. Modifications. +The Modifications that You create or to which You contribute are governed by the terms of this License. You represent that You believe Your Modifications are Your original creation(s) and/or You have sufficient rights to grant the rights conveyed by this License. +3.3. Required Notices. +You must include a notice in each of Your Modifications that identifies You as the Contributor of the Modification. You may not remove or alter any copyright, patent or trademark notices contained within the Covered Software, or any notices of licensing or any descriptive text giving attribution to any Contributor or the Initial Developer. +3.4. Application of Additional Terms. +You may not offer or impose any terms on any Covered Software in Source Code form that alters or restricts the applicable version of this License or the recipients' rights hereunder. You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Covered Software. However, you may do so only on Your own behalf, and not on behalf of the Initial Developer or any Contributor. You must make it absolutely clear that any such warranty, support, indemnity or liability obligation is offered by You alone, and You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of warranty, support, indemnity or liability terms You offer. +3.5. Distribution of Executable Versions. +You may distribute the Executable form of the Covered Software under the terms of this License or under the terms of a license of Your choice, which may contain terms different from this License, provided that You are in compliance with the terms of this License and that the license for the Executable form does not attempt to limit or alter the recipient's rights in the Source Code form from the rights set forth in this License. If You distribute the Covered Software in Executable form under a different license, You must make it absolutely clear that any terms which differ from this License are offered by You alone, not by the Initial Developer or Contributor. You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of any such terms You offer. +3.6. Larger Works. +You may create a Larger Work by combining Covered Software with other code not governed by the terms of this License and distribute the Larger Work as a single product. In such a case, You must make sure the requirements of this License are fulfilled for the Covered Software. +4. Versions of the License. + +4.1. New Versions. +Oracle is the initial license steward and may publish revised and/or new versions of this License from time to time. Each version will be given a distinguishing version number. Except as provided in Section 4.3, no one other than the license steward has the right to modify this License. +4.2. Effect of New Versions. +You may always continue to use, distribute or otherwise make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. If the Initial Developer includes a notice in the Original Software prohibiting it from being distributed or otherwise made available under any subsequent version of the License, You must distribute and make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. Otherwise, You may also choose to use, distribute or otherwise make the Covered Software available under the terms of any subsequent version of the License published by the license steward. +4.3. Modified Versions. +When You are an Initial Developer and You want to create a new license for Your Original Software, You may create and use a modified version of this License if You: (a) rename the license and remove any references to the name of the license steward (except to note that the license differs from this License); and (b) otherwise make it clear that the license contains terms which differ from this License. +5. DISCLAIMER OF WARRANTY. + +COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN “AS IS†BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED SOFTWARE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER. + +6. TERMINATION. + +6.1. This License and the rights granted hereunder will terminate automatically if You fail to comply with terms herein and fail to cure such breach within 30 days of becoming aware of the breach. Provisions which, by their nature, must remain in effect beyond the termination of this License shall survive. +6.2. If You assert a patent infringement claim (excluding declaratory judgment actions) against Initial Developer or a Contributor (the Initial Developer or Contributor against whom You assert such claim is referred to as “Participantâ€) alleging that the Participant Software (meaning the Contributor Version where the Participant is a Contributor or the Original Software where the Participant is the Initial Developer) directly or indirectly infringes any patent, then any and all rights granted directly or indirectly to You by such Participant, the Initial Developer (if the Initial Developer is not the Participant) and all Contributors under Sections 2.1 and/or 2.2 of this License shall, upon 60 days notice from Participant terminate prospectively and automatically at the expiration of such 60 day notice period, unless if within such 60 day period You withdraw Your claim with respect to the Participant Software against such Participant either unilaterally or pursuant to a written agreement with Participant. +6.3. If You assert a patent infringement claim against Participant alleging that the Participant Software directly or indirectly infringes any patent where such claim is resolved (such as by license or settlement) prior to the initiation of patent infringement litigation, then the reasonable value of the licenses granted by such Participant under Sections 2.1 or 2.2 shall be taken into account in determining the amount or value of any payment or license. +6.4. In the event of termination under Sections 6.1 or 6.2 above, all end user licenses that have been validly granted by You or any distributor hereunder prior to termination (excluding licenses granted to You by any distributor) shall survive termination. +7. LIMITATION OF LIABILITY. + +UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU. + +8. U.S. GOVERNMENT END USERS. + +The Covered Software is a “commercial item,†as that term is defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of “commercial computer software†(as that term is defined at 48 C.F.R. § 252.227-7014(a)(1)) and “commercial computer software documentation†as such terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all U.S. Government End Users acquire Covered Software with only those rights set forth herein. This U.S. Government Rights clause is in lieu of, and supersedes, any other FAR, DFAR, or other clause or provision that addresses Government rights in computer software under this License. + +9. MISCELLANEOUS. + +This License represents the complete agreement concerning subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. This License shall be governed by the law of the jurisdiction specified in a notice contained within the Original Software (except to the extent applicable law, if any, provides otherwise), excluding such jurisdiction's conflict-of-law provisions. Any litigation relating to this License shall be subject to the jurisdiction of the courts located in the jurisdiction and venue specified in a notice contained within the Original Software, with the losing party responsible for costs, including, without limitation, court costs and reasonable attorneys' fees and expenses. The application of the United Nations Convention on Contracts for the International Sale of Goods is expressly excluded. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not apply to this License. You agree that You alone are responsible for compliance with the United States export administration regulations (and the export control laws and regulation of any other countries) when You use, distribute or otherwise make available any Covered Software. + +10. RESPONSIBILITY FOR CLAIMS. + +As between Initial Developer and the Contributors, each party is responsible for claims and damages arising, directly or indirectly, out of its utilization of rights under this License and You agree to work with Initial Developer and Contributors to distribute such responsibility on an equitable basis. Nothing herein is intended or shall be deemed to constitute any admission of liability. + +NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) + +The code released under the CDDL shall be governed by the laws of the State of California (excluding conflict-of-law provisions). Any litigation relating to this License shall be subject to the jurisdiction of the Federal Courts of the Northern District of California and the state courts of the State of California, with venue lying in Santa Clara County, California. \ No newline at end of file diff --git a/solr/licenses/metrics-core-3.0.1.jar.sha1 b/solr/licenses/metrics-core-3.0.1.jar.sha1 new file mode 100644 index 00000000000..1d42f50b422 --- /dev/null +++ b/solr/licenses/metrics-core-3.0.1.jar.sha1 @@ -0,0 +1 @@ +1e98427c7f6e53363b598e2943e50903ce4f3657 diff --git a/solr/licenses/metrics-core-LICENSE-ASL.txt b/solr/licenses/metrics-core-LICENSE-ASL.txt new file mode 100644 index 00000000000..e4ba40426da --- /dev/null +++ b/solr/licenses/metrics-core-LICENSE-ASL.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2010-2012 Coda Hale and Yammer, Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/solr/licenses/metrics-core-NOTICE.txt b/solr/licenses/metrics-core-NOTICE.txt new file mode 100644 index 00000000000..4fe83de38a5 --- /dev/null +++ b/solr/licenses/metrics-core-NOTICE.txt @@ -0,0 +1,11 @@ +Metrics +Copyright 2010-2013 Coda Hale and Yammer, Inc. + +This product includes software developed by Coda Hale and Yammer, Inc. + +This product includes code derived from the JSR-166 project (ThreadLocalRandom, Striped64, +LongAdder), which was released with the following comments: + + Written by Doug Lea with assistance from members of JCP JSR-166 + Expert Group and released to the public domain, as explained at + http://creativecommons.org/publicdomain/zero/1.0/ diff --git a/solr/licenses/metrics-healthchecks-3.0.1.jar.sha1 b/solr/licenses/metrics-healthchecks-3.0.1.jar.sha1 new file mode 100644 index 00000000000..4c1055ff1fe --- /dev/null +++ b/solr/licenses/metrics-healthchecks-3.0.1.jar.sha1 @@ -0,0 +1 @@ +bec37e61ebe40bf0f52f3fc8b7df57b5c1773682 diff --git a/solr/licenses/metrics-healthchecks-LICENSE-ASL.txt b/solr/licenses/metrics-healthchecks-LICENSE-ASL.txt new file mode 100644 index 00000000000..e4ba40426da --- /dev/null +++ b/solr/licenses/metrics-healthchecks-LICENSE-ASL.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2010-2012 Coda Hale and Yammer, Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/solr/licenses/metrics-healthchecks-NOTICE.txt b/solr/licenses/metrics-healthchecks-NOTICE.txt new file mode 100644 index 00000000000..4fe83de38a5 --- /dev/null +++ b/solr/licenses/metrics-healthchecks-NOTICE.txt @@ -0,0 +1,11 @@ +Metrics +Copyright 2010-2013 Coda Hale and Yammer, Inc. + +This product includes software developed by Coda Hale and Yammer, Inc. + +This product includes code derived from the JSR-166 project (ThreadLocalRandom, Striped64, +LongAdder), which was released with the following comments: + + Written by Doug Lea with assistance from members of JCP JSR-166 + Expert Group and released to the public domain, as explained at + http://creativecommons.org/publicdomain/zero/1.0/ diff --git a/solr/licenses/mockito-core-1.9.5.jar.sha1 b/solr/licenses/mockito-core-1.9.5.jar.sha1 new file mode 100644 index 00000000000..5de9041c834 --- /dev/null +++ b/solr/licenses/mockito-core-1.9.5.jar.sha1 @@ -0,0 +1 @@ +c3264abeea62c4d2f367e21484fbb40c7e256393 diff --git a/solr/licenses/mockito-core-LICENSE-MIT.txt b/solr/licenses/mockito-core-LICENSE-MIT.txt new file mode 100644 index 00000000000..e0840a446ca --- /dev/null +++ b/solr/licenses/mockito-core-LICENSE-MIT.txt @@ -0,0 +1,21 @@ +The MIT License + +Copyright (c) 2007 Mockito contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. \ No newline at end of file diff --git a/solr/licenses/mrunit-1.0.0-hadoop2.jar.sha1 b/solr/licenses/mrunit-1.0.0-hadoop2.jar.sha1 new file mode 100644 index 00000000000..6146ee05008 --- /dev/null +++ b/solr/licenses/mrunit-1.0.0-hadoop2.jar.sha1 @@ -0,0 +1 @@ +d6e4cce578b705508bfd7fd3fafbccc3adb33e83 diff --git a/solr/licenses/mrunit-LICENSE-ASL.txt b/solr/licenses/mrunit-LICENSE-ASL.txt new file mode 100644 index 00000000000..75f307ab0ad --- /dev/null +++ b/solr/licenses/mrunit-LICENSE-ASL.txt @@ -0,0 +1,479 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +========================================================================== +The Apache License, Version 2.0 applies to the following libraries: +commons-logging + +========================================================================== +The following license applies to the junit library +-------------------------------------------------------------------------- + +Common Public License Version 1.0 + +THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS COMMON PUBLIC +LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM +CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. + +1. DEFINITIONS + +"Contribution" means: + +a) in the case of the initial Contributor, the initial code and +documentation distributed under this Agreement, and + +b) in the case of each subsequent Contributor: + +i) changes to the Program, and + +ii) additions to the Program; + +where such changes and/or additions to the Program originate from and are +distributed by that particular Contributor. A Contribution 'originates' from a +Contributor if it was added to the Program by such Contributor itself or anyone +acting on such Contributor's behalf. Contributions do not include additions to +the Program which: (i) are separate modules of software distributed in +conjunction with the Program under their own license agreement, and (ii) are not +derivative works of the Program. + +"Contributor" means any person or entity that distributes the Program. + +"Licensed Patents " mean patent claims licensable by a Contributor which are +necessarily infringed by the use or sale of its Contribution alone or when +combined with the Program. + +"Program" means the Contributions distributed in accordance with this Agreement. + +"Recipient" means anyone who receives the Program under this Agreement, +including all Contributors. + +2. GRANT OF RIGHTS + +a) Subject to the terms of this Agreement, each Contributor hereby grants +Recipient a non-exclusive, worldwide, royalty-free copyright license to +reproduce, prepare derivative works of, publicly display, publicly perform, +distribute and sublicense the Contribution of such Contributor, if any, and such +derivative works, in source code and object code form. + +b) Subject to the terms of this Agreement, each Contributor hereby grants +Recipient a non-exclusive, worldwide, royalty-free patent license under Licensed +Patents to make, use, sell, offer to sell, import and otherwise transfer the +Contribution of such Contributor, if any, in source code and object code form. +This patent license shall apply to the combination of the Contribution and the +Program if, at the time the Contribution is added by the Contributor, such +addition of the Contribution causes such combination to be covered by the +Licensed Patents. The patent license shall not apply to any other combinations +which include the Contribution. No hardware per se is licensed hereunder. + +c) Recipient understands that although each Contributor grants the licenses +to its Contributions set forth herein, no assurances are provided by any +Contributor that the Program does not infringe the patent or other intellectual +property rights of any other entity. Each Contributor disclaims any liability to +Recipient for claims brought by any other entity based on infringement of +intellectual property rights or otherwise. As a condition to exercising the +rights and licenses granted hereunder, each Recipient hereby assumes sole +responsibility to secure any other intellectual property rights needed, if any. +For example, if a third party patent license is required to allow Recipient to +distribute the Program, it is Recipient's responsibility to acquire that license +before distributing the Program. + +d) Each Contributor represents that to its knowledge it has sufficient +copyright rights in its Contribution, if any, to grant the copyright license set +forth in this Agreement. + +3. REQUIREMENTS + +A Contributor may choose to distribute the Program in object code form under its +own license agreement, provided that: + +a) it complies with the terms and conditions of this Agreement; and + +b) its license agreement: + +i) effectively disclaims on behalf of all Contributors all warranties and +conditions, express and implied, including warranties or conditions of title and +non-infringement, and implied warranties or conditions of merchantability and +fitness for a particular purpose; + +ii) effectively excludes on behalf of all Contributors all liability for +damages, including direct, indirect, special, incidental and consequential +damages, such as lost profits; + +iii) states that any provisions which differ from this Agreement are offered +by that Contributor alone and not by any other party; and + +iv) states that source code for the Program is available from such +Contributor, and informs licensees how to obtain it in a reasonable manner on or +through a medium customarily used for software exchange. + +When the Program is made available in source code form: + +a) it must be made available under this Agreement; and + +b) a copy of this Agreement must be included with each copy of the Program. + +Contributors may not remove or alter any copyright notices contained within the +Program. + +Each Contributor must identify itself as the originator of its Contribution, if +any, in a manner that reasonably allows subsequent Recipients to identify the +originator of the Contribution. + +4. COMMERCIAL DISTRIBUTION + +Commercial distributors of software may accept certain responsibilities with +respect to end users, business partners and the like. While this license is +intended to facilitate the commercial use of the Program, the Contributor who +includes the Program in a commercial product offering should do so in a manner +which does not create potential liability for other Contributors. Therefore, if +a Contributor includes the Program in a commercial product offering, such +Contributor ("Commercial Contributor") hereby agrees to defend and indemnify +every other Contributor ("Indemnified Contributor") against any losses, damages +and costs (collectively "Losses") arising from claims, lawsuits and other legal +actions brought by a third party against the Indemnified Contributor to the +extent caused by the acts or omissions of such Commercial Contributor in +connection with its distribution of the Program in a commercial product +offering. The obligations in this section do not apply to any claims or Losses +relating to any actual or alleged intellectual property infringement. In order +to qualify, an Indemnified Contributor must: a) promptly notify the Commercial +Contributor in writing of such claim, and b) allow the Commercial Contributor to +control, and cooperate with the Commercial Contributor in, the defense and any +related settlement negotiations. The Indemnified Contributor may participate in +any such claim at its own expense. + +For example, a Contributor might include the Program in a commercial product +offering, Product X. That Contributor is then a Commercial Contributor. If that +Commercial Contributor then makes performance claims, or offers warranties +related to Product X, those performance claims and warranties are such +Commercial Contributor's responsibility alone. Under this section, the +Commercial Contributor would have to defend claims against the other +Contributors related to those performance claims and warranties, and if a court +requires any other Contributor to pay any damages as a result, the Commercial +Contributor must pay those damages. + +5. NO WARRANTY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR +IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, +NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each +Recipient is solely responsible for determining the appropriateness of using and +distributing the Program and assumes all risks associated with its exercise of +rights under this Agreement, including but not limited to the risks and costs of +program errors, compliance with applicable laws, damage to or loss of data, +programs or equipment, and unavailability or interruption of operations. + +6. DISCLAIMER OF LIABILITY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY +CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST +PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS +GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +7. GENERAL + +If any provision of this Agreement is invalid or unenforceable under applicable +law, it shall not affect the validity or enforceability of the remainder of the +terms of this Agreement, and without further action by the parties hereto, such +provision shall be reformed to the minimum extent necessary to make such +provision valid and enforceable. + +If Recipient institutes patent litigation against a Contributor with respect to +a patent applicable to software (including a cross-claim or counterclaim in a +lawsuit), then any patent licenses granted by that Contributor to such Recipient +under this Agreement shall terminate as of the date such litigation is filed. In +addition, if Recipient institutes patent litigation against any entity +(including a cross-claim or counterclaim in a lawsuit) alleging that the Program +itself (excluding combinations of the Program with other software or hardware) +infringes such Recipient's patent(s), then such Recipient's rights granted under +Section 2(b) shall terminate as of the date such litigation is filed. + +All Recipient's rights under this Agreement shall terminate if it fails to +comply with any of the material terms or conditions of this Agreement and does +not cure such failure in a reasonable period of time after becoming aware of +such noncompliance. If all Recipient's rights under this Agreement terminate, +Recipient agrees to cease use and distribution of the Program as soon as +reasonably practicable. However, Recipient's obligations under this Agreement +and any licenses granted by Recipient relating to the Program shall continue and +survive. + +Everyone is permitted to copy and distribute copies of this Agreement, but in +order to avoid inconsistency the Agreement is copyrighted and may only be +modified in the following manner. The Agreement Steward reserves the right to +publish new versions (including revisions) of this Agreement from time to time. +No one other than the Agreement Steward has the right to modify this Agreement. +IBM is the initial Agreement Steward. IBM may assign the responsibility to serve +as the Agreement Steward to a suitable separate entity. Each new version of the +Agreement will be given a distinguishing version number. The Program (including +Contributions) may always be distributed subject to the version of the Agreement +under which it was received. In addition, after a new version of the Agreement +is published, Contributor may elect to distribute the Program (including its +Contributions) under the new version. Except as expressly stated in Sections +2(a) and 2(b) above, Recipient receives no rights or licenses to the +intellectual property of any Contributor under this Agreement, whether +expressly, by implication, estoppel or otherwise. All rights in the Program not +expressly granted under this Agreement are reserved. + +This Agreement is governed by the laws of the State of New York and the +intellectual property laws of the United States of America. No party to this +Agreement will bring a legal action under this Agreement more than one year +after the cause of action arose. Each party waives its rights to a jury trial in +any resulting litigation. + +========================================================================== +The following license applies to the mockito library +-------------------------------------------------------------------------- + +The MIT License + +Copyright (c) 2007 Mockito contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +========================================================================== +The following license applies to the hamcrest library +-------------------------------------------------------------------------- + +The BSD 2-Clause License + +Copyright (c) 2000-2006, www.hamcrest.org +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +- Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/solr/licenses/mrunit-NOTICE.txt b/solr/licenses/mrunit-NOTICE.txt new file mode 100644 index 00000000000..2dfba62fbfc --- /dev/null +++ b/solr/licenses/mrunit-NOTICE.txt @@ -0,0 +1,5 @@ +Apache MRUnit +Copyright 2011-2012 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). \ No newline at end of file diff --git a/solr/licenses/netty-3.6.2.Final.jar.sha1 b/solr/licenses/netty-3.6.2.Final.jar.sha1 new file mode 100644 index 00000000000..10c334de1a8 --- /dev/null +++ b/solr/licenses/netty-3.6.2.Final.jar.sha1 @@ -0,0 +1 @@ +69be11c61427f0604a30539755add84ad9e37e5e diff --git a/solr/licenses/netty-LICENSE-ASL.txt b/solr/licenses/netty-LICENSE-ASL.txt new file mode 100644 index 00000000000..d6456956733 --- /dev/null +++ b/solr/licenses/netty-LICENSE-ASL.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/solr/licenses/netty-NOTICE.txt b/solr/licenses/netty-NOTICE.txt new file mode 100644 index 00000000000..ef811d15d03 --- /dev/null +++ b/solr/licenses/netty-NOTICE.txt @@ -0,0 +1,121 @@ + + The Netty Project + ================= + +Please visit the Netty web site for more information: + + * http://netty.io/ + +Copyright 2011 The Netty Project + +The Netty Project licenses this file to you under the Apache License, +version 2.0 (the "License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +License for the specific language governing permissions and limitations +under the License. + +Also, please refer to each LICENSE..txt file, which is located in +the 'license' directory of the distribution file, for the license terms of the +components that this product depends on. + +------------------------------------------------------------------------------- +This product contains the extensions to Java Collections Framework which has +been derived from the works by JSR-166 EG, Doug Lea, and Jason T. Greene: + + * LICENSE: + * license/LICENSE.jsr166y.txt (Public Domain) + * HOMEPAGE: + * http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/ + * http://viewvc.jboss.org/cgi-bin/viewvc.cgi/jbosscache/experimental/jsr166/ + +This product contains a modified version of Robert Harder's Public Domain +Base64 Encoder and Decoder, which can be obtained at: + + * LICENSE: + * license/LICENSE.base64.txt (Public Domain) + * HOMEPAGE: + * http://iharder.sourceforge.net/current/java/base64/ + +This product contains a modified portion of 'Webbit', an event based +WebSocket and HTTP server, which can be obtained at: + + * LICENSE: + * license/LICENSE.webbit.txt (BSD License) + * HOMEPAGE: + * https://github.com/joewalnes/webbit + +This product contains a modified portion of 'Caliper', Google's micro- +benchmarking framework, which can be obtained at: + + * LICENSE: + * license/LICENSE.caliper.txt (Apache License 2.0) + * HOMEPAGE: + * http://code.google.com/p/caliper/ + +This product contains a modified portion of 'SLF4J', a simple logging +facade for Java, which can be obtained at: + + * LICENSE: + * license/LICENSE.slf4j.txt (MIT License) + * HOMEPAGE: + * http://www.slf4j.org/ + +This product contains a modified portion of 'ArrayDeque', written by Josh +Bloch of Google, Inc: + + * LICENSE: + * license/LICENSE.deque.txt (Public Domain) + +This product optionally depends on 'JZlib', a re-implementation of zlib in +pure Java, which can be obtained at: + + * LICENSE: + * license/LICENSE.jzlib.txt (BSD style License) + * HOMEPAGE: + * http://www.jcraft.com/jzlib/ + +This product optionally depends on 'Protocol Buffers', Google's data +interchange format, which can be obtained at: + + * LICENSE: + * license/LICENSE.protobuf.txt (New BSD License) + * HOMEPAGE: + * http://code.google.com/p/protobuf/ + +This product optionally depends on 'JBoss Marshalling', an alternative Java +serialization API, which can be obtained at: + + * LICENSE: + * license/LICENSE.jboss-marshalling.txt (GNU LGPL 2.1) + * HOMEPAGE: + * http://www.jboss.org/jbossmarshalling + +This product optionally depends on 'Apache Commons Logging', a logging +framework, which can be obtained at: + + * LICENSE: + * license/LICENSE.commons-logging.txt (Apache License 2.0) + * HOMEPAGE: + * http://commons.apache.org/logging/ + +This product optionally depends on 'Apache Log4J', a logging framework, which +can be obtained at: + + * LICENSE: + * license/LICENSE.log4j.txt (Apache License 2.0) + * HOMEPAGE: + * http://logging.apache.org/log4j/ + +This product optionally depends on 'Snappy', a compression library produced +by Google Inc, which can be obtained at: + + * LICENSE: + * license/LICENSE.snappy.txt (New BSD License) + * HOMEPAGE: + * http://code.google.com/p/snappy/ diff --git a/solr/licenses/paranamer-2.3.jar.sha1 b/solr/licenses/paranamer-2.3.jar.sha1 new file mode 100644 index 00000000000..21c0b2636d3 --- /dev/null +++ b/solr/licenses/paranamer-2.3.jar.sha1 @@ -0,0 +1 @@ +4a85963a752c0a2f715c3924bfc686865e7e1bc6 diff --git a/solr/licenses/paranamer-LICENSE-BSD.txt b/solr/licenses/paranamer-LICENSE-BSD.txt new file mode 100644 index 00000000000..fca18473ba0 --- /dev/null +++ b/solr/licenses/paranamer-LICENSE-BSD.txt @@ -0,0 +1,28 @@ +[ ParaNamer used to be 'Pubic Domain', but since it includes a small piece of ASM it is now the same license as that: BSD ] + + Copyright (c) 2006 Paul Hammant & ThoughtWorks Inc + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the copyright holders nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/solr/licenses/paranamer-NOTICE.txt b/solr/licenses/paranamer-NOTICE.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/solr/licenses/snappy-java-1.0.4.1.jar.sha1 b/solr/licenses/snappy-java-1.0.4.1.jar.sha1 new file mode 100644 index 00000000000..b74def86e17 --- /dev/null +++ b/solr/licenses/snappy-java-1.0.4.1.jar.sha1 @@ -0,0 +1 @@ +f88b89a5a21a466aeb0ecf0c063605bd584b4947 diff --git a/solr/licenses/snappy-java-LICENSE-ASL.txt b/solr/licenses/snappy-java-LICENSE-ASL.txt new file mode 100644 index 00000000000..261eeb9e9f8 --- /dev/null +++ b/solr/licenses/snappy-java-LICENSE-ASL.txt @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/solr/licenses/snappy-java-NOTICE.txt b/solr/licenses/snappy-java-NOTICE.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/solr/licenses/tika-xmp-1.4.jar.sha1 b/solr/licenses/tika-xmp-1.4.jar.sha1 new file mode 100644 index 00000000000..e0d756cdb09 --- /dev/null +++ b/solr/licenses/tika-xmp-1.4.jar.sha1 @@ -0,0 +1 @@ +412c97017eb6318e30c47e9a69e51879b20b4dde diff --git a/solr/licenses/tika-xmp-LICENSE-ASL.txt b/solr/licenses/tika-xmp-LICENSE-ASL.txt new file mode 100644 index 00000000000..ca855f4c37c --- /dev/null +++ b/solr/licenses/tika-xmp-LICENSE-ASL.txt @@ -0,0 +1,238 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +APACHE TIKA SUBCOMPONENTS + +Apache Tika includes a number of subcomponents with separate copyright notices +and license terms. Your use of these subcomponents is subject to the terms and +conditions of the following licenses. + +MIME type information from file-4.26.tar.gz (http://www.darwinsys.com/file/) + + Copyright (c) Ian F. Darwin 1986, 1987, 1989, 1990, 1991, 1992, 1994, 1995. + Software written by Ian F. Darwin and others; + maintained 1994- Christos Zoulas. + + This software is not subject to any export provision of the United States + Department of Commerce, and may be exported to any country or planet. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice immediately at the beginning of the file, without modification, + this list of conditions, and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. diff --git a/solr/licenses/tika-xmp-NOTICE.txt b/solr/licenses/tika-xmp-NOTICE.txt new file mode 100644 index 00000000000..156a582d5b3 --- /dev/null +++ b/solr/licenses/tika-xmp-NOTICE.txt @@ -0,0 +1,15 @@ +Apache Tika xmp +Copyright 2011 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +Copyright 1993-2010 University Corporation for Atmospheric Research/Unidata +This software contains code derived from UCAR/Unidata's NetCDF library. + +Tika-server compoment uses CDDL-licensed dependencies: jersey (http://jersey.java.net/) and +Grizzly (http://grizzly.java.net/) + +OpenCSV: Copyright 2005 Bytecode Pty Ltd. Licensed under the Apache License, Version 2.0 + +IPTC Photo Metadata descriptions Copyright 2010 International Press Telecommunications Council. diff --git a/solr/licenses/xmpcore-5.1.2.jar.sha1 b/solr/licenses/xmpcore-5.1.2.jar.sha1 new file mode 100644 index 00000000000..19af7ca17ff --- /dev/null +++ b/solr/licenses/xmpcore-5.1.2.jar.sha1 @@ -0,0 +1 @@ +55615fa2582424e38705487d1d3969af8554f637 diff --git a/solr/licenses/xmpcore-LICENSE-BSD.txt b/solr/licenses/xmpcore-LICENSE-BSD.txt new file mode 100644 index 00000000000..f0296f1c4c6 --- /dev/null +++ b/solr/licenses/xmpcore-LICENSE-BSD.txt @@ -0,0 +1,11 @@ +Copyright (c) 2009, Adobe Systems Incorporated All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +* Neither the name of Adobe Systems Incorporated, nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANT ABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/solr/licenses/xmpcore-NOTICE.txt b/solr/licenses/xmpcore-NOTICE.txt new file mode 100644 index 00000000000..e69de29bb2d