diff --git a/dev-tools/maven/pom.xml.template b/dev-tools/maven/pom.xml.template index 8996d7388d1..1e05fcf8668 100644 --- a/dev-tools/maven/pom.xml.template +++ b/dev-tools/maven/pom.xml.template @@ -407,12 +407,12 @@ org.carrot2 carrot2-mini - 3.6.2 + 3.8.0 org.carrot2 morfologik-polish - 1.6.0 + 1.7.1 org.codehaus.woodstox diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 489f79fa77c..8413b25ebb4 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -23,10 +23,6 @@ Changes in backwards compatibility policy not positioned. This change affects all classes that inherit from DocIdSetIterator, including DocsEnum and DocsAndPositionsEnum. (Adrien Grand) -* LUCENE-5089: Update to Morfologik 1.6.0. MorfologikAnalyzer and MorfologikFilter - no longer support multiple "dictionaries" as there is only one dictionary available. - (Dawid Weiss) - * LUCENE-5127: Reduce RAM usage of FixedGapTermsIndex. Remove IndexWriterConfig.setTermIndexInterval, IndexWriterConfig.setReaderTermsIndexDivisor, and termsIndexDivisor from StandardDirectoryReader. These options have been no-ops @@ -39,10 +35,6 @@ New Features * LUCENE-4747: Move to Java 7 as minimum Java version. (Robert Muir, Uwe Schindler) -* LUCENE-5089: Update to Morfologik 1.6.0. MorfologikAnalyzer and MorfologikFilter - no longer support multiple "dictionaries" as there is only one dictionary available. - (Dawid Weiss) - * SOLR-3359: Added analyzer attribute/property to SynonymFilterFactory. (Ryo Onodera via Koji Sekiguchi) @@ -184,6 +176,10 @@ Changes in backwards compatibility policy CheckIndex.fixIndex(Status). If you used to pass a codec to this method, just remove it from the arguments. (Adrien Grand) +* LUCENE-5089, SOLR-5126: Update to Morfologik 1.7.1. MorfologikAnalyzer and MorfologikFilter + no longer support multiple "dictionaries" as there is only one dictionary available. + (Dawid Weiss) + ======================= Lucene 4.4.0 ======================= Changes in backwards compatibility policy diff --git a/lucene/analysis/morfologik/ivy.xml b/lucene/analysis/morfologik/ivy.xml index 0c9c3373934..c4dd72fc7e8 100644 --- a/lucene/analysis/morfologik/ivy.xml +++ b/lucene/analysis/morfologik/ivy.xml @@ -19,9 +19,9 @@ - - - + + + diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java index 049dad1570e..5ac14cdeb55 100644 --- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java +++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java @@ -20,6 +20,7 @@ package org.apache.lucene.analysis.morfologik; import java.io.IOException; import java.util.*; +import java.util.regex.Pattern; import morfologik.stemming.*; @@ -82,73 +83,31 @@ public class MorfologikFilter extends TokenFilter { } /** - * The tag encoding format has been changing in Morfologik from version - * to version. Let's keep both variants and determine which one to run - * based on this flag. + * A pattern used to split lemma forms. */ - private final static boolean multipleTagsPerLemma = true; + private final static Pattern lemmaSplitter = Pattern.compile("\\+|\\|"); private void popNextLemma() { - if (multipleTagsPerLemma) { - // One tag (concatenated) per lemma. - final WordData lemma = lemmaList.get(lemmaListIndex++); - termAtt.setEmpty().append(lemma.getStem()); - CharSequence tag = lemma.getTag(); - if (tag != null) { - String[] tags = tag.toString().split("\\+|\\|"); - for (int i = 0; i < tags.length; i++) { - if (tagsList.size() <= i) { - tagsList.add(new StringBuilder()); - } - StringBuilder buffer = tagsList.get(i); - buffer.setLength(0); - buffer.append(tags[i]); + // One tag (concatenated) per lemma. + final WordData lemma = lemmaList.get(lemmaListIndex++); + termAtt.setEmpty().append(lemma.getStem()); + CharSequence tag = lemma.getTag(); + if (tag != null) { + String[] tags = lemmaSplitter.split(tag.toString()); + for (int i = 0; i < tags.length; i++) { + if (tagsList.size() <= i) { + tagsList.add(new StringBuilder()); } - tagsAtt.setTags(tagsList.subList(0, tags.length)); - } else { - tagsAtt.setTags(Collections. emptyList()); + StringBuilder buffer = tagsList.get(i); + buffer.setLength(0); + buffer.append(tags[i]); } + tagsAtt.setTags(tagsList.subList(0, tags.length)); } else { - // One tag (concatenated) per stem (lemma repeated). - CharSequence currentStem; - int tags = 0; - do { - final WordData lemma = lemmaList.get(lemmaListIndex++); - currentStem = lemma.getStem(); - final CharSequence tag = lemma.getTag(); - if (tag != null) { - if (tagsList.size() <= tags) { - tagsList.add(new StringBuilder()); - } - - final StringBuilder buffer = tagsList.get(tags++); - buffer.setLength(0); - buffer.append(lemma.getTag()); - } - } while (lemmaListIndex < lemmaList.size() && - equalCharSequences(lemmaList.get(lemmaListIndex).getStem(), currentStem)); - - // Set the lemma's base form and tags as attributes. - termAtt.setEmpty().append(currentStem); - tagsAtt.setTags(tagsList.subList(0, tags)); + tagsAtt.setTags(Collections. emptyList()); } } - /** - * Compare two char sequences for equality. Assumes non-null arguments. - */ - private static final boolean equalCharSequences(CharSequence s1, CharSequence s2) { - int len1 = s1.length(); - int len2 = s2.length(); - if (len1 != len2) return false; - for (int i = len1; --i >= 0;) { - if (s1.charAt(i) != s2.charAt(i)) { - return false; - } - } - return true; - } - /** * Lookup a given surface form of a token and update * {@link #lemmaList} and {@link #lemmaListIndex} accordingly. diff --git a/lucene/licenses/morfologik-fsa-1.6.0.jar.sha1 b/lucene/licenses/morfologik-fsa-1.6.0.jar.sha1 deleted file mode 100644 index 8041cb4027d..00000000000 --- a/lucene/licenses/morfologik-fsa-1.6.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -397a99307020797e6790f2faf8cf865983b52559 diff --git a/lucene/licenses/morfologik-fsa-1.7.1.jar.sha1 b/lucene/licenses/morfologik-fsa-1.7.1.jar.sha1 new file mode 100644 index 00000000000..b71174e4d03 --- /dev/null +++ b/lucene/licenses/morfologik-fsa-1.7.1.jar.sha1 @@ -0,0 +1 @@ +fdf556c88d66f65440bd24024f55a52c227c0e3f diff --git a/lucene/licenses/morfologik-polish-1.6.0.jar.sha1 b/lucene/licenses/morfologik-polish-1.6.0.jar.sha1 deleted file mode 100644 index b44ead1078f..00000000000 --- a/lucene/licenses/morfologik-polish-1.6.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -ca0663530971b54420fc1cea00a6338f68428232 diff --git a/lucene/licenses/morfologik-polish-1.7.1.jar.sha1 b/lucene/licenses/morfologik-polish-1.7.1.jar.sha1 new file mode 100644 index 00000000000..3bd0d88ae67 --- /dev/null +++ b/lucene/licenses/morfologik-polish-1.7.1.jar.sha1 @@ -0,0 +1 @@ +e03b9feb39f6e2c0ac7c37e220d01cdae66d3a28 diff --git a/lucene/licenses/morfologik-stemming-1.6.0.jar.sha1 b/lucene/licenses/morfologik-stemming-1.6.0.jar.sha1 deleted file mode 100644 index 4ba54674892..00000000000 --- a/lucene/licenses/morfologik-stemming-1.6.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -8a284571bea2cdd305cd86fbac9bab6deef31c7f diff --git a/lucene/licenses/morfologik-stemming-1.7.1.jar.sha1 b/lucene/licenses/morfologik-stemming-1.7.1.jar.sha1 new file mode 100644 index 00000000000..3b53503b1c2 --- /dev/null +++ b/lucene/licenses/morfologik-stemming-1.7.1.jar.sha1 @@ -0,0 +1 @@ +c81d6c63e22e97819063cad7f1ecd20269cba720 diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index ffe123ce614..4fccdf2703a 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -25,7 +25,7 @@ $Id$ Versions of Major Components --------------------- Apache Tika 1.4 -Carrot2 3.6.2 +Carrot2 3.8.0 Velocity 1.7 and Velocity Tools 2.0 Apache UIMA 2.3.1 Apache ZooKeeper 3.4.5 @@ -53,7 +53,7 @@ Other Changes Versions of Major Components --------------------- Apache Tika 1.4 -Carrot2 3.6.2 +Carrot2 3.8.0 Velocity 1.7 and Velocity Tools 2.0 Apache UIMA 2.3.1 Apache ZooKeeper 3.4.5 @@ -76,6 +76,9 @@ Detailed Change List New Features ---------------------- +* SOLR-5126: Update Carrot2 clustering to version 3.8.0, update Morfologik + to version 1.7.1 (Dawid Weiss) + * SOLR-2345: Enhanced geodist() to work with an RPT field, provided that the field is referenced via 'sfield' and the query point is constant. (David Smiley) diff --git a/solr/contrib/analysis-extras/ivy.xml b/solr/contrib/analysis-extras/ivy.xml index 597f6060b4b..b8a1bfb7e36 100644 --- a/solr/contrib/analysis-extras/ivy.xml +++ b/solr/contrib/analysis-extras/ivy.xml @@ -20,9 +20,9 @@ - - - + + + diff --git a/solr/contrib/clustering/ivy.xml b/solr/contrib/clustering/ivy.xml index ef3e4de21da..71f8c8c7fc9 100644 --- a/solr/contrib/clustering/ivy.xml +++ b/solr/contrib/clustering/ivy.xml @@ -19,14 +19,25 @@ - - - + + + + + + + + + - - - + + + diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java index 2926b8fa643..b52e9a18a97 100644 --- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java +++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java @@ -37,8 +37,7 @@ public abstract class DocumentClusteringEngine extends ClusteringEngine { public abstract NamedList cluster(SolrParams solrParams); /** - * Experimental. Subject to change before the next release - * + * Experimental. Subject to change before the next release * * Cluster the set of docs. Clustering of documents is often an expensive task that can take a long time. * @param docs The docs to cluster. If null, cluster all docs as in {@link #cluster(org.apache.solr.common.params.SolrParams)} diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java index 7ac385c87ac..e788a3cedfb 100644 --- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java +++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java @@ -77,6 +77,7 @@ import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import com.google.common.io.Closeables; +import com.google.common.io.Closer; /** * Search results clustering engine based on Carrot2 clustering algorithms. @@ -140,7 +141,13 @@ public class CarrotClusteringEngine extends SearchClusteringEngine { + ". Using the default " + resource + " from Carrot JAR."); return new IResource[] {}; } finally { - if (resourceStream != null) Closeables.closeQuietly(resourceStream); + if (resourceStream != null) { + try { + resourceStream.close(); + } catch (IOException e) { + // ignore. + } + } } log.info("Loaded Solr resource: " + resourceName); diff --git a/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java b/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java index 17029019a88..d202042bc68 100644 --- a/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java +++ b/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java @@ -52,7 +52,7 @@ public class ClusteringComponentTest extends AbstractClusteringTestCase { SolrRequestHandler handler = core.getRequestHandler("standard"); SolrQueryResponse rsp; rsp = new SolrQueryResponse(); - rsp.add("responseHeader", new SimpleOrderedMap()); + rsp.add("responseHeader", new SimpleOrderedMap()); SolrQueryRequest req = new LocalSolrQueryRequest(core, params); handler.handleRequest(req, rsp); NamedList values = rsp.getValues(); @@ -70,7 +70,7 @@ public class ClusteringComponentTest extends AbstractClusteringTestCase { handler = core.getRequestHandler("docClustering"); rsp = new SolrQueryResponse(); - rsp.add("responseHeader", new SimpleOrderedMap()); + rsp.add("responseHeader", new SimpleOrderedMap()); req = new LocalSolrQueryRequest(core, params); handler.handleRequest(req, rsp); values = rsp.getValues(); diff --git a/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoClusteringAlgorithm.java b/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoClusteringAlgorithm.java index 9bf68ae8cb5..2b113b7bbfb 100644 --- a/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoClusteringAlgorithm.java +++ b/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoClusteringAlgorithm.java @@ -15,7 +15,6 @@ package org.apache.solr.handler.clustering.carrot2; * See the License for the specific language governing permissions and * limitations under the License. */ -import java.util.Collections; import java.util.List; import org.carrot2.core.Cluster; diff --git a/solr/licenses/attributes-binder-1.0.1.jar.sha1 b/solr/licenses/attributes-binder-1.0.1.jar.sha1 deleted file mode 100644 index 2b260866210..00000000000 --- a/solr/licenses/attributes-binder-1.0.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -698f0c9427a8a94e00a59575ff6c5ff9d0bdc34a diff --git a/solr/licenses/attributes-binder-1.2.0.jar.sha1 b/solr/licenses/attributes-binder-1.2.0.jar.sha1 new file mode 100644 index 00000000000..16318388090 --- /dev/null +++ b/solr/licenses/attributes-binder-1.2.0.jar.sha1 @@ -0,0 +1 @@ +2aa3ce620ebadea4e385fc0a54dc363cb659dca5 diff --git a/solr/licenses/carrot2-mini-3.6.2.jar.sha1 b/solr/licenses/carrot2-mini-3.6.2.jar.sha1 deleted file mode 100644 index ccdccc48abc..00000000000 --- a/solr/licenses/carrot2-mini-3.6.2.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -ffd6e0f7ef6c189bf8b456ef749f1ce600d6df74 diff --git a/solr/licenses/carrot2-mini-3.8.0.jar.sha1 b/solr/licenses/carrot2-mini-3.8.0.jar.sha1 new file mode 100644 index 00000000000..c190593a19a --- /dev/null +++ b/solr/licenses/carrot2-mini-3.8.0.jar.sha1 @@ -0,0 +1 @@ +65d7bbe49bad0a95d9ae9b858abafb96a666ac5a diff --git a/solr/licenses/hppc-0.4.1.jar.sha1 b/solr/licenses/hppc-0.4.1.jar.sha1 deleted file mode 100644 index c7362d0efac..00000000000 --- a/solr/licenses/hppc-0.4.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -61497cafe8201435b603c6014d2abf0b3fb7c381 diff --git a/solr/licenses/hppc-0.5.2.jar.sha1 b/solr/licenses/hppc-0.5.2.jar.sha1 new file mode 100644 index 00000000000..0557fa15a0e --- /dev/null +++ b/solr/licenses/hppc-0.5.2.jar.sha1 @@ -0,0 +1 @@ +074bcc9d152a928a4ea9ac59a5b45850bf00cd4e diff --git a/solr/licenses/morfologik-fsa-1.6.0.jar.sha1 b/solr/licenses/morfologik-fsa-1.6.0.jar.sha1 deleted file mode 100644 index 8041cb4027d..00000000000 --- a/solr/licenses/morfologik-fsa-1.6.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -397a99307020797e6790f2faf8cf865983b52559 diff --git a/solr/licenses/morfologik-fsa-1.7.1.jar.sha1 b/solr/licenses/morfologik-fsa-1.7.1.jar.sha1 new file mode 100644 index 00000000000..b71174e4d03 --- /dev/null +++ b/solr/licenses/morfologik-fsa-1.7.1.jar.sha1 @@ -0,0 +1 @@ +fdf556c88d66f65440bd24024f55a52c227c0e3f diff --git a/solr/licenses/morfologik-polish-1.6.0.jar.sha1 b/solr/licenses/morfologik-polish-1.6.0.jar.sha1 deleted file mode 100644 index b44ead1078f..00000000000 --- a/solr/licenses/morfologik-polish-1.6.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -ca0663530971b54420fc1cea00a6338f68428232 diff --git a/solr/licenses/morfologik-polish-1.7.1.jar.sha1 b/solr/licenses/morfologik-polish-1.7.1.jar.sha1 new file mode 100644 index 00000000000..3bd0d88ae67 --- /dev/null +++ b/solr/licenses/morfologik-polish-1.7.1.jar.sha1 @@ -0,0 +1 @@ +e03b9feb39f6e2c0ac7c37e220d01cdae66d3a28 diff --git a/solr/licenses/morfologik-stemming-1.6.0.jar.sha1 b/solr/licenses/morfologik-stemming-1.6.0.jar.sha1 deleted file mode 100644 index 4ba54674892..00000000000 --- a/solr/licenses/morfologik-stemming-1.6.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -8a284571bea2cdd305cd86fbac9bab6deef31c7f diff --git a/solr/licenses/morfologik-stemming-1.7.1.jar.sha1 b/solr/licenses/morfologik-stemming-1.7.1.jar.sha1 new file mode 100644 index 00000000000..3b53503b1c2 --- /dev/null +++ b/solr/licenses/morfologik-stemming-1.7.1.jar.sha1 @@ -0,0 +1 @@ +c81d6c63e22e97819063cad7f1ecd20269cba720 diff --git a/solr/licenses/simple-xml-2.6.4.jar.sha1 b/solr/licenses/simple-xml-2.6.4.jar.sha1 deleted file mode 100644 index ceb36f3e525..00000000000 --- a/solr/licenses/simple-xml-2.6.4.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -5b7a462882768cf65a2273d90710c9838bd5b280 diff --git a/solr/licenses/simple-xml-2.7.jar.sha1 b/solr/licenses/simple-xml-2.7.jar.sha1 new file mode 100644 index 00000000000..117f50f793e --- /dev/null +++ b/solr/licenses/simple-xml-2.7.jar.sha1 @@ -0,0 +1 @@ +48f90a787b2d59faab3b8c203945e4b0db32aec4