SOLR-7539: Upgrade the clustering plugin to Carrot2 3.15.0.

This commit is contained in:
Dawid Weiss 2016-11-04 16:02:00 +01:00
parent 1381dd9287
commit 401d77485d
14 changed files with 77 additions and 82 deletions

View File

@ -222,7 +222,7 @@ org.bouncycastle.version = 1.45
/org.carrot2.attributes/attributes-binder = 1.3.1 /org.carrot2.attributes/attributes-binder = 1.3.1
/org.carrot2.shaded/carrot2-guava = 18.0 /org.carrot2.shaded/carrot2-guava = 18.0
/org.carrot2/carrot2-mini = 3.12.0 /org.carrot2/carrot2-mini = 3.15.0
org.carrot2.morfologik.version = 2.1.1 org.carrot2.morfologik.version = 2.1.1
/org.carrot2/morfologik-fsa = ${org.carrot2.morfologik.version} /org.carrot2/morfologik-fsa = ${org.carrot2.morfologik.version}

View File

@ -70,7 +70,7 @@ Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this r
Versions of Major Components Versions of Major Components
--------------------- ---------------------
Apache Tika 1.13 Apache Tika 1.13
Carrot2 3.12.0 Carrot2 3.15.0
Velocity 1.7 and Velocity Tools 2.0 Velocity 1.7 and Velocity Tools 2.0
Apache UIMA 2.3.1 Apache UIMA 2.3.1
Apache ZooKeeper 3.4.6 Apache ZooKeeper 3.4.6
@ -114,6 +114,8 @@ Bug Fixes
Other Changes Other Changes
---------------------- ----------------------
* SOLR-7539: Upgrade the clustering plugin to Carrot2 3.15.0. (Dawid Weiss)
* SOLR-9621: Remove several Guava & Apache Commons calls in favor of java 8 alternatives. * SOLR-9621: Remove several Guava & Apache Commons calls in favor of java 8 alternatives.
(Michael Braun via David Smiley) (Michael Braun via David Smiley)

View File

@ -19,6 +19,7 @@ package org.apache.solr.handler.clustering;
import java.io.IOException; import java.io.IOException;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.Map; import java.util.Map;
@ -44,9 +45,6 @@ import org.apache.solr.util.plugin.SolrCoreAware;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.google.common.collect.Maps;
/** /**
* Provides a plugin for performing cluster analysis. This can either be applied to * Provides a plugin for performing cluster analysis. This can either be applied to
* search results (e.g., via <a href="http://project.carrot2.org">Carrot<sup>2</sup></a>) or for * search results (e.g., via <a href="http://project.carrot2.org">Carrot<sup>2</sup></a>) or for
@ -68,12 +66,12 @@ public class ClusteringComponent extends SearchComponent implements SolrCoreAwar
/** /**
* Declaration-order list of search clustering engines. * Declaration-order list of search clustering engines.
*/ */
private final LinkedHashMap<String, SearchClusteringEngine> searchClusteringEngines = Maps.newLinkedHashMap(); private final LinkedHashMap<String, SearchClusteringEngine> searchClusteringEngines = new LinkedHashMap<>();
/** /**
* Declaration order list of document clustering engines. * Declaration order list of document clustering engines.
*/ */
private final LinkedHashMap<String, DocumentClusteringEngine> documentClusteringEngines = Maps.newLinkedHashMap(); private final LinkedHashMap<String, DocumentClusteringEngine> documentClusteringEngines = new LinkedHashMap<>();
/** /**
* An unmodifiable view of {@link #searchClusteringEngines}. * An unmodifiable view of {@link #searchClusteringEngines}.
@ -173,7 +171,7 @@ public class ClusteringComponent extends SearchComponent implements SolrCoreAwar
if (engine != null) { if (engine != null) {
checkAvailable(name, engine); checkAvailable(name, engine);
DocListAndSet results = rb.getResults(); DocListAndSet results = rb.getResults();
Map<SolrDocument,Integer> docIds = Maps.newHashMapWithExpectedSize(results.docList.size()); Map<SolrDocument,Integer> docIds = new HashMap<>(results.docList.size());
SolrDocumentList solrDocList = SolrPluginUtils.docListToSolrDocumentList( SolrDocumentList solrDocList = SolrPluginUtils.docListToSolrDocumentList(
results.docList, rb.req.getSearcher(), engine.getFieldsToLoad(rb.req), docIds); results.docList, rb.req.getSearcher(), engine.getFieldsToLoad(rb.req), docIds);
Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req); Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req);

View File

@ -58,6 +58,8 @@ import org.carrot2.core.Document;
import org.carrot2.core.IClusteringAlgorithm; import org.carrot2.core.IClusteringAlgorithm;
import org.carrot2.core.LanguageCode; import org.carrot2.core.LanguageCode;
import org.carrot2.core.attribute.AttributeNames; import org.carrot2.core.attribute.AttributeNames;
import org.carrot2.shaded.guava.common.base.MoreObjects;
import org.carrot2.shaded.guava.common.base.Strings;
import org.carrot2.text.linguistic.DefaultLexicalDataFactoryDescriptor; import org.carrot2.text.linguistic.DefaultLexicalDataFactoryDescriptor;
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor; import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor;
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor.AttributeBuilder; import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor.AttributeBuilder;
@ -69,12 +71,6 @@ import org.carrot2.util.resource.ResourceLookup;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.google.common.base.Objects;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
/** /**
* Search results clustering engine based on Carrot2 clustering algorithms. * Search results clustering engine based on Carrot2 clustering algorithms.
* *
@ -155,7 +151,8 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
// Load Carrot2-Workbench exported attribute XMLs based on the 'name' attribute // Load Carrot2-Workbench exported attribute XMLs based on the 'name' attribute
// of this component. This by-name convention lookup is used to simplify configuring algorithms. // of this component. This by-name convention lookup is used to simplify configuring algorithms.
String componentName = initParams.get(ClusteringEngine.ENGINE_NAME); String componentName = initParams.get(ClusteringEngine.ENGINE_NAME);
log.info("Initializing Clustering Engine '" + Objects.firstNonNull(componentName, "<no 'name' attribute>") + "'"); log.info("Initializing Clustering Engine '" +
MoreObjects.firstNonNull(componentName, "<no 'name' attribute>") + "'");
if (!Strings.isNullOrEmpty(componentName)) { if (!Strings.isNullOrEmpty(componentName)) {
IResource[] attributeXmls = resourceLookup.getAll(componentName + "-attributes.xml"); IResource[] attributeXmls = resourceLookup.getAll(componentName + "-attributes.xml");
@ -268,7 +265,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
protected Set<String> getFieldsToLoad(SolrQueryRequest sreq){ protected Set<String> getFieldsToLoad(SolrQueryRequest sreq){
SolrParams solrParams = sreq.getParams(); SolrParams solrParams = sreq.getParams();
HashSet<String> fields = Sets.newHashSet(getFieldsForClustering(sreq)); HashSet<String> fields = new HashSet<>(getFieldsForClustering(sreq));
fields.add(idFieldName); fields.add(idFieldName);
fields.add(solrParams.get(CarrotParams.URL_FIELD_NAME, "url")); fields.add(solrParams.get(CarrotParams.URL_FIELD_NAME, "url"));
fields.addAll(getCustomFieldsMap(solrParams).keySet()); fields.addAll(getCustomFieldsMap(solrParams).keySet());
@ -295,7 +292,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
+ " must not be blank."); + " must not be blank.");
} }
final Set<String> fields = Sets.newHashSet(); final Set<String> fields = new HashSet<>();
fields.addAll(Arrays.asList(titleFieldSpec.split("[, ]"))); fields.addAll(Arrays.asList(titleFieldSpec.split("[, ]")));
fields.addAll(Arrays.asList(snippetFieldSpec.split("[, ]"))); fields.addAll(Arrays.asList(snippetFieldSpec.split("[, ]")));
return fields; return fields;
@ -319,7 +316,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
Map<String, String> customFields = getCustomFieldsMap(solrParams); Map<String, String> customFields = getCustomFieldsMap(solrParams);
// Parse language code map string into a map // Parse language code map string into a map
Map<String, String> languageCodeMap = Maps.newHashMap(); Map<String, String> languageCodeMap = new HashMap<>();
if (StringUtils.isNotBlank(languageField)) { if (StringUtils.isNotBlank(languageField)) {
for (String pair : solrParams.get(CarrotParams.LANGUAGE_CODE_MAP, "").split("[, ]")) { for (String pair : solrParams.get(CarrotParams.LANGUAGE_CODE_MAP, "").split("[, ]")) {
final String[] split = pair.split(":"); final String[] split = pair.split(":");
@ -340,7 +337,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
if (produceSummary) { if (produceSummary) {
highlighter = HighlightComponent.getHighlighter(core); highlighter = HighlightComponent.getHighlighter(core);
if (highlighter != null){ if (highlighter != null){
Map<String, Object> args = Maps.newHashMap(); Map<String, Object> args = new HashMap<>();
snippetFieldAry = snippetFieldSpec.split("[, ]"); snippetFieldAry = snippetFieldSpec.split("[, ]");
args.put(HighlightParams.FIELDS, snippetFieldAry); args.put(HighlightParams.FIELDS, snippetFieldAry);
args.put(HighlightParams.HIGHLIGHT, "true"); args.put(HighlightParams.HIGHLIGHT, "true");
@ -466,10 +463,10 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
* custom field names. * custom field names.
*/ */
private Map<String, String> getCustomFieldsMap(SolrParams solrParams) { private Map<String, String> getCustomFieldsMap(SolrParams solrParams) {
Map<String, String> customFields = Maps.newHashMap(); Map<String, String> customFields = new HashMap<>();
String [] customFieldsSpec = solrParams.getParams(CarrotParams.CUSTOM_FIELD_NAME); String [] customFieldsSpec = solrParams.getParams(CarrotParams.CUSTOM_FIELD_NAME);
if (customFieldsSpec != null) { if (customFieldsSpec != null) {
customFields = Maps.newHashMap(); customFields = new HashMap<>();
for (String customFieldSpec : customFieldsSpec) { for (String customFieldSpec : customFieldsSpec) {
String [] split = customFieldSpec.split(":"); String [] split = customFieldSpec.split(":");
if (split.length == 2 && StringUtils.isNotBlank(split[0]) && StringUtils.isNotBlank(split[1])) { if (split.length == 2 && StringUtils.isNotBlank(split[0]) && StringUtils.isNotBlank(split[1])) {
@ -501,7 +498,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
private List<NamedList<Object>> clustersToNamedList(List<Cluster> carrotClusters, private List<NamedList<Object>> clustersToNamedList(List<Cluster> carrotClusters,
SolrParams solrParams) { SolrParams solrParams) {
List<NamedList<Object>> result = Lists.newArrayList(); List<NamedList<Object>> result = new ArrayList<>();
clustersToNamedList(carrotClusters, result, solrParams.getBool( clustersToNamedList(carrotClusters, result, solrParams.getBool(
CarrotParams.OUTPUT_SUB_CLUSTERS, true), solrParams.getInt( CarrotParams.OUTPUT_SUB_CLUSTERS, true), solrParams.getInt(
CarrotParams.NUM_DESCRIPTIONS, Integer.MAX_VALUE)); CarrotParams.NUM_DESCRIPTIONS, Integer.MAX_VALUE));
@ -534,7 +531,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
// Add documents // Add documents
List<Document> docs = outputSubClusters ? outCluster.getDocuments() : outCluster.getAllDocuments(); List<Document> docs = outputSubClusters ? outCluster.getDocuments() : outCluster.getAllDocuments();
List<Object> docList = Lists.newArrayList(); List<Object> docList = new ArrayList<>();
cluster.add("docs", docList); cluster.add("docs", docList);
for (Document doc : docs) { for (Document doc : docs) {
docList.add(doc.getField(SOLR_DOCUMENT_ID)); docList.add(doc.getField(SOLR_DOCUMENT_ID));
@ -542,7 +539,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
// Add subclusters // Add subclusters
if (outputSubClusters && !outCluster.getSubclusters().isEmpty()) { if (outputSubClusters && !outCluster.getSubclusters().isEmpty()) {
List<NamedList<Object>> subclusters = Lists.newArrayList(); List<NamedList<Object>> subclusters = new ArrayList<>();
cluster.add("clusters", subclusters); cluster.add("clusters", subclusters);
clustersToNamedList(outCluster.getSubclusters(), subclusters, clustersToNamedList(outCluster.getSubclusters(), subclusters,
outputSubClusters, maxLabels); outputSubClusters, maxLabels);

View File

@ -16,10 +16,10 @@
*/ */
package org.apache.solr.handler.clustering.carrot2; package org.apache.solr.handler.clustering.carrot2;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set; import java.util.Set;
import com.google.common.collect.ImmutableSet;
/** /**
* Carrot2 parameter mapping (recognized and mapped if passed via Solr configuration). * Carrot2 parameter mapping (recognized and mapped if passed via Solr configuration).
* @lucene.experimental * @lucene.experimental
@ -50,7 +50,7 @@ public final class CarrotParams {
*/ */
public static String RESOURCES_DIR = CARROT_PREFIX + "resourcesDir"; public static String RESOURCES_DIR = CARROT_PREFIX + "resourcesDir";
static final Set<String> CARROT_PARAM_NAMES = ImmutableSet.of( static final Set<String> CARROT_PARAM_NAMES = new HashSet<>(Arrays.asList(
ALGORITHM, ALGORITHM,
TITLE_FIELD_NAME, TITLE_FIELD_NAME,
@ -66,8 +66,8 @@ public final class CarrotParams {
NUM_DESCRIPTIONS, NUM_DESCRIPTIONS,
OUTPUT_SUB_CLUSTERS, OUTPUT_SUB_CLUSTERS,
RESOURCES_DIR, RESOURCES_DIR,
LANGUAGE_CODE_MAP); LANGUAGE_CODE_MAP));
/** No instances. */ /** No instances. */
private CarrotParams() {} private CarrotParams() {}
} }

View File

@ -16,7 +16,9 @@
*/ */
package org.apache.solr.handler.clustering.carrot2; package org.apache.solr.handler.clustering.carrot2;
import java.util.Collection; import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Set; import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
@ -26,6 +28,7 @@ import org.apache.lucene.analysis.core.StopFilterFactory;
import org.apache.lucene.analysis.util.TokenFilterFactory; import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.solr.analysis.TokenizerChain; import org.apache.solr.analysis.TokenizerChain;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.IndexSchema;
import org.carrot2.core.LanguageCode; import org.carrot2.core.LanguageCode;
import org.carrot2.core.attribute.Init; import org.carrot2.core.attribute.Init;
import org.carrot2.core.attribute.Processing; import org.carrot2.core.attribute.Processing;
@ -37,9 +40,6 @@ import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.Bindable; import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Input; import org.carrot2.util.attribute.Input;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
/** /**
* An implementation of Carrot2's {@link ILexicalDataFactory} that adds stop * An implementation of Carrot2's {@link ILexicalDataFactory} that adds stop
* words from a field's StopFilter to the default stop words used in Carrot2, * words from a field's StopFilter to the default stop words used in Carrot2,
@ -67,7 +67,7 @@ public class SolrStopwordsCarrot2LexicalDataFactory implements ILexicalDataFacto
/** /**
* A lazily-built cache of stop words per field. * A lazily-built cache of stop words per field.
*/ */
private Multimap<String, CharArraySet> solrStopWords = HashMultimap.create(); private HashMap<String, List<CharArraySet>> solrStopWords = new HashMap<>();
/** /**
* Carrot2's default lexical resources to use in addition to Solr's stop * Carrot2's default lexical resources to use in addition to Solr's stop
@ -79,31 +79,34 @@ public class SolrStopwordsCarrot2LexicalDataFactory implements ILexicalDataFacto
* Obtains stop words for a field from the associated * Obtains stop words for a field from the associated
* {@link StopFilterFactory}, if any. * {@link StopFilterFactory}, if any.
*/ */
private Collection<CharArraySet> getSolrStopWordsForField(String fieldName) { private List<CharArraySet> getSolrStopWordsForField(String fieldName) {
// No need to synchronize here, Carrot2 ensures that instances // No need to synchronize here, Carrot2 ensures that instances
// of this class are not used by multiple threads at a time. // of this class are not used by multiple threads at a time.
if (!solrStopWords.containsKey(fieldName)) { synchronized (solrStopWords) {
final Analyzer fieldAnalyzer = core.getLatestSchema().getFieldType(fieldName) if (!solrStopWords.containsKey(fieldName)) {
.getIndexAnalyzer(); solrStopWords.put(fieldName, new ArrayList<>());
if (fieldAnalyzer instanceof TokenizerChain) {
final TokenFilterFactory[] filterFactories = ((TokenizerChain) fieldAnalyzer)
.getTokenFilterFactories();
for (TokenFilterFactory factory : filterFactories) {
if (factory instanceof StopFilterFactory) {
// StopFilterFactory holds the stop words in a CharArraySet
solrStopWords.put(fieldName,
((StopFilterFactory) factory).getStopWords());
}
if (factory instanceof CommonGramsFilterFactory) { IndexSchema schema = core.getLatestSchema();
solrStopWords.put(fieldName, final Analyzer fieldAnalyzer = schema.getFieldType(fieldName).getIndexAnalyzer();
((CommonGramsFilterFactory) factory) if (fieldAnalyzer instanceof TokenizerChain) {
.getCommonWords()); final TokenFilterFactory[] filterFactories =
((TokenizerChain) fieldAnalyzer).getTokenFilterFactories();
for (TokenFilterFactory factory : filterFactories) {
if (factory instanceof StopFilterFactory) {
// StopFilterFactory holds the stop words in a CharArraySet
CharArraySet stopWords = ((StopFilterFactory) factory).getStopWords();
solrStopWords.get(fieldName).add(stopWords);
}
if (factory instanceof CommonGramsFilterFactory) {
CharArraySet commonWords = ((CommonGramsFilterFactory) factory).getCommonWords();
solrStopWords.get(fieldName).add(commonWords);
}
} }
} }
} }
return solrStopWords.get(fieldName);
} }
return solrStopWords.get(fieldName);
} }
@Override @Override

View File

@ -17,6 +17,9 @@
package org.apache.solr.handler.clustering.carrot2; package org.apache.solr.handler.clustering.carrot2;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -45,9 +48,6 @@ import org.carrot2.core.LanguageCode;
import org.carrot2.util.attribute.AttributeUtils; import org.carrot2.util.attribute.AttributeUtils;
import org.junit.Test; import org.junit.Test;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
/** /**
* *
*/ */
@ -211,7 +211,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
// stoplabels.mt, so we're expecting only one cluster with label "online". // stoplabels.mt, so we're expecting only one cluster with label "online".
final List<NamedList<Object>> clusters = checkEngine( final List<NamedList<Object>> clusters = checkEngine(
getClusteringEngine(engineName), 1, params); getClusteringEngine(engineName), 1, params);
assertEquals(getLabels(clusters.get(0)), ImmutableList.of("online")); assertEquals(getLabels(clusters.get(0)), Collections.singletonList("online"));
} }
@Test @Test
@ -226,7 +226,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
// only one cluster with label "online". // only one cluster with label "online".
final List<NamedList<Object>> clusters = checkEngine( final List<NamedList<Object>> clusters = checkEngine(
getClusteringEngine("lexical-resource-check"), 1, params); getClusteringEngine("lexical-resource-check"), 1, params);
assertEquals(getLabels(clusters.get(0)), ImmutableList.of("online")); assertEquals(getLabels(clusters.get(0)), Collections.singletonList("online"));
} }
@Test @Test
@ -243,9 +243,8 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
final List<NamedList<Object>> clusters = checkEngine( final List<NamedList<Object>> clusters = checkEngine(
getClusteringEngine("lexical-resource-check"), 2, params); getClusteringEngine("lexical-resource-check"), 2, params);
assertEquals(ImmutableList.of("online"), getLabels(clusters.get(0))); assertEquals(Collections.singletonList("online"), getLabels(clusters.get(0)));
assertEquals(ImmutableList.of("solrownstopword"), assertEquals(Collections.singletonList("solrownstopword"), getLabels(clusters.get(1)));
getLabels(clusters.get(1)));
} }
@Test @Test
@ -395,8 +394,8 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
ClusteringComponent comp = (ClusteringComponent) h.getCore().getSearchComponent("clustering-name-default"); ClusteringComponent comp = (ClusteringComponent) h.getCore().getSearchComponent("clustering-name-default");
Map<String,SearchClusteringEngine> engines = getSearchClusteringEngines(comp); Map<String,SearchClusteringEngine> engines = getSearchClusteringEngines(comp);
assertEquals( assertEquals(
Lists.newArrayList("stc", "default", "mock"), Arrays.asList("stc", "default", "mock"),
Lists.newArrayList(engines.keySet())); new ArrayList<>(engines.keySet()));
assertEquals( assertEquals(
LingoClusteringAlgorithm.class, LingoClusteringAlgorithm.class,
((CarrotClusteringEngine) engines.get(ClusteringEngine.DEFAULT_ENGINE_NAME)).getClusteringAlgorithmClass()); ((CarrotClusteringEngine) engines.get(ClusteringEngine.DEFAULT_ENGINE_NAME)).getClusteringAlgorithmClass());
@ -407,8 +406,8 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
ClusteringComponent comp = (ClusteringComponent) h.getCore().getSearchComponent("clustering-name-decl-order"); ClusteringComponent comp = (ClusteringComponent) h.getCore().getSearchComponent("clustering-name-decl-order");
Map<String,SearchClusteringEngine> engines = getSearchClusteringEngines(comp); Map<String,SearchClusteringEngine> engines = getSearchClusteringEngines(comp);
assertEquals( assertEquals(
Lists.newArrayList("unavailable", "lingo", "stc", "mock", "default"), Arrays.asList("unavailable", "lingo", "stc", "mock", "default"),
Lists.newArrayList(engines.keySet())); new ArrayList<>(engines.keySet()));
assertEquals( assertEquals(
LingoClusteringAlgorithm.class, LingoClusteringAlgorithm.class,
((CarrotClusteringEngine) engines.get(ClusteringEngine.DEFAULT_ENGINE_NAME)).getClusteringAlgorithmClass()); ((CarrotClusteringEngine) engines.get(ClusteringEngine.DEFAULT_ENGINE_NAME)).getClusteringAlgorithmClass());
@ -419,8 +418,8 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
ClusteringComponent comp = (ClusteringComponent) h.getCore().getSearchComponent("clustering-name-dups"); ClusteringComponent comp = (ClusteringComponent) h.getCore().getSearchComponent("clustering-name-dups");
Map<String,SearchClusteringEngine> engines = getSearchClusteringEngines(comp); Map<String,SearchClusteringEngine> engines = getSearchClusteringEngines(comp);
assertEquals( assertEquals(
Lists.newArrayList("", "default"), Arrays.asList("", "default"),
Lists.newArrayList(engines.keySet())); new ArrayList<>(engines.keySet()));
assertEquals( assertEquals(
MockClusteringAlgorithm.class, MockClusteringAlgorithm.class,
((CarrotClusteringEngine) engines.get(ClusteringEngine.DEFAULT_ENGINE_NAME)).getClusteringAlgorithmClass()); ((CarrotClusteringEngine) engines.get(ClusteringEngine.DEFAULT_ENGINE_NAME)).getClusteringAlgorithmClass());

View File

@ -15,6 +15,7 @@
* limitations under the License. * limitations under the License.
*/ */
package org.apache.solr.handler.clustering.carrot2; package org.apache.solr.handler.clustering.carrot2;
import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.carrot2.core.Cluster; import org.carrot2.core.Cluster;
@ -29,8 +30,6 @@ import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Input; import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Output; import org.carrot2.util.attribute.Output;
import com.google.common.collect.Lists;
/** /**
* A mock Carrot2 clustering algorithm that outputs input documents as clusters. * A mock Carrot2 clustering algorithm that outputs input documents as clusters.
* Useful only in tests. * Useful only in tests.
@ -56,7 +55,7 @@ public class EchoClusteringAlgorithm extends ProcessingComponentBase implements
@Override @Override
public void process() throws ProcessingException { public void process() throws ProcessingException {
clusters = Lists.newArrayListWithCapacity(documents.size()); clusters = new ArrayList<>();
for (Document document : documents) { for (Document document : documents) {
final Cluster cluster = new Cluster(); final Cluster cluster = new Cluster();

View File

@ -16,6 +16,7 @@
*/ */
package org.apache.solr.handler.clustering.carrot2; package org.apache.solr.handler.clustering.carrot2;
import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.carrot2.core.Cluster; import org.carrot2.core.Cluster;
@ -36,8 +37,6 @@ import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Input; import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Output; import org.carrot2.util.attribute.Output;
import com.google.common.collect.Lists;
/** /**
* A mock Carrot2 clustering algorithm that outputs stem of each token of each * A mock Carrot2 clustering algorithm that outputs stem of each token of each
* document as a separate cluster. Useful only in tests. * document as a separate cluster. Useful only in tests.
@ -64,7 +63,7 @@ public class EchoStemsClusteringAlgorithm extends ProcessingComponentBase
final AllTokens allTokens = preprocessingContext.allTokens; final AllTokens allTokens = preprocessingContext.allTokens;
final AllWords allWords = preprocessingContext.allWords; final AllWords allWords = preprocessingContext.allWords;
final AllStems allStems = preprocessingContext.allStems; final AllStems allStems = preprocessingContext.allStems;
clusters = Lists.newArrayListWithCapacity(allTokens.image.length); clusters = new ArrayList<>();
for (int i = 0; i < allTokens.image.length; i++) { for (int i = 0; i < allTokens.image.length; i++) {
if (allTokens.wordIndex[i] >= 0) { if (allTokens.wordIndex[i] >= 0) {
clusters.add(new Cluster(new String( clusters.add(new Cluster(new String(

View File

@ -16,6 +16,7 @@
*/ */
package org.apache.solr.handler.clustering.carrot2; package org.apache.solr.handler.clustering.carrot2;
import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.carrot2.core.Cluster; import org.carrot2.core.Cluster;
@ -33,7 +34,6 @@ import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Input; import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Output; import org.carrot2.util.attribute.Output;
import com.google.common.collect.Lists;
/** /**
* A mock Carrot2 clustering algorithm that outputs each token of each document * A mock Carrot2 clustering algorithm that outputs each token of each document
@ -58,8 +58,7 @@ public class EchoTokensClusteringAlgorithm extends ProcessingComponentBase
public void process() throws ProcessingException { public void process() throws ProcessingException {
final PreprocessingContext preprocessingContext = preprocessing.preprocess( final PreprocessingContext preprocessingContext = preprocessing.preprocess(
documents, "", LanguageCode.ENGLISH); documents, "", LanguageCode.ENGLISH);
clusters = Lists clusters = new ArrayList<>();
.newArrayListWithCapacity(preprocessingContext.allTokens.image.length);
for (char[] token : preprocessingContext.allTokens.image) { for (char[] token : preprocessingContext.allTokens.image) {
if (token != null) { if (token != null) {
clusters.add(new Cluster(new String(token))); clusters.add(new Cluster(new String(token)));

View File

@ -16,6 +16,7 @@
*/ */
package org.apache.solr.handler.clustering.carrot2; package org.apache.solr.handler.clustering.carrot2;
import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.carrot2.core.Cluster; import org.carrot2.core.Cluster;
@ -33,8 +34,6 @@ import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Input; import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Output; import org.carrot2.util.attribute.Output;
import com.google.common.collect.Lists;
/** /**
* A mock implementation of Carrot2 clustering algorithm for testing whether the * A mock implementation of Carrot2 clustering algorithm for testing whether the
* customized lexical resource lookup works correctly. This algorithm ignores * customized lexical resource lookup works correctly. This algorithm ignores
@ -60,7 +59,7 @@ public class LexicalResourcesCheckClusteringAlgorithm extends
@Override @Override
public void process() throws ProcessingException { public void process() throws ProcessingException {
clusters = Lists.newArrayList(); clusters = new ArrayList<>();
if (wordsToCheck == null) { if (wordsToCheck == null) {
return; return;
} }

View File

@ -15,13 +15,13 @@
* limitations under the License. * limitations under the License.
*/ */
package org.apache.solr.handler.clustering.carrot2; package org.apache.solr.handler.clustering.carrot2;
import com.google.common.collect.Lists;
import org.carrot2.core.*; import org.carrot2.core.*;
import org.carrot2.core.attribute.AttributeNames; import org.carrot2.core.attribute.AttributeNames;
import org.carrot2.core.attribute.Processing; import org.carrot2.core.attribute.Processing;
import org.carrot2.util.attribute.*; import org.carrot2.util.attribute.*;
import org.carrot2.util.attribute.constraint.IntRange; import org.carrot2.util.attribute.constraint.IntRange;
import java.util.ArrayList;
import java.util.List; import java.util.List;
@Bindable(prefix = "MockClusteringAlgorithm") @Bindable(prefix = "MockClusteringAlgorithm")
@ -62,7 +62,7 @@ public class MockClusteringAlgorithm extends ProcessingComponentBase implements
@Override @Override
public void process() throws ProcessingException { public void process() throws ProcessingException {
clusters = Lists.newArrayList(); clusters = new ArrayList<>();
if (documents == null) { if (documents == null) {
return; return;
} }

View File

@ -1 +0,0 @@
9d8b42afe43ba5c0a0c5d67208d5c919e45c3584

View File

@ -0,0 +1 @@
5d76ec388711056bfaaacc354ed04ffa6811c7b7