mirror of https://github.com/apache/lucene.git
SOLR-7539: Upgrade the clustering plugin to Carrot2 3.15.0.
This commit is contained in:
parent
1381dd9287
commit
401d77485d
|
@ -222,7 +222,7 @@ org.bouncycastle.version = 1.45
|
|||
/org.carrot2.attributes/attributes-binder = 1.3.1
|
||||
/org.carrot2.shaded/carrot2-guava = 18.0
|
||||
|
||||
/org.carrot2/carrot2-mini = 3.12.0
|
||||
/org.carrot2/carrot2-mini = 3.15.0
|
||||
|
||||
org.carrot2.morfologik.version = 2.1.1
|
||||
/org.carrot2/morfologik-fsa = ${org.carrot2.morfologik.version}
|
||||
|
|
|
@ -70,7 +70,7 @@ Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this r
|
|||
Versions of Major Components
|
||||
---------------------
|
||||
Apache Tika 1.13
|
||||
Carrot2 3.12.0
|
||||
Carrot2 3.15.0
|
||||
Velocity 1.7 and Velocity Tools 2.0
|
||||
Apache UIMA 2.3.1
|
||||
Apache ZooKeeper 3.4.6
|
||||
|
@ -114,6 +114,8 @@ Bug Fixes
|
|||
Other Changes
|
||||
----------------------
|
||||
|
||||
* SOLR-7539: Upgrade the clustering plugin to Carrot2 3.15.0. (Dawid Weiss)
|
||||
|
||||
* SOLR-9621: Remove several Guava & Apache Commons calls in favor of java 8 alternatives.
|
||||
(Michael Braun via David Smiley)
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.solr.handler.clustering;
|
|||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
@ -44,9 +45,6 @@ import org.apache.solr.util.plugin.SolrCoreAware;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
|
||||
/**
|
||||
* Provides a plugin for performing cluster analysis. This can either be applied to
|
||||
* search results (e.g., via <a href="http://project.carrot2.org">Carrot<sup>2</sup></a>) or for
|
||||
|
@ -68,12 +66,12 @@ public class ClusteringComponent extends SearchComponent implements SolrCoreAwar
|
|||
/**
|
||||
* Declaration-order list of search clustering engines.
|
||||
*/
|
||||
private final LinkedHashMap<String, SearchClusteringEngine> searchClusteringEngines = Maps.newLinkedHashMap();
|
||||
private final LinkedHashMap<String, SearchClusteringEngine> searchClusteringEngines = new LinkedHashMap<>();
|
||||
|
||||
/**
|
||||
* Declaration order list of document clustering engines.
|
||||
*/
|
||||
private final LinkedHashMap<String, DocumentClusteringEngine> documentClusteringEngines = Maps.newLinkedHashMap();
|
||||
private final LinkedHashMap<String, DocumentClusteringEngine> documentClusteringEngines = new LinkedHashMap<>();
|
||||
|
||||
/**
|
||||
* An unmodifiable view of {@link #searchClusteringEngines}.
|
||||
|
@ -173,7 +171,7 @@ public class ClusteringComponent extends SearchComponent implements SolrCoreAwar
|
|||
if (engine != null) {
|
||||
checkAvailable(name, engine);
|
||||
DocListAndSet results = rb.getResults();
|
||||
Map<SolrDocument,Integer> docIds = Maps.newHashMapWithExpectedSize(results.docList.size());
|
||||
Map<SolrDocument,Integer> docIds = new HashMap<>(results.docList.size());
|
||||
SolrDocumentList solrDocList = SolrPluginUtils.docListToSolrDocumentList(
|
||||
results.docList, rb.req.getSearcher(), engine.getFieldsToLoad(rb.req), docIds);
|
||||
Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req);
|
||||
|
|
|
@ -58,6 +58,8 @@ import org.carrot2.core.Document;
|
|||
import org.carrot2.core.IClusteringAlgorithm;
|
||||
import org.carrot2.core.LanguageCode;
|
||||
import org.carrot2.core.attribute.AttributeNames;
|
||||
import org.carrot2.shaded.guava.common.base.MoreObjects;
|
||||
import org.carrot2.shaded.guava.common.base.Strings;
|
||||
import org.carrot2.text.linguistic.DefaultLexicalDataFactoryDescriptor;
|
||||
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor;
|
||||
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor.AttributeBuilder;
|
||||
|
@ -69,12 +71,6 @@ import org.carrot2.util.resource.ResourceLookup;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.common.base.Objects;
|
||||
import com.google.common.base.Strings;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
/**
|
||||
* Search results clustering engine based on Carrot2 clustering algorithms.
|
||||
*
|
||||
|
@ -155,7 +151,8 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
// Load Carrot2-Workbench exported attribute XMLs based on the 'name' attribute
|
||||
// of this component. This by-name convention lookup is used to simplify configuring algorithms.
|
||||
String componentName = initParams.get(ClusteringEngine.ENGINE_NAME);
|
||||
log.info("Initializing Clustering Engine '" + Objects.firstNonNull(componentName, "<no 'name' attribute>") + "'");
|
||||
log.info("Initializing Clustering Engine '" +
|
||||
MoreObjects.firstNonNull(componentName, "<no 'name' attribute>") + "'");
|
||||
|
||||
if (!Strings.isNullOrEmpty(componentName)) {
|
||||
IResource[] attributeXmls = resourceLookup.getAll(componentName + "-attributes.xml");
|
||||
|
@ -268,7 +265,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
protected Set<String> getFieldsToLoad(SolrQueryRequest sreq){
|
||||
SolrParams solrParams = sreq.getParams();
|
||||
|
||||
HashSet<String> fields = Sets.newHashSet(getFieldsForClustering(sreq));
|
||||
HashSet<String> fields = new HashSet<>(getFieldsForClustering(sreq));
|
||||
fields.add(idFieldName);
|
||||
fields.add(solrParams.get(CarrotParams.URL_FIELD_NAME, "url"));
|
||||
fields.addAll(getCustomFieldsMap(solrParams).keySet());
|
||||
|
@ -295,7 +292,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
+ " must not be blank.");
|
||||
}
|
||||
|
||||
final Set<String> fields = Sets.newHashSet();
|
||||
final Set<String> fields = new HashSet<>();
|
||||
fields.addAll(Arrays.asList(titleFieldSpec.split("[, ]")));
|
||||
fields.addAll(Arrays.asList(snippetFieldSpec.split("[, ]")));
|
||||
return fields;
|
||||
|
@ -319,7 +316,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
Map<String, String> customFields = getCustomFieldsMap(solrParams);
|
||||
|
||||
// Parse language code map string into a map
|
||||
Map<String, String> languageCodeMap = Maps.newHashMap();
|
||||
Map<String, String> languageCodeMap = new HashMap<>();
|
||||
if (StringUtils.isNotBlank(languageField)) {
|
||||
for (String pair : solrParams.get(CarrotParams.LANGUAGE_CODE_MAP, "").split("[, ]")) {
|
||||
final String[] split = pair.split(":");
|
||||
|
@ -340,7 +337,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
if (produceSummary) {
|
||||
highlighter = HighlightComponent.getHighlighter(core);
|
||||
if (highlighter != null){
|
||||
Map<String, Object> args = Maps.newHashMap();
|
||||
Map<String, Object> args = new HashMap<>();
|
||||
snippetFieldAry = snippetFieldSpec.split("[, ]");
|
||||
args.put(HighlightParams.FIELDS, snippetFieldAry);
|
||||
args.put(HighlightParams.HIGHLIGHT, "true");
|
||||
|
@ -466,10 +463,10 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
* custom field names.
|
||||
*/
|
||||
private Map<String, String> getCustomFieldsMap(SolrParams solrParams) {
|
||||
Map<String, String> customFields = Maps.newHashMap();
|
||||
Map<String, String> customFields = new HashMap<>();
|
||||
String [] customFieldsSpec = solrParams.getParams(CarrotParams.CUSTOM_FIELD_NAME);
|
||||
if (customFieldsSpec != null) {
|
||||
customFields = Maps.newHashMap();
|
||||
customFields = new HashMap<>();
|
||||
for (String customFieldSpec : customFieldsSpec) {
|
||||
String [] split = customFieldSpec.split(":");
|
||||
if (split.length == 2 && StringUtils.isNotBlank(split[0]) && StringUtils.isNotBlank(split[1])) {
|
||||
|
@ -501,7 +498,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
|
||||
private List<NamedList<Object>> clustersToNamedList(List<Cluster> carrotClusters,
|
||||
SolrParams solrParams) {
|
||||
List<NamedList<Object>> result = Lists.newArrayList();
|
||||
List<NamedList<Object>> result = new ArrayList<>();
|
||||
clustersToNamedList(carrotClusters, result, solrParams.getBool(
|
||||
CarrotParams.OUTPUT_SUB_CLUSTERS, true), solrParams.getInt(
|
||||
CarrotParams.NUM_DESCRIPTIONS, Integer.MAX_VALUE));
|
||||
|
@ -534,7 +531,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
|
||||
// Add documents
|
||||
List<Document> docs = outputSubClusters ? outCluster.getDocuments() : outCluster.getAllDocuments();
|
||||
List<Object> docList = Lists.newArrayList();
|
||||
List<Object> docList = new ArrayList<>();
|
||||
cluster.add("docs", docList);
|
||||
for (Document doc : docs) {
|
||||
docList.add(doc.getField(SOLR_DOCUMENT_ID));
|
||||
|
@ -542,7 +539,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
|
||||
// Add subclusters
|
||||
if (outputSubClusters && !outCluster.getSubclusters().isEmpty()) {
|
||||
List<NamedList<Object>> subclusters = Lists.newArrayList();
|
||||
List<NamedList<Object>> subclusters = new ArrayList<>();
|
||||
cluster.add("clusters", subclusters);
|
||||
clustersToNamedList(outCluster.getSubclusters(), subclusters,
|
||||
outputSubClusters, maxLabels);
|
||||
|
|
|
@ -16,10 +16,10 @@
|
|||
*/
|
||||
package org.apache.solr.handler.clustering.carrot2;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
|
||||
/**
|
||||
* Carrot2 parameter mapping (recognized and mapped if passed via Solr configuration).
|
||||
* @lucene.experimental
|
||||
|
@ -50,7 +50,7 @@ public final class CarrotParams {
|
|||
*/
|
||||
public static String RESOURCES_DIR = CARROT_PREFIX + "resourcesDir";
|
||||
|
||||
static final Set<String> CARROT_PARAM_NAMES = ImmutableSet.of(
|
||||
static final Set<String> CARROT_PARAM_NAMES = new HashSet<>(Arrays.asList(
|
||||
ALGORITHM,
|
||||
|
||||
TITLE_FIELD_NAME,
|
||||
|
@ -66,7 +66,7 @@ public final class CarrotParams {
|
|||
NUM_DESCRIPTIONS,
|
||||
OUTPUT_SUB_CLUSTERS,
|
||||
RESOURCES_DIR,
|
||||
LANGUAGE_CODE_MAP);
|
||||
LANGUAGE_CODE_MAP));
|
||||
|
||||
/** No instances. */
|
||||
private CarrotParams() {}
|
||||
|
|
|
@ -16,7 +16,9 @@
|
|||
*/
|
||||
package org.apache.solr.handler.clustering.carrot2;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
|
@ -26,6 +28,7 @@ import org.apache.lucene.analysis.core.StopFilterFactory;
|
|||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import org.apache.solr.analysis.TokenizerChain;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.carrot2.core.LanguageCode;
|
||||
import org.carrot2.core.attribute.Init;
|
||||
import org.carrot2.core.attribute.Processing;
|
||||
|
@ -37,9 +40,6 @@ import org.carrot2.util.attribute.Attribute;
|
|||
import org.carrot2.util.attribute.Bindable;
|
||||
import org.carrot2.util.attribute.Input;
|
||||
|
||||
import com.google.common.collect.HashMultimap;
|
||||
import com.google.common.collect.Multimap;
|
||||
|
||||
/**
|
||||
* An implementation of Carrot2's {@link ILexicalDataFactory} that adds stop
|
||||
* words from a field's StopFilter to the default stop words used in Carrot2,
|
||||
|
@ -67,7 +67,7 @@ public class SolrStopwordsCarrot2LexicalDataFactory implements ILexicalDataFacto
|
|||
/**
|
||||
* A lazily-built cache of stop words per field.
|
||||
*/
|
||||
private Multimap<String, CharArraySet> solrStopWords = HashMultimap.create();
|
||||
private HashMap<String, List<CharArraySet>> solrStopWords = new HashMap<>();
|
||||
|
||||
/**
|
||||
* Carrot2's default lexical resources to use in addition to Solr's stop
|
||||
|
@ -79,31 +79,34 @@ public class SolrStopwordsCarrot2LexicalDataFactory implements ILexicalDataFacto
|
|||
* Obtains stop words for a field from the associated
|
||||
* {@link StopFilterFactory}, if any.
|
||||
*/
|
||||
private Collection<CharArraySet> getSolrStopWordsForField(String fieldName) {
|
||||
private List<CharArraySet> getSolrStopWordsForField(String fieldName) {
|
||||
// No need to synchronize here, Carrot2 ensures that instances
|
||||
// of this class are not used by multiple threads at a time.
|
||||
if (!solrStopWords.containsKey(fieldName)) {
|
||||
final Analyzer fieldAnalyzer = core.getLatestSchema().getFieldType(fieldName)
|
||||
.getIndexAnalyzer();
|
||||
if (fieldAnalyzer instanceof TokenizerChain) {
|
||||
final TokenFilterFactory[] filterFactories = ((TokenizerChain) fieldAnalyzer)
|
||||
.getTokenFilterFactories();
|
||||
for (TokenFilterFactory factory : filterFactories) {
|
||||
if (factory instanceof StopFilterFactory) {
|
||||
// StopFilterFactory holds the stop words in a CharArraySet
|
||||
solrStopWords.put(fieldName,
|
||||
((StopFilterFactory) factory).getStopWords());
|
||||
}
|
||||
synchronized (solrStopWords) {
|
||||
if (!solrStopWords.containsKey(fieldName)) {
|
||||
solrStopWords.put(fieldName, new ArrayList<>());
|
||||
|
||||
if (factory instanceof CommonGramsFilterFactory) {
|
||||
solrStopWords.put(fieldName,
|
||||
((CommonGramsFilterFactory) factory)
|
||||
.getCommonWords());
|
||||
IndexSchema schema = core.getLatestSchema();
|
||||
final Analyzer fieldAnalyzer = schema.getFieldType(fieldName).getIndexAnalyzer();
|
||||
if (fieldAnalyzer instanceof TokenizerChain) {
|
||||
final TokenFilterFactory[] filterFactories =
|
||||
((TokenizerChain) fieldAnalyzer).getTokenFilterFactories();
|
||||
for (TokenFilterFactory factory : filterFactories) {
|
||||
if (factory instanceof StopFilterFactory) {
|
||||
// StopFilterFactory holds the stop words in a CharArraySet
|
||||
CharArraySet stopWords = ((StopFilterFactory) factory).getStopWords();
|
||||
solrStopWords.get(fieldName).add(stopWords);
|
||||
}
|
||||
|
||||
if (factory instanceof CommonGramsFilterFactory) {
|
||||
CharArraySet commonWords = ((CommonGramsFilterFactory) factory).getCommonWords();
|
||||
solrStopWords.get(fieldName).add(commonWords);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return solrStopWords.get(fieldName);
|
||||
}
|
||||
return solrStopWords.get(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,6 +17,9 @@
|
|||
package org.apache.solr.handler.clustering.carrot2;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
@ -45,9 +48,6 @@ import org.carrot2.core.LanguageCode;
|
|||
import org.carrot2.util.attribute.AttributeUtils;
|
||||
import org.junit.Test;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
|
@ -211,7 +211,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
// stoplabels.mt, so we're expecting only one cluster with label "online".
|
||||
final List<NamedList<Object>> clusters = checkEngine(
|
||||
getClusteringEngine(engineName), 1, params);
|
||||
assertEquals(getLabels(clusters.get(0)), ImmutableList.of("online"));
|
||||
assertEquals(getLabels(clusters.get(0)), Collections.singletonList("online"));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -226,7 +226,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
// only one cluster with label "online".
|
||||
final List<NamedList<Object>> clusters = checkEngine(
|
||||
getClusteringEngine("lexical-resource-check"), 1, params);
|
||||
assertEquals(getLabels(clusters.get(0)), ImmutableList.of("online"));
|
||||
assertEquals(getLabels(clusters.get(0)), Collections.singletonList("online"));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -243,9 +243,8 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
|
||||
final List<NamedList<Object>> clusters = checkEngine(
|
||||
getClusteringEngine("lexical-resource-check"), 2, params);
|
||||
assertEquals(ImmutableList.of("online"), getLabels(clusters.get(0)));
|
||||
assertEquals(ImmutableList.of("solrownstopword"),
|
||||
getLabels(clusters.get(1)));
|
||||
assertEquals(Collections.singletonList("online"), getLabels(clusters.get(0)));
|
||||
assertEquals(Collections.singletonList("solrownstopword"), getLabels(clusters.get(1)));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -395,8 +394,8 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
ClusteringComponent comp = (ClusteringComponent) h.getCore().getSearchComponent("clustering-name-default");
|
||||
Map<String,SearchClusteringEngine> engines = getSearchClusteringEngines(comp);
|
||||
assertEquals(
|
||||
Lists.newArrayList("stc", "default", "mock"),
|
||||
Lists.newArrayList(engines.keySet()));
|
||||
Arrays.asList("stc", "default", "mock"),
|
||||
new ArrayList<>(engines.keySet()));
|
||||
assertEquals(
|
||||
LingoClusteringAlgorithm.class,
|
||||
((CarrotClusteringEngine) engines.get(ClusteringEngine.DEFAULT_ENGINE_NAME)).getClusteringAlgorithmClass());
|
||||
|
@ -407,8 +406,8 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
ClusteringComponent comp = (ClusteringComponent) h.getCore().getSearchComponent("clustering-name-decl-order");
|
||||
Map<String,SearchClusteringEngine> engines = getSearchClusteringEngines(comp);
|
||||
assertEquals(
|
||||
Lists.newArrayList("unavailable", "lingo", "stc", "mock", "default"),
|
||||
Lists.newArrayList(engines.keySet()));
|
||||
Arrays.asList("unavailable", "lingo", "stc", "mock", "default"),
|
||||
new ArrayList<>(engines.keySet()));
|
||||
assertEquals(
|
||||
LingoClusteringAlgorithm.class,
|
||||
((CarrotClusteringEngine) engines.get(ClusteringEngine.DEFAULT_ENGINE_NAME)).getClusteringAlgorithmClass());
|
||||
|
@ -419,8 +418,8 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
ClusteringComponent comp = (ClusteringComponent) h.getCore().getSearchComponent("clustering-name-dups");
|
||||
Map<String,SearchClusteringEngine> engines = getSearchClusteringEngines(comp);
|
||||
assertEquals(
|
||||
Lists.newArrayList("", "default"),
|
||||
Lists.newArrayList(engines.keySet()));
|
||||
Arrays.asList("", "default"),
|
||||
new ArrayList<>(engines.keySet()));
|
||||
assertEquals(
|
||||
MockClusteringAlgorithm.class,
|
||||
((CarrotClusteringEngine) engines.get(ClusteringEngine.DEFAULT_ENGINE_NAME)).getClusteringAlgorithmClass());
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.clustering.carrot2;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.carrot2.core.Cluster;
|
||||
|
@ -29,8 +30,6 @@ import org.carrot2.util.attribute.Bindable;
|
|||
import org.carrot2.util.attribute.Input;
|
||||
import org.carrot2.util.attribute.Output;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* A mock Carrot2 clustering algorithm that outputs input documents as clusters.
|
||||
* Useful only in tests.
|
||||
|
@ -56,7 +55,7 @@ public class EchoClusteringAlgorithm extends ProcessingComponentBase implements
|
|||
|
||||
@Override
|
||||
public void process() throws ProcessingException {
|
||||
clusters = Lists.newArrayListWithCapacity(documents.size());
|
||||
clusters = new ArrayList<>();
|
||||
|
||||
for (Document document : documents) {
|
||||
final Cluster cluster = new Cluster();
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.solr.handler.clustering.carrot2;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.carrot2.core.Cluster;
|
||||
|
@ -36,8 +37,6 @@ import org.carrot2.util.attribute.Bindable;
|
|||
import org.carrot2.util.attribute.Input;
|
||||
import org.carrot2.util.attribute.Output;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* A mock Carrot2 clustering algorithm that outputs stem of each token of each
|
||||
* document as a separate cluster. Useful only in tests.
|
||||
|
@ -64,7 +63,7 @@ public class EchoStemsClusteringAlgorithm extends ProcessingComponentBase
|
|||
final AllTokens allTokens = preprocessingContext.allTokens;
|
||||
final AllWords allWords = preprocessingContext.allWords;
|
||||
final AllStems allStems = preprocessingContext.allStems;
|
||||
clusters = Lists.newArrayListWithCapacity(allTokens.image.length);
|
||||
clusters = new ArrayList<>();
|
||||
for (int i = 0; i < allTokens.image.length; i++) {
|
||||
if (allTokens.wordIndex[i] >= 0) {
|
||||
clusters.add(new Cluster(new String(
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.solr.handler.clustering.carrot2;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.carrot2.core.Cluster;
|
||||
|
@ -33,7 +34,6 @@ import org.carrot2.util.attribute.Bindable;
|
|||
import org.carrot2.util.attribute.Input;
|
||||
import org.carrot2.util.attribute.Output;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* A mock Carrot2 clustering algorithm that outputs each token of each document
|
||||
|
@ -58,8 +58,7 @@ public class EchoTokensClusteringAlgorithm extends ProcessingComponentBase
|
|||
public void process() throws ProcessingException {
|
||||
final PreprocessingContext preprocessingContext = preprocessing.preprocess(
|
||||
documents, "", LanguageCode.ENGLISH);
|
||||
clusters = Lists
|
||||
.newArrayListWithCapacity(preprocessingContext.allTokens.image.length);
|
||||
clusters = new ArrayList<>();
|
||||
for (char[] token : preprocessingContext.allTokens.image) {
|
||||
if (token != null) {
|
||||
clusters.add(new Cluster(new String(token)));
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.solr.handler.clustering.carrot2;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.carrot2.core.Cluster;
|
||||
|
@ -33,8 +34,6 @@ import org.carrot2.util.attribute.Bindable;
|
|||
import org.carrot2.util.attribute.Input;
|
||||
import org.carrot2.util.attribute.Output;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* A mock implementation of Carrot2 clustering algorithm for testing whether the
|
||||
* customized lexical resource lookup works correctly. This algorithm ignores
|
||||
|
@ -60,7 +59,7 @@ public class LexicalResourcesCheckClusteringAlgorithm extends
|
|||
|
||||
@Override
|
||||
public void process() throws ProcessingException {
|
||||
clusters = Lists.newArrayList();
|
||||
clusters = new ArrayList<>();
|
||||
if (wordsToCheck == null) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -15,13 +15,13 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.clustering.carrot2;
|
||||
import com.google.common.collect.Lists;
|
||||
import org.carrot2.core.*;
|
||||
import org.carrot2.core.attribute.AttributeNames;
|
||||
import org.carrot2.core.attribute.Processing;
|
||||
import org.carrot2.util.attribute.*;
|
||||
import org.carrot2.util.attribute.constraint.IntRange;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
@Bindable(prefix = "MockClusteringAlgorithm")
|
||||
|
@ -62,7 +62,7 @@ public class MockClusteringAlgorithm extends ProcessingComponentBase implements
|
|||
|
||||
@Override
|
||||
public void process() throws ProcessingException {
|
||||
clusters = Lists.newArrayList();
|
||||
clusters = new ArrayList<>();
|
||||
if (documents == null) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
9d8b42afe43ba5c0a0c5d67208d5c919e45c3584
|
|
@ -0,0 +1 @@
|
|||
5d76ec388711056bfaaacc354ed04ffa6811c7b7
|
Loading…
Reference in New Issue