mirror of https://github.com/apache/lucene.git
SOLR-5202: Support easier overrides of Carrot2 clustering attributes via XML data sets exported from the Workbench. Polished clustering configuration examples.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1520677 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e4bcd35ac0
commit
43467403b2
|
@ -59,11 +59,13 @@ public class ClusteringComponent extends SearchComponent implements SolrCoreAwar
|
|||
|
||||
private Map<String, SearchClusteringEngine> searchClusteringEngines = new HashMap<String, SearchClusteringEngine>();
|
||||
private Map<String, DocumentClusteringEngine> documentClusteringEngines = new HashMap<String, DocumentClusteringEngine>();
|
||||
|
||||
/**
|
||||
* Base name for all spell checker query parameters. This name is also used to
|
||||
* Base name for all component parameters. This name is also used to
|
||||
* register this component with SearchHandler.
|
||||
*/
|
||||
public static final String COMPONENT_NAME = "clustering";
|
||||
|
||||
private NamedList initParams;
|
||||
|
||||
@Override
|
||||
|
|
|
@ -30,7 +30,6 @@ public class ClusteringEngine {
|
|||
|
||||
public String init(NamedList config, SolrCore core) {
|
||||
name = (String) config.get(ENGINE_NAME);
|
||||
|
||||
return name;
|
||||
}
|
||||
|
||||
|
|
|
@ -46,6 +46,7 @@ import org.apache.solr.common.util.NamedList;
|
|||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
import org.apache.solr.handler.clustering.ClusteringEngine;
|
||||
import org.apache.solr.handler.clustering.SearchClusteringEngine;
|
||||
import org.apache.solr.handler.component.HighlightComponent;
|
||||
import org.apache.solr.highlight.SolrHighlighter;
|
||||
|
@ -66,6 +67,8 @@ import org.carrot2.core.attribute.AttributeNames;
|
|||
import org.carrot2.text.linguistic.DefaultLexicalDataFactoryDescriptor;
|
||||
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor;
|
||||
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor.AttributeBuilder;
|
||||
import org.carrot2.util.attribute.AttributeValueSet;
|
||||
import org.carrot2.util.attribute.AttributeValueSets;
|
||||
import org.carrot2.util.resource.ClassLoaderLocator;
|
||||
import org.carrot2.util.resource.IResource;
|
||||
import org.carrot2.util.resource.IResourceLocator;
|
||||
|
@ -73,11 +76,11 @@ import org.carrot2.util.resource.ResourceLookup;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.common.base.Objects;
|
||||
import com.google.common.base.Strings;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.google.common.io.Closeables;
|
||||
import com.google.common.io.Closer;
|
||||
|
||||
/**
|
||||
* Search results clustering engine based on Carrot2 clustering algorithms.
|
||||
|
@ -122,8 +125,19 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
|
||||
public SolrResourceLocator(SolrCore core, SolrParams initParams) {
|
||||
resourceLoader = core.getResourceLoader();
|
||||
carrot2ResourcesDir = initParams.get(
|
||||
CarrotParams.LEXICAL_RESOURCES_DIR, CARROT_RESOURCES_PREFIX);
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
String lexicalResourcesDir = initParams.get(CarrotParams.LEXICAL_RESOURCES_DIR);
|
||||
String resourcesDir = initParams.get(CarrotParams.RESOURCES_DIR);
|
||||
carrot2ResourcesDir = firstNonNull(resourcesDir, lexicalResourcesDir, CARROT_RESOURCES_PREFIX);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public static <T> T firstNonNull(T... args) {
|
||||
for (T t : args) {
|
||||
if (t != null) return t;
|
||||
}
|
||||
throw new NullPointerException("At least one element has to be non-null.");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -269,8 +283,52 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
String result = super.init(config, core);
|
||||
final SolrParams initParams = SolrParams.toSolrParams(config);
|
||||
|
||||
// Initialize Carrot2 controller. Pass initialization attributes, if any.
|
||||
// Initialization attributes for Carrot2 controller.
|
||||
HashMap<String, Object> initAttributes = new HashMap<String, Object>();
|
||||
|
||||
// Customize Carrot2's resource lookup to first look for resources
|
||||
// using Solr's resource loader. If that fails, try loading from the classpath.
|
||||
ResourceLookup resourceLookup = new ResourceLookup(
|
||||
// Solr-specific resource loading.
|
||||
new SolrResourceLocator(core, initParams),
|
||||
// Using the class loader directly because this time we want to omit the prefix
|
||||
new ClassLoaderLocator(core.getResourceLoader().getClassLoader()));
|
||||
|
||||
DefaultLexicalDataFactoryDescriptor.attributeBuilder(initAttributes)
|
||||
.resourceLookup(resourceLookup);
|
||||
|
||||
// Load Carrot2-Workbench exported attribute XMLs based on the 'name' attribute
|
||||
// of this component. This by-name convention lookup is used to simplify configuring algorithms.
|
||||
String componentName = initParams.get(ClusteringEngine.ENGINE_NAME);
|
||||
log.info("Initializing Clustering Engine '" + Objects.firstNonNull(componentName, "<no 'name' attribute>") + "'");
|
||||
|
||||
if (!Strings.isNullOrEmpty(componentName)) {
|
||||
IResource[] attributeXmls = resourceLookup.getAll(componentName + "-attributes.xml");
|
||||
if (attributeXmls.length > 0) {
|
||||
if (attributeXmls.length > 1) {
|
||||
log.warn("More than one attribute file found, first one will be used: "
|
||||
+ Arrays.toString(attributeXmls));
|
||||
}
|
||||
|
||||
Thread ct = Thread.currentThread();
|
||||
ClassLoader prev = ct.getContextClassLoader();
|
||||
try {
|
||||
ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
|
||||
|
||||
AttributeValueSets avs = AttributeValueSets.deserialize(attributeXmls[0].open());
|
||||
AttributeValueSet defaultSet = avs.getDefaultAttributeValueSet();
|
||||
initAttributes.putAll(defaultSet.getAttributeValues());
|
||||
} catch (Exception e) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||
"Could not read attributes XML for clustering component: "
|
||||
+ componentName, e);
|
||||
} finally {
|
||||
ct.setContextClassLoader(prev);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract solrconfig attributes, they take precedence.
|
||||
extractCarrotAttributes(initParams, initAttributes);
|
||||
|
||||
// Customize the stemmer and tokenizer factories. The implementations we provide here
|
||||
|
@ -291,15 +349,6 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
// Pass the schema (via the core) to SolrStopwordsCarrot2LexicalDataFactory.
|
||||
initAttributes.put("solrCore", core);
|
||||
|
||||
// Customize Carrot2's resource lookup to first look for resources
|
||||
// using Solr's resource loader. If that fails, try loading from the classpath.
|
||||
DefaultLexicalDataFactoryDescriptor.attributeBuilder(initAttributes).resourceLookup(
|
||||
new ResourceLookup(
|
||||
// Solr-specific resource loading.
|
||||
new SolrResourceLocator(core, initParams),
|
||||
// Using the class loader directly because this time we want to omit the prefix
|
||||
new ClassLoaderLocator(core.getResourceLoader().getClassLoader())));
|
||||
|
||||
// Carrot2 uses current thread's context class loader to get
|
||||
// certain classes (e.g. custom tokenizer/stemmer) at initialization time.
|
||||
// To make sure classes from contrib JARs are available,
|
||||
|
@ -322,7 +371,9 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
|
||||
// Make sure the requested Carrot2 clustering algorithm class is available
|
||||
String carrotAlgorithmClassName = initParams.get(CarrotParams.ALGORITHM);
|
||||
this.clusteringAlgorithmClass = core.getResourceLoader().findClass(carrotAlgorithmClassName, IClusteringAlgorithm.class);
|
||||
this.clusteringAlgorithmClass = core.getResourceLoader().findClass(
|
||||
carrotAlgorithmClassName, IClusteringAlgorithm.class);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -440,8 +491,9 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
docsHolder[0] = docIds.get(sdoc).intValue();
|
||||
DocList docAsList = new DocSlice(0, 1, docsHolder, scores, 1, 1.0f);
|
||||
NamedList<Object> highlights = highlighter.doHighlighting(docAsList, theQuery, req, snippetFieldAry);
|
||||
if (highlights != null && highlights.size() == 1) {//should only be one value given our setup
|
||||
//should only be one document
|
||||
if (highlights != null && highlights.size() == 1) {
|
||||
// should only be one value given our setup
|
||||
// should only be one document
|
||||
@SuppressWarnings("unchecked")
|
||||
NamedList<String []> tmp = (NamedList<String[]>) highlights.getVal(0);
|
||||
|
||||
|
|
|
@ -43,9 +43,21 @@ public final class CarrotParams {
|
|||
|
||||
public static String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions";
|
||||
public static String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters";
|
||||
public static String LEXICAL_RESOURCES_DIR = CARROT_PREFIX + "lexicalResourcesDir";
|
||||
|
||||
public static String LANGUAGE_CODE_MAP = CARROT_PREFIX + "lcmap";
|
||||
|
||||
/**
|
||||
* Use {@link #RESOURCES_DIR}.
|
||||
*/
|
||||
@Deprecated
|
||||
public static String LEXICAL_RESOURCES_DIR = CARROT_PREFIX + "lexicalResourcesDir";
|
||||
|
||||
/**
|
||||
* A replacement property pointing to Carrot<sup>2</sup> resources
|
||||
* (a more generic version of the deprecated {@link #LEXICAL_RESOURCES_DIR}).
|
||||
*/
|
||||
public static String RESOURCES_DIR = CARROT_PREFIX + "resourcesDir";
|
||||
|
||||
static final Set<String> CARROT_PARAM_NAMES = ImmutableSet.of(
|
||||
ALGORITHM,
|
||||
|
||||
|
@ -62,6 +74,7 @@ public final class CarrotParams {
|
|||
NUM_DESCRIPTIONS,
|
||||
OUTPUT_SUB_CLUSTERS,
|
||||
LEXICAL_RESOURCES_DIR,
|
||||
RESOURCES_DIR,
|
||||
LANGUAGE_CODE_MAP);
|
||||
|
||||
/** No instances. */
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
<attribute-sets default="overridden-attributes">
|
||||
<attribute-set id="overridden-attributes">
|
||||
<value-set>
|
||||
<label>defaults</label>
|
||||
<attribute key="MockClusteringAlgorithm.depth"><value value="1" /></attribute>
|
||||
<attribute key="MockClusteringAlgorithm.labels"><value value="3" /></attribute>
|
||||
<attribute key="MockClusteringAlgorithm.maxClusters"><value value="13" /></attribute>
|
||||
</value-set>
|
||||
</attribute-set>
|
||||
</attribute-sets>
|
|
@ -327,6 +327,12 @@
|
|||
<str name="name">mock</str>
|
||||
<str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.MockClusteringAlgorithm</str>
|
||||
</lst>
|
||||
<lst name="engine">
|
||||
<str name="name">mock-external-attrs</str>
|
||||
<str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.MockClusteringAlgorithm</str>
|
||||
<!-- takes precedence over external XML -->
|
||||
<int name="MockClusteringAlgorithm.labels">4</int>
|
||||
</lst>
|
||||
<lst name="engine">
|
||||
<str name="name">echo</str>
|
||||
<str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.EchoClusteringAlgorithm</str>
|
||||
|
@ -338,6 +344,11 @@
|
|||
<lst name="engine">
|
||||
<str name="name">lexical-resource-check-custom-resource-dir</str>
|
||||
<str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.LexicalResourcesCheckClusteringAlgorithm</str>
|
||||
<str name="carrot.resourcesDir">clustering/custom</str>
|
||||
</lst>
|
||||
<lst name="engine">
|
||||
<str name="name">lexical-resource-check-custom-resource-dir-deprecated</str>
|
||||
<str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.LexicalResourcesCheckClusteringAlgorithm</str>
|
||||
<str name="carrot.lexicalResourcesDir">clustering/custom</str>
|
||||
</lst>
|
||||
<lst name="engine">
|
||||
|
|
|
@ -122,7 +122,14 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
@Test
|
||||
public void testWithoutSubclusters() throws Exception {
|
||||
checkClusters(checkEngine(getClusteringEngine("mock"), AbstractClusteringTestCase.numberOfDocs),
|
||||
1, 1, 0);
|
||||
1, 1, 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExternalXmlAttributesFile() throws Exception {
|
||||
checkClusters(
|
||||
checkEngine(getClusteringEngine("mock-external-attrs"), 13),
|
||||
1, 4, 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -189,6 +196,12 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
"online,customsolrstopwordcustomdir,customsolrstoplabelcustomdir");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLexicalResourcesFromSolrConfigCustomDirDeprecated() throws Exception {
|
||||
checkLexicalResourcesFromSolrConfig("lexical-resource-check-custom-resource-dir-deprecated",
|
||||
"online,customsolrstopwordcustomdir,customsolrstoplabelcustomdir");
|
||||
}
|
||||
|
||||
private void checkLexicalResourcesFromSolrConfig(String engineName, String wordsToCheck)
|
||||
throws IOException {
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
|
|
|
@ -49,6 +49,12 @@ public class MockClusteringAlgorithm extends ProcessingComponentBase implements
|
|||
@IntRange(min = 1, max = 5)
|
||||
private int labels = 1;
|
||||
|
||||
@Input
|
||||
@Processing
|
||||
@Attribute
|
||||
@IntRange(min = 0)
|
||||
private int maxClusters = 0;
|
||||
|
||||
@Input
|
||||
@Processing
|
||||
@Attribute
|
||||
|
@ -61,6 +67,10 @@ public class MockClusteringAlgorithm extends ProcessingComponentBase implements
|
|||
return;
|
||||
}
|
||||
|
||||
if (maxClusters > 0) {
|
||||
documents = documents.subList(0, maxClusters);
|
||||
}
|
||||
|
||||
int documentIndex = 1;
|
||||
for (Document document : documents) {
|
||||
StringBuilder label = new StringBuilder("Cluster " + documentIndex);
|
||||
|
|
|
@ -1369,113 +1369,7 @@
|
|||
</arr>
|
||||
</requestHandler>
|
||||
|
||||
<!-- Clustering Component
|
||||
|
||||
http://wiki.apache.org/solr/ClusteringComponent
|
||||
|
||||
You'll need to set the solr.clustering.enabled system property
|
||||
when running solr to run with clustering enabled:
|
||||
|
||||
java -Dsolr.clustering.enabled=true -jar start.jar
|
||||
|
||||
-->
|
||||
<searchComponent name="clustering"
|
||||
enable="${solr.clustering.enabled:false}"
|
||||
class="solr.clustering.ClusteringComponent" >
|
||||
<!-- Declare an engine -->
|
||||
<lst name="engine">
|
||||
<!-- The name, only one can be named "default" -->
|
||||
<str name="name">default</str>
|
||||
|
||||
<!-- Class name of Carrot2 clustering algorithm.
|
||||
|
||||
Currently available algorithms are:
|
||||
|
||||
* org.carrot2.clustering.lingo.LingoClusteringAlgorithm
|
||||
* org.carrot2.clustering.stc.STCClusteringAlgorithm
|
||||
* org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm
|
||||
|
||||
See http://project.carrot2.org/algorithms.html for the
|
||||
algorithm's characteristics.
|
||||
-->
|
||||
<str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
|
||||
|
||||
<!-- Overriding values for Carrot2 default algorithm attributes.
|
||||
|
||||
For a description of all available attributes, see:
|
||||
http://download.carrot2.org/stable/manual/#chapter.components.
|
||||
Use attribute key as name attribute of str elements
|
||||
below. These can be further overridden for individual
|
||||
requests by specifying attribute key as request parameter
|
||||
name and attribute value as parameter value.
|
||||
-->
|
||||
<str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
|
||||
|
||||
<!-- Location of Carrot2 lexical resources.
|
||||
|
||||
A directory from which to load Carrot2-specific stop words
|
||||
and stop labels. Absolute or relative to Solr config directory.
|
||||
If a specific resource (e.g. stopwords.en) is present in the
|
||||
specified dir, it will completely override the corresponding
|
||||
default one that ships with Carrot2.
|
||||
|
||||
For an overview of Carrot2 lexical resources, see:
|
||||
http://download.carrot2.org/head/manual/#chapter.lexical-resources
|
||||
-->
|
||||
<str name="carrot.lexicalResourcesDir">clustering/carrot2</str>
|
||||
|
||||
<!-- The language to assume for the documents.
|
||||
|
||||
For a list of allowed values, see:
|
||||
http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage
|
||||
-->
|
||||
<str name="MultilingualClustering.defaultLanguage">ENGLISH</str>
|
||||
</lst>
|
||||
<lst name="engine">
|
||||
<str name="name">stc</str>
|
||||
<str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
|
||||
</lst>
|
||||
</searchComponent>
|
||||
|
||||
<!-- A request handler for demonstrating the clustering component
|
||||
|
||||
This is purely as an example.
|
||||
|
||||
In reality you will likely want to add the component to your
|
||||
already specified request handlers.
|
||||
-->
|
||||
<requestHandler name="/clustering"
|
||||
startup="lazy"
|
||||
enable="${solr.clustering.enabled:false}"
|
||||
class="solr.SearchHandler">
|
||||
<lst name="defaults">
|
||||
<bool name="clustering">true</bool>
|
||||
<str name="clustering.engine">default</str>
|
||||
<bool name="clustering.results">true</bool>
|
||||
<!-- The title field -->
|
||||
<str name="carrot.title">name</str>
|
||||
<str name="carrot.url">id</str>
|
||||
<!-- The field to cluster on -->
|
||||
<str name="carrot.snippet">features</str>
|
||||
<!-- produce summaries -->
|
||||
<bool name="carrot.produceSummary">true</bool>
|
||||
<!-- the maximum number of labels per cluster -->
|
||||
<!--<int name="carrot.numDescriptions">5</int>-->
|
||||
<!-- produce sub clusters -->
|
||||
<bool name="carrot.outputSubClusters">false</bool>
|
||||
|
||||
<str name="defType">edismax</str>
|
||||
<str name="qf">
|
||||
text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
|
||||
</str>
|
||||
<str name="q.alt">*:*</str>
|
||||
<str name="rows">10</str>
|
||||
<str name="fl">*,score</str>
|
||||
</lst>
|
||||
<arr name="last-components">
|
||||
<str>clustering</str>
|
||||
</arr>
|
||||
</requestHandler>
|
||||
<!-- Clustering Component. (Omitted here. See the default Solr example for a typical configuration.) -->
|
||||
|
||||
<!-- Terms Component
|
||||
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
<!--
|
||||
Default configuration for the Lingo clustering algorithm.
|
||||
|
||||
This file can be loaded (and saved) by Carrot2 Workbench.
|
||||
http://project.carrot2.org/download.html
|
||||
-->
|
||||
<attribute-sets default="attributes">
|
||||
<attribute-set id="attributes">
|
||||
<value-set>
|
||||
<label>attributes</label>
|
||||
<!--
|
||||
The language to assume for clustered documents.
|
||||
For a list of allowed values, see:
|
||||
http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage
|
||||
-->
|
||||
<attribute key="MultilingualClustering.defaultLanguage">
|
||||
<value type="org.carrot2.core.LanguageCode" value="ENGLISH"/>
|
||||
</attribute>
|
||||
<attribute key="LingoClusteringAlgorithm.desiredClusterCountBase">
|
||||
<value type="java.lang.Integer" value="20"/>
|
||||
</attribute>
|
||||
</value-set>
|
||||
</attribute-set>
|
||||
</attribute-sets>
|
|
@ -0,0 +1,19 @@
|
|||
<!--
|
||||
Default configuration for the bisecting k-means clustering algorithm.
|
||||
|
||||
This file can be loaded (and saved) by Carrot2 Workbench.
|
||||
http://project.carrot2.org/download.html
|
||||
-->
|
||||
<attribute-sets default="attributes">
|
||||
<attribute-set id="attributes">
|
||||
<value-set>
|
||||
<label>attributes</label>
|
||||
<attribute key="MultilingualClustering.defaultLanguage">
|
||||
<value type="org.carrot2.core.LanguageCode" value="ENGLISH"/>
|
||||
</attribute>
|
||||
<attribute key="MultilingualClustering.languageAggregationStrategy">
|
||||
<value type="org.carrot2.text.clustering.MultilingualClustering$LanguageAggregationStrategy" value="FLATTEN_MAJOR_LANGUAGE"/>
|
||||
</attribute>
|
||||
</value-set>
|
||||
</attribute-set>
|
||||
</attribute-sets>
|
|
@ -0,0 +1,19 @@
|
|||
<!--
|
||||
Default configuration for the STC clustering algorithm.
|
||||
|
||||
This file can be loaded (and saved) by Carrot2 Workbench.
|
||||
http://project.carrot2.org/download.html
|
||||
-->
|
||||
<attribute-sets default="attributes">
|
||||
<attribute-set id="attributes">
|
||||
<value-set>
|
||||
<label>attributes</label>
|
||||
<attribute key="MultilingualClustering.defaultLanguage">
|
||||
<value type="org.carrot2.core.LanguageCode" value="ENGLISH"/>
|
||||
</attribute>
|
||||
<attribute key="MultilingualClustering.languageAggregationStrategy">
|
||||
<value type="org.carrot2.text.clustering.MultilingualClustering$LanguageAggregationStrategy" value="FLATTEN_MAJOR_LANGUAGE"/>
|
||||
</attribute>
|
||||
</value-set>
|
||||
</attribute-set>
|
||||
</attribute-sets>
|
|
@ -1382,59 +1382,57 @@
|
|||
<searchComponent name="clustering"
|
||||
enable="${solr.clustering.enabled:true}"
|
||||
class="solr.clustering.ClusteringComponent" >
|
||||
<!-- Declare an engine -->
|
||||
<!-- Declare a named clustering engine. Only one engine can be named
|
||||
"default" (and it becomes the default one for the search component).
|
||||
-->
|
||||
<lst name="engine">
|
||||
<!-- The name, only one can be named "default" -->
|
||||
<str name="name">default</str>
|
||||
|
||||
<!-- Class name of Carrot2 clustering algorithm.
|
||||
<!-- Class name of a clustering algorithm compatible with the Carrot2
|
||||
framework.
|
||||
|
||||
Currently available algorithms are:
|
||||
|
||||
Currently available open source algorithms are:
|
||||
* org.carrot2.clustering.lingo.LingoClusteringAlgorithm
|
||||
* org.carrot2.clustering.stc.STCClusteringAlgorithm
|
||||
* org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm
|
||||
|
||||
See http://project.carrot2.org/algorithms.html for the
|
||||
algorithm's characteristics.
|
||||
|
||||
See http://project.carrot2.org/algorithms.html for more information.
|
||||
|
||||
A commercial algorithm Lingo3G (needs to be installed separately) is defined as:
|
||||
* com.carrotsearch.lingo3g.Lingo3GClusteringAlgorithm
|
||||
-->
|
||||
<str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
|
||||
|
||||
<!-- Overriding values for Carrot2 default algorithm attributes.
|
||||
<!-- Override location of the clustering algorithm's resources
|
||||
(attribute definitions and lexical resources).
|
||||
|
||||
For a description of all available attributes, see:
|
||||
http://download.carrot2.org/stable/manual/#chapter.components.
|
||||
Use attribute key as name attribute of str elements
|
||||
below. These can be further overridden for individual
|
||||
requests by specifying attribute key as request parameter
|
||||
name and attribute value as parameter value.
|
||||
-->
|
||||
<str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
|
||||
|
||||
<!-- Location of Carrot2 lexical resources.
|
||||
|
||||
A directory from which to load Carrot2-specific stop words
|
||||
and stop labels. Absolute or relative to Solr config directory.
|
||||
A directory from which to load algorithm-specific stop words,
|
||||
stop labels and attribute definition XMLs.
|
||||
Absolute or relative to Solr config directory.
|
||||
If a specific resource (e.g. stopwords.en) is present in the
|
||||
specified dir, it will completely override the corresponding
|
||||
default one that ships with Carrot2.
|
||||
default one that typically ships with each algorithm.
|
||||
|
||||
For an overview of Carrot2 lexical resources, see:
|
||||
http://download.carrot2.org/head/manual/#chapter.lexical-resources
|
||||
-->
|
||||
<str name="carrot.lexicalResourcesDir">clustering/carrot2</str>
|
||||
|
||||
<!-- The language to assume for the documents.
|
||||
|
||||
For a list of allowed values, see:
|
||||
http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage
|
||||
|
||||
For an overview of Lingo3G lexical resources, see:
|
||||
http://download.carrotsearch.com/lingo3g/manual/#chapter.lexical-resources
|
||||
-->
|
||||
<str name="MultilingualClustering.defaultLanguage">ENGLISH</str>
|
||||
<!-- <str name="carrot.resourcesDir">clustering/carrot2</str> -->
|
||||
</lst>
|
||||
|
||||
<!-- An example definition for the STC clustering algorithm. -->
|
||||
<lst name="engine">
|
||||
<str name="name">stc</str>
|
||||
<str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
|
||||
</lst>
|
||||
|
||||
<!-- An example definition for the bisecting kmeans clustering algorithm. -->
|
||||
<lst name="engine">
|
||||
<str name="name">kmeans</str>
|
||||
<str name="carrot.algorithm">org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm</str>
|
||||
</lst>
|
||||
</searchComponent>
|
||||
|
||||
<!-- A request handler for demonstrating the clustering component
|
||||
|
|
Loading…
Reference in New Issue