SOLR-5202: Support easier overrides of Carrot2 clustering attributes via XML data sets exported from the Workbench. Polished clustering configuration examples.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1520677 13f79535-47bb-0310-9956-ffa450edef68
2013-09-06 20:02:57 +00:00 · 2013-09-06 20:02:57 +00:00 · 43467403b2
parent e4bcd35ac0
commit 43467403b2
13 changed files with 223 additions and 159 deletions
--- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringComponent.java
+++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringComponent.java
@ -59,11 +59,13 @@ public class ClusteringComponent extends SearchComponent implements SolrCoreAwar

  private Map<String, SearchClusteringEngine> searchClusteringEngines = new HashMap<String, SearchClusteringEngine>();
  private Map<String, DocumentClusteringEngine> documentClusteringEngines = new HashMap<String, DocumentClusteringEngine>();
+
  /**
-   * Base name for all spell checker query parameters. This name is also used to
+   * Base name for all component parameters. This name is also used to
   * register this component with SearchHandler.
   */
  public static final String COMPONENT_NAME = "clustering";
+  
  private NamedList initParams;

  @Override
--- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringEngine.java
+++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringEngine.java
@ -30,7 +30,6 @@ public class ClusteringEngine {

  public String init(NamedList config, SolrCore core) {
    name = (String) config.get(ENGINE_NAME);
-
    return name;
  }

--- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
+++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
@ -46,6 +46,7 @@ import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.handler.clustering.ClusteringEngine;
 import org.apache.solr.handler.clustering.SearchClusteringEngine;
 import org.apache.solr.handler.component.HighlightComponent;
 import org.apache.solr.highlight.SolrHighlighter;
@ -66,6 +67,8 @@ import org.carrot2.core.attribute.AttributeNames;
 import org.carrot2.text.linguistic.DefaultLexicalDataFactoryDescriptor;
 import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor;
 import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor.AttributeBuilder;
+import org.carrot2.util.attribute.AttributeValueSet;
+import org.carrot2.util.attribute.AttributeValueSets;
 import org.carrot2.util.resource.ClassLoaderLocator;
 import org.carrot2.util.resource.IResource;
 import org.carrot2.util.resource.IResourceLocator;
@ -73,11 +76,11 @@ import org.carrot2.util.resource.ResourceLookup;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

+import com.google.common.base.Objects;
+import com.google.common.base.Strings;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
-import com.google.common.io.Closeables;
-import com.google.common.io.Closer;

 /**
 * Search results clustering engine based on Carrot2 clustering algorithms.
@ -122,8 +125,19 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {

    public SolrResourceLocator(SolrCore core, SolrParams initParams) {
      resourceLoader = core.getResourceLoader();
-      carrot2ResourcesDir = initParams.get(
-          CarrotParams.LEXICAL_RESOURCES_DIR, CARROT_RESOURCES_PREFIX);
+      
+      @SuppressWarnings("deprecation")
+      String lexicalResourcesDir = initParams.get(CarrotParams.LEXICAL_RESOURCES_DIR);
+      String resourcesDir = initParams.get(CarrotParams.RESOURCES_DIR);
+      carrot2ResourcesDir = firstNonNull(resourcesDir, lexicalResourcesDir, CARROT_RESOURCES_PREFIX);
+    }
+
+    @SuppressWarnings("unchecked")
+    public static <T> T firstNonNull(T... args) {
+      for (T t : args) {
+        if (t != null) return t;
+      }
+      throw new NullPointerException("At least one element has to be non-null.");
    }

    @Override
@ -269,8 +283,52 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
    String result = super.init(config, core);
    final SolrParams initParams = SolrParams.toSolrParams(config);

-    // Initialize Carrot2 controller. Pass initialization attributes, if any.
+    // Initialization attributes for Carrot2 controller.
    HashMap<String, Object> initAttributes = new HashMap<String, Object>();
+
+    // Customize Carrot2's resource lookup to first look for resources
+    // using Solr's resource loader. If that fails, try loading from the classpath.
+    ResourceLookup resourceLookup = new ResourceLookup(
+      // Solr-specific resource loading.
+      new SolrResourceLocator(core, initParams),
+      // Using the class loader directly because this time we want to omit the prefix
+      new ClassLoaderLocator(core.getResourceLoader().getClassLoader()));
+
+    DefaultLexicalDataFactoryDescriptor.attributeBuilder(initAttributes)
+      .resourceLookup(resourceLookup);
+
+    // Load Carrot2-Workbench exported attribute XMLs based on the 'name' attribute
+    // of this component. This by-name convention lookup is used to simplify configuring algorithms.
+    String componentName = initParams.get(ClusteringEngine.ENGINE_NAME);
+    log.info("Initializing Clustering Engine '" + Objects.firstNonNull(componentName, "<no 'name' attribute>") + "'");
+
+    if (!Strings.isNullOrEmpty(componentName)) {
+      IResource[] attributeXmls = resourceLookup.getAll(componentName + "-attributes.xml");
+      if (attributeXmls.length > 0) {
+        if (attributeXmls.length > 1) {
+          log.warn("More than one attribute file found, first one will be used: " 
+              + Arrays.toString(attributeXmls));
+        }
+
+        Thread ct = Thread.currentThread();
+        ClassLoader prev = ct.getContextClassLoader();
+        try {
+          ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
+
+          AttributeValueSets avs = AttributeValueSets.deserialize(attributeXmls[0].open());
+          AttributeValueSet defaultSet = avs.getDefaultAttributeValueSet();
+          initAttributes.putAll(defaultSet.getAttributeValues());
+        } catch (Exception e) {
+          throw new SolrException(ErrorCode.SERVER_ERROR, 
+              "Could not read attributes XML for clustering component: " 
+                  + componentName, e);
+        } finally {
+          ct.setContextClassLoader(prev);
+        }
+      }
+    }
+
+    // Extract solrconfig attributes, they take precedence.
    extractCarrotAttributes(initParams, initAttributes);

    // Customize the stemmer and tokenizer factories. The implementations we provide here
@ -291,15 +349,6 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
    // Pass the schema (via the core) to SolrStopwordsCarrot2LexicalDataFactory.
    initAttributes.put("solrCore", core);

-    // Customize Carrot2's resource lookup to first look for resources
-    // using Solr's resource loader. If that fails, try loading from the classpath.
-    DefaultLexicalDataFactoryDescriptor.attributeBuilder(initAttributes).resourceLookup(
-      new ResourceLookup(
-        // Solr-specific resource loading.
-        new SolrResourceLocator(core, initParams),
-        // Using the class loader directly because this time we want to omit the prefix
-        new ClassLoaderLocator(core.getResourceLoader().getClassLoader())));
-
    // Carrot2 uses current thread's context class loader to get
    // certain classes (e.g. custom tokenizer/stemmer) at initialization time.
    // To make sure classes from contrib JARs are available,
@ -322,7 +371,9 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {

    // Make sure the requested Carrot2 clustering algorithm class is available
    String carrotAlgorithmClassName = initParams.get(CarrotParams.ALGORITHM);
-    this.clusteringAlgorithmClass = core.getResourceLoader().findClass(carrotAlgorithmClassName, IClusteringAlgorithm.class);
+    this.clusteringAlgorithmClass = core.getResourceLoader().findClass(
+        carrotAlgorithmClassName, IClusteringAlgorithm.class);
+
    return result;
  }

@ -440,8 +491,9 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
        docsHolder[0] = docIds.get(sdoc).intValue();
        DocList docAsList = new DocSlice(0, 1, docsHolder, scores, 1, 1.0f);
        NamedList<Object> highlights = highlighter.doHighlighting(docAsList, theQuery, req, snippetFieldAry);
-        if (highlights != null && highlights.size() == 1) {//should only be one value given our setup
-          //should only be one document
+        if (highlights != null && highlights.size() == 1) {
+          // should only be one value given our setup
+          // should only be one document
          @SuppressWarnings("unchecked")
          NamedList<String []> tmp = (NamedList<String[]>) highlights.getVal(0);
          
--- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
+++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
@ -43,9 +43,21 @@ public final class CarrotParams {

  public static String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions";
  public static String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters";
-  public static String LEXICAL_RESOURCES_DIR = CARROT_PREFIX + "lexicalResourcesDir";
+
  public static String LANGUAGE_CODE_MAP = CARROT_PREFIX + "lcmap";

+  /**
+   * Use {@link #RESOURCES_DIR}.
+   */
+  @Deprecated
+  public static String LEXICAL_RESOURCES_DIR = CARROT_PREFIX + "lexicalResourcesDir";
+  
+  /**
+   * A replacement property pointing to Carrot<sup>2</sup> resources
+   * (a more generic version of the deprecated {@link #LEXICAL_RESOURCES_DIR}).
+   */
+  public static String RESOURCES_DIR = CARROT_PREFIX + "resourcesDir";
+
  static final Set<String> CARROT_PARAM_NAMES = ImmutableSet.of(
          ALGORITHM, 
          
@ -62,6 +74,7 @@ public final class CarrotParams {
          NUM_DESCRIPTIONS, 
          OUTPUT_SUB_CLUSTERS, 
          LEXICAL_RESOURCES_DIR,
+          RESOURCES_DIR,
          LANGUAGE_CODE_MAP);
  
  /** No instances. */
--- a/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/clustering/carrot2/mock-external-attrs-attributes.xml
+++ b/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/clustering/carrot2/mock-external-attrs-attributes.xml
@ -0,0 +1,10 @@
+<attribute-sets default="overridden-attributes">
+  <attribute-set id="overridden-attributes">
+    <value-set>
+      <label>defaults</label>
+      <attribute key="MockClusteringAlgorithm.depth"><value value="1" /></attribute>
+      <attribute key="MockClusteringAlgorithm.labels"><value value="3" /></attribute>
+      <attribute key="MockClusteringAlgorithm.maxClusters"><value value="13" /></attribute>
+    </value-set>
+  </attribute-set>
+</attribute-sets>
--- a/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/solrconfig.xml
+++ b/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/solrconfig.xml
@ -327,6 +327,12 @@
      <str name="name">mock</str>
      <str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.MockClusteringAlgorithm</str>
    </lst>
+    <lst name="engine">
+      <str name="name">mock-external-attrs</str>
+      <str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.MockClusteringAlgorithm</str>
+      <!-- takes precedence over external XML -->
+      <int name="MockClusteringAlgorithm.labels">4</int>
+    </lst>
    <lst name="engine">
      <str name="name">echo</str>
      <str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.EchoClusteringAlgorithm</str>
@ -338,6 +344,11 @@
    <lst name="engine">
      <str name="name">lexical-resource-check-custom-resource-dir</str>
      <str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.LexicalResourcesCheckClusteringAlgorithm</str>
+      <str name="carrot.resourcesDir">clustering/custom</str>
+    </lst>
+    <lst name="engine">
+      <str name="name">lexical-resource-check-custom-resource-dir-deprecated</str>
+      <str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.LexicalResourcesCheckClusteringAlgorithm</str>
      <str name="carrot.lexicalResourcesDir">clustering/custom</str>
    </lst>
    <lst name="engine">
--- a/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
+++ b/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
@ -122,7 +122,14 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
  @Test
  public void testWithoutSubclusters() throws Exception {
    checkClusters(checkEngine(getClusteringEngine("mock"), AbstractClusteringTestCase.numberOfDocs),
-            1, 1, 0);
+        1, 1, 0);
+  }
+
+  @Test
+  public void testExternalXmlAttributesFile() throws Exception {
+    checkClusters(
+        checkEngine(getClusteringEngine("mock-external-attrs"), 13),
+        1, 4, 0);
  }

  @Test
@ -189,6 +196,12 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
        "online,customsolrstopwordcustomdir,customsolrstoplabelcustomdir");
  }

+  @Test
+  public void testLexicalResourcesFromSolrConfigCustomDirDeprecated() throws Exception {
+    checkLexicalResourcesFromSolrConfig("lexical-resource-check-custom-resource-dir-deprecated",
+        "online,customsolrstopwordcustomdir,customsolrstoplabelcustomdir");
+  }
+
  private void checkLexicalResourcesFromSolrConfig(String engineName, String wordsToCheck)
      throws IOException {
    ModifiableSolrParams params = new ModifiableSolrParams();
--- a/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java
+++ b/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java
@ -49,6 +49,12 @@ public class MockClusteringAlgorithm extends ProcessingComponentBase implements
  @IntRange(min = 1, max = 5)
  private int labels = 1;

+  @Input
+  @Processing
+  @Attribute
+  @IntRange(min = 0)
+  private int maxClusters = 0;
+
  @Input
  @Processing
  @Attribute
@ -61,6 +67,10 @@ public class MockClusteringAlgorithm extends ProcessingComponentBase implements
      return;
    }

+    if (maxClusters > 0) {
+      documents = documents.subList(0, maxClusters);
+    }
+
    int documentIndex = 1;
    for (Document document : documents) {
      StringBuilder label = new StringBuilder("Cluster " + documentIndex);
--- a/solr/example/example-schemaless/solr/collection1/conf/solrconfig.xml
+++ b/solr/example/example-schemaless/solr/collection1/conf/solrconfig.xml
@ -1369,113 +1369,7 @@
    </arr>
  </requestHandler>

-  <!-- Clustering Component
-
-       http://wiki.apache.org/solr/ClusteringComponent
-
-       You'll need to set the solr.clustering.enabled system property
-       when running solr to run with clustering enabled:
-
-            java -Dsolr.clustering.enabled=true -jar start.jar
-
-    -->
-  <searchComponent name="clustering"
-                   enable="${solr.clustering.enabled:false}"
-                   class="solr.clustering.ClusteringComponent" >
-    <!-- Declare an engine -->
-    <lst name="engine">
-      <!-- The name, only one can be named "default" -->
-      <str name="name">default</str>
-
-      <!-- Class name of Carrot2 clustering algorithm.
-
-           Currently available algorithms are:
-           
-           * org.carrot2.clustering.lingo.LingoClusteringAlgorithm
-           * org.carrot2.clustering.stc.STCClusteringAlgorithm
-           * org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm
-           
-           See http://project.carrot2.org/algorithms.html for the
-           algorithm's characteristics.
-        -->
-      <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
-
-      <!-- Overriding values for Carrot2 default algorithm attributes.
-
-           For a description of all available attributes, see:
-           http://download.carrot2.org/stable/manual/#chapter.components.
-           Use attribute key as name attribute of str elements
-           below. These can be further overridden for individual
-           requests by specifying attribute key as request parameter
-           name and attribute value as parameter value.
-        -->
-      <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
-
-      <!-- Location of Carrot2 lexical resources.
-
-           A directory from which to load Carrot2-specific stop words
-           and stop labels. Absolute or relative to Solr config directory.
-           If a specific resource (e.g. stopwords.en) is present in the
-           specified dir, it will completely override the corresponding
-           default one that ships with Carrot2.
-
-           For an overview of Carrot2 lexical resources, see:
-           http://download.carrot2.org/head/manual/#chapter.lexical-resources
-        -->
-      <str name="carrot.lexicalResourcesDir">clustering/carrot2</str>
-
-      <!-- The language to assume for the documents.
-
-           For a list of allowed values, see:
-           http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage
-       -->
-      <str name="MultilingualClustering.defaultLanguage">ENGLISH</str>
-    </lst>
-    <lst name="engine">
-      <str name="name">stc</str>
-      <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
-    </lst>
-  </searchComponent>
-
-  <!-- A request handler for demonstrating the clustering component
-
-       This is purely as an example.
-
-       In reality you will likely want to add the component to your 
-       already specified request handlers. 
-    -->
-  <requestHandler name="/clustering"
-                  startup="lazy"
-                  enable="${solr.clustering.enabled:false}"
-                  class="solr.SearchHandler">
-    <lst name="defaults">
-      <bool name="clustering">true</bool>
-      <str name="clustering.engine">default</str>
-      <bool name="clustering.results">true</bool>
-      <!-- The title field -->
-      <str name="carrot.title">name</str>
-      <str name="carrot.url">id</str>
-      <!-- The field to cluster on -->
-      <str name="carrot.snippet">features</str>
-      <!-- produce summaries -->
-      <bool name="carrot.produceSummary">true</bool>
-      <!-- the maximum number of labels per cluster -->
-      <!--<int name="carrot.numDescriptions">5</int>-->
-      <!-- produce sub clusters -->
-      <bool name="carrot.outputSubClusters">false</bool>
-
-      <str name="defType">edismax</str>
-      <str name="qf">
-        text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
-      </str>
-      <str name="q.alt">*:*</str>
-      <str name="rows">10</str>
-      <str name="fl">*,score</str>
-    </lst>
-    <arr name="last-components">
-      <str>clustering</str>
-    </arr>
-  </requestHandler>
+  <!-- Clustering Component. (Omitted here. See the default Solr example for a typical configuration.) -->

  <!-- Terms Component

--- a/solr/example/solr/collection1/conf/clustering/carrot2/default-attributes.xml
+++ b/solr/example/solr/collection1/conf/clustering/carrot2/default-attributes.xml
@ -0,0 +1,24 @@
+<!-- 
+  Default configuration for the Lingo clustering algorithm.
+
+  This file can be loaded (and saved) by Carrot2 Workbench.
+  http://project.carrot2.org/download.html
+-->
+<attribute-sets default="attributes">
+    <attribute-set id="attributes">
+      <value-set>
+        <label>attributes</label>
+          <!-- 
+          The language to assume for clustered documents.
+          For a list of allowed values, see: 
+          http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage
+          -->
+          <attribute key="MultilingualClustering.defaultLanguage">
+            <value type="org.carrot2.core.LanguageCode" value="ENGLISH"/>
+          </attribute>
+          <attribute key="LingoClusteringAlgorithm.desiredClusterCountBase">
+            <value type="java.lang.Integer" value="20"/>
+          </attribute>
+      </value-set>
+  </attribute-set>
+</attribute-sets>
--- a/solr/example/solr/collection1/conf/clustering/carrot2/kmeans-attributes.xml
+++ b/solr/example/solr/collection1/conf/clustering/carrot2/kmeans-attributes.xml
@ -0,0 +1,19 @@
+<!-- 
+  Default configuration for the bisecting k-means clustering algorithm.
+  
+  This file can be loaded (and saved) by Carrot2 Workbench.
+  http://project.carrot2.org/download.html
+-->
+<attribute-sets default="attributes">
+    <attribute-set id="attributes">
+      <value-set>
+        <label>attributes</label>
+          <attribute key="MultilingualClustering.defaultLanguage">
+            <value type="org.carrot2.core.LanguageCode" value="ENGLISH"/>
+          </attribute>
+          <attribute key="MultilingualClustering.languageAggregationStrategy">
+            <value type="org.carrot2.text.clustering.MultilingualClustering$LanguageAggregationStrategy" value="FLATTEN_MAJOR_LANGUAGE"/>
+          </attribute>
+      </value-set>
+  </attribute-set>
+</attribute-sets>
--- a/solr/example/solr/collection1/conf/clustering/carrot2/stc-attributes.xml
+++ b/solr/example/solr/collection1/conf/clustering/carrot2/stc-attributes.xml
@ -0,0 +1,19 @@
+<!-- 
+  Default configuration for the STC clustering algorithm.
+
+  This file can be loaded (and saved) by Carrot2 Workbench.
+  http://project.carrot2.org/download.html
+-->
+<attribute-sets default="attributes">
+    <attribute-set id="attributes">
+      <value-set>
+        <label>attributes</label>
+          <attribute key="MultilingualClustering.defaultLanguage">
+            <value type="org.carrot2.core.LanguageCode" value="ENGLISH"/>
+          </attribute>
+          <attribute key="MultilingualClustering.languageAggregationStrategy">
+            <value type="org.carrot2.text.clustering.MultilingualClustering$LanguageAggregationStrategy" value="FLATTEN_MAJOR_LANGUAGE"/>
+          </attribute>
+      </value-set>
+  </attribute-set>
+</attribute-sets>
--- a/solr/example/solr/collection1/conf/solrconfig.xml
+++ b/solr/example/solr/collection1/conf/solrconfig.xml
@ -1382,59 +1382,57 @@
  <searchComponent name="clustering"
                   enable="${solr.clustering.enabled:true}"
                   class="solr.clustering.ClusteringComponent" >
-    <!-- Declare an engine -->
+    <!-- Declare a named clustering engine. Only one engine can be named 
+         "default" (and it becomes the default one for the search component). 
+      -->
    <lst name="engine">
-      <!-- The name, only one can be named "default" -->
      <str name="name">default</str>

-      <!-- Class name of Carrot2 clustering algorithm.
+      <!-- Class name of a clustering algorithm compatible with the Carrot2
+           framework.

-           Currently available algorithms are:
-           
+           Currently available open source algorithms are:
           * org.carrot2.clustering.lingo.LingoClusteringAlgorithm
           * org.carrot2.clustering.stc.STCClusteringAlgorithm
           * org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm
-           
-           See http://project.carrot2.org/algorithms.html for the
-           algorithm's characteristics.
+
+           See http://project.carrot2.org/algorithms.html for more information.
+
+           A commercial algorithm Lingo3G (needs to be installed separately) is defined as:
+           * com.carrotsearch.lingo3g.Lingo3GClusteringAlgorithm
        -->
      <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>

-      <!-- Overriding values for Carrot2 default algorithm attributes.
+      <!-- Override location of the clustering algorithm's resources 
+           (attribute definitions and lexical resources).

-           For a description of all available attributes, see:
-           http://download.carrot2.org/stable/manual/#chapter.components.
-           Use attribute key as name attribute of str elements
-           below. These can be further overridden for individual
-           requests by specifying attribute key as request parameter
-           name and attribute value as parameter value.
-        -->
-      <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
-
-      <!-- Location of Carrot2 lexical resources.
-
-           A directory from which to load Carrot2-specific stop words
-           and stop labels. Absolute or relative to Solr config directory.
+           A directory from which to load algorithm-specific stop words,
+           stop labels and attribute definition XMLs. 
+           Absolute or relative to Solr config directory.
           If a specific resource (e.g. stopwords.en) is present in the
           specified dir, it will completely override the corresponding
-           default one that ships with Carrot2.
+           default one that typically ships with each algorithm.

           For an overview of Carrot2 lexical resources, see:
           http://download.carrot2.org/head/manual/#chapter.lexical-resources
-        -->
-      <str name="carrot.lexicalResourcesDir">clustering/carrot2</str>
-
-      <!-- The language to assume for the documents.
-
-           For a list of allowed values, see:
-           http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage
+           
+           For an overview of Lingo3G lexical resources, see:
+           http://download.carrotsearch.com/lingo3g/manual/#chapter.lexical-resources
       -->
-      <str name="MultilingualClustering.defaultLanguage">ENGLISH</str>
+      <!-- <str name="carrot.resourcesDir">clustering/carrot2</str> -->
    </lst>
+
+    <!-- An example definition for the STC clustering algorithm. -->
    <lst name="engine">
      <str name="name">stc</str>
      <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
    </lst>
+
+    <!-- An example definition for the bisecting kmeans clustering algorithm. -->
+    <lst name="engine">
+      <str name="name">kmeans</str>
+      <str name="carrot.algorithm">org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm</str>
+    </lst>
  </searchComponent>

  <!-- A request handler for demonstrating the clustering component