diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringComponent.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringComponent.java
index 90437e65330..9d045436bfd 100644
--- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringComponent.java
+++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringComponent.java
@@ -1,4 +1,5 @@
package org.apache.solr.handler.clustering;
+
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -18,7 +19,6 @@ package org.apache.solr.handler.clustering;
import java.io.IOException;
import java.util.Collections;
-import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Map;
@@ -38,6 +38,7 @@ import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.SearchComponent;
import org.apache.solr.handler.component.ShardRequest;
import org.apache.solr.search.DocListAndSet;
+import org.apache.solr.util.SolrPluginUtils;
import org.apache.solr.util.plugin.SolrCoreAware;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -46,13 +47,13 @@ import com.google.common.collect.Maps;
/**
- * Provide a plugin for performing cluster analysis. This can either be applied to
+ * Provides a plugin for performing cluster analysis. This can either be applied to
* search results (e.g., via Carrot2 ) or for
* clustering documents (e.g., via Mahout ).
*
- * This engine is experimental. Output from this engine is subject to change in future releases.
- *
- * See Solr example for configuration examples.
+ * See Solr example for configuration examples.
+ *
+ * @lucene.experimental
*/
public class ClusteringComponent extends SearchComponent implements SolrCoreAware {
private transient static Logger log = LoggerFactory.getLogger(ClusteringComponent.class);
@@ -78,6 +79,13 @@ public class ClusteringComponent extends SearchComponent implements SolrCoreAwar
*/
private final Map searchClusteringEnginesView = Collections.unmodifiableMap(searchClusteringEngines);
+ /**
+ * Initialization parameters temporarily saved here, the component
+ * is initialized in {@link #inform(SolrCore)} because we need to know
+ * the core's {@link SolrResourceLoader}.
+ *
+ * @see #init(NamedList)
+ */
private NamedList initParams;
@Override
@@ -150,8 +158,9 @@ public class ClusteringComponent extends SearchComponent implements SolrCoreAwar
SearchClusteringEngine engine = getSearchClusteringEngine(rb);
if (engine != null) {
DocListAndSet results = rb.getResults();
- Map docIds = new HashMap(results.docList.size());
- SolrDocumentList solrDocList = engine.getSolrDocumentList(results.docList, rb.req, docIds);
+ Map docIds = Maps.newHashMapWithExpectedSize(results.docList.size());
+ SolrDocumentList solrDocList = SolrPluginUtils.docListToSolrDocumentList(
+ results.docList, rb.req.getSearcher(), engine.getFieldsToLoad(rb.req), docIds);
Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req);
rb.rsp.add("clusters", clusters);
} else {
@@ -177,7 +186,7 @@ public class ClusteringComponent extends SearchComponent implements SolrCoreAwar
}
}
}
-
+
private SearchClusteringEngine getSearchClusteringEngine(ResponseBuilder rb){
return searchClusteringEngines.get(getClusteringEngineName(rb));
}
diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringEngine.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringEngine.java
index e76323faee4..e8e41af06d4 100644
--- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringEngine.java
+++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringEngine.java
@@ -18,16 +18,16 @@ package org.apache.solr.handler.clustering;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
-
/**
- *
- *
- **/
+ * A base class for {@link SearchClusteringEngine} and {@link DocumentClusteringEngine}.
+ * @lucene.experimental
+ */
public class ClusteringEngine {
- private String name;
public static final String ENGINE_NAME = "name";
public static final String DEFAULT_ENGINE_NAME = "default";
+ private String name;
+
public String init(NamedList> config, SolrCore core) {
name = (String) config.get(ENGINE_NAME);
return name;
diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringParams.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringParams.java
index 19ba8ba919b..fc1c3c67d4d 100644
--- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringParams.java
+++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringParams.java
@@ -16,11 +16,9 @@ package org.apache.solr.handler.clustering;
* limitations under the License.
*/
-
/**
- *
- *
- **/
+ * @lucene.experimental
+ */
public interface ClusteringParams {
public static final String CLUSTERING_PREFIX = "clustering.";
@@ -30,8 +28,9 @@ public interface ClusteringParams {
public static final String USE_SEARCH_RESULTS = CLUSTERING_PREFIX + "results";
public static final String USE_COLLECTION = CLUSTERING_PREFIX + "collection";
+
/**
- * When document clustering, cluster on the Doc Set
+ * When clustering full documents, cluster on the Doc Set.
*/
public static final String USE_DOC_SET = CLUSTERING_PREFIX + "docs.useDocSet";
}
diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java
index 792e2b7c81a..d56ab7b48df 100644
--- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java
+++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java
@@ -20,11 +20,9 @@ import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.search.DocSet;
-
/**
- * Experimental. Subject to change before the next release.
- *
- **/
+ * @lucene.experimental
+ */
public abstract class DocumentClusteringEngine extends ClusteringEngine {
/**
diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/SearchClusteringEngine.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/SearchClusteringEngine.java
index 38193a915b1..3fcae7ba2e4 100644
--- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/SearchClusteringEngine.java
+++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/SearchClusteringEngine.java
@@ -1,4 +1,5 @@
package org.apache.solr.handler.clustering;
+
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,7 +17,6 @@ package org.apache.solr.handler.clustering;
* limitations under the License.
*/
-import java.io.IOException;
import java.util.Map;
import java.util.Set;
@@ -24,20 +24,19 @@ import org.apache.lucene.search.Query;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.request.SolrQueryRequest;
-import org.apache.solr.search.DocList;
-import org.apache.solr.util.SolrPluginUtils;
-
+import org.apache.solr.response.SolrQueryResponse;
/**
- *
- *
- **/
+ * Base class for clustering engines performing cluster analysis on search
+ * results.
+ *
+ * @lucene.experimental
+ */
public abstract class SearchClusteringEngine extends ClusteringEngine {
-
- @Deprecated
- public abstract Object cluster(Query query, DocList docList, SolrQueryRequest sreq);
-
- // TODO: need DocList, too?
+ /**
+ * Do the clustering, return a clusters structure to be appended to
+ * {@link SolrQueryResponse}.
+ */
public abstract Object cluster(Query query, SolrDocumentList solrDocumentList,
Map docIds, SolrQueryRequest sreq);
@@ -45,15 +44,10 @@ public abstract class SearchClusteringEngine extends ClusteringEngine {
* Returns the set of field names to load.
* Concrete classes can override this method if needed.
* Default implementation returns null, that is, all stored fields are loaded.
- * @return set of field names to load
+ *
+ * @return The set of field names to load.
*/
protected Set getFieldsToLoad(SolrQueryRequest sreq){
return null;
}
-
- public SolrDocumentList getSolrDocumentList(DocList docList, SolrQueryRequest sreq,
- Map docIds) throws IOException{
- return SolrPluginUtils.docListToSolrDocumentList(
- docList, sreq.getSearcher(), getFieldsToLoad(sreq), docIds);
- }
}
diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
index 26e839ee7db..6147fb4956f 100644
--- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
+++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
@@ -17,10 +17,7 @@ package org.apache.solr.handler.clustering.carrot2;
* limitations under the License.
*/
-import java.io.ByteArrayInputStream;
-import java.io.File;
import java.io.IOException;
-import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@@ -32,7 +29,6 @@ import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
-import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.ObjectUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.search.Query;
@@ -45,7 +41,6 @@ import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.SolrCore;
-import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.handler.clustering.ClusteringEngine;
import org.apache.solr.handler.clustering.SearchClusteringEngine;
import org.apache.solr.handler.component.HighlightComponent;
@@ -56,7 +51,6 @@ import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocList;
import org.apache.solr.search.DocSlice;
import org.apache.solr.search.SolrIndexSearcher;
-import org.apache.solr.util.SolrPluginUtils;
import org.carrot2.core.Cluster;
import org.carrot2.core.Controller;
import org.carrot2.core.ControllerFactory;
@@ -71,7 +65,6 @@ import org.carrot2.util.attribute.AttributeValueSet;
import org.carrot2.util.attribute.AttributeValueSets;
import org.carrot2.util.resource.ClassLoaderLocator;
import org.carrot2.util.resource.IResource;
-import org.carrot2.util.resource.IResourceLocator;
import org.carrot2.util.resource.ResourceLookup;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -84,19 +77,17 @@ import com.google.common.collect.Sets;
/**
* Search results clustering engine based on Carrot2 clustering algorithms.
- *
- * Output from this class is subject to change.
*
* @see "http://project.carrot2.org"
+ * @lucene.experimental
*/
public class CarrotClusteringEngine extends SearchClusteringEngine {
- private transient static Logger log = LoggerFactory
- .getLogger(CarrotClusteringEngine.class);
+ transient static Logger log = LoggerFactory.getLogger(CarrotClusteringEngine.class);
/**
* The subdirectory in Solr config dir to read customized Carrot2 resources from.
*/
- private static final String CARROT_RESOURCES_PREFIX = "clustering/carrot2";
+ static final String CARROT_RESOURCES_PREFIX = "clustering/carrot2";
/**
* Name of Carrot2 document's field containing Solr document's identifier.
@@ -114,167 +105,15 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
* Carrot2 controller that manages instances of clustering algorithms
*/
private Controller controller = ControllerFactory.createPooling();
+
+ /**
+ * {@link IClusteringAlgorithm} class used for actual clustering.
+ */
private Class extends IClusteringAlgorithm> clusteringAlgorithmClass;
/** Solr core we're bound to. */
private SolrCore core;
- private static class SolrResourceLocator implements IResourceLocator {
- private final SolrResourceLoader resourceLoader;
- private final String carrot2ResourcesDir;
-
- public SolrResourceLocator(SolrCore core, SolrParams initParams) {
- resourceLoader = core.getResourceLoader();
-
- @SuppressWarnings("deprecation")
- String lexicalResourcesDir = initParams.get(CarrotParams.LEXICAL_RESOURCES_DIR);
- String resourcesDir = initParams.get(CarrotParams.RESOURCES_DIR);
- carrot2ResourcesDir = firstNonNull(resourcesDir, lexicalResourcesDir, CARROT_RESOURCES_PREFIX);
- }
-
- @SuppressWarnings("unchecked")
- public static T firstNonNull(T... args) {
- for (T t : args) {
- if (t != null) return t;
- }
- throw new NullPointerException("At least one element has to be non-null.");
- }
-
- @Override
- public IResource[] getAll(final String resource) {
- final String resourceName = carrot2ResourcesDir + "/" + resource;
- log.debug("Looking for Solr resource: " + resourceName);
-
- InputStream resourceStream = null;
- final byte [] asBytes;
- try {
- resourceStream = resourceLoader.openResource(resourceName);
- asBytes = IOUtils.toByteArray(resourceStream);
- } catch (IOException e) {
- log.debug("Resource not found in Solr's config: " + resourceName
- + ". Using the default " + resource + " from Carrot JAR.");
- return new IResource[] {};
- } finally {
- if (resourceStream != null) {
- try {
- resourceStream.close();
- } catch (IOException e) {
- // ignore.
- }
- }
- }
-
- log.info("Loaded Solr resource: " + resourceName);
-
- final IResource foundResource = new IResource() {
- @Override
- public InputStream open() {
- return new ByteArrayInputStream(asBytes);
- }
-
- @Override
- public int hashCode() {
- // In case multiple resources are found they will be deduped, but we don't use it in Solr,
- // so simply rely on instance equivalence.
- return super.hashCode();
- }
-
- @Override
- public boolean equals(Object obj) {
- // In case multiple resources are found they will be deduped, but we don't use it in Solr,
- // so simply rely on instance equivalence.
- return super.equals(obj);
- }
-
- @Override
- public String toString() {
- return "Solr config resource: " + resourceName;
- }
- };
-
- return new IResource[] { foundResource };
- }
-
- @Override
- public int hashCode() {
- // In case multiple locations are used locators will be deduped, but we don't use it in Solr,
- // so simply rely on instance equivalence.
- return super.hashCode();
- }
-
- @Override
- public boolean equals(Object obj) {
- // In case multiple locations are used locators will be deduped, but we don't use it in Solr,
- // so simply rely on instance equivalence.
- return super.equals(obj);
- }
-
- @Override
- public String toString() {
- String configDir = "";
- try {
- configDir = "configDir=" + new File(resourceLoader.getConfigDir()).getAbsolutePath() + ", ";
- } catch (Exception ignored) {
- // If we get the exception, the resource loader implementation
- // probably does not support getConfigDir(). Not a big problem.
- }
-
- return "SolrResourceLocator, " + configDir
- + "Carrot2 relative lexicalResourcesDir=" + carrot2ResourcesDir;
- }
- }
-
- @Override
- @Deprecated
- public Object cluster(Query query, DocList docList, SolrQueryRequest sreq) {
- SolrIndexSearcher searcher = sreq.getSearcher();
- SolrDocumentList solrDocList;
- try {
- Map docIds = new HashMap(docList.size());
- solrDocList = SolrPluginUtils.docListToSolrDocumentList( docList, searcher, getFieldsToLoad(sreq), docIds );
- return cluster(query, solrDocList, docIds, sreq);
- } catch (IOException e) {
- throw new SolrException(ErrorCode.SERVER_ERROR, e);
- }
- }
-
- @Override
- public Object cluster(Query query, SolrDocumentList solrDocList,
- Map docIds, SolrQueryRequest sreq) {
- try {
- // Prepare attributes for Carrot2 clustering call
- Map attributes = new HashMap();
- List documents = getDocuments(solrDocList, docIds, query, sreq);
- attributes.put(AttributeNames.DOCUMENTS, documents);
- attributes.put(AttributeNames.QUERY, query.toString());
-
- // Pass the fields on which clustering runs to the
- // SolrStopwordsCarrot2LexicalDataFactory
- attributes.put("solrFieldNames", getFieldsForClustering(sreq));
-
- // Pass extra overriding attributes from the request, if any
- extractCarrotAttributes(sreq.getParams(), attributes);
-
- // Perform clustering and convert to named list
- // Carrot2 uses current thread's context class loader to get
- // certain classes (e.g. custom tokenizer/stemmer) at runtime.
- // To make sure classes from contrib JARs are available,
- // we swap the context class loader for the time of clustering.
- Thread ct = Thread.currentThread();
- ClassLoader prev = ct.getContextClassLoader();
- try {
- ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
- return clustersToNamedList(controller.process(attributes,
- clusteringAlgorithmClass).getClusters(), sreq.getParams());
- } finally {
- ct.setContextClassLoader(prev);
- }
- } catch (Exception e) {
- log.error("Carrot2 clustering failed", e);
- throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e);
- }
- }
-
@Override
@SuppressWarnings("rawtypes")
public String init(NamedList config, final SolrCore core) {
@@ -377,6 +216,43 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
return result;
}
+ @Override
+ public Object cluster(Query query, SolrDocumentList solrDocList,
+ Map docIds, SolrQueryRequest sreq) {
+ try {
+ // Prepare attributes for Carrot2 clustering call
+ Map attributes = new HashMap();
+ List documents = getDocuments(solrDocList, docIds, query, sreq);
+ attributes.put(AttributeNames.DOCUMENTS, documents);
+ attributes.put(AttributeNames.QUERY, query.toString());
+
+ // Pass the fields on which clustering runs.
+ attributes.put("solrFieldNames", getFieldsForClustering(sreq));
+
+ // Pass extra overriding attributes from the request, if any
+ extractCarrotAttributes(sreq.getParams(), attributes);
+
+ // Perform clustering and convert to an output structure of clusters.
+ //
+ // Carrot2 uses current thread's context class loader to get
+ // certain classes (e.g. custom tokenizer/stemmer) at runtime.
+ // To make sure classes from contrib JARs are available,
+ // we swap the context class loader for the time of clustering.
+ Thread ct = Thread.currentThread();
+ ClassLoader prev = ct.getContextClassLoader();
+ try {
+ ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
+ return clustersToNamedList(controller.process(attributes,
+ clusteringAlgorithmClass).getClusters(), sreq.getParams());
+ } finally {
+ ct.setContextClassLoader(prev);
+ }
+ } catch (Exception e) {
+ log.error("Carrot2 clustering failed", e);
+ throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e);
+ }
+ }
+
@Override
protected Set getFieldsToLoad(SolrQueryRequest sreq){
SolrParams solrParams = sreq.getParams();
@@ -434,8 +310,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
// Parse language code map string into a map
Map languageCodeMap = Maps.newHashMap();
if (StringUtils.isNotBlank(languageField)) {
- for (String pair : solrParams.get(CarrotParams.LANGUAGE_CODE_MAP, "")
- .split("[, ]")) {
+ for (String pair : solrParams.get(CarrotParams.LANGUAGE_CODE_MAP, "").split("[, ]")) {
final String[] split = pair.split(":");
if (split.length == 2 && StringUtils.isNotBlank(split[0]) && StringUtils.isNotBlank(split[1])) {
languageCodeMap.put(split[0], split[1]);
diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
index e497f3d25c6..beebb91aab4 100644
--- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
+++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
@@ -24,6 +24,7 @@ import com.google.common.collect.ImmutableSet;
/**
* Carrot2 parameter mapping (recognized and mapped if passed via Solr configuration).
+ * @lucene.experimental
*/
public final class CarrotParams {
diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2StemmerFactory.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2StemmerFactory.java
index 5b8332d9513..8db24d0a593 100644
--- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2StemmerFactory.java
+++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2StemmerFactory.java
@@ -48,6 +48,8 @@ import org.tartarus.snowball.ext.TurkishStemmer;
* An implementation of Carrot2's {@link IStemmerFactory} based on Lucene's
* APIs. Should the relevant Lucene APIs need to change, the changes can be made
* in this class.
+ *
+ * @lucene.experimental
*/
public class LuceneCarrot2StemmerFactory implements IStemmerFactory {
final static Logger logger = org.slf4j.LoggerFactory
diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2TokenizerFactory.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2TokenizerFactory.java
index 998c6b71a5b..88c3b737e3b 100644
--- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2TokenizerFactory.java
+++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2TokenizerFactory.java
@@ -38,6 +38,8 @@ import org.slf4j.Logger;
* Smart Chinese tokenizer. If Smart Chinese tokenizer is not available in
* classpath at runtime, the default Carrot2's tokenizer is used. Should the
* Lucene APIs need to change, the changes can be made in this class.
+ *
+ * @lucene.experimental
*/
public class LuceneCarrot2TokenizerFactory implements ITokenizerFactory {
final static Logger logger = org.slf4j.LoggerFactory
diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/SolrResourceLocator.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/SolrResourceLocator.java
new file mode 100644
index 00000000000..1cf28e79ba4
--- /dev/null
+++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/SolrResourceLocator.java
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.handler.clustering.carrot2;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.core.SolrResourceLoader;
+import org.carrot2.util.resource.IResource;
+import org.carrot2.util.resource.IResourceLocator;
+
+/**
+ * A {@link IResourceLocator} that delegates resource searches to {@link SolrCore}.
+ *
+ * @lucene.experimental
+ */
+class SolrResourceLocator implements IResourceLocator {
+ private final SolrResourceLoader resourceLoader;
+ private final String carrot2ResourcesDir;
+
+ public SolrResourceLocator(SolrCore core, SolrParams initParams) {
+ resourceLoader = core.getResourceLoader();
+
+ @SuppressWarnings("deprecation")
+ String lexicalResourcesDir = initParams.get(CarrotParams.LEXICAL_RESOURCES_DIR);
+ String resourcesDir = initParams.get(CarrotParams.RESOURCES_DIR);
+ carrot2ResourcesDir = firstNonNull(resourcesDir, lexicalResourcesDir, CarrotClusteringEngine.CARROT_RESOURCES_PREFIX);
+ }
+
+ @SuppressWarnings("unchecked")
+ public static T firstNonNull(T... args) {
+ for (T t : args) {
+ if (t != null) return t;
+ }
+ throw new NullPointerException("At least one element has to be non-null.");
+ }
+
+ @Override
+ public IResource[] getAll(final String resource) {
+ final String resourceName = carrot2ResourcesDir + "/" + resource;
+ CarrotClusteringEngine.log.debug("Looking for Solr resource: " + resourceName);
+
+ InputStream resourceStream = null;
+ final byte [] asBytes;
+ try {
+ resourceStream = resourceLoader.openResource(resourceName);
+ asBytes = IOUtils.toByteArray(resourceStream);
+ } catch (IOException e) {
+ CarrotClusteringEngine.log.debug("Resource not found in Solr's config: " + resourceName
+ + ". Using the default " + resource + " from Carrot JAR.");
+ return new IResource[] {};
+ } finally {
+ if (resourceStream != null) {
+ try {
+ resourceStream.close();
+ } catch (IOException e) {
+ // ignore.
+ }
+ }
+ }
+
+ CarrotClusteringEngine.log.info("Loaded Solr resource: " + resourceName);
+
+ final IResource foundResource = new IResource() {
+ @Override
+ public InputStream open() {
+ return new ByteArrayInputStream(asBytes);
+ }
+
+ @Override
+ public int hashCode() {
+ // In case multiple resources are found they will be deduped, but we don't use it in Solr,
+ // so simply rely on instance equivalence.
+ return super.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ // In case multiple resources are found they will be deduped, but we don't use it in Solr,
+ // so simply rely on instance equivalence.
+ return super.equals(obj);
+ }
+
+ @Override
+ public String toString() {
+ return "Solr config resource: " + resourceName;
+ }
+ };
+
+ return new IResource[] { foundResource };
+ }
+
+ @Override
+ public int hashCode() {
+ // In case multiple locations are used locators will be deduped, but we don't use it in Solr,
+ // so simply rely on instance equivalence.
+ return super.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ // In case multiple locations are used locators will be deduped, but we don't use it in Solr,
+ // so simply rely on instance equivalence.
+ return super.equals(obj);
+ }
+
+ @Override
+ public String toString() {
+ String configDir = "";
+ try {
+ configDir = "configDir=" + new File(resourceLoader.getConfigDir()).getAbsolutePath() + ", ";
+ } catch (Exception ignored) {
+ // If we get the exception, the resource loader implementation
+ // probably does not support getConfigDir(). Not a big problem.
+ }
+
+ return "SolrResourceLocator, " + configDir
+ + "Carrot2 relative lexicalResourcesDir=" + carrot2ResourcesDir;
+ }
+}
\ No newline at end of file
diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/SolrStopwordsCarrot2LexicalDataFactory.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/SolrStopwordsCarrot2LexicalDataFactory.java
index 00830203c36..3631c5774de 100644
--- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/SolrStopwordsCarrot2LexicalDataFactory.java
+++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/SolrStopwordsCarrot2LexicalDataFactory.java
@@ -50,10 +50,11 @@ import com.google.common.collect.Multimap;
* stop words removal. In other words, if something is a stop word during
* indexing, then it should also be a stop word during clustering, but not the
* other way round.
+ *
+ * @lucene.experimental
*/
@Bindable
-public class SolrStopwordsCarrot2LexicalDataFactory implements
- ILexicalDataFactory {
+public class SolrStopwordsCarrot2LexicalDataFactory implements ILexicalDataFactory {
final static Logger logger = org.slf4j.LoggerFactory
.getLogger(SolrStopwordsCarrot2LexicalDataFactory.class);
diff --git a/solr/contrib/clustering/src/java/overview.html b/solr/contrib/clustering/src/java/overview.html
index 3bbd2d7f357..59940f65641 100644
--- a/solr/contrib/clustering/src/java/overview.html
+++ b/solr/contrib/clustering/src/java/overview.html
@@ -16,6 +16,6 @@
-->
-Apache Solr Search Server: Clustering contrib
+Apache Solr Search Server: text clustering contrib