diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringComponent.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringComponent.java index 90437e65330..9d045436bfd 100644 --- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringComponent.java +++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringComponent.java @@ -1,4 +1,5 @@ package org.apache.solr.handler.clustering; + /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -18,7 +19,6 @@ package org.apache.solr.handler.clustering; import java.io.IOException; import java.util.Collections; -import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.Map; @@ -38,6 +38,7 @@ import org.apache.solr.handler.component.ResponseBuilder; import org.apache.solr.handler.component.SearchComponent; import org.apache.solr.handler.component.ShardRequest; import org.apache.solr.search.DocListAndSet; +import org.apache.solr.util.SolrPluginUtils; import org.apache.solr.util.plugin.SolrCoreAware; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -46,13 +47,13 @@ import com.google.common.collect.Maps; /** - * Provide a plugin for performing cluster analysis. This can either be applied to + * Provides a plugin for performing cluster analysis. This can either be applied to * search results (e.g., via Carrot2) or for * clustering documents (e.g., via Mahout). *

- * This engine is experimental. Output from this engine is subject to change in future releases.

- *

- * See Solr example for configuration examples.

+ * See Solr example for configuration examples.

+ * + * @lucene.experimental */ public class ClusteringComponent extends SearchComponent implements SolrCoreAware { private transient static Logger log = LoggerFactory.getLogger(ClusteringComponent.class); @@ -78,6 +79,13 @@ public class ClusteringComponent extends SearchComponent implements SolrCoreAwar */ private final Map searchClusteringEnginesView = Collections.unmodifiableMap(searchClusteringEngines); + /** + * Initialization parameters temporarily saved here, the component + * is initialized in {@link #inform(SolrCore)} because we need to know + * the core's {@link SolrResourceLoader}. + * + * @see #init(NamedList) + */ private NamedList initParams; @Override @@ -150,8 +158,9 @@ public class ClusteringComponent extends SearchComponent implements SolrCoreAwar SearchClusteringEngine engine = getSearchClusteringEngine(rb); if (engine != null) { DocListAndSet results = rb.getResults(); - Map docIds = new HashMap(results.docList.size()); - SolrDocumentList solrDocList = engine.getSolrDocumentList(results.docList, rb.req, docIds); + Map docIds = Maps.newHashMapWithExpectedSize(results.docList.size()); + SolrDocumentList solrDocList = SolrPluginUtils.docListToSolrDocumentList( + results.docList, rb.req.getSearcher(), engine.getFieldsToLoad(rb.req), docIds); Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req); rb.rsp.add("clusters", clusters); } else { @@ -177,7 +186,7 @@ public class ClusteringComponent extends SearchComponent implements SolrCoreAwar } } } - + private SearchClusteringEngine getSearchClusteringEngine(ResponseBuilder rb){ return searchClusteringEngines.get(getClusteringEngineName(rb)); } diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringEngine.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringEngine.java index e76323faee4..e8e41af06d4 100644 --- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringEngine.java +++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringEngine.java @@ -18,16 +18,16 @@ package org.apache.solr.handler.clustering; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; - /** - * - * - **/ + * A base class for {@link SearchClusteringEngine} and {@link DocumentClusteringEngine}. + * @lucene.experimental + */ public class ClusteringEngine { - private String name; public static final String ENGINE_NAME = "name"; public static final String DEFAULT_ENGINE_NAME = "default"; + private String name; + public String init(NamedList config, SolrCore core) { name = (String) config.get(ENGINE_NAME); return name; diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringParams.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringParams.java index 19ba8ba919b..fc1c3c67d4d 100644 --- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringParams.java +++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringParams.java @@ -16,11 +16,9 @@ package org.apache.solr.handler.clustering; * limitations under the License. */ - /** - * - * - **/ + * @lucene.experimental + */ public interface ClusteringParams { public static final String CLUSTERING_PREFIX = "clustering."; @@ -30,8 +28,9 @@ public interface ClusteringParams { public static final String USE_SEARCH_RESULTS = CLUSTERING_PREFIX + "results"; public static final String USE_COLLECTION = CLUSTERING_PREFIX + "collection"; + /** - * When document clustering, cluster on the Doc Set + * When clustering full documents, cluster on the Doc Set. */ public static final String USE_DOC_SET = CLUSTERING_PREFIX + "docs.useDocSet"; } diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java index 792e2b7c81a..d56ab7b48df 100644 --- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java +++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java @@ -20,11 +20,9 @@ import org.apache.solr.common.util.NamedList; import org.apache.solr.common.params.SolrParams; import org.apache.solr.search.DocSet; - /** - * Experimental. Subject to change before the next release. - * - **/ + * @lucene.experimental + */ public abstract class DocumentClusteringEngine extends ClusteringEngine { /** diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/SearchClusteringEngine.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/SearchClusteringEngine.java index 38193a915b1..3fcae7ba2e4 100644 --- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/SearchClusteringEngine.java +++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/SearchClusteringEngine.java @@ -1,4 +1,5 @@ package org.apache.solr.handler.clustering; + /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -16,7 +17,6 @@ package org.apache.solr.handler.clustering; * limitations under the License. */ -import java.io.IOException; import java.util.Map; import java.util.Set; @@ -24,20 +24,19 @@ import org.apache.lucene.search.Query; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.request.SolrQueryRequest; -import org.apache.solr.search.DocList; -import org.apache.solr.util.SolrPluginUtils; - +import org.apache.solr.response.SolrQueryResponse; /** - * - * - **/ + * Base class for clustering engines performing cluster analysis on search + * results. + * + * @lucene.experimental + */ public abstract class SearchClusteringEngine extends ClusteringEngine { - - @Deprecated - public abstract Object cluster(Query query, DocList docList, SolrQueryRequest sreq); - - // TODO: need DocList, too? + /** + * Do the clustering, return a clusters structure to be appended to + * {@link SolrQueryResponse}. + */ public abstract Object cluster(Query query, SolrDocumentList solrDocumentList, Map docIds, SolrQueryRequest sreq); @@ -45,15 +44,10 @@ public abstract class SearchClusteringEngine extends ClusteringEngine { * Returns the set of field names to load. * Concrete classes can override this method if needed. * Default implementation returns null, that is, all stored fields are loaded. - * @return set of field names to load + * + * @return The set of field names to load. */ protected Set getFieldsToLoad(SolrQueryRequest sreq){ return null; } - - public SolrDocumentList getSolrDocumentList(DocList docList, SolrQueryRequest sreq, - Map docIds) throws IOException{ - return SolrPluginUtils.docListToSolrDocumentList( - docList, sreq.getSearcher(), getFieldsToLoad(sreq), docIds); - } } diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java index 26e839ee7db..6147fb4956f 100644 --- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java +++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java @@ -17,10 +17,7 @@ package org.apache.solr.handler.clustering.carrot2; * limitations under the License. */ -import java.io.ByteArrayInputStream; -import java.io.File; import java.io.IOException; -import java.io.InputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -32,7 +29,6 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Set; -import org.apache.commons.io.IOUtils; import org.apache.commons.lang.ObjectUtils; import org.apache.commons.lang.StringUtils; import org.apache.lucene.search.Query; @@ -45,7 +41,6 @@ import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.core.SolrCore; -import org.apache.solr.core.SolrResourceLoader; import org.apache.solr.handler.clustering.ClusteringEngine; import org.apache.solr.handler.clustering.SearchClusteringEngine; import org.apache.solr.handler.component.HighlightComponent; @@ -56,7 +51,6 @@ import org.apache.solr.schema.SchemaField; import org.apache.solr.search.DocList; import org.apache.solr.search.DocSlice; import org.apache.solr.search.SolrIndexSearcher; -import org.apache.solr.util.SolrPluginUtils; import org.carrot2.core.Cluster; import org.carrot2.core.Controller; import org.carrot2.core.ControllerFactory; @@ -71,7 +65,6 @@ import org.carrot2.util.attribute.AttributeValueSet; import org.carrot2.util.attribute.AttributeValueSets; import org.carrot2.util.resource.ClassLoaderLocator; import org.carrot2.util.resource.IResource; -import org.carrot2.util.resource.IResourceLocator; import org.carrot2.util.resource.ResourceLookup; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -84,19 +77,17 @@ import com.google.common.collect.Sets; /** * Search results clustering engine based on Carrot2 clustering algorithms. - *

- * Output from this class is subject to change. * * @see "http://project.carrot2.org" + * @lucene.experimental */ public class CarrotClusteringEngine extends SearchClusteringEngine { - private transient static Logger log = LoggerFactory - .getLogger(CarrotClusteringEngine.class); + transient static Logger log = LoggerFactory.getLogger(CarrotClusteringEngine.class); /** * The subdirectory in Solr config dir to read customized Carrot2 resources from. */ - private static final String CARROT_RESOURCES_PREFIX = "clustering/carrot2"; + static final String CARROT_RESOURCES_PREFIX = "clustering/carrot2"; /** * Name of Carrot2 document's field containing Solr document's identifier. @@ -114,167 +105,15 @@ public class CarrotClusteringEngine extends SearchClusteringEngine { * Carrot2 controller that manages instances of clustering algorithms */ private Controller controller = ControllerFactory.createPooling(); + + /** + * {@link IClusteringAlgorithm} class used for actual clustering. + */ private Class clusteringAlgorithmClass; /** Solr core we're bound to. */ private SolrCore core; - private static class SolrResourceLocator implements IResourceLocator { - private final SolrResourceLoader resourceLoader; - private final String carrot2ResourcesDir; - - public SolrResourceLocator(SolrCore core, SolrParams initParams) { - resourceLoader = core.getResourceLoader(); - - @SuppressWarnings("deprecation") - String lexicalResourcesDir = initParams.get(CarrotParams.LEXICAL_RESOURCES_DIR); - String resourcesDir = initParams.get(CarrotParams.RESOURCES_DIR); - carrot2ResourcesDir = firstNonNull(resourcesDir, lexicalResourcesDir, CARROT_RESOURCES_PREFIX); - } - - @SuppressWarnings("unchecked") - public static T firstNonNull(T... args) { - for (T t : args) { - if (t != null) return t; - } - throw new NullPointerException("At least one element has to be non-null."); - } - - @Override - public IResource[] getAll(final String resource) { - final String resourceName = carrot2ResourcesDir + "/" + resource; - log.debug("Looking for Solr resource: " + resourceName); - - InputStream resourceStream = null; - final byte [] asBytes; - try { - resourceStream = resourceLoader.openResource(resourceName); - asBytes = IOUtils.toByteArray(resourceStream); - } catch (IOException e) { - log.debug("Resource not found in Solr's config: " + resourceName - + ". Using the default " + resource + " from Carrot JAR."); - return new IResource[] {}; - } finally { - if (resourceStream != null) { - try { - resourceStream.close(); - } catch (IOException e) { - // ignore. - } - } - } - - log.info("Loaded Solr resource: " + resourceName); - - final IResource foundResource = new IResource() { - @Override - public InputStream open() { - return new ByteArrayInputStream(asBytes); - } - - @Override - public int hashCode() { - // In case multiple resources are found they will be deduped, but we don't use it in Solr, - // so simply rely on instance equivalence. - return super.hashCode(); - } - - @Override - public boolean equals(Object obj) { - // In case multiple resources are found they will be deduped, but we don't use it in Solr, - // so simply rely on instance equivalence. - return super.equals(obj); - } - - @Override - public String toString() { - return "Solr config resource: " + resourceName; - } - }; - - return new IResource[] { foundResource }; - } - - @Override - public int hashCode() { - // In case multiple locations are used locators will be deduped, but we don't use it in Solr, - // so simply rely on instance equivalence. - return super.hashCode(); - } - - @Override - public boolean equals(Object obj) { - // In case multiple locations are used locators will be deduped, but we don't use it in Solr, - // so simply rely on instance equivalence. - return super.equals(obj); - } - - @Override - public String toString() { - String configDir = ""; - try { - configDir = "configDir=" + new File(resourceLoader.getConfigDir()).getAbsolutePath() + ", "; - } catch (Exception ignored) { - // If we get the exception, the resource loader implementation - // probably does not support getConfigDir(). Not a big problem. - } - - return "SolrResourceLocator, " + configDir - + "Carrot2 relative lexicalResourcesDir=" + carrot2ResourcesDir; - } - } - - @Override - @Deprecated - public Object cluster(Query query, DocList docList, SolrQueryRequest sreq) { - SolrIndexSearcher searcher = sreq.getSearcher(); - SolrDocumentList solrDocList; - try { - Map docIds = new HashMap(docList.size()); - solrDocList = SolrPluginUtils.docListToSolrDocumentList( docList, searcher, getFieldsToLoad(sreq), docIds ); - return cluster(query, solrDocList, docIds, sreq); - } catch (IOException e) { - throw new SolrException(ErrorCode.SERVER_ERROR, e); - } - } - - @Override - public Object cluster(Query query, SolrDocumentList solrDocList, - Map docIds, SolrQueryRequest sreq) { - try { - // Prepare attributes for Carrot2 clustering call - Map attributes = new HashMap(); - List documents = getDocuments(solrDocList, docIds, query, sreq); - attributes.put(AttributeNames.DOCUMENTS, documents); - attributes.put(AttributeNames.QUERY, query.toString()); - - // Pass the fields on which clustering runs to the - // SolrStopwordsCarrot2LexicalDataFactory - attributes.put("solrFieldNames", getFieldsForClustering(sreq)); - - // Pass extra overriding attributes from the request, if any - extractCarrotAttributes(sreq.getParams(), attributes); - - // Perform clustering and convert to named list - // Carrot2 uses current thread's context class loader to get - // certain classes (e.g. custom tokenizer/stemmer) at runtime. - // To make sure classes from contrib JARs are available, - // we swap the context class loader for the time of clustering. - Thread ct = Thread.currentThread(); - ClassLoader prev = ct.getContextClassLoader(); - try { - ct.setContextClassLoader(core.getResourceLoader().getClassLoader()); - return clustersToNamedList(controller.process(attributes, - clusteringAlgorithmClass).getClusters(), sreq.getParams()); - } finally { - ct.setContextClassLoader(prev); - } - } catch (Exception e) { - log.error("Carrot2 clustering failed", e); - throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e); - } - } - @Override @SuppressWarnings("rawtypes") public String init(NamedList config, final SolrCore core) { @@ -377,6 +216,43 @@ public class CarrotClusteringEngine extends SearchClusteringEngine { return result; } + @Override + public Object cluster(Query query, SolrDocumentList solrDocList, + Map docIds, SolrQueryRequest sreq) { + try { + // Prepare attributes for Carrot2 clustering call + Map attributes = new HashMap(); + List documents = getDocuments(solrDocList, docIds, query, sreq); + attributes.put(AttributeNames.DOCUMENTS, documents); + attributes.put(AttributeNames.QUERY, query.toString()); + + // Pass the fields on which clustering runs. + attributes.put("solrFieldNames", getFieldsForClustering(sreq)); + + // Pass extra overriding attributes from the request, if any + extractCarrotAttributes(sreq.getParams(), attributes); + + // Perform clustering and convert to an output structure of clusters. + // + // Carrot2 uses current thread's context class loader to get + // certain classes (e.g. custom tokenizer/stemmer) at runtime. + // To make sure classes from contrib JARs are available, + // we swap the context class loader for the time of clustering. + Thread ct = Thread.currentThread(); + ClassLoader prev = ct.getContextClassLoader(); + try { + ct.setContextClassLoader(core.getResourceLoader().getClassLoader()); + return clustersToNamedList(controller.process(attributes, + clusteringAlgorithmClass).getClusters(), sreq.getParams()); + } finally { + ct.setContextClassLoader(prev); + } + } catch (Exception e) { + log.error("Carrot2 clustering failed", e); + throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e); + } + } + @Override protected Set getFieldsToLoad(SolrQueryRequest sreq){ SolrParams solrParams = sreq.getParams(); @@ -434,8 +310,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine { // Parse language code map string into a map Map languageCodeMap = Maps.newHashMap(); if (StringUtils.isNotBlank(languageField)) { - for (String pair : solrParams.get(CarrotParams.LANGUAGE_CODE_MAP, "") - .split("[, ]")) { + for (String pair : solrParams.get(CarrotParams.LANGUAGE_CODE_MAP, "").split("[, ]")) { final String[] split = pair.split(":"); if (split.length == 2 && StringUtils.isNotBlank(split[0]) && StringUtils.isNotBlank(split[1])) { languageCodeMap.put(split[0], split[1]); diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java index e497f3d25c6..beebb91aab4 100644 --- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java +++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java @@ -24,6 +24,7 @@ import com.google.common.collect.ImmutableSet; /** * Carrot2 parameter mapping (recognized and mapped if passed via Solr configuration). + * @lucene.experimental */ public final class CarrotParams { diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2StemmerFactory.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2StemmerFactory.java index 5b8332d9513..8db24d0a593 100644 --- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2StemmerFactory.java +++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2StemmerFactory.java @@ -48,6 +48,8 @@ import org.tartarus.snowball.ext.TurkishStemmer; * An implementation of Carrot2's {@link IStemmerFactory} based on Lucene's * APIs. Should the relevant Lucene APIs need to change, the changes can be made * in this class. + * + * @lucene.experimental */ public class LuceneCarrot2StemmerFactory implements IStemmerFactory { final static Logger logger = org.slf4j.LoggerFactory diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2TokenizerFactory.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2TokenizerFactory.java index 998c6b71a5b..88c3b737e3b 100644 --- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2TokenizerFactory.java +++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2TokenizerFactory.java @@ -38,6 +38,8 @@ import org.slf4j.Logger; * Smart Chinese tokenizer. If Smart Chinese tokenizer is not available in * classpath at runtime, the default Carrot2's tokenizer is used. Should the * Lucene APIs need to change, the changes can be made in this class. + * + * @lucene.experimental */ public class LuceneCarrot2TokenizerFactory implements ITokenizerFactory { final static Logger logger = org.slf4j.LoggerFactory diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/SolrResourceLocator.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/SolrResourceLocator.java new file mode 100644 index 00000000000..1cf28e79ba4 --- /dev/null +++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/SolrResourceLocator.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.handler.clustering.carrot2; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; + +import org.apache.commons.io.IOUtils; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.core.SolrCore; +import org.apache.solr.core.SolrResourceLoader; +import org.carrot2.util.resource.IResource; +import org.carrot2.util.resource.IResourceLocator; + +/** + * A {@link IResourceLocator} that delegates resource searches to {@link SolrCore}. + * + * @lucene.experimental + */ +class SolrResourceLocator implements IResourceLocator { + private final SolrResourceLoader resourceLoader; + private final String carrot2ResourcesDir; + + public SolrResourceLocator(SolrCore core, SolrParams initParams) { + resourceLoader = core.getResourceLoader(); + + @SuppressWarnings("deprecation") + String lexicalResourcesDir = initParams.get(CarrotParams.LEXICAL_RESOURCES_DIR); + String resourcesDir = initParams.get(CarrotParams.RESOURCES_DIR); + carrot2ResourcesDir = firstNonNull(resourcesDir, lexicalResourcesDir, CarrotClusteringEngine.CARROT_RESOURCES_PREFIX); + } + + @SuppressWarnings("unchecked") + public static T firstNonNull(T... args) { + for (T t : args) { + if (t != null) return t; + } + throw new NullPointerException("At least one element has to be non-null."); + } + + @Override + public IResource[] getAll(final String resource) { + final String resourceName = carrot2ResourcesDir + "/" + resource; + CarrotClusteringEngine.log.debug("Looking for Solr resource: " + resourceName); + + InputStream resourceStream = null; + final byte [] asBytes; + try { + resourceStream = resourceLoader.openResource(resourceName); + asBytes = IOUtils.toByteArray(resourceStream); + } catch (IOException e) { + CarrotClusteringEngine.log.debug("Resource not found in Solr's config: " + resourceName + + ". Using the default " + resource + " from Carrot JAR."); + return new IResource[] {}; + } finally { + if (resourceStream != null) { + try { + resourceStream.close(); + } catch (IOException e) { + // ignore. + } + } + } + + CarrotClusteringEngine.log.info("Loaded Solr resource: " + resourceName); + + final IResource foundResource = new IResource() { + @Override + public InputStream open() { + return new ByteArrayInputStream(asBytes); + } + + @Override + public int hashCode() { + // In case multiple resources are found they will be deduped, but we don't use it in Solr, + // so simply rely on instance equivalence. + return super.hashCode(); + } + + @Override + public boolean equals(Object obj) { + // In case multiple resources are found they will be deduped, but we don't use it in Solr, + // so simply rely on instance equivalence. + return super.equals(obj); + } + + @Override + public String toString() { + return "Solr config resource: " + resourceName; + } + }; + + return new IResource[] { foundResource }; + } + + @Override + public int hashCode() { + // In case multiple locations are used locators will be deduped, but we don't use it in Solr, + // so simply rely on instance equivalence. + return super.hashCode(); + } + + @Override + public boolean equals(Object obj) { + // In case multiple locations are used locators will be deduped, but we don't use it in Solr, + // so simply rely on instance equivalence. + return super.equals(obj); + } + + @Override + public String toString() { + String configDir = ""; + try { + configDir = "configDir=" + new File(resourceLoader.getConfigDir()).getAbsolutePath() + ", "; + } catch (Exception ignored) { + // If we get the exception, the resource loader implementation + // probably does not support getConfigDir(). Not a big problem. + } + + return "SolrResourceLocator, " + configDir + + "Carrot2 relative lexicalResourcesDir=" + carrot2ResourcesDir; + } +} \ No newline at end of file diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/SolrStopwordsCarrot2LexicalDataFactory.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/SolrStopwordsCarrot2LexicalDataFactory.java index 00830203c36..3631c5774de 100644 --- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/SolrStopwordsCarrot2LexicalDataFactory.java +++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/SolrStopwordsCarrot2LexicalDataFactory.java @@ -50,10 +50,11 @@ import com.google.common.collect.Multimap; * stop words removal. In other words, if something is a stop word during * indexing, then it should also be a stop word during clustering, but not the * other way round. + * + * @lucene.experimental */ @Bindable -public class SolrStopwordsCarrot2LexicalDataFactory implements - ILexicalDataFactory { +public class SolrStopwordsCarrot2LexicalDataFactory implements ILexicalDataFactory { final static Logger logger = org.slf4j.LoggerFactory .getLogger(SolrStopwordsCarrot2LexicalDataFactory.class); diff --git a/solr/contrib/clustering/src/java/overview.html b/solr/contrib/clustering/src/java/overview.html index 3bbd2d7f357..59940f65641 100644 --- a/solr/contrib/clustering/src/java/overview.html +++ b/solr/contrib/clustering/src/java/overview.html @@ -16,6 +16,6 @@ --> -Apache Solr Search Server: Clustering contrib +Apache Solr Search Server: text clustering contrib