mirror of https://github.com/apache/lucene.git
SOLR-5219: follow-up code cleanups.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1522472 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
247142e18c
commit
047a650004
|
@ -1,4 +1,5 @@
|
|||
package org.apache.solr.handler.clustering;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -18,7 +19,6 @@ package org.apache.solr.handler.clustering;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
@ -38,6 +38,7 @@ import org.apache.solr.handler.component.ResponseBuilder;
|
|||
import org.apache.solr.handler.component.SearchComponent;
|
||||
import org.apache.solr.handler.component.ShardRequest;
|
||||
import org.apache.solr.search.DocListAndSet;
|
||||
import org.apache.solr.util.SolrPluginUtils;
|
||||
import org.apache.solr.util.plugin.SolrCoreAware;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -46,13 +47,13 @@ import com.google.common.collect.Maps;
|
|||
|
||||
|
||||
/**
|
||||
* Provide a plugin for performing cluster analysis. This can either be applied to
|
||||
* Provides a plugin for performing cluster analysis. This can either be applied to
|
||||
* search results (e.g., via <a href="http://project.carrot2.org">Carrot<sup>2</sup></a>) or for
|
||||
* clustering documents (e.g., via <a href="http://mahout.apache.org/">Mahout</a>).
|
||||
* <p>
|
||||
* This engine is experimental. Output from this engine is subject to change in future releases.</p>
|
||||
* <p>
|
||||
* See Solr example for configuration examples.</p>
|
||||
* See Solr example for configuration examples.</p>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class ClusteringComponent extends SearchComponent implements SolrCoreAware {
|
||||
private transient static Logger log = LoggerFactory.getLogger(ClusteringComponent.class);
|
||||
|
@ -78,6 +79,13 @@ public class ClusteringComponent extends SearchComponent implements SolrCoreAwar
|
|||
*/
|
||||
private final Map<String, SearchClusteringEngine> searchClusteringEnginesView = Collections.unmodifiableMap(searchClusteringEngines);
|
||||
|
||||
/**
|
||||
* Initialization parameters temporarily saved here, the component
|
||||
* is initialized in {@link #inform(SolrCore)} because we need to know
|
||||
* the core's {@link SolrResourceLoader}.
|
||||
*
|
||||
* @see #init(NamedList)
|
||||
*/
|
||||
private NamedList<Object> initParams;
|
||||
|
||||
@Override
|
||||
|
@ -150,8 +158,9 @@ public class ClusteringComponent extends SearchComponent implements SolrCoreAwar
|
|||
SearchClusteringEngine engine = getSearchClusteringEngine(rb);
|
||||
if (engine != null) {
|
||||
DocListAndSet results = rb.getResults();
|
||||
Map<SolrDocument,Integer> docIds = new HashMap<SolrDocument, Integer>(results.docList.size());
|
||||
SolrDocumentList solrDocList = engine.getSolrDocumentList(results.docList, rb.req, docIds);
|
||||
Map<SolrDocument,Integer> docIds = Maps.newHashMapWithExpectedSize(results.docList.size());
|
||||
SolrDocumentList solrDocList = SolrPluginUtils.docListToSolrDocumentList(
|
||||
results.docList, rb.req.getSearcher(), engine.getFieldsToLoad(rb.req), docIds);
|
||||
Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req);
|
||||
rb.rsp.add("clusters", clusters);
|
||||
} else {
|
||||
|
@ -177,7 +186,7 @@ public class ClusteringComponent extends SearchComponent implements SolrCoreAwar
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private SearchClusteringEngine getSearchClusteringEngine(ResponseBuilder rb){
|
||||
return searchClusteringEngines.get(getClusteringEngineName(rb));
|
||||
}
|
||||
|
|
|
@ -18,16 +18,16 @@ package org.apache.solr.handler.clustering;
|
|||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
**/
|
||||
* A base class for {@link SearchClusteringEngine} and {@link DocumentClusteringEngine}.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class ClusteringEngine {
|
||||
private String name;
|
||||
public static final String ENGINE_NAME = "name";
|
||||
public static final String DEFAULT_ENGINE_NAME = "default";
|
||||
|
||||
private String name;
|
||||
|
||||
public String init(NamedList<?> config, SolrCore core) {
|
||||
name = (String) config.get(ENGINE_NAME);
|
||||
return name;
|
||||
|
|
|
@ -16,11 +16,9 @@ package org.apache.solr.handler.clustering;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
**/
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface ClusteringParams {
|
||||
|
||||
public static final String CLUSTERING_PREFIX = "clustering.";
|
||||
|
@ -30,8 +28,9 @@ public interface ClusteringParams {
|
|||
public static final String USE_SEARCH_RESULTS = CLUSTERING_PREFIX + "results";
|
||||
|
||||
public static final String USE_COLLECTION = CLUSTERING_PREFIX + "collection";
|
||||
|
||||
/**
|
||||
* When document clustering, cluster on the Doc Set
|
||||
* When clustering full documents, cluster on the Doc Set.
|
||||
*/
|
||||
public static final String USE_DOC_SET = CLUSTERING_PREFIX + "docs.useDocSet";
|
||||
}
|
||||
|
|
|
@ -20,11 +20,9 @@ import org.apache.solr.common.util.NamedList;
|
|||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.search.DocSet;
|
||||
|
||||
|
||||
/**
|
||||
* Experimental. Subject to change before the next release.
|
||||
*
|
||||
**/
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class DocumentClusteringEngine extends ClusteringEngine {
|
||||
|
||||
/**
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
package org.apache.solr.handler.clustering;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -16,7 +17,6 @@ package org.apache.solr.handler.clustering;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
|
@ -24,20 +24,19 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.util.SolrPluginUtils;
|
||||
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
**/
|
||||
* Base class for clustering engines performing cluster analysis on search
|
||||
* results.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class SearchClusteringEngine extends ClusteringEngine {
|
||||
|
||||
@Deprecated
|
||||
public abstract Object cluster(Query query, DocList docList, SolrQueryRequest sreq);
|
||||
|
||||
// TODO: need DocList, too?
|
||||
/**
|
||||
* Do the clustering, return a clusters structure to be appended to
|
||||
* {@link SolrQueryResponse}.
|
||||
*/
|
||||
public abstract Object cluster(Query query, SolrDocumentList solrDocumentList,
|
||||
Map<SolrDocument,Integer> docIds, SolrQueryRequest sreq);
|
||||
|
||||
|
@ -45,15 +44,10 @@ public abstract class SearchClusteringEngine extends ClusteringEngine {
|
|||
* Returns the set of field names to load.
|
||||
* Concrete classes can override this method if needed.
|
||||
* Default implementation returns null, that is, all stored fields are loaded.
|
||||
* @return set of field names to load
|
||||
*
|
||||
* @return The set of field names to load.
|
||||
*/
|
||||
protected Set<String> getFieldsToLoad(SolrQueryRequest sreq){
|
||||
return null;
|
||||
}
|
||||
|
||||
public SolrDocumentList getSolrDocumentList(DocList docList, SolrQueryRequest sreq,
|
||||
Map<SolrDocument, Integer> docIds) throws IOException{
|
||||
return SolrPluginUtils.docListToSolrDocumentList(
|
||||
docList, sreq.getSearcher(), getFieldsToLoad(sreq), docIds);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,10 +17,7 @@ package org.apache.solr.handler.clustering.carrot2;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
|
@ -32,7 +29,6 @@ import java.util.Map;
|
|||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang.ObjectUtils;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
@ -45,7 +41,6 @@ import org.apache.solr.common.params.SolrParams;
|
|||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
import org.apache.solr.handler.clustering.ClusteringEngine;
|
||||
import org.apache.solr.handler.clustering.SearchClusteringEngine;
|
||||
import org.apache.solr.handler.component.HighlightComponent;
|
||||
|
@ -56,7 +51,6 @@ import org.apache.solr.schema.SchemaField;
|
|||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.search.DocSlice;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.util.SolrPluginUtils;
|
||||
import org.carrot2.core.Cluster;
|
||||
import org.carrot2.core.Controller;
|
||||
import org.carrot2.core.ControllerFactory;
|
||||
|
@ -71,7 +65,6 @@ import org.carrot2.util.attribute.AttributeValueSet;
|
|||
import org.carrot2.util.attribute.AttributeValueSets;
|
||||
import org.carrot2.util.resource.ClassLoaderLocator;
|
||||
import org.carrot2.util.resource.IResource;
|
||||
import org.carrot2.util.resource.IResourceLocator;
|
||||
import org.carrot2.util.resource.ResourceLookup;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -84,19 +77,17 @@ import com.google.common.collect.Sets;
|
|||
|
||||
/**
|
||||
* Search results clustering engine based on Carrot2 clustering algorithms.
|
||||
* <p/>
|
||||
* Output from this class is subject to change.
|
||||
*
|
||||
* @see "http://project.carrot2.org"
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class CarrotClusteringEngine extends SearchClusteringEngine {
|
||||
private transient static Logger log = LoggerFactory
|
||||
.getLogger(CarrotClusteringEngine.class);
|
||||
transient static Logger log = LoggerFactory.getLogger(CarrotClusteringEngine.class);
|
||||
|
||||
/**
|
||||
* The subdirectory in Solr config dir to read customized Carrot2 resources from.
|
||||
*/
|
||||
private static final String CARROT_RESOURCES_PREFIX = "clustering/carrot2";
|
||||
static final String CARROT_RESOURCES_PREFIX = "clustering/carrot2";
|
||||
|
||||
/**
|
||||
* Name of Carrot2 document's field containing Solr document's identifier.
|
||||
|
@ -114,167 +105,15 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
* Carrot2 controller that manages instances of clustering algorithms
|
||||
*/
|
||||
private Controller controller = ControllerFactory.createPooling();
|
||||
|
||||
/**
|
||||
* {@link IClusteringAlgorithm} class used for actual clustering.
|
||||
*/
|
||||
private Class<? extends IClusteringAlgorithm> clusteringAlgorithmClass;
|
||||
|
||||
/** Solr core we're bound to. */
|
||||
private SolrCore core;
|
||||
|
||||
private static class SolrResourceLocator implements IResourceLocator {
|
||||
private final SolrResourceLoader resourceLoader;
|
||||
private final String carrot2ResourcesDir;
|
||||
|
||||
public SolrResourceLocator(SolrCore core, SolrParams initParams) {
|
||||
resourceLoader = core.getResourceLoader();
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
String lexicalResourcesDir = initParams.get(CarrotParams.LEXICAL_RESOURCES_DIR);
|
||||
String resourcesDir = initParams.get(CarrotParams.RESOURCES_DIR);
|
||||
carrot2ResourcesDir = firstNonNull(resourcesDir, lexicalResourcesDir, CARROT_RESOURCES_PREFIX);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public static <T> T firstNonNull(T... args) {
|
||||
for (T t : args) {
|
||||
if (t != null) return t;
|
||||
}
|
||||
throw new NullPointerException("At least one element has to be non-null.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public IResource[] getAll(final String resource) {
|
||||
final String resourceName = carrot2ResourcesDir + "/" + resource;
|
||||
log.debug("Looking for Solr resource: " + resourceName);
|
||||
|
||||
InputStream resourceStream = null;
|
||||
final byte [] asBytes;
|
||||
try {
|
||||
resourceStream = resourceLoader.openResource(resourceName);
|
||||
asBytes = IOUtils.toByteArray(resourceStream);
|
||||
} catch (IOException e) {
|
||||
log.debug("Resource not found in Solr's config: " + resourceName
|
||||
+ ". Using the default " + resource + " from Carrot JAR.");
|
||||
return new IResource[] {};
|
||||
} finally {
|
||||
if (resourceStream != null) {
|
||||
try {
|
||||
resourceStream.close();
|
||||
} catch (IOException e) {
|
||||
// ignore.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log.info("Loaded Solr resource: " + resourceName);
|
||||
|
||||
final IResource foundResource = new IResource() {
|
||||
@Override
|
||||
public InputStream open() {
|
||||
return new ByteArrayInputStream(asBytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
// In case multiple resources are found they will be deduped, but we don't use it in Solr,
|
||||
// so simply rely on instance equivalence.
|
||||
return super.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
// In case multiple resources are found they will be deduped, but we don't use it in Solr,
|
||||
// so simply rely on instance equivalence.
|
||||
return super.equals(obj);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Solr config resource: " + resourceName;
|
||||
}
|
||||
};
|
||||
|
||||
return new IResource[] { foundResource };
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
// In case multiple locations are used locators will be deduped, but we don't use it in Solr,
|
||||
// so simply rely on instance equivalence.
|
||||
return super.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
// In case multiple locations are used locators will be deduped, but we don't use it in Solr,
|
||||
// so simply rely on instance equivalence.
|
||||
return super.equals(obj);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
String configDir = "";
|
||||
try {
|
||||
configDir = "configDir=" + new File(resourceLoader.getConfigDir()).getAbsolutePath() + ", ";
|
||||
} catch (Exception ignored) {
|
||||
// If we get the exception, the resource loader implementation
|
||||
// probably does not support getConfigDir(). Not a big problem.
|
||||
}
|
||||
|
||||
return "SolrResourceLocator, " + configDir
|
||||
+ "Carrot2 relative lexicalResourcesDir=" + carrot2ResourcesDir;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@Deprecated
|
||||
public Object cluster(Query query, DocList docList, SolrQueryRequest sreq) {
|
||||
SolrIndexSearcher searcher = sreq.getSearcher();
|
||||
SolrDocumentList solrDocList;
|
||||
try {
|
||||
Map<SolrDocument,Integer> docIds = new HashMap<SolrDocument, Integer>(docList.size());
|
||||
solrDocList = SolrPluginUtils.docListToSolrDocumentList( docList, searcher, getFieldsToLoad(sreq), docIds );
|
||||
return cluster(query, solrDocList, docIds, sreq);
|
||||
} catch (IOException e) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object cluster(Query query, SolrDocumentList solrDocList,
|
||||
Map<SolrDocument, Integer> docIds, SolrQueryRequest sreq) {
|
||||
try {
|
||||
// Prepare attributes for Carrot2 clustering call
|
||||
Map<String, Object> attributes = new HashMap<String, Object>();
|
||||
List<Document> documents = getDocuments(solrDocList, docIds, query, sreq);
|
||||
attributes.put(AttributeNames.DOCUMENTS, documents);
|
||||
attributes.put(AttributeNames.QUERY, query.toString());
|
||||
|
||||
// Pass the fields on which clustering runs to the
|
||||
// SolrStopwordsCarrot2LexicalDataFactory
|
||||
attributes.put("solrFieldNames", getFieldsForClustering(sreq));
|
||||
|
||||
// Pass extra overriding attributes from the request, if any
|
||||
extractCarrotAttributes(sreq.getParams(), attributes);
|
||||
|
||||
// Perform clustering and convert to named list
|
||||
// Carrot2 uses current thread's context class loader to get
|
||||
// certain classes (e.g. custom tokenizer/stemmer) at runtime.
|
||||
// To make sure classes from contrib JARs are available,
|
||||
// we swap the context class loader for the time of clustering.
|
||||
Thread ct = Thread.currentThread();
|
||||
ClassLoader prev = ct.getContextClassLoader();
|
||||
try {
|
||||
ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
|
||||
return clustersToNamedList(controller.process(attributes,
|
||||
clusteringAlgorithmClass).getClusters(), sreq.getParams());
|
||||
} finally {
|
||||
ct.setContextClassLoader(prev);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("Carrot2 clustering failed", e);
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("rawtypes")
|
||||
public String init(NamedList config, final SolrCore core) {
|
||||
|
@ -377,6 +216,43 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object cluster(Query query, SolrDocumentList solrDocList,
|
||||
Map<SolrDocument, Integer> docIds, SolrQueryRequest sreq) {
|
||||
try {
|
||||
// Prepare attributes for Carrot2 clustering call
|
||||
Map<String, Object> attributes = new HashMap<String, Object>();
|
||||
List<Document> documents = getDocuments(solrDocList, docIds, query, sreq);
|
||||
attributes.put(AttributeNames.DOCUMENTS, documents);
|
||||
attributes.put(AttributeNames.QUERY, query.toString());
|
||||
|
||||
// Pass the fields on which clustering runs.
|
||||
attributes.put("solrFieldNames", getFieldsForClustering(sreq));
|
||||
|
||||
// Pass extra overriding attributes from the request, if any
|
||||
extractCarrotAttributes(sreq.getParams(), attributes);
|
||||
|
||||
// Perform clustering and convert to an output structure of clusters.
|
||||
//
|
||||
// Carrot2 uses current thread's context class loader to get
|
||||
// certain classes (e.g. custom tokenizer/stemmer) at runtime.
|
||||
// To make sure classes from contrib JARs are available,
|
||||
// we swap the context class loader for the time of clustering.
|
||||
Thread ct = Thread.currentThread();
|
||||
ClassLoader prev = ct.getContextClassLoader();
|
||||
try {
|
||||
ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
|
||||
return clustersToNamedList(controller.process(attributes,
|
||||
clusteringAlgorithmClass).getClusters(), sreq.getParams());
|
||||
} finally {
|
||||
ct.setContextClassLoader(prev);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("Carrot2 clustering failed", e);
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Set<String> getFieldsToLoad(SolrQueryRequest sreq){
|
||||
SolrParams solrParams = sreq.getParams();
|
||||
|
@ -434,8 +310,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
// Parse language code map string into a map
|
||||
Map<String, String> languageCodeMap = Maps.newHashMap();
|
||||
if (StringUtils.isNotBlank(languageField)) {
|
||||
for (String pair : solrParams.get(CarrotParams.LANGUAGE_CODE_MAP, "")
|
||||
.split("[, ]")) {
|
||||
for (String pair : solrParams.get(CarrotParams.LANGUAGE_CODE_MAP, "").split("[, ]")) {
|
||||
final String[] split = pair.split(":");
|
||||
if (split.length == 2 && StringUtils.isNotBlank(split[0]) && StringUtils.isNotBlank(split[1])) {
|
||||
languageCodeMap.put(split[0], split[1]);
|
||||
|
|
|
@ -24,6 +24,7 @@ import com.google.common.collect.ImmutableSet;
|
|||
|
||||
/**
|
||||
* Carrot2 parameter mapping (recognized and mapped if passed via Solr configuration).
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class CarrotParams {
|
||||
|
||||
|
|
|
@ -48,6 +48,8 @@ import org.tartarus.snowball.ext.TurkishStemmer;
|
|||
* An implementation of Carrot2's {@link IStemmerFactory} based on Lucene's
|
||||
* APIs. Should the relevant Lucene APIs need to change, the changes can be made
|
||||
* in this class.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class LuceneCarrot2StemmerFactory implements IStemmerFactory {
|
||||
final static Logger logger = org.slf4j.LoggerFactory
|
||||
|
|
|
@ -38,6 +38,8 @@ import org.slf4j.Logger;
|
|||
* Smart Chinese tokenizer. If Smart Chinese tokenizer is not available in
|
||||
* classpath at runtime, the default Carrot2's tokenizer is used. Should the
|
||||
* Lucene APIs need to change, the changes can be made in this class.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class LuceneCarrot2TokenizerFactory implements ITokenizerFactory {
|
||||
final static Logger logger = org.slf4j.LoggerFactory
|
||||
|
|
|
@ -0,0 +1,140 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.handler.clustering.carrot2;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
import org.carrot2.util.resource.IResource;
|
||||
import org.carrot2.util.resource.IResourceLocator;
|
||||
|
||||
/**
|
||||
* A {@link IResourceLocator} that delegates resource searches to {@link SolrCore}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
class SolrResourceLocator implements IResourceLocator {
|
||||
private final SolrResourceLoader resourceLoader;
|
||||
private final String carrot2ResourcesDir;
|
||||
|
||||
public SolrResourceLocator(SolrCore core, SolrParams initParams) {
|
||||
resourceLoader = core.getResourceLoader();
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
String lexicalResourcesDir = initParams.get(CarrotParams.LEXICAL_RESOURCES_DIR);
|
||||
String resourcesDir = initParams.get(CarrotParams.RESOURCES_DIR);
|
||||
carrot2ResourcesDir = firstNonNull(resourcesDir, lexicalResourcesDir, CarrotClusteringEngine.CARROT_RESOURCES_PREFIX);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public static <T> T firstNonNull(T... args) {
|
||||
for (T t : args) {
|
||||
if (t != null) return t;
|
||||
}
|
||||
throw new NullPointerException("At least one element has to be non-null.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public IResource[] getAll(final String resource) {
|
||||
final String resourceName = carrot2ResourcesDir + "/" + resource;
|
||||
CarrotClusteringEngine.log.debug("Looking for Solr resource: " + resourceName);
|
||||
|
||||
InputStream resourceStream = null;
|
||||
final byte [] asBytes;
|
||||
try {
|
||||
resourceStream = resourceLoader.openResource(resourceName);
|
||||
asBytes = IOUtils.toByteArray(resourceStream);
|
||||
} catch (IOException e) {
|
||||
CarrotClusteringEngine.log.debug("Resource not found in Solr's config: " + resourceName
|
||||
+ ". Using the default " + resource + " from Carrot JAR.");
|
||||
return new IResource[] {};
|
||||
} finally {
|
||||
if (resourceStream != null) {
|
||||
try {
|
||||
resourceStream.close();
|
||||
} catch (IOException e) {
|
||||
// ignore.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CarrotClusteringEngine.log.info("Loaded Solr resource: " + resourceName);
|
||||
|
||||
final IResource foundResource = new IResource() {
|
||||
@Override
|
||||
public InputStream open() {
|
||||
return new ByteArrayInputStream(asBytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
// In case multiple resources are found they will be deduped, but we don't use it in Solr,
|
||||
// so simply rely on instance equivalence.
|
||||
return super.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
// In case multiple resources are found they will be deduped, but we don't use it in Solr,
|
||||
// so simply rely on instance equivalence.
|
||||
return super.equals(obj);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Solr config resource: " + resourceName;
|
||||
}
|
||||
};
|
||||
|
||||
return new IResource[] { foundResource };
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
// In case multiple locations are used locators will be deduped, but we don't use it in Solr,
|
||||
// so simply rely on instance equivalence.
|
||||
return super.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
// In case multiple locations are used locators will be deduped, but we don't use it in Solr,
|
||||
// so simply rely on instance equivalence.
|
||||
return super.equals(obj);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
String configDir = "";
|
||||
try {
|
||||
configDir = "configDir=" + new File(resourceLoader.getConfigDir()).getAbsolutePath() + ", ";
|
||||
} catch (Exception ignored) {
|
||||
// If we get the exception, the resource loader implementation
|
||||
// probably does not support getConfigDir(). Not a big problem.
|
||||
}
|
||||
|
||||
return "SolrResourceLocator, " + configDir
|
||||
+ "Carrot2 relative lexicalResourcesDir=" + carrot2ResourcesDir;
|
||||
}
|
||||
}
|
|
@ -50,10 +50,11 @@ import com.google.common.collect.Multimap;
|
|||
* stop words removal. In other words, if something is a stop word during
|
||||
* indexing, then it should also be a stop word during clustering, but not the
|
||||
* other way round.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
@Bindable
|
||||
public class SolrStopwordsCarrot2LexicalDataFactory implements
|
||||
ILexicalDataFactory {
|
||||
public class SolrStopwordsCarrot2LexicalDataFactory implements ILexicalDataFactory {
|
||||
final static Logger logger = org.slf4j.LoggerFactory
|
||||
.getLogger(SolrStopwordsCarrot2LexicalDataFactory.class);
|
||||
|
||||
|
|
|
@ -16,6 +16,6 @@
|
|||
-->
|
||||
<html>
|
||||
<body>
|
||||
Apache Solr Search Server: Clustering contrib
|
||||
Apache Solr Search Server: text clustering contrib
|
||||
</body>
|
||||
</html>
|
||||
|
|
Loading…
Reference in New Issue