diff --git a/solr/contrib/clustering/CHANGES.txt b/solr/contrib/clustering/CHANGES.txt index 1baee1dc05d..ebda2079ba3 100644 --- a/solr/contrib/clustering/CHANGES.txt +++ b/solr/contrib/clustering/CHANGES.txt @@ -9,7 +9,8 @@ CHANGES $Id$ ================== Release 4.0.0-dev ============== -(No Changes) +* SOLR-3470: Bug fix: custom Carrot2 tokenizer and stemmer factories are + respected now (Stanislaw Osinski, Dawid Weiss) ================== Release 3.6.0 ================== diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java index edaffe18db5..b086318e99f 100644 --- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java +++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java @@ -109,6 +109,9 @@ public class CarrotClusteringEngine extends SearchClusteringEngine { private Controller controller = ControllerFactory.createPooling(); private Class clusteringAlgorithmClass; + /** Solr core we're bound to. */ + private SolrCore core; + private static class SolrResourceLocator implements IResourceLocator { private final SolrResourceLoader resourceLoader; private final String carrot2ResourcesDir; @@ -147,7 +150,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine { public InputStream open() throws IOException { return new ByteArrayInputStream(asBytes); } - + @Override public int hashCode() { // In case multiple resources are found they will be deduped, but we don't use it in Solr, @@ -232,8 +235,19 @@ public class CarrotClusteringEngine extends SearchClusteringEngine { extractCarrotAttributes(sreq.getParams(), attributes); // Perform clustering and convert to named list - return clustersToNamedList(controller.process(attributes, - clusteringAlgorithmClass).getClusters(), sreq.getParams()); + // Carrot2 uses current thread's context class loader to get + // certain classes (e.g. custom tokenizer/stemmer) at runtime. + // To make sure classes from contrib JARs are available, + // we swap the context class loader for the time of clustering. + Thread ct = Thread.currentThread(); + ClassLoader prev = ct.getContextClassLoader(); + try { + ct.setContextClassLoader(core.getResourceLoader().getClassLoader()); + return clustersToNamedList(controller.process(attributes, + clusteringAlgorithmClass).getClusters(), sreq.getParams()); + } finally { + ct.setContextClassLoader(prev); + } } catch (Exception e) { log.error("Carrot2 clustering failed", e); throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e); @@ -243,6 +257,8 @@ public class CarrotClusteringEngine extends SearchClusteringEngine { @Override @SuppressWarnings({ "unchecked", "rawtypes" }) public String init(NamedList config, final SolrCore core) { + this.core = core; + String result = super.init(config, core); final SolrParams initParams = SolrParams.toSolrParams(config); @@ -277,8 +293,19 @@ public class CarrotClusteringEngine extends SearchClusteringEngine { // Using the class loader directly because this time we want to omit the prefix new ClassLoaderLocator(core.getResourceLoader().getClassLoader()))); - this.controller.init(initAttributes); - + // Carrot2 uses current thread's context class loader to get + // certain classes (e.g. custom tokenizer/stemmer) at initialization time. + // To make sure classes from contrib JARs are available, + // we swap the context class loader for the time of clustering. + Thread ct = Thread.currentThread(); + ClassLoader prev = ct.getContextClassLoader(); + try { + ct.setContextClassLoader(core.getResourceLoader().getClassLoader()); + this.controller.init(initAttributes); + } finally { + ct.setContextClassLoader(prev); + } + SchemaField uniqueField = core.getSchema().getUniqueKeyField(); if (uniqueField == null) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java index 00050476a56..f9d7b757cb1 100644 --- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java +++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java @@ -22,29 +22,48 @@ import com.google.common.collect.ImmutableSet; */ -public interface CarrotParams { +/** + * Carrot2 parameter mapping (recognized and mapped if passed via Solr configuration). + */ +public final class CarrotParams { - String CARROT_PREFIX = "carrot."; + private static String CARROT_PREFIX = "carrot."; - String ALGORITHM = CARROT_PREFIX + "algorithm"; + public static String ALGORITHM = CARROT_PREFIX + "algorithm"; - String TITLE_FIELD_NAME = CARROT_PREFIX + "title"; - String URL_FIELD_NAME = CARROT_PREFIX + "url"; - String SNIPPET_FIELD_NAME = CARROT_PREFIX + "snippet"; - String LANGUAGE_FIELD_NAME = CARROT_PREFIX + "lang"; - String CUSTOM_FIELD_NAME = CARROT_PREFIX + "custom"; + public static String TITLE_FIELD_NAME = CARROT_PREFIX + "title"; + public static String URL_FIELD_NAME = CARROT_PREFIX + "url"; + public static String SNIPPET_FIELD_NAME = CARROT_PREFIX + "snippet"; + public static String LANGUAGE_FIELD_NAME = CARROT_PREFIX + "lang"; + public static String CUSTOM_FIELD_NAME = CARROT_PREFIX + "custom"; - String PRODUCE_SUMMARY = CARROT_PREFIX + "produceSummary"; - String SUMMARY_FRAGSIZE = CARROT_PREFIX + "fragSize"; - String SUMMARY_SNIPPETS = CARROT_PREFIX + "summarySnippets"; + public static String PRODUCE_SUMMARY = CARROT_PREFIX + "produceSummary"; + public static String SUMMARY_FRAGSIZE = CARROT_PREFIX + "fragSize"; + public static String SUMMARY_SNIPPETS = CARROT_PREFIX + "summarySnippets"; - String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions"; - String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters"; - String LEXICAL_RESOURCES_DIR = CARROT_PREFIX + "lexicalResourcesDir"; - String LANGUAGE_CODE_MAP = CARROT_PREFIX + "lcmap"; + public static String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions"; + public static String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters"; + public static String LEXICAL_RESOURCES_DIR = CARROT_PREFIX + "lexicalResourcesDir"; + public static String LANGUAGE_CODE_MAP = CARROT_PREFIX + "lcmap"; - public static final Set CARROT_PARAM_NAMES = ImmutableSet.of( - ALGORITHM, TITLE_FIELD_NAME, URL_FIELD_NAME, SNIPPET_FIELD_NAME, LANGUAGE_FIELD_NAME, - PRODUCE_SUMMARY, SUMMARY_FRAGSIZE, SUMMARY_SNIPPETS, NUM_DESCRIPTIONS, OUTPUT_SUB_CLUSTERS, - LEXICAL_RESOURCES_DIR); + static final Set CARROT_PARAM_NAMES = ImmutableSet.of( + ALGORITHM, + + TITLE_FIELD_NAME, + URL_FIELD_NAME, + SNIPPET_FIELD_NAME, + LANGUAGE_FIELD_NAME, + CUSTOM_FIELD_NAME, + + PRODUCE_SUMMARY, + SUMMARY_FRAGSIZE, + SUMMARY_SNIPPETS, + + NUM_DESCRIPTIONS, + OUTPUT_SUB_CLUSTERS, + LEXICAL_RESOURCES_DIR, + LANGUAGE_CODE_MAP); + + /** No instances. */ + private CarrotParams() {} } diff --git a/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java b/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java index 911f03d787d..94502350bcd 100644 --- a/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java +++ b/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java @@ -205,7 +205,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase { } @Test - public void solrStopWordsUsedInCarrot2Clustering() throws Exception { + public void testSolrStopWordsUsedInCarrot2Clustering() throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); params.set("merge-resources", false); params.set(AttributeUtils.getKey( @@ -220,7 +220,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase { } @Test - public void solrStopWordsNotDefinedOnAFieldForClustering() throws Exception { + public void testSolrStopWordsNotDefinedOnAFieldForClustering() throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); // Force string fields to be used for clustering. Does not make sense // in a real word, but does the job in the test. @@ -239,7 +239,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase { } @Test - public void highlightingOfMultiValueField() throws Exception { + public void testHighlightingOfMultiValueField() throws Exception { final String snippetWithoutSummary = getLabels(clusterWithHighlighting( false, 30, 3, "multi", 1).get(0)).get(1); assertTrue("Snippet contains first value", snippetWithoutSummary.contains("First")); @@ -256,7 +256,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase { } @Test - public void concatenatingMultipleFields() throws Exception { + public void testConcatenatingMultipleFields() throws Exception { final ModifiableSolrParams params = new ModifiableSolrParams(); params.add(CarrotParams.TITLE_FIELD_NAME, "title,heading"); params.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet,body"); @@ -271,7 +271,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase { } @Test - public void highlightingMultipleFields() throws Exception { + public void testHighlightingMultipleFields() throws Exception { final TermQuery query = new TermQuery(new Term("snippet", "content")); final ModifiableSolrParams params = new ModifiableSolrParams(); @@ -297,7 +297,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase { } @Test - public void oneCarrot2SupportedLanguage() throws Exception { + public void testOneCarrot2SupportedLanguage() throws Exception { final ModifiableSolrParams params = new ModifiableSolrParams(); params.add(CarrotParams.LANGUAGE_FIELD_NAME, "lang"); @@ -309,7 +309,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase { } @Test - public void oneCarrot2SupportedLanguageOfMany() throws Exception { + public void testOneCarrot2SupportedLanguageOfMany() throws Exception { final ModifiableSolrParams params = new ModifiableSolrParams(); params.add(CarrotParams.LANGUAGE_FIELD_NAME, "lang"); @@ -321,7 +321,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase { } @Test - public void languageCodeMapping() throws Exception { + public void testLanguageCodeMapping() throws Exception { final ModifiableSolrParams params = new ModifiableSolrParams(); params.add(CarrotParams.LANGUAGE_FIELD_NAME, "lang"); params.add(CarrotParams.LANGUAGE_CODE_MAP, "POLISH:pl"); @@ -334,7 +334,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase { } @Test - public void passingOfCustomFields() throws Exception { + public void testPassingOfCustomFields() throws Exception { final ModifiableSolrParams params = new ModifiableSolrParams(); params.add(CarrotParams.CUSTOM_FIELD_NAME, "intfield_i:intfield"); params.add(CarrotParams.CUSTOM_FIELD_NAME, "floatfield_f:floatfield"); @@ -353,7 +353,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase { } @Test - public void customTokenizer() throws Exception { + public void testCustomTokenizer() throws Exception { final ModifiableSolrParams params = new ModifiableSolrParams(); params.add(CarrotParams.TITLE_FIELD_NAME, "title"); params.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet"); @@ -367,7 +367,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase { } @Test - public void customStemmer() throws Exception { + public void testCustomStemmer() throws Exception { final ModifiableSolrParams params = new ModifiableSolrParams(); params.add(CarrotParams.TITLE_FIELD_NAME, "title"); params.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet");