SOLR-3470: Custom Carrot2 tokenizer and stemmer factories overwritten by defaults: fixing class loader issues, minor cleanups (Dawid Weiss)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1340939 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Stanisław Osiński 2012-05-21 08:57:31 +00:00
parent a79a14347d
commit 98d15d4e67
4 changed files with 83 additions and 36 deletions

View File

@ -9,7 +9,8 @@ CHANGES
$Id$
================== Release 4.0.0-dev ==============
(No Changes)
* SOLR-3470: Bug fix: custom Carrot2 tokenizer and stemmer factories are
respected now (Stanislaw Osinski, Dawid Weiss)
================== Release 3.6.0 ==================

View File

@ -109,6 +109,9 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
private Controller controller = ControllerFactory.createPooling();
private Class<? extends IClusteringAlgorithm> clusteringAlgorithmClass;
/** Solr core we're bound to. */
private SolrCore core;
private static class SolrResourceLocator implements IResourceLocator {
private final SolrResourceLoader resourceLoader;
private final String carrot2ResourcesDir;
@ -147,7 +150,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
public InputStream open() throws IOException {
return new ByteArrayInputStream(asBytes);
}
@Override
public int hashCode() {
// In case multiple resources are found they will be deduped, but we don't use it in Solr,
@ -232,8 +235,19 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
extractCarrotAttributes(sreq.getParams(), attributes);
// Perform clustering and convert to named list
return clustersToNamedList(controller.process(attributes,
clusteringAlgorithmClass).getClusters(), sreq.getParams());
// Carrot2 uses current thread's context class loader to get
// certain classes (e.g. custom tokenizer/stemmer) at runtime.
// To make sure classes from contrib JARs are available,
// we swap the context class loader for the time of clustering.
Thread ct = Thread.currentThread();
ClassLoader prev = ct.getContextClassLoader();
try {
ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
return clustersToNamedList(controller.process(attributes,
clusteringAlgorithmClass).getClusters(), sreq.getParams());
} finally {
ct.setContextClassLoader(prev);
}
} catch (Exception e) {
log.error("Carrot2 clustering failed", e);
throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e);
@ -243,6 +257,8 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
@Override
@SuppressWarnings({ "unchecked", "rawtypes" })
public String init(NamedList config, final SolrCore core) {
this.core = core;
String result = super.init(config, core);
final SolrParams initParams = SolrParams.toSolrParams(config);
@ -277,8 +293,19 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
// Using the class loader directly because this time we want to omit the prefix
new ClassLoaderLocator(core.getResourceLoader().getClassLoader())));
this.controller.init(initAttributes);
// Carrot2 uses current thread's context class loader to get
// certain classes (e.g. custom tokenizer/stemmer) at initialization time.
// To make sure classes from contrib JARs are available,
// we swap the context class loader for the time of clustering.
Thread ct = Thread.currentThread();
ClassLoader prev = ct.getContextClassLoader();
try {
ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
this.controller.init(initAttributes);
} finally {
ct.setContextClassLoader(prev);
}
SchemaField uniqueField = core.getSchema().getUniqueKeyField();
if (uniqueField == null) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,

View File

@ -22,29 +22,48 @@ import com.google.common.collect.ImmutableSet;
*/
public interface CarrotParams {
/**
* Carrot2 parameter mapping (recognized and mapped if passed via Solr configuration).
*/
public final class CarrotParams {
String CARROT_PREFIX = "carrot.";
private static String CARROT_PREFIX = "carrot.";
String ALGORITHM = CARROT_PREFIX + "algorithm";
public static String ALGORITHM = CARROT_PREFIX + "algorithm";
String TITLE_FIELD_NAME = CARROT_PREFIX + "title";
String URL_FIELD_NAME = CARROT_PREFIX + "url";
String SNIPPET_FIELD_NAME = CARROT_PREFIX + "snippet";
String LANGUAGE_FIELD_NAME = CARROT_PREFIX + "lang";
String CUSTOM_FIELD_NAME = CARROT_PREFIX + "custom";
public static String TITLE_FIELD_NAME = CARROT_PREFIX + "title";
public static String URL_FIELD_NAME = CARROT_PREFIX + "url";
public static String SNIPPET_FIELD_NAME = CARROT_PREFIX + "snippet";
public static String LANGUAGE_FIELD_NAME = CARROT_PREFIX + "lang";
public static String CUSTOM_FIELD_NAME = CARROT_PREFIX + "custom";
String PRODUCE_SUMMARY = CARROT_PREFIX + "produceSummary";
String SUMMARY_FRAGSIZE = CARROT_PREFIX + "fragSize";
String SUMMARY_SNIPPETS = CARROT_PREFIX + "summarySnippets";
public static String PRODUCE_SUMMARY = CARROT_PREFIX + "produceSummary";
public static String SUMMARY_FRAGSIZE = CARROT_PREFIX + "fragSize";
public static String SUMMARY_SNIPPETS = CARROT_PREFIX + "summarySnippets";
String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions";
String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters";
String LEXICAL_RESOURCES_DIR = CARROT_PREFIX + "lexicalResourcesDir";
String LANGUAGE_CODE_MAP = CARROT_PREFIX + "lcmap";
public static String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions";
public static String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters";
public static String LEXICAL_RESOURCES_DIR = CARROT_PREFIX + "lexicalResourcesDir";
public static String LANGUAGE_CODE_MAP = CARROT_PREFIX + "lcmap";
public static final Set<String> CARROT_PARAM_NAMES = ImmutableSet.of(
ALGORITHM, TITLE_FIELD_NAME, URL_FIELD_NAME, SNIPPET_FIELD_NAME, LANGUAGE_FIELD_NAME,
PRODUCE_SUMMARY, SUMMARY_FRAGSIZE, SUMMARY_SNIPPETS, NUM_DESCRIPTIONS, OUTPUT_SUB_CLUSTERS,
LEXICAL_RESOURCES_DIR);
static final Set<String> CARROT_PARAM_NAMES = ImmutableSet.of(
ALGORITHM,
TITLE_FIELD_NAME,
URL_FIELD_NAME,
SNIPPET_FIELD_NAME,
LANGUAGE_FIELD_NAME,
CUSTOM_FIELD_NAME,
PRODUCE_SUMMARY,
SUMMARY_FRAGSIZE,
SUMMARY_SNIPPETS,
NUM_DESCRIPTIONS,
OUTPUT_SUB_CLUSTERS,
LEXICAL_RESOURCES_DIR,
LANGUAGE_CODE_MAP);
/** No instances. */
private CarrotParams() {}
}

View File

@ -205,7 +205,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
}
@Test
public void solrStopWordsUsedInCarrot2Clustering() throws Exception {
public void testSolrStopWordsUsedInCarrot2Clustering() throws Exception {
ModifiableSolrParams params = new ModifiableSolrParams();
params.set("merge-resources", false);
params.set(AttributeUtils.getKey(
@ -220,7 +220,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
}
@Test
public void solrStopWordsNotDefinedOnAFieldForClustering() throws Exception {
public void testSolrStopWordsNotDefinedOnAFieldForClustering() throws Exception {
ModifiableSolrParams params = new ModifiableSolrParams();
// Force string fields to be used for clustering. Does not make sense
// in a real word, but does the job in the test.
@ -239,7 +239,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
}
@Test
public void highlightingOfMultiValueField() throws Exception {
public void testHighlightingOfMultiValueField() throws Exception {
final String snippetWithoutSummary = getLabels(clusterWithHighlighting(
false, 30, 3, "multi", 1).get(0)).get(1);
assertTrue("Snippet contains first value", snippetWithoutSummary.contains("First"));
@ -256,7 +256,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
}
@Test
public void concatenatingMultipleFields() throws Exception {
public void testConcatenatingMultipleFields() throws Exception {
final ModifiableSolrParams params = new ModifiableSolrParams();
params.add(CarrotParams.TITLE_FIELD_NAME, "title,heading");
params.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet,body");
@ -271,7 +271,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
}
@Test
public void highlightingMultipleFields() throws Exception {
public void testHighlightingMultipleFields() throws Exception {
final TermQuery query = new TermQuery(new Term("snippet", "content"));
final ModifiableSolrParams params = new ModifiableSolrParams();
@ -297,7 +297,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
}
@Test
public void oneCarrot2SupportedLanguage() throws Exception {
public void testOneCarrot2SupportedLanguage() throws Exception {
final ModifiableSolrParams params = new ModifiableSolrParams();
params.add(CarrotParams.LANGUAGE_FIELD_NAME, "lang");
@ -309,7 +309,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
}
@Test
public void oneCarrot2SupportedLanguageOfMany() throws Exception {
public void testOneCarrot2SupportedLanguageOfMany() throws Exception {
final ModifiableSolrParams params = new ModifiableSolrParams();
params.add(CarrotParams.LANGUAGE_FIELD_NAME, "lang");
@ -321,7 +321,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
}
@Test
public void languageCodeMapping() throws Exception {
public void testLanguageCodeMapping() throws Exception {
final ModifiableSolrParams params = new ModifiableSolrParams();
params.add(CarrotParams.LANGUAGE_FIELD_NAME, "lang");
params.add(CarrotParams.LANGUAGE_CODE_MAP, "POLISH:pl");
@ -334,7 +334,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
}
@Test
public void passingOfCustomFields() throws Exception {
public void testPassingOfCustomFields() throws Exception {
final ModifiableSolrParams params = new ModifiableSolrParams();
params.add(CarrotParams.CUSTOM_FIELD_NAME, "intfield_i:intfield");
params.add(CarrotParams.CUSTOM_FIELD_NAME, "floatfield_f:floatfield");
@ -353,7 +353,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
}
@Test
public void customTokenizer() throws Exception {
public void testCustomTokenizer() throws Exception {
final ModifiableSolrParams params = new ModifiableSolrParams();
params.add(CarrotParams.TITLE_FIELD_NAME, "title");
params.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet");
@ -367,7 +367,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
}
@Test
public void customStemmer() throws Exception {
public void testCustomStemmer() throws Exception {
final ModifiableSolrParams params = new ModifiableSolrParams();
params.add(CarrotParams.TITLE_FIELD_NAME, "title");
params.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet");