mirror of https://github.com/apache/lucene.git
SOLR-3470: Custom Carrot2 tokenizer and stemmer factories overwritten by defaults: fixing class loader issues, minor cleanups (Dawid Weiss)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1340939 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a79a14347d
commit
98d15d4e67
|
@ -9,7 +9,8 @@ CHANGES
|
|||
$Id$
|
||||
================== Release 4.0.0-dev ==============
|
||||
|
||||
(No Changes)
|
||||
* SOLR-3470: Bug fix: custom Carrot2 tokenizer and stemmer factories are
|
||||
respected now (Stanislaw Osinski, Dawid Weiss)
|
||||
|
||||
================== Release 3.6.0 ==================
|
||||
|
||||
|
|
|
@ -109,6 +109,9 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
private Controller controller = ControllerFactory.createPooling();
|
||||
private Class<? extends IClusteringAlgorithm> clusteringAlgorithmClass;
|
||||
|
||||
/** Solr core we're bound to. */
|
||||
private SolrCore core;
|
||||
|
||||
private static class SolrResourceLocator implements IResourceLocator {
|
||||
private final SolrResourceLoader resourceLoader;
|
||||
private final String carrot2ResourcesDir;
|
||||
|
@ -147,7 +150,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
public InputStream open() throws IOException {
|
||||
return new ByteArrayInputStream(asBytes);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
// In case multiple resources are found they will be deduped, but we don't use it in Solr,
|
||||
|
@ -232,8 +235,19 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
extractCarrotAttributes(sreq.getParams(), attributes);
|
||||
|
||||
// Perform clustering and convert to named list
|
||||
return clustersToNamedList(controller.process(attributes,
|
||||
clusteringAlgorithmClass).getClusters(), sreq.getParams());
|
||||
// Carrot2 uses current thread's context class loader to get
|
||||
// certain classes (e.g. custom tokenizer/stemmer) at runtime.
|
||||
// To make sure classes from contrib JARs are available,
|
||||
// we swap the context class loader for the time of clustering.
|
||||
Thread ct = Thread.currentThread();
|
||||
ClassLoader prev = ct.getContextClassLoader();
|
||||
try {
|
||||
ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
|
||||
return clustersToNamedList(controller.process(attributes,
|
||||
clusteringAlgorithmClass).getClusters(), sreq.getParams());
|
||||
} finally {
|
||||
ct.setContextClassLoader(prev);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("Carrot2 clustering failed", e);
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e);
|
||||
|
@ -243,6 +257,8 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
@Override
|
||||
@SuppressWarnings({ "unchecked", "rawtypes" })
|
||||
public String init(NamedList config, final SolrCore core) {
|
||||
this.core = core;
|
||||
|
||||
String result = super.init(config, core);
|
||||
final SolrParams initParams = SolrParams.toSolrParams(config);
|
||||
|
||||
|
@ -277,8 +293,19 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
// Using the class loader directly because this time we want to omit the prefix
|
||||
new ClassLoaderLocator(core.getResourceLoader().getClassLoader())));
|
||||
|
||||
this.controller.init(initAttributes);
|
||||
|
||||
// Carrot2 uses current thread's context class loader to get
|
||||
// certain classes (e.g. custom tokenizer/stemmer) at initialization time.
|
||||
// To make sure classes from contrib JARs are available,
|
||||
// we swap the context class loader for the time of clustering.
|
||||
Thread ct = Thread.currentThread();
|
||||
ClassLoader prev = ct.getContextClassLoader();
|
||||
try {
|
||||
ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
|
||||
this.controller.init(initAttributes);
|
||||
} finally {
|
||||
ct.setContextClassLoader(prev);
|
||||
}
|
||||
|
||||
SchemaField uniqueField = core.getSchema().getUniqueKeyField();
|
||||
if (uniqueField == null) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
|
||||
|
|
|
@ -22,29 +22,48 @@ import com.google.common.collect.ImmutableSet;
|
|||
*/
|
||||
|
||||
|
||||
public interface CarrotParams {
|
||||
/**
|
||||
* Carrot2 parameter mapping (recognized and mapped if passed via Solr configuration).
|
||||
*/
|
||||
public final class CarrotParams {
|
||||
|
||||
String CARROT_PREFIX = "carrot.";
|
||||
private static String CARROT_PREFIX = "carrot.";
|
||||
|
||||
String ALGORITHM = CARROT_PREFIX + "algorithm";
|
||||
public static String ALGORITHM = CARROT_PREFIX + "algorithm";
|
||||
|
||||
String TITLE_FIELD_NAME = CARROT_PREFIX + "title";
|
||||
String URL_FIELD_NAME = CARROT_PREFIX + "url";
|
||||
String SNIPPET_FIELD_NAME = CARROT_PREFIX + "snippet";
|
||||
String LANGUAGE_FIELD_NAME = CARROT_PREFIX + "lang";
|
||||
String CUSTOM_FIELD_NAME = CARROT_PREFIX + "custom";
|
||||
public static String TITLE_FIELD_NAME = CARROT_PREFIX + "title";
|
||||
public static String URL_FIELD_NAME = CARROT_PREFIX + "url";
|
||||
public static String SNIPPET_FIELD_NAME = CARROT_PREFIX + "snippet";
|
||||
public static String LANGUAGE_FIELD_NAME = CARROT_PREFIX + "lang";
|
||||
public static String CUSTOM_FIELD_NAME = CARROT_PREFIX + "custom";
|
||||
|
||||
String PRODUCE_SUMMARY = CARROT_PREFIX + "produceSummary";
|
||||
String SUMMARY_FRAGSIZE = CARROT_PREFIX + "fragSize";
|
||||
String SUMMARY_SNIPPETS = CARROT_PREFIX + "summarySnippets";
|
||||
public static String PRODUCE_SUMMARY = CARROT_PREFIX + "produceSummary";
|
||||
public static String SUMMARY_FRAGSIZE = CARROT_PREFIX + "fragSize";
|
||||
public static String SUMMARY_SNIPPETS = CARROT_PREFIX + "summarySnippets";
|
||||
|
||||
String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions";
|
||||
String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters";
|
||||
String LEXICAL_RESOURCES_DIR = CARROT_PREFIX + "lexicalResourcesDir";
|
||||
String LANGUAGE_CODE_MAP = CARROT_PREFIX + "lcmap";
|
||||
public static String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions";
|
||||
public static String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters";
|
||||
public static String LEXICAL_RESOURCES_DIR = CARROT_PREFIX + "lexicalResourcesDir";
|
||||
public static String LANGUAGE_CODE_MAP = CARROT_PREFIX + "lcmap";
|
||||
|
||||
public static final Set<String> CARROT_PARAM_NAMES = ImmutableSet.of(
|
||||
ALGORITHM, TITLE_FIELD_NAME, URL_FIELD_NAME, SNIPPET_FIELD_NAME, LANGUAGE_FIELD_NAME,
|
||||
PRODUCE_SUMMARY, SUMMARY_FRAGSIZE, SUMMARY_SNIPPETS, NUM_DESCRIPTIONS, OUTPUT_SUB_CLUSTERS,
|
||||
LEXICAL_RESOURCES_DIR);
|
||||
static final Set<String> CARROT_PARAM_NAMES = ImmutableSet.of(
|
||||
ALGORITHM,
|
||||
|
||||
TITLE_FIELD_NAME,
|
||||
URL_FIELD_NAME,
|
||||
SNIPPET_FIELD_NAME,
|
||||
LANGUAGE_FIELD_NAME,
|
||||
CUSTOM_FIELD_NAME,
|
||||
|
||||
PRODUCE_SUMMARY,
|
||||
SUMMARY_FRAGSIZE,
|
||||
SUMMARY_SNIPPETS,
|
||||
|
||||
NUM_DESCRIPTIONS,
|
||||
OUTPUT_SUB_CLUSTERS,
|
||||
LEXICAL_RESOURCES_DIR,
|
||||
LANGUAGE_CODE_MAP);
|
||||
|
||||
/** No instances. */
|
||||
private CarrotParams() {}
|
||||
}
|
||||
|
|
|
@ -205,7 +205,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void solrStopWordsUsedInCarrot2Clustering() throws Exception {
|
||||
public void testSolrStopWordsUsedInCarrot2Clustering() throws Exception {
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.set("merge-resources", false);
|
||||
params.set(AttributeUtils.getKey(
|
||||
|
@ -220,7 +220,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void solrStopWordsNotDefinedOnAFieldForClustering() throws Exception {
|
||||
public void testSolrStopWordsNotDefinedOnAFieldForClustering() throws Exception {
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
// Force string fields to be used for clustering. Does not make sense
|
||||
// in a real word, but does the job in the test.
|
||||
|
@ -239,7 +239,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void highlightingOfMultiValueField() throws Exception {
|
||||
public void testHighlightingOfMultiValueField() throws Exception {
|
||||
final String snippetWithoutSummary = getLabels(clusterWithHighlighting(
|
||||
false, 30, 3, "multi", 1).get(0)).get(1);
|
||||
assertTrue("Snippet contains first value", snippetWithoutSummary.contains("First"));
|
||||
|
@ -256,7 +256,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void concatenatingMultipleFields() throws Exception {
|
||||
public void testConcatenatingMultipleFields() throws Exception {
|
||||
final ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add(CarrotParams.TITLE_FIELD_NAME, "title,heading");
|
||||
params.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet,body");
|
||||
|
@ -271,7 +271,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void highlightingMultipleFields() throws Exception {
|
||||
public void testHighlightingMultipleFields() throws Exception {
|
||||
final TermQuery query = new TermQuery(new Term("snippet", "content"));
|
||||
|
||||
final ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
|
@ -297,7 +297,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void oneCarrot2SupportedLanguage() throws Exception {
|
||||
public void testOneCarrot2SupportedLanguage() throws Exception {
|
||||
final ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add(CarrotParams.LANGUAGE_FIELD_NAME, "lang");
|
||||
|
||||
|
@ -309,7 +309,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void oneCarrot2SupportedLanguageOfMany() throws Exception {
|
||||
public void testOneCarrot2SupportedLanguageOfMany() throws Exception {
|
||||
final ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add(CarrotParams.LANGUAGE_FIELD_NAME, "lang");
|
||||
|
||||
|
@ -321,7 +321,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void languageCodeMapping() throws Exception {
|
||||
public void testLanguageCodeMapping() throws Exception {
|
||||
final ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add(CarrotParams.LANGUAGE_FIELD_NAME, "lang");
|
||||
params.add(CarrotParams.LANGUAGE_CODE_MAP, "POLISH:pl");
|
||||
|
@ -334,7 +334,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void passingOfCustomFields() throws Exception {
|
||||
public void testPassingOfCustomFields() throws Exception {
|
||||
final ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add(CarrotParams.CUSTOM_FIELD_NAME, "intfield_i:intfield");
|
||||
params.add(CarrotParams.CUSTOM_FIELD_NAME, "floatfield_f:floatfield");
|
||||
|
@ -353,7 +353,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void customTokenizer() throws Exception {
|
||||
public void testCustomTokenizer() throws Exception {
|
||||
final ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add(CarrotParams.TITLE_FIELD_NAME, "title");
|
||||
params.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet");
|
||||
|
@ -367,7 +367,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void customStemmer() throws Exception {
|
||||
public void testCustomStemmer() throws Exception {
|
||||
final ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add(CarrotParams.TITLE_FIELD_NAME, "title");
|
||||
params.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet");
|
||||
|
|
Loading…
Reference in New Issue