mirror of https://github.com/apache/lucene.git
SOLR-3967: langid.enforceSchema option checks source field instead of target field
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1440226 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
606eec0b99
commit
d6317b5e68
|
@ -95,6 +95,8 @@ Bug Fixes
|
|||
|
||||
* SOLR-4342: Fix DataImportHandler stats to be a prper Map (hossman)
|
||||
|
||||
* SOLR-3967: langid.enforceSchema option checks source field instead of target field (janhoy)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -222,10 +222,6 @@ public abstract class LanguageIdentifierUpdateProcessor extends UpdateRequestPro
|
|||
log.debug("Mapping field "+fieldName+" using document global language "+fieldLang);
|
||||
}
|
||||
String mappedOutputField = getMappedField(fieldName, fieldLang);
|
||||
if(enforceSchema && schema.getFieldOrNull(fieldName) == null) {
|
||||
log.warn("Unsuccessful field name mapping to {}, field does not exist, skipping mapping.", mappedOutputField, fieldName);
|
||||
mappedOutputField = fieldName;
|
||||
}
|
||||
|
||||
if (mappedOutputField != null) {
|
||||
log.debug("Mapping field {} to {}", doc.getFieldValue(docIdField), fieldLang);
|
||||
|
@ -350,17 +346,23 @@ public abstract class LanguageIdentifierUpdateProcessor extends UpdateRequestPro
|
|||
|
||||
/**
|
||||
* Returns the name of the field to map the current contents into, so that they are properly analyzed. For instance
|
||||
* if the currentField is "text" and the code is "en", the new field would be "text_en". If such a field doesn't exist,
|
||||
* then null is returned.
|
||||
* if the currentField is "text" and the code is "en", the new field would by default be "text_en".
|
||||
* This method also performs custom regex pattern replace if configured. If enforceSchema=true
|
||||
* and the resulting field name doesn't exist, then null is returned.
|
||||
*
|
||||
* @param currentField The current field name
|
||||
* @param language the language code
|
||||
* @return The new schema field name, based on pattern and replace
|
||||
* @return The new schema field name, based on pattern and replace, or null if illegal
|
||||
*/
|
||||
protected String getMappedField(String currentField, String language) {
|
||||
String lc = lcMap.containsKey(language) ? lcMap.get(language) : language;
|
||||
String newFieldName = langPattern.matcher(mapPattern.matcher(currentField).replaceFirst(mapReplaceStr)).replaceFirst(lc);
|
||||
log.debug("Doing mapping from "+currentField+" with language "+language+" to field "+newFieldName);
|
||||
if(enforceSchema && schema.getFieldOrNull(newFieldName) == null) {
|
||||
log.warn("Unsuccessful field name mapping from {} to {}, field does not exist and enforceSchema=true; skipping mapping.", currentField, newFieldName);
|
||||
return null;
|
||||
} else {
|
||||
log.debug("Doing mapping from "+currentField+" with language "+language+" to field "+newFieldName);
|
||||
}
|
||||
return newFieldName;
|
||||
}
|
||||
|
||||
|
|
|
@ -93,7 +93,7 @@ public abstract class LanguageIdentifierUpdateProcessorFactoryTestCase extends S
|
|||
parameters = new ModifiableSolrParams();
|
||||
parameters.add("langid.fl", "name");
|
||||
parameters.add("langid.map.lcmap", "jp:s zh:cjk ko:cjk");
|
||||
parameters.add("langid.enforceSchema", "true");
|
||||
parameters.set("langid.enforceSchema", "false");
|
||||
liProcessor = createLangIdProcessor(parameters);
|
||||
|
||||
assertEquals("test_no", liProcessor.getMappedField("test", "no"));
|
||||
|
@ -102,13 +102,17 @@ public abstract class LanguageIdentifierUpdateProcessorFactoryTestCase extends S
|
|||
assertEquals("test_cjk", liProcessor.getMappedField("test", "zh"));
|
||||
assertEquals("test_cjk", liProcessor.getMappedField("test", "ko"));
|
||||
|
||||
// Prove support for other mapping regex
|
||||
parameters.add("langid.map.pattern", "text_(.*?)_field");
|
||||
parameters.add("langid.map.replace", "$1_{lang}Text");
|
||||
// Test that enforceSchema correctly catches illegal field and returns null
|
||||
parameters.set("langid.enforceSchema", "true");
|
||||
liProcessor = createLangIdProcessor(parameters);
|
||||
assertEquals(null, liProcessor.getMappedField("inputfield", "sv"));
|
||||
|
||||
assertEquals("title_noText", liProcessor.getMappedField("text_title_field", "no"));
|
||||
assertEquals("body_svText", liProcessor.getMappedField("text_body_field", "sv"));
|
||||
// Prove support for other mapping regex, still with enforceSchema=true
|
||||
parameters.add("langid.map.pattern", "text_(.*?)_field");
|
||||
parameters.add("langid.map.replace", "$1_{lang}_s");
|
||||
liProcessor = createLangIdProcessor(parameters);
|
||||
assertEquals("title_no_s", liProcessor.getMappedField("text_title_field", "no"));
|
||||
assertEquals("body_sv_s", liProcessor.getMappedField("text_body_field", "sv"));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Reference in New Issue