SOLR-3967: langid.enforceSchema option checks source field instead of target field

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1440226 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jan Høydahl 2013-01-30 00:26:39 +00:00
parent 606eec0b99
commit d6317b5e68
3 changed files with 22 additions and 14 deletions

View File

@ -95,6 +95,8 @@ Bug Fixes
* SOLR-4342: Fix DataImportHandler stats to be a prper Map (hossman)
* SOLR-3967: langid.enforceSchema option checks source field instead of target field (janhoy)
Optimizations
----------------------

View File

@ -222,10 +222,6 @@ public abstract class LanguageIdentifierUpdateProcessor extends UpdateRequestPro
log.debug("Mapping field "+fieldName+" using document global language "+fieldLang);
}
String mappedOutputField = getMappedField(fieldName, fieldLang);
if(enforceSchema && schema.getFieldOrNull(fieldName) == null) {
log.warn("Unsuccessful field name mapping to {}, field does not exist, skipping mapping.", mappedOutputField, fieldName);
mappedOutputField = fieldName;
}
if (mappedOutputField != null) {
log.debug("Mapping field {} to {}", doc.getFieldValue(docIdField), fieldLang);
@ -350,17 +346,23 @@ public abstract class LanguageIdentifierUpdateProcessor extends UpdateRequestPro
/**
* Returns the name of the field to map the current contents into, so that they are properly analyzed. For instance
* if the currentField is "text" and the code is "en", the new field would be "text_en". If such a field doesn't exist,
* then null is returned.
* if the currentField is "text" and the code is "en", the new field would by default be "text_en".
* This method also performs custom regex pattern replace if configured. If enforceSchema=true
* and the resulting field name doesn't exist, then null is returned.
*
* @param currentField The current field name
* @param language the language code
* @return The new schema field name, based on pattern and replace
* @return The new schema field name, based on pattern and replace, or null if illegal
*/
protected String getMappedField(String currentField, String language) {
String lc = lcMap.containsKey(language) ? lcMap.get(language) : language;
String newFieldName = langPattern.matcher(mapPattern.matcher(currentField).replaceFirst(mapReplaceStr)).replaceFirst(lc);
log.debug("Doing mapping from "+currentField+" with language "+language+" to field "+newFieldName);
if(enforceSchema && schema.getFieldOrNull(newFieldName) == null) {
log.warn("Unsuccessful field name mapping from {} to {}, field does not exist and enforceSchema=true; skipping mapping.", currentField, newFieldName);
return null;
} else {
log.debug("Doing mapping from "+currentField+" with language "+language+" to field "+newFieldName);
}
return newFieldName;
}

View File

@ -93,7 +93,7 @@ public abstract class LanguageIdentifierUpdateProcessorFactoryTestCase extends S
parameters = new ModifiableSolrParams();
parameters.add("langid.fl", "name");
parameters.add("langid.map.lcmap", "jp:s zh:cjk ko:cjk");
parameters.add("langid.enforceSchema", "true");
parameters.set("langid.enforceSchema", "false");
liProcessor = createLangIdProcessor(parameters);
assertEquals("test_no", liProcessor.getMappedField("test", "no"));
@ -102,13 +102,17 @@ public abstract class LanguageIdentifierUpdateProcessorFactoryTestCase extends S
assertEquals("test_cjk", liProcessor.getMappedField("test", "zh"));
assertEquals("test_cjk", liProcessor.getMappedField("test", "ko"));
// Prove support for other mapping regex
parameters.add("langid.map.pattern", "text_(.*?)_field");
parameters.add("langid.map.replace", "$1_{lang}Text");
// Test that enforceSchema correctly catches illegal field and returns null
parameters.set("langid.enforceSchema", "true");
liProcessor = createLangIdProcessor(parameters);
assertEquals(null, liProcessor.getMappedField("inputfield", "sv"));
assertEquals("title_noText", liProcessor.getMappedField("text_title_field", "no"));
assertEquals("body_svText", liProcessor.getMappedField("text_body_field", "sv"));
// Prove support for other mapping regex, still with enforceSchema=true
parameters.add("langid.map.pattern", "text_(.*?)_field");
parameters.add("langid.map.replace", "$1_{lang}_s");
liProcessor = createLangIdProcessor(parameters);
assertEquals("title_no_s", liProcessor.getMappedField("text_title_field", "no"));
assertEquals("body_sv_s", liProcessor.getMappedField("text_body_field", "sv"));
}
@Test