SOLR-12570: OpenNLPExtractNamedEntitiesUpdateProcessor cannot support multi fields because pattern replacement doesn't work correctly

This commit is contained in:
koji 2018-07-23 16:58:46 +09:00
parent 55bfadbce1
commit 995a902d1a
3 changed files with 17 additions and 11 deletions

View File

@ -159,6 +159,9 @@ Bug Fixes
* SOLR-12553: Allow SignificantTerms Query Parser to use local parameters (Alexandre Rafalovitch)
* SOLR-12570: OpenNLPExtractNamedEntitiesUpdateProcessor cannot support multi fields because pattern replacement
doesn't work correctly. (Koji Sekiguchi)
Optimizations
----------------------

View File

@ -17,8 +17,6 @@
package org.apache.solr.update.processor;
import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
@ -57,6 +55,8 @@ import org.apache.solr.util.plugin.SolrCoreAware;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR;
/**
* Extracts named entities using an OpenNLP NER <code>modelFile</code> from the values found in
* any matching <code>source</code> field into a configured <code>dest</code> field, after
@ -500,13 +500,13 @@ public class OpenNLPExtractNamedEntitiesUpdateProcessorFactory
SolrInputField destField = null;
String entityName = entity.first();
String entityType = entity.second();
resolvedDest = resolvedDest.replace(ENTITY_TYPE, entityType);
if (doc.containsKey(resolvedDest)) {
destField = doc.getField(resolvedDest);
final String resolved = resolvedDest.replace(ENTITY_TYPE, entityType);
if (doc.containsKey(resolved)) {
destField = doc.getField(resolved);
} else {
SolrInputField targetField = destMap.get(resolvedDest);
SolrInputField targetField = destMap.get(resolved);
if (targetField == null) {
destField = new SolrInputField(resolvedDest);
destField = new SolrInputField(resolved);
} else {
destField = targetField;
}
@ -514,7 +514,7 @@ public class OpenNLPExtractNamedEntitiesUpdateProcessorFactory
destField.addValue(entityName);
// put it in map to avoid concurrent modification...
destMap.put(resolvedDest, destField);
destMap.put(resolved, destField);
}
}
}

View File

@ -82,7 +82,7 @@ public class TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory extends Updat
f("subtitle", "Ineluctably, Flashman."),
f("corrolary_txt", "Forsooth thou bringeth Flashman."),
f("notes_txt", "Yes Flashman."),
f("summary", "Many aspire to be Flashman."),
f("summary", "Many aspire to be Flashman in London."),
f("descs", "Courage, Flashman.", "Ain't he Flashman."),
f("descriptions", "Flashman. Flashman. Flashman.")));
@ -91,6 +91,7 @@ public class TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory extends Updat
assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("key_desc_people"));
assertEquals(Arrays.asList("Flashman", "Flashman", "Flashman"), doc.getFieldValues("key_description_people"));
assertEquals("Flashman", doc.getFieldValue("summary_person_s")); // {EntityType} field name interpolation
assertEquals("London", doc.getFieldValue("summary_location_s")); // {EntityType} field name interpolation
}
public void testEquivalentExtraction() throws Exception {
@ -182,11 +183,13 @@ public class TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory extends Updat
public void testExtractFieldRegexReplaceAllWithEntityType() throws Exception {
SolrInputDocument d = processAdd("extract-regex-replaceall-with-entity-type",
doc(f("id", "1111"),
f("foo_x2_s", "Infrequently Flashman.", "In the words of Flashman."),
f("foo_x3_x7_s", "Flashman. Whoa.")));
f("foo_x2_s", "Infrequently Flashman in London.", "In the words of Flashman in London."),
f("foo_x3_x7_s", "Flashman in London. Whoa.")));
assertNotNull(d);
assertEquals(d.getFieldNames().toString(), Arrays.asList("Flashman", "Flashman"), d.getFieldValues("foo_person_y2_s"));
assertEquals(d.getFieldNames().toString(), Arrays.asList("London", "London"), d.getFieldValues("foo_location_y2_s"));
assertEquals(d.getFieldNames().toString(),"Flashman", d.getFieldValue("foo_person_y3_person_y7_s"));
assertEquals(d.getFieldNames().toString(),"London", d.getFieldValue("foo_location_y3_location_y7_s"));
}
}