mirror of
https://github.com/apache/lucene.git
synced 2025-02-13 13:35:37 +00:00
SOLR-12570: OpenNLPExtractNamedEntitiesUpdateProcessor cannot support multi fields because pattern replacement doesn't work correctly
This commit is contained in:
parent
55bfadbce1
commit
995a902d1a
@ -159,6 +159,9 @@ Bug Fixes
|
||||
|
||||
* SOLR-12553: Allow SignificantTerms Query Parser to use local parameters (Alexandre Rafalovitch)
|
||||
|
||||
* SOLR-12570: OpenNLPExtractNamedEntitiesUpdateProcessor cannot support multi fields because pattern replacement
|
||||
doesn't work correctly. (Koji Sekiguchi)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
@ -17,8 +17,6 @@
|
||||
|
||||
package org.apache.solr.update.processor;
|
||||
|
||||
import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
@ -57,6 +55,8 @@ import org.apache.solr.util.plugin.SolrCoreAware;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR;
|
||||
|
||||
/**
|
||||
* Extracts named entities using an OpenNLP NER <code>modelFile</code> from the values found in
|
||||
* any matching <code>source</code> field into a configured <code>dest</code> field, after
|
||||
@ -500,13 +500,13 @@ public class OpenNLPExtractNamedEntitiesUpdateProcessorFactory
|
||||
SolrInputField destField = null;
|
||||
String entityName = entity.first();
|
||||
String entityType = entity.second();
|
||||
resolvedDest = resolvedDest.replace(ENTITY_TYPE, entityType);
|
||||
if (doc.containsKey(resolvedDest)) {
|
||||
destField = doc.getField(resolvedDest);
|
||||
final String resolved = resolvedDest.replace(ENTITY_TYPE, entityType);
|
||||
if (doc.containsKey(resolved)) {
|
||||
destField = doc.getField(resolved);
|
||||
} else {
|
||||
SolrInputField targetField = destMap.get(resolvedDest);
|
||||
SolrInputField targetField = destMap.get(resolved);
|
||||
if (targetField == null) {
|
||||
destField = new SolrInputField(resolvedDest);
|
||||
destField = new SolrInputField(resolved);
|
||||
} else {
|
||||
destField = targetField;
|
||||
}
|
||||
@ -514,7 +514,7 @@ public class OpenNLPExtractNamedEntitiesUpdateProcessorFactory
|
||||
destField.addValue(entityName);
|
||||
|
||||
// put it in map to avoid concurrent modification...
|
||||
destMap.put(resolvedDest, destField);
|
||||
destMap.put(resolved, destField);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -82,7 +82,7 @@ public class TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory extends Updat
|
||||
f("subtitle", "Ineluctably, Flashman."),
|
||||
f("corrolary_txt", "Forsooth thou bringeth Flashman."),
|
||||
f("notes_txt", "Yes Flashman."),
|
||||
f("summary", "Many aspire to be Flashman."),
|
||||
f("summary", "Many aspire to be Flashman in London."),
|
||||
f("descs", "Courage, Flashman.", "Ain't he Flashman."),
|
||||
f("descriptions", "Flashman. Flashman. Flashman.")));
|
||||
|
||||
@ -91,6 +91,7 @@ public class TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory extends Updat
|
||||
assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("key_desc_people"));
|
||||
assertEquals(Arrays.asList("Flashman", "Flashman", "Flashman"), doc.getFieldValues("key_description_people"));
|
||||
assertEquals("Flashman", doc.getFieldValue("summary_person_s")); // {EntityType} field name interpolation
|
||||
assertEquals("London", doc.getFieldValue("summary_location_s")); // {EntityType} field name interpolation
|
||||
}
|
||||
|
||||
public void testEquivalentExtraction() throws Exception {
|
||||
@ -182,11 +183,13 @@ public class TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory extends Updat
|
||||
public void testExtractFieldRegexReplaceAllWithEntityType() throws Exception {
|
||||
SolrInputDocument d = processAdd("extract-regex-replaceall-with-entity-type",
|
||||
doc(f("id", "1111"),
|
||||
f("foo_x2_s", "Infrequently Flashman.", "In the words of Flashman."),
|
||||
f("foo_x3_x7_s", "Flashman. Whoa.")));
|
||||
f("foo_x2_s", "Infrequently Flashman in London.", "In the words of Flashman in London."),
|
||||
f("foo_x3_x7_s", "Flashman in London. Whoa.")));
|
||||
|
||||
assertNotNull(d);
|
||||
assertEquals(d.getFieldNames().toString(), Arrays.asList("Flashman", "Flashman"), d.getFieldValues("foo_person_y2_s"));
|
||||
assertEquals(d.getFieldNames().toString(), Arrays.asList("London", "London"), d.getFieldValues("foo_location_y2_s"));
|
||||
assertEquals(d.getFieldNames().toString(),"Flashman", d.getFieldValue("foo_person_y3_person_y7_s"));
|
||||
assertEquals(d.getFieldNames().toString(),"London", d.getFieldValue("foo_location_y3_location_y7_s"));
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user