mirror of https://github.com/apache/lucene.git
SOLR-10264: Fixes multi-term synonym parsing in ManagedSynonymFilterFactory.
(Jörg Rathlev, Steve Rowe, Christine Poerschke)
This commit is contained in:
parent
206021578e
commit
adabfdd9c2
|
@ -163,6 +163,9 @@ Bug Fixes
|
|||
* SOLR-10323: fix to SpellingQueryConverter to properly strip out colons in field-specific queries.
|
||||
(Amrit Sarkar via James Dyer)
|
||||
|
||||
* SOLR-10264: Fixes multi-term synonym parsing in ManagedSynonymFilterFactory.
|
||||
(Jörg Rathlev, Steve Rowe, Christine Poerschke)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.analysis.synonym.SynonymFilterFactory;
|
|||
import org.apache.lucene.analysis.synonym.SynonymMap;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
@ -358,9 +359,9 @@ public class ManagedSynonymFilterFactory extends BaseManagedTokenFilterFactory {
|
|||
for (String term : cpsm.mappings.keySet()) {
|
||||
for (String mapping : cpsm.mappings.get(term)) {
|
||||
// apply the case setting to match the behavior of the SynonymMap builder
|
||||
String casedTerm = synonymManager.applyCaseSetting(ignoreCase, term);
|
||||
String casedMapping = synonymManager.applyCaseSetting(ignoreCase, mapping);
|
||||
add(new CharsRef(casedTerm), new CharsRef(casedMapping), false);
|
||||
CharsRef casedTerm = analyze(synonymManager.applyCaseSetting(ignoreCase, term), new CharsRefBuilder());
|
||||
CharsRef casedMapping = analyze(synonymManager.applyCaseSetting(ignoreCase, mapping), new CharsRefBuilder());
|
||||
add(casedTerm, casedMapping, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -494,7 +494,7 @@
|
|||
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
|
||||
|
||||
<!-- Field type where english stopwords are managed by the REST API -->
|
||||
<fieldType name="managed_en" class="solr.TextField">
|
||||
<fieldType name="managed_en" class="solr.TextField" autoGeneratePhraseQueries="false">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.ManagedStopFilterFactory" managed="english"/>
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.solr.rest.schema.analysis;
|
||||
import java.io.File;
|
||||
import java.net.URLEncoder;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
|
@ -154,13 +155,30 @@ public class TestManagedSynonymFilterFactory extends RestTestBase {
|
|||
"count(/response/lst[@name='field']) = 1",
|
||||
"/response/lst[@name='responseHeader']/int[@name='status'] = '0'");
|
||||
|
||||
// multi-term synonym logic - SOLR-10264
|
||||
final String multiTermOrigin;
|
||||
final String multiTermSynonym;
|
||||
if (random().nextBoolean()) {
|
||||
multiTermOrigin = "hansestadt hamburg";
|
||||
multiTermSynonym = "hh";
|
||||
} else {
|
||||
multiTermOrigin = "hh";
|
||||
multiTermSynonym = "hansestadt hamburg";
|
||||
}
|
||||
// multi-term logic similar to the angry/mad logic (angry ~ origin, mad ~ synonym)
|
||||
|
||||
assertU(adoc(newFieldName, "I am a happy test today but yesterday I was angry", "id", "5150"));
|
||||
assertU(adoc(newFieldName, multiTermOrigin+" is in North Germany.", "id", "040"));
|
||||
assertU(commit());
|
||||
|
||||
assertQ("/select?q=" + newFieldName + ":angry",
|
||||
"/response/lst[@name='responseHeader']/int[@name='status'] = '0'",
|
||||
"/response/result[@name='response'][@numFound='1']",
|
||||
"/response/result[@name='response']/doc/str[@name='id'][.='5150']");
|
||||
assertQ("/select?q=" + newFieldName + ":"+URLEncoder.encode(multiTermOrigin, "UTF-8"),
|
||||
"/response/lst[@name='responseHeader']/int[@name='status'] = '0'",
|
||||
"/response/result[@name='response'][@numFound='1']",
|
||||
"/response/result[@name='response']/doc/str[@name='id'][.='040']");
|
||||
|
||||
// add a mapping that will expand a query for "mad" to match docs with "angry"
|
||||
syns = new HashMap<>();
|
||||
|
@ -172,12 +190,28 @@ public class TestManagedSynonymFilterFactory extends RestTestBase {
|
|||
assertJQ(endpoint,
|
||||
"/synonymMappings/managedMap/mad==['angry']");
|
||||
|
||||
// add a mapping that will expand a query for "multi-term synonym" to match docs with "acronym"
|
||||
syns = new HashMap<>();
|
||||
syns.put(multiTermSynonym, Arrays.asList(multiTermOrigin));
|
||||
assertJPut(endpoint,
|
||||
JSONUtil.toJSON(syns),
|
||||
"/responseHeader/status==0");
|
||||
|
||||
assertJQ(endpoint+"/"+URLEncoder.encode(multiTermSynonym, "UTF-8"),
|
||||
"/"+multiTermSynonym+"==['"+multiTermOrigin+"']");
|
||||
|
||||
// should not match as the synonym mapping between mad and angry does not
|
||||
// get applied until core reload
|
||||
assertQ("/select?q=" + newFieldName + ":mad",
|
||||
"/response/lst[@name='responseHeader']/int[@name='status'] = '0'",
|
||||
"/response/result[@name='response'][@numFound='0']");
|
||||
|
||||
// should not match as the synonym mapping between "origin" and "synonym"
|
||||
// was not added before the document was indexed
|
||||
assertQ("/select?q=" + newFieldName + ":("+URLEncoder.encode(multiTermSynonym, "UTF-8") + ")&sow=false",
|
||||
"/response/lst[@name='responseHeader']/int[@name='status'] = '0'",
|
||||
"/response/result[@name='response'][@numFound='0']");
|
||||
|
||||
restTestHarness.reload();
|
||||
|
||||
// now query for mad and we should see our test doc
|
||||
|
@ -186,6 +220,12 @@ public class TestManagedSynonymFilterFactory extends RestTestBase {
|
|||
"/response/result[@name='response'][@numFound='1']",
|
||||
"/response/result[@name='response']/doc/str[@name='id'][.='5150']");
|
||||
|
||||
// now query for "synonym" and we should see our test doc with "origin"
|
||||
assertQ("/select?q=" + newFieldName + ":("+URLEncoder.encode(multiTermSynonym, "UTF-8") + ")&sow=false",
|
||||
"/response/lst[@name='responseHeader']/int[@name='status'] = '0'",
|
||||
"/response/result[@name='response'][@numFound='1']",
|
||||
"/response/result[@name='response']/doc/str[@name='id'][.='040']");
|
||||
|
||||
// test for SOLR-6015
|
||||
syns = new HashMap<>();
|
||||
syns.put("mb", Arrays.asList("megabyte"));
|
||||
|
|
Loading…
Reference in New Issue