diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index bf1cabaadef..54d9b2f2111 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -86,6 +86,9 @@ Other Changes replication factor for an update request (single or batch) by sending an optional parameter "min_rf". (Timothy Potter) +* SOLR-6015: Better way to handle managed synonyms when ignoreCase=true + (Timothy Potter) + ================== 4.9.0 ================== Versions of Major Components diff --git a/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java b/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java index 8a4bcbcec3d..a8b1390039e 100644 --- a/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java +++ b/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java @@ -57,6 +57,42 @@ public class ManagedSynonymFilterFactory extends BaseManagedTokenFilterFactory { public static final String SYNONYM_MAPPINGS = "synonymMappings"; public static final String IGNORE_CASE_INIT_ARG = "ignoreCase"; + /** + * Used internally to preserve the case of synonym mappings regardless + * of the ignoreCase setting. + */ + private static class CasePreservedSynonymMappings { + Map> mappings = new TreeMap<>(); + + /** + * Provides a view of the mappings for a given term; specifically, if + * ignoreCase is true, then the returned "view" contains the mappings + * for all known cases of the term, if it is false, then only the + * mappings for the specific case is returned. + */ + Set getMappings(boolean ignoreCase, String key) { + Set synMappings = null; + if (ignoreCase) { + // TODO: should we return the mapped values in all lower-case here? + if (mappings.size() == 1) { + // if only one in the map (which is common) just return it directly + return mappings.values().iterator().next(); + } + + synMappings = new TreeSet<>(); + for (Set next : mappings.values()) + synMappings.addAll(next); + } else { + synMappings = mappings.get(key); + } + return synMappings; + } + + public String toString() { + return mappings.toString(); + } + } + /** * ManagedResource implementation for synonyms, which are so specialized that * it makes sense to implement this class as an inner class as it has little @@ -65,11 +101,7 @@ public class ManagedSynonymFilterFactory extends BaseManagedTokenFilterFactory { public static class SynonymManager extends ManagedResource implements ManagedResource.ChildResourceSupport { - - // TODO: Maybe hold this using a SoftReference / WeakReference to - // reduce memory in case the set of synonyms is large and the JVM - // is running low on memory? - protected Map> synonymMappings; + protected Map synonymMappings; public SynonymManager(String resourceId, SolrResourceLoader loader, StorageIO storageIO) throws SolrException { @@ -94,11 +126,20 @@ public class ManagedSynonymFilterFactory extends BaseManagedTokenFilterFactory { if (initArgs.get(IGNORE_CASE_INIT_ARG) == null) { initArgs.add(IGNORE_CASE_INIT_ARG, Boolean.FALSE); } + boolean ignoreCase = getIgnoreCase(managedInitArgs); synonymMappings = new TreeMap<>(); if (managedData != null) { Map storedSyns = (Map)managedData; for (String key : storedSyns.keySet()) { + + String caseKey = applyCaseSetting(ignoreCase, key); + CasePreservedSynonymMappings cpsm = synonymMappings.get(caseKey); + if (cpsm == null) { + cpsm = new CasePreservedSynonymMappings(); + synonymMappings.put(caseKey, cpsm); + } + // give the nature of our JSON parsing solution, we really have // no guarantees on what is in the file Object mapping = storedSyns.get(key); @@ -108,21 +149,11 @@ public class ManagedSynonymFilterFactory extends BaseManagedTokenFilterFactory { " but got "+mapping.getClass().getName()); } - // if we're configured to ignoreCase, then we build the mappings with all lower - List vals = (List)storedSyns.get(key); Set sortedVals = new TreeSet<>(); - if (ignoreCase) { - for (String next : vals) { - sortedVals.add(applyCaseSetting(ignoreCase, next)); - } - } else { - sortedVals.addAll(vals); - } - - synonymMappings.put(applyCaseSetting(ignoreCase, key), sortedVals); + sortedVals.addAll((List)storedSyns.get(key)); + cpsm.mappings.put(key, sortedVals); } } - log.info("Loaded {} synonym mappings for {}", synonymMappings.size(), getResourceId()); } @@ -138,17 +169,24 @@ public class ManagedSynonymFilterFactory extends BaseManagedTokenFilterFactory { Map jsonMap = (Map)updates; for (String term : jsonMap.keySet()) { + String origTerm = term; term = applyCaseSetting(ignoreCase, term); - Set output = synonymMappings.get(term); + // find the mappings using the case aware key + CasePreservedSynonymMappings cpsm = synonymMappings.get(term); + if (cpsm == null) { + cpsm = new CasePreservedSynonymMappings(); + } - Object val = jsonMap.get(term); + Set output = cpsm.mappings.get(origTerm); + + Object val = jsonMap.get(origTerm); // IMPORTANT: use the original if (val instanceof String) { - String strVal = applyCaseSetting(ignoreCase, (String)val); + String strVal = (String)val; if (output == null) { output = new TreeSet<>(); - synonymMappings.put(term, output); + cpsm.mappings.put(origTerm, output); } if (output.add(strVal)) { @@ -159,11 +197,11 @@ public class ManagedSynonymFilterFactory extends BaseManagedTokenFilterFactory { if (output == null) { output = new TreeSet<>(); - synonymMappings.put(term, output); + cpsm.mappings.put(origTerm, output); } for (String nextVal : vals) { - if (output.add(applyCaseSetting(ignoreCase, nextVal))) { + if (output.add(nextVal)) { madeChanges = true; } } @@ -172,41 +210,30 @@ public class ManagedSynonymFilterFactory extends BaseManagedTokenFilterFactory { throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST, "Unsupported value "+val+ " for "+term+"; expected single value or a JSON array!"); } + + // only add the cpsm to the synonymMappings if it has valid data + if (!synonymMappings.containsKey(term) && cpsm.mappings.get(origTerm) != null) { + synonymMappings.put(term, cpsm); + } } - - return madeChanges ? synonymMappings : null; + return madeChanges ? getStoredView() : null; } /** - * Handles a change in the ignoreCase setting for synonyms, which requires - * a full rebuild of the synonymMappings. + * Returns a Map of how we store and load data managed by this resource, + * which is different than how it is managed at runtime in order to support + * the ignoreCase setting. */ - @Override - protected boolean updateInitArgs(NamedList updatedArgs) { - if (updatedArgs == null || updatedArgs.size() == 0) { - return false; - } - boolean currentIgnoreCase = getIgnoreCase(managedInitArgs); - boolean updatedIgnoreCase = getIgnoreCase(updatedArgs); - if (currentIgnoreCase == true && updatedIgnoreCase == false) { - throw new SolrException(ErrorCode.BAD_REQUEST, - "Changing a managed word set's ignoreCase arg from true to false is not permitted."); - } else if (currentIgnoreCase == false && updatedIgnoreCase == true) { - // ignore case policy changed ... rebuild the map - Map> rebuild = new TreeMap<>(); - for (String curr : synonymMappings.keySet()) { - Set newMappings = new TreeSet<>(); - for (String next : synonymMappings.get(curr)) { - newMappings.add(applyCaseSetting(updatedIgnoreCase, next)); - } - rebuild.put(applyCaseSetting(updatedIgnoreCase, curr), newMappings); + protected Map> getStoredView() { + Map> storedView = new TreeMap<>(); + for (CasePreservedSynonymMappings cpsm : synonymMappings.values()) { + for (String key : cpsm.mappings.keySet()) { + storedView.put(key, cpsm.mappings.get(key)); } - synonymMappings = rebuild; } - - return super.updateInitArgs(updatedArgs); + return storedView; } - + protected String applyCaseSetting(boolean ignoreCase, String str) { return (ignoreCase && str != null) ? str.toLowerCase(Locale.ROOT) : str; } @@ -227,14 +254,19 @@ public class ManagedSynonymFilterFactory extends BaseManagedTokenFilterFactory { if (childId != null) { boolean ignoreCase = getIgnoreCase(); String key = applyCaseSetting(ignoreCase, childId); - Set output = synonymMappings.get(key); - if (output == null) { + + // if ignoreCase==true, then we get the mappings using the lower-cased key + // and then return a union of all case-sensitive keys, if false, then + // we only return the mappings for the exact case requested + CasePreservedSynonymMappings cpsm = synonymMappings.get(key); + Set mappings = (cpsm != null) ? cpsm.getMappings(ignoreCase, childId) : null; + if (mappings == null) throw new SolrException(ErrorCode.NOT_FOUND, - String.format(Locale.ROOT, "%s not found in %s", key, getResourceId())); - } - response.add(key, output); + String.format(Locale.ROOT, "%s not found in %s", childId, getResourceId())); + + response.add(childId, mappings); } else { - response.add(SYNONYM_MAPPINGS, buildMapToStore(synonymMappings)); + response.add(SYNONYM_MAPPINGS, buildMapToStore(getStoredView())); } } @@ -242,14 +274,32 @@ public class ManagedSynonymFilterFactory extends BaseManagedTokenFilterFactory { public synchronized void doDeleteChild(BaseSolrResource endpoint, String childId) { boolean ignoreCase = getIgnoreCase(); String key = applyCaseSetting(ignoreCase, childId); - Set output = synonymMappings.get(key); - if (output == null) - throw new SolrException(ErrorCode.NOT_FOUND, - String.format(Locale.ROOT, "%s not found in %s", key, getResourceId())); - synonymMappings.remove(key); - storeManagedData(synonymMappings); - log.info("Removed synonym mappings for: {}", key); + CasePreservedSynonymMappings cpsm = synonymMappings.get(key); + if (cpsm == null) + throw new SolrException(ErrorCode.NOT_FOUND, + String.format(Locale.ROOT, "%s not found in %s", childId, getResourceId())); + + if (ignoreCase) { + // delete all mappings regardless of case + synonymMappings.remove(key); + } else { + // just delete the mappings for the specific case-sensitive key + if (cpsm.mappings.containsKey(childId)) { + cpsm.mappings.remove(childId); + + if (cpsm.mappings.isEmpty()) + synonymMappings.remove(key); + } else { + throw new SolrException(ErrorCode.NOT_FOUND, + String.format(Locale.ROOT, "%s not found in %s", childId, getResourceId())); + } + } + + // store the updated data (using the stored view) + storeManagedData(getStoredView()); + + log.info("Removed synonym mappings for: {}", childId); } } @@ -272,9 +322,15 @@ public class ManagedSynonymFilterFactory extends BaseManagedTokenFilterFactory { */ @Override public void parse(Reader in) throws IOException, ParseException { - for (String term : synonymManager.synonymMappings.keySet()) { - for (String mapping : synonymManager.synonymMappings.get(term)) { - add(new CharsRef(term), new CharsRef(mapping), false); + boolean ignoreCase = synonymManager.getIgnoreCase(); + for (CasePreservedSynonymMappings cpsm : synonymManager.synonymMappings.values()) { + for (String term : cpsm.mappings.keySet()) { + for (String mapping : cpsm.mappings.get(term)) { + // apply the case setting to match the behavior of the SynonymMap builder + String casedTerm = synonymManager.applyCaseSetting(ignoreCase, term); + String casedMapping = synonymManager.applyCaseSetting(ignoreCase, mapping); + add(new CharsRef(casedTerm), new CharsRef(casedMapping), false); + } } } } diff --git a/solr/core/src/test/org/apache/solr/rest/schema/analysis/TestManagedSynonymFilterFactory.java b/solr/core/src/test/org/apache/solr/rest/schema/analysis/TestManagedSynonymFilterFactory.java index 1c91ab9142e..deadac0bcbc 100644 --- a/solr/core/src/test/org/apache/solr/rest/schema/analysis/TestManagedSynonymFilterFactory.java +++ b/solr/core/src/test/org/apache/solr/rest/schema/analysis/TestManagedSynonymFilterFactory.java @@ -91,7 +91,7 @@ public class TestManagedSynonymFilterFactory extends RestTestBase { // request to a specific mapping assertJQ(endpoint+"/happy", "/happy==['cheerful','glad','joyful']"); - + // does not exist assertJQ(endpoint+"/sad", "/error/code==404"); @@ -106,13 +106,19 @@ public class TestManagedSynonymFilterFactory extends RestTestBase { syns = new HashMap<>(); syns.put("sad", Arrays.asList("unhappy")); - syns.put("SAD", Arrays.asList("Unhappy")); + syns.put("SAD", Arrays.asList("bummed")); assertJPut(endpoint, JSONUtil.toJSON(syns), "/responseHeader/status==0"); assertJQ(endpoint, "/synonymMappings/managedMap/sad==['unhappy']"); + assertJQ(endpoint, + "/synonymMappings/managedMap/SAD==['bummed']"); + + // expect a union of values when requesting the "sad" child + assertJQ(endpoint+"/sad", + "/sad==['bummed','unhappy']"); // verify delete works assertJDelete(endpoint+"/sad", @@ -174,5 +180,20 @@ public class TestManagedSynonymFilterFactory extends RestTestBase { "/response/lst[@name='responseHeader']/int[@name='status'] = '0'", "/response/result[@name='response'][@numFound='1']", "/response/result[@name='response']/doc/str[@name='id'][.='5150']"); + + // test for SOLR-6015 + syns = new HashMap<>(); + syns.put("mb", Arrays.asList("megabyte")); + assertJPut(endpoint, + JSONUtil.toJSON(syns), + "/responseHeader/status==0"); + + syns.put("MB", Arrays.asList("MiB", "Megabyte")); + assertJPut(endpoint, + JSONUtil.toJSON(syns), + "/responseHeader/status==0"); + + assertJQ(endpoint+"/MB", + "/MB==['Megabyte','MiB','megabyte']"); } }