diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java index 8df01e58fd1..7fcbf471c56 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java @@ -158,7 +158,7 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource /** * Load synonyms with the given {@link SynonymMap.Parser} class. */ - private SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException { + protected SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException { CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 786a900ebe4..6e5219fcf73 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -170,7 +170,9 @@ New Features * SOLR-5829: Allow ExpandComponent to accept query and filter query parameters (Joel Bernstein) - +* SOLR-5654: Create a synonym filter factory that is (re)configurable, and + capable of reporting its configuration, via REST API. + (Tim Potter via Steve Rowe) Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java b/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java new file mode 100644 index 00000000000..8a4bcbcec3d --- /dev/null +++ b/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java @@ -0,0 +1,349 @@ +package org.apache.solr.rest.schema.analysis; +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; +import java.text.ParseException; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.TreeSet; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.synonym.SynonymFilterFactory; +import org.apache.lucene.analysis.synonym.SynonymMap; +import org.apache.lucene.analysis.util.ResourceLoader; +import org.apache.lucene.util.CharsRef; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.rest.BaseSolrResource; +import org.apache.solr.rest.ManagedResource; +import org.apache.solr.rest.ManagedResourceStorage.StorageIO; +import org.restlet.data.Status; +import org.restlet.resource.ResourceException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * TokenFilterFactory and ManagedResource implementation for + * doing CRUD on synonyms using the REST API. + */ +public class ManagedSynonymFilterFactory extends BaseManagedTokenFilterFactory { + + public static final Logger log = LoggerFactory.getLogger(ManagedSynonymFilterFactory.class); + + public static final String SYNONYM_MAPPINGS = "synonymMappings"; + public static final String IGNORE_CASE_INIT_ARG = "ignoreCase"; + + /** + * ManagedResource implementation for synonyms, which are so specialized that + * it makes sense to implement this class as an inner class as it has little + * application outside the SynonymFilterFactory use cases. + */ + public static class SynonymManager extends ManagedResource + implements ManagedResource.ChildResourceSupport + { + + // TODO: Maybe hold this using a SoftReference / WeakReference to + // reduce memory in case the set of synonyms is large and the JVM + // is running low on memory? + protected Map> synonymMappings; + + public SynonymManager(String resourceId, SolrResourceLoader loader, StorageIO storageIO) + throws SolrException { + super(resourceId, loader, storageIO); + } + + @SuppressWarnings("unchecked") + @Override + protected void onManagedDataLoadedFromStorage(NamedList managedInitArgs, Object managedData) + throws SolrException + { + NamedList initArgs = (NamedList)managedInitArgs; + + String format = (String)initArgs.get("format"); + if (format != null && !"solr".equals(format)) { + throw new SolrException(ErrorCode.BAD_REQUEST, "Invalid format "+ + format+"! Only 'solr' is supported."); + } + + // the default behavior is to not ignore case, + // so if not supplied, then install the default + if (initArgs.get(IGNORE_CASE_INIT_ARG) == null) { + initArgs.add(IGNORE_CASE_INIT_ARG, Boolean.FALSE); + } + boolean ignoreCase = getIgnoreCase(managedInitArgs); + synonymMappings = new TreeMap<>(); + if (managedData != null) { + Map storedSyns = (Map)managedData; + for (String key : storedSyns.keySet()) { + // give the nature of our JSON parsing solution, we really have + // no guarantees on what is in the file + Object mapping = storedSyns.get(key); + if (!(mapping instanceof List)) { + throw new SolrException(ErrorCode.SERVER_ERROR, + "Invalid synonym file format! Expected a list of synonyms for "+key+ + " but got "+mapping.getClass().getName()); + } + + // if we're configured to ignoreCase, then we build the mappings with all lower + List vals = (List)storedSyns.get(key); + Set sortedVals = new TreeSet<>(); + if (ignoreCase) { + for (String next : vals) { + sortedVals.add(applyCaseSetting(ignoreCase, next)); + } + } else { + sortedVals.addAll(vals); + } + + synonymMappings.put(applyCaseSetting(ignoreCase, key), sortedVals); + } + } + + log.info("Loaded {} synonym mappings for {}", synonymMappings.size(), getResourceId()); + } + + @SuppressWarnings("unchecked") + @Override + protected Object applyUpdatesToManagedData(Object updates) { + if (!(updates instanceof Map)) { + throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST, + "Unsupported data format (" + updates.getClass().getName() + "); expected a JSON object (Map)!"); + } + boolean ignoreCase = getIgnoreCase(); + boolean madeChanges = false; + Map jsonMap = (Map)updates; + for (String term : jsonMap.keySet()) { + + term = applyCaseSetting(ignoreCase, term); + + Set output = synonymMappings.get(term); + + Object val = jsonMap.get(term); + if (val instanceof String) { + String strVal = applyCaseSetting(ignoreCase, (String)val); + + if (output == null) { + output = new TreeSet<>(); + synonymMappings.put(term, output); + } + + if (output.add(strVal)) { + madeChanges = true; + } + } else if (val instanceof List) { + List vals = (List)val; + + if (output == null) { + output = new TreeSet<>(); + synonymMappings.put(term, output); + } + + for (String nextVal : vals) { + if (output.add(applyCaseSetting(ignoreCase, nextVal))) { + madeChanges = true; + } + } + + } else { + throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST, "Unsupported value "+val+ + " for "+term+"; expected single value or a JSON array!"); + } + } + + return madeChanges ? synonymMappings : null; + } + + /** + * Handles a change in the ignoreCase setting for synonyms, which requires + * a full rebuild of the synonymMappings. + */ + @Override + protected boolean updateInitArgs(NamedList updatedArgs) { + if (updatedArgs == null || updatedArgs.size() == 0) { + return false; + } + boolean currentIgnoreCase = getIgnoreCase(managedInitArgs); + boolean updatedIgnoreCase = getIgnoreCase(updatedArgs); + if (currentIgnoreCase == true && updatedIgnoreCase == false) { + throw new SolrException(ErrorCode.BAD_REQUEST, + "Changing a managed word set's ignoreCase arg from true to false is not permitted."); + } else if (currentIgnoreCase == false && updatedIgnoreCase == true) { + // ignore case policy changed ... rebuild the map + Map> rebuild = new TreeMap<>(); + for (String curr : synonymMappings.keySet()) { + Set newMappings = new TreeSet<>(); + for (String next : synonymMappings.get(curr)) { + newMappings.add(applyCaseSetting(updatedIgnoreCase, next)); + } + rebuild.put(applyCaseSetting(updatedIgnoreCase, curr), newMappings); + } + synonymMappings = rebuild; + } + + return super.updateInitArgs(updatedArgs); + } + + protected String applyCaseSetting(boolean ignoreCase, String str) { + return (ignoreCase && str != null) ? str.toLowerCase(Locale.ROOT) : str; + } + + public boolean getIgnoreCase() { + return getIgnoreCase(managedInitArgs); + } + + public boolean getIgnoreCase(NamedList initArgs) { + Boolean ignoreCase = initArgs.getBooleanArg(IGNORE_CASE_INIT_ARG); + // ignoreCase = false by default + return null == ignoreCase ? false : ignoreCase; + } + + @Override + public void doGet(BaseSolrResource endpoint, String childId) { + SolrQueryResponse response = endpoint.getSolrResponse(); + if (childId != null) { + boolean ignoreCase = getIgnoreCase(); + String key = applyCaseSetting(ignoreCase, childId); + Set output = synonymMappings.get(key); + if (output == null) { + throw new SolrException(ErrorCode.NOT_FOUND, + String.format(Locale.ROOT, "%s not found in %s", key, getResourceId())); + } + response.add(key, output); + } else { + response.add(SYNONYM_MAPPINGS, buildMapToStore(synonymMappings)); + } + } + + @Override + public synchronized void doDeleteChild(BaseSolrResource endpoint, String childId) { + boolean ignoreCase = getIgnoreCase(); + String key = applyCaseSetting(ignoreCase, childId); + Set output = synonymMappings.get(key); + if (output == null) + throw new SolrException(ErrorCode.NOT_FOUND, + String.format(Locale.ROOT, "%s not found in %s", key, getResourceId())); + + synonymMappings.remove(key); + storeManagedData(synonymMappings); + log.info("Removed synonym mappings for: {}", key); + } + } + + /** + * Custom SynonymMap.Parser implementation that provides synonym + * mappings from the managed JSON in this class during SynonymMap + * building. + */ + private class ManagedSynonymParser extends SynonymMap.Parser { + + SynonymManager synonymManager; + + public ManagedSynonymParser(SynonymManager synonymManager, boolean dedup, Analyzer analyzer) { + super(dedup, analyzer); + this.synonymManager = synonymManager; + } + + /** + * Add the managed synonyms and their mappings into the SynonymMap builder. + */ + @Override + public void parse(Reader in) throws IOException, ParseException { + for (String term : synonymManager.synonymMappings.keySet()) { + for (String mapping : synonymManager.synonymMappings.get(term)) { + add(new CharsRef(term), new CharsRef(mapping), false); + } + } + } + } + + protected SynonymFilterFactory delegate; + + public ManagedSynonymFilterFactory(Map args) { + super(args); + } + + @Override + public String getResourceId() { + return "/schema/analysis/synonyms/"+handle; + } + + protected Class getManagedResourceImplClass() { + return SynonymManager.class; + } + + /** + * Called once, during core initialization, to initialize any analysis components + * that depend on the data managed by this resource. It is important that the + * analysis component is only initialized once during core initialization so that + * text analysis is consistent, especially in a distributed environment, as we + * don't want one server applying a different set of stop words than other servers. + */ + @SuppressWarnings("unchecked") + @Override + public void onManagedResourceInitialized(NamedList initArgs, final ManagedResource res) + throws SolrException + { + NamedList args = (NamedList)initArgs; + args.add("synonyms", getResourceId()); + args.add("expand", "false"); + args.add("format", "solr"); + + Map filtArgs = new HashMap<>(); + for (Map.Entry entry : args) { + filtArgs.put(entry.getKey(), entry.getValue().toString()); + } + // create the actual filter factory that pulls the synonym mappings + // from synonymMappings using a custom parser implementation + delegate = new SynonymFilterFactory(filtArgs) { + @Override + protected SynonymMap loadSynonyms + (ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) + throws IOException, ParseException { + + ManagedSynonymParser parser = + new ManagedSynonymParser((SynonymManager)res, dedup, analyzer); + // null is safe here because there's no actual parsing done against a input Reader + parser.parse(null); + return parser.build(); + } + }; + try { + delegate.inform(res.getResourceLoader()); + } catch (IOException e) { + throw new SolrException(ErrorCode.SERVER_ERROR, e); + } + } + + @Override + public TokenStream create(TokenStream input) { + if (delegate == null) + throw new IllegalStateException(this.getClass().getName()+ + " not initialized correctly! The SynonymFilterFactory delegate was not initialized."); + + return delegate.create(input); + } +} diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml b/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml index 60281903716..ece1a8e9642 100755 --- a/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml @@ -457,6 +457,7 @@ + diff --git a/solr/core/src/test/org/apache/solr/rest/schema/analysis/TestManagedSynonymFilterFactory.java b/solr/core/src/test/org/apache/solr/rest/schema/analysis/TestManagedSynonymFilterFactory.java new file mode 100644 index 00000000000..10ea732bce1 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/rest/schema/analysis/TestManagedSynonymFilterFactory.java @@ -0,0 +1,179 @@ +package org.apache.solr.rest.schema.analysis; +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; + +import org.apache.commons.io.FileUtils; +import org.apache.solr.util.RestTestBase; +import org.eclipse.jetty.servlet.ServletHolder; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.noggit.JSONUtil; +import org.restlet.ext.servlet.ServerServlet; + +public class TestManagedSynonymFilterFactory extends RestTestBase { + + private static File tmpSolrHome; + + /** + * Setup to make the schema mutable + */ + @Before + public void before() throws Exception { + tmpSolrHome = new File(dataDir + File.separator + TestManagedStopFilterFactory.class.getSimpleName() + + System.currentTimeMillis()); + FileUtils.copyDirectory(new File(TEST_HOME()), tmpSolrHome.getAbsoluteFile()); + + final SortedMap extraServlets = new TreeMap<>(); + final ServletHolder solrRestApi = new ServletHolder("SolrSchemaRestApi", ServerServlet.class); + solrRestApi.setInitParameter("org.restlet.application", "org.apache.solr.rest.SolrSchemaRestApi"); + extraServlets.put(solrRestApi, "/schema/*"); + + System.setProperty("managed.schema.mutable", "true"); + System.setProperty("enable.update.log", "false"); + createJettyAndHarness(tmpSolrHome.getAbsolutePath(), "solrconfig-managed-schema.xml", "schema-rest.xml", + "/solr", true, extraServlets); + } + + @After + private void after() throws Exception { + jetty.stop(); + jetty = null; + FileUtils.deleteDirectory(tmpSolrHome); + System.clearProperty("managed.schema.mutable"); + System.clearProperty("enable.update.log"); + } + + @Test + public void testManagedSynonyms() throws Exception { + // this endpoint depends on at least one field type containing the following + // declaration in the schema-rest.xml: + // + // + // + String endpoint = "/schema/analysis/synonyms/english"; + + assertJQ(endpoint, + "/synonymMappings/initArgs/ignoreCase==false", + "/synonymMappings/managedMap=={}"); + + // put a new mapping into the synonyms + Map> syns = new HashMap<>(); + syns.put("happy", Arrays.asList("glad","cheerful","joyful")); + assertJPut(endpoint, + JSONUtil.toJSON(syns), + "/responseHeader/status==0"); + + assertJQ(endpoint, + "/synonymMappings/managedMap/happy==['cheerful','glad','joyful']"); + + // request to a specific mapping + assertJQ(endpoint+"/happy", + "/happy==['cheerful','glad','joyful']"); + + // does not exist + assertJQ(endpoint+"/sad", + "/error/code==404"); + + // verify the user can update the ignoreCase initArg + assertJPut(endpoint, + json("{ 'initArgs':{ 'ignoreCase':true } }"), + "responseHeader/status==0"); + + assertJQ(endpoint, + "/synonymMappings/initArgs/ignoreCase==true"); + + syns = new HashMap<>(); + syns.put("sad", Arrays.asList("unhappy")); + syns.put("SAD", Arrays.asList("Unhappy")); + assertJPut(endpoint, + JSONUtil.toJSON(syns), + "/responseHeader/status==0"); + + assertJQ(endpoint, + "/synonymMappings/managedMap/sad==['unhappy']"); + + // verify delete works + assertJDelete(endpoint+"/sad", + "/responseHeader/status==0"); + + assertJQ(endpoint, + "/synonymMappings/managedMap=={'happy':['cheerful','glad','joyful']}"); + + // should fail with 404 as foo doesn't exist + assertJDelete(endpoint+"/foo", + "/error/code==404"); + + // verify that a newly added synonym gets expanded on the query side after core reload + + String newFieldName = "managed_en_field"; + // make sure the new field doesn't already exist + assertQ("/schema/fields/" + newFieldName + "?indent=on&wt=xml", + "count(/response/lst[@name='field']) = 0", + "/response/lst[@name='responseHeader']/int[@name='status'] = '404'", + "/response/lst[@name='error']/int[@name='code'] = '404'"); + + // add the new field + assertJPut("/schema/fields/" + newFieldName, json("{'type':'managed_en'}"), + "/responseHeader/status==0"); + + // make sure the new field exists now + assertQ("/schema/fields/" + newFieldName + "?indent=on&wt=xml", + "count(/response/lst[@name='field']) = 1", + "/response/lst[@name='responseHeader']/int[@name='status'] = '0'"); + + assertU(adoc(newFieldName, "I am a happy test today but yesterday I was angry", "id", "5150")); + assertU(commit()); + + assertQ("/select?q=" + newFieldName + ":angry", + "/response/lst[@name='responseHeader']/int[@name='status'] = '0'", + "/response/result[@name='response'][@numFound='1']", + "/response/result[@name='response']/doc/str[@name='id'][.='5150']"); + + // add a mapping that will expand a query for "mad" to match docs with "angry" + syns = new HashMap<>(); + syns.put("mad", Arrays.asList("angry")); + assertJPut(endpoint, + JSONUtil.toJSON(syns), + "/responseHeader/status==0"); + + assertJQ(endpoint, + "/synonymMappings/managedMap/mad==['angry']"); + + // should not match as the synonym mapping between mad and angry does not + // get applied until core reload + assertQ("/select?q=" + newFieldName + ":mad", + "/response/lst[@name='responseHeader']/int[@name='status'] = '0'", + "/response/result[@name='response'][@numFound='0']"); + + restTestHarness.reload(); + + // now query for mad and we should see our test doc + assertQ("/select?q=" + newFieldName + ":mad", + "/response/lst[@name='responseHeader']/int[@name='status'] = '0'", + "/response/result[@name='response'][@numFound='1']", + "/response/result[@name='response']/doc/str[@name='id'][.='5150']"); + } +} diff --git a/solr/example/solr/collection1/conf/_schema_analysis_synonyms_english.json b/solr/example/solr/collection1/conf/_schema_analysis_synonyms_english.json new file mode 100644 index 00000000000..869bdce0514 --- /dev/null +++ b/solr/example/solr/collection1/conf/_schema_analysis_synonyms_english.json @@ -0,0 +1,11 @@ +{ + "initArgs":{ + "ignoreCase":true, + "format":"solr" + }, + "managedMap":{ + "GB":["GiB","Gigabyte"], + "happy":["glad","joyful"], + "TV":["Television"] + } +} diff --git a/solr/example/solr/collection1/conf/schema.xml b/solr/example/solr/collection1/conf/schema.xml index adaedfdbf8a..5504a0f00f1 100755 --- a/solr/example/solr/collection1/conf/schema.xml +++ b/solr/example/solr/collection1/conf/schema.xml @@ -453,6 +453,7 @@ +