diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 71bc9d6180d..f0779b2f309 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -65,6 +65,12 @@ Apache ZooKeeper 3.4.5 Detailed Change List ---------------------- +New Features +---------------------- + +* SOLR-5167: Add support for AnalyzingInfixSuggester (AnalyzingInfixLookupFactory). + (Areek Zillur, Varun Thacker via Robert Muir) + Other Changes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java new file mode 100644 index 00000000000..e32859eebc0 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java @@ -0,0 +1,97 @@ +package org.apache.solr.spelling.suggest.fst; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.IOException; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.search.suggest.Lookup; +import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester; +import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.schema.FieldType; +import org.apache.solr.spelling.suggest.LookupFactory; + +/** + * Factory for {@link AnalyzingInfixSuggester} + * @lucene.experimental + */ +public class AnalyzingInfixLookupFactory extends LookupFactory { + /** + * The analyzer used at "query-time" and "build-time" to analyze suggestions. + */ + public static final String QUERY_ANALYZER = "suggestAnalyzerFieldType"; + + /** + * The path where the underlying index is stored + * if no index is found, it will be generated by + * the AnalyzingInfixSuggester + */ + public static final String INDEX_PATH = "indexPath"; + + /** + * Minimum number of leading characters before PrefixQuery is used (default 4). + * Prefixes shorter than this are indexed as character ngrams + * (increasing index size but making lookups faster) + */ + private static final String MIN_PREFIX_CHARS = "minPrefixChars"; + + private static final String DEFAULT_INDEX_PATH = "analyzingInfixSuggesterIndexDir"; + + /** + * File name for the automaton. + */ + private static final String FILENAME = "iwfsta.bin"; + + + @Override + public Lookup create(NamedList params, SolrCore core) { + // mandatory parameter + Object fieldTypeName = params.get(QUERY_ANALYZER); + if (fieldTypeName == null) { + throw new IllegalArgumentException("Error in configuration: " + QUERY_ANALYZER + " parameter is mandatory"); + } + FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString()); + Analyzer indexAnalyzer = ft.getAnalyzer(); + Analyzer queryAnalyzer = ft.getQueryAnalyzer(); + + // optional parameters + + String indexPath = params.get(INDEX_PATH) != null + ? params.get(INDEX_PATH).toString() + : DEFAULT_INDEX_PATH; + + int minPrefixChars = params.get(MIN_PREFIX_CHARS) != null + ? Integer.parseInt(params.get(MIN_PREFIX_CHARS).toString()) + : AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS; + + try { + return new AnalyzingInfixSuggester(core.getSolrConfig().luceneMatchVersion, + new File(indexPath), indexAnalyzer, queryAnalyzer, minPrefixChars); + } catch (IOException e) { + throw new RuntimeException(); + } + } + + @Override + public String storeFileName() { + return FILENAME; + } +} diff --git a/solr/core/src/test-files/solr/collection1/conf/analyzingInfixSuggest.txt b/solr/core/src/test-files/solr/collection1/conf/analyzingInfixSuggest.txt new file mode 100644 index 00000000000..6d276c33a16 --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/analyzingInfixSuggest.txt @@ -0,0 +1,5 @@ +# simple AnalyzingInfix suggest phrase dictionary for testing +Japanese Autocomplete and Japanese Highlighter broken +Add Japanese Kanji number normalization to Kuromoji +Add decompose compound Japanese Katakana token capability to Kuromoji +This is just another entry! \ No newline at end of file diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml index 96b4f7b52f4..b4f560ed32f 100644 --- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml @@ -65,6 +65,24 @@ phrase_suggest + + + + infix_suggest_analyzing + org.apache.solr.spelling.suggest.Suggester + org.apache.solr.spelling.suggest.fst.AnalyzingInfixLookupFactory + false + + + text + + analyzingInfixSuggest.txt + + + + phrase_suggest + + @@ -183,7 +201,20 @@ - + + + + true + infix_suggest_analyzing + false + + true + + + infix_suggest_analyzing + + + true diff --git a/solr/core/src/test/org/apache/solr/spelling/suggest/TestAnalyzeInfixSuggestions.java b/solr/core/src/test/org/apache/solr/spelling/suggest/TestAnalyzeInfixSuggestions.java new file mode 100644 index 00000000000..0ee3e583356 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/spelling/suggest/TestAnalyzeInfixSuggestions.java @@ -0,0 +1,66 @@ +package org.apache.solr.spelling.suggest; + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.params.SpellingParams; +import org.junit.BeforeClass; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class TestAnalyzeInfixSuggestions extends SolrTestCaseJ4 { + static final String URI_DEFAULT = "/infix_suggest_analyzing"; + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig-phrasesuggest.xml","schema-phrasesuggest.xml"); + assertQ(req("qt", URI_DEFAULT, "q", "", SpellingParams.SPELLCHECK_BUILD, "true")); + } + + public void testSingle() throws Exception { + + assertQ(req("qt", URI_DEFAULT, "q", "japan", SpellingParams.SPELLCHECK_COUNT, "1"), + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/int[@name='numFound'][.='1']", + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[1][.='Japanese Autocomplete and Japanese Highlighter broken']" + ); + + assertQ(req("qt", URI_DEFAULT, "q", "high", SpellingParams.SPELLCHECK_COUNT, "1"), + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='high']/int[@name='numFound'][.='1']", + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='high']/arr[@name='suggestion']/str[1][.='Japanese Autocomplete and Japanese Highlighter broken']" + ); + } + + public void testMultiple() throws Exception { + + assertQ(req("qt", URI_DEFAULT, "q", "japan", SpellingParams.SPELLCHECK_COUNT, "2"), + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/int[@name='numFound'][.='2']", + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[1][.='Japanese Autocomplete and Japanese Highlighter broken']", + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[2][.='Add Japanese Kanji number normalization to Kuromoji']" + ); + assertQ(req("qt", URI_DEFAULT, "q", "japan", SpellingParams.SPELLCHECK_COUNT, "3"), + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/int[@name='numFound'][.='3']", + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[1][.='Japanese Autocomplete and Japanese Highlighter broken']", + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[2][.='Add Japanese Kanji number normalization to Kuromoji']", + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[3][.='Add decompose compound Japanese Katakana token capability to Kuromoji']" + ); + assertQ(req("qt", URI_DEFAULT, "q", "japan", SpellingParams.SPELLCHECK_COUNT, "4"), + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/int[@name='numFound'][.='3']", + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[1][.='Japanese Autocomplete and Japanese Highlighter broken']", + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[2][.='Add Japanese Kanji number normalization to Kuromoji']", + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[3][.='Add decompose compound Japanese Katakana token capability to Kuromoji']" + ); + } +} \ No newline at end of file