SOLR-5167: Ability to use AnalyzingInfixSuggester in Solr

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1523451 13f79535-47bb-0310-9956-ffa450edef68
2013-09-15 16:13:32 +00:00 · 2013-09-15 16:13:32 +00:00 · 11a2ca8959
parent 67358330e1
commit 11a2ca8959
5 changed files with 206 additions and 1 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -65,6 +65,12 @@ Apache ZooKeeper 3.4.5
 Detailed Change List
 ----------------------

+New Features
+----------------------
+
+* SOLR-5167: Add support for AnalyzingInfixSuggester (AnalyzingInfixLookupFactory).
+  (Areek Zillur, Varun Thacker via Robert Muir)
+
 Other Changes
 ----------------------

--- a/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java
+++ b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java
@ -0,0 +1,97 @@
+package org.apache.solr.spelling.suggest.fst;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;
+import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.schema.FieldType;
+import org.apache.solr.spelling.suggest.LookupFactory;
+
+/**
+ * Factory for {@link AnalyzingInfixSuggester}
+ * @lucene.experimental
+ */
+public class AnalyzingInfixLookupFactory extends LookupFactory {
+  /**
+   * The analyzer used at "query-time" and "build-time" to analyze suggestions.
+   */
+  public static final String QUERY_ANALYZER = "suggestAnalyzerFieldType";
+
+  /**
+   * The path where the underlying index is stored
+   * if no index is found, it will be generated by
+   * the AnalyzingInfixSuggester
+   */
+  public static final String INDEX_PATH = "indexPath";
+
+  /**
+   * Minimum number of leading characters before PrefixQuery is used (default 4). 
+   * Prefixes shorter than this are indexed as character ngrams 
+   * (increasing index size but making lookups faster)
+   */
+  private static final String MIN_PREFIX_CHARS = "minPrefixChars";
+  
+  private static final String DEFAULT_INDEX_PATH = "analyzingInfixSuggesterIndexDir";
+
+  /**
+   * File name for the automaton.
+   */
+  private static final String FILENAME = "iwfsta.bin";
+  
+  
+  @Override
+  public Lookup create(NamedList params, SolrCore core) {
+    // mandatory parameter
+    Object fieldTypeName = params.get(QUERY_ANALYZER);
+    if (fieldTypeName == null) {
+      throw new IllegalArgumentException("Error in configuration: " + QUERY_ANALYZER + " parameter is mandatory");
+    }
+    FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString());
+    Analyzer indexAnalyzer = ft.getAnalyzer();
+    Analyzer queryAnalyzer = ft.getQueryAnalyzer();
+    
+    // optional parameters
+    
+    String indexPath = params.get(INDEX_PATH) != null
+    ? params.get(INDEX_PATH).toString()
+    : DEFAULT_INDEX_PATH;
+    
+    int minPrefixChars = params.get(MIN_PREFIX_CHARS) != null
+    ? Integer.parseInt(params.get(MIN_PREFIX_CHARS).toString())
+    : AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS;
+
+    try {
+      return new AnalyzingInfixSuggester(core.getSolrConfig().luceneMatchVersion, 
+          new File(indexPath), indexAnalyzer, queryAnalyzer, minPrefixChars);
+    } catch (IOException e) {
+      throw new RuntimeException();
+    }
+  }
+
+  @Override
+  public String storeFileName() {
+    return FILENAME;
+  }
+}
--- a/solr/core/src/test-files/solr/collection1/conf/analyzingInfixSuggest.txt
+++ b/solr/core/src/test-files/solr/collection1/conf/analyzingInfixSuggest.txt
@ -0,0 +1,5 @@
+# simple AnalyzingInfix suggest phrase dictionary for testing
+Japanese Autocomplete and Japanese Highlighter broken
+Add Japanese Kanji number normalization to Kuromoji
+Add decompose compound Japanese Katakana token capability to Kuromoji
+This is just another entry!
--- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml
@ -65,6 +65,24 @@
    <str name="queryAnalyzerFieldType">phrase_suggest</str>
  </searchComponent>
  
+  <!-- AnalyzingInfixLookup suggest component (default)-->
+  <searchComponent class="solr.SpellCheckComponent" name="infix_suggest_analyzing">
+    <lst name="spellchecker">
+      <str name="name">infix_suggest_analyzing</str>
+      <str name="classname">org.apache.solr.spelling.suggest.Suggester</str>
+      <str name="lookupImpl">org.apache.solr.spelling.suggest.fst.AnalyzingInfixLookupFactory</str>
+      <str name="buildOnCommit">false</str>
+
+      <!-- Suggester properties -->
+      <str name="suggestAnalyzerFieldType">text</str>
+      
+      <str name="sourceLocation">analyzingInfixSuggest.txt</str>
+    </lst>
+    
+    <!-- specify a fieldtype using keywordtokenizer + lowercase + cleanup -->
+    <str name="queryAnalyzerFieldType">phrase_suggest</str>
+  </searchComponent>
+
  <!-- FuzzyLookup suggest component (default)-->
  <searchComponent class="solr.SpellCheckComponent" name="fuzzy_suggest_analyzing">
    <lst name="spellchecker">
@ -183,7 +201,20 @@
    </arr>
  </requestHandler>
  
-  <!--  Fuzzy analyzing handler with 1 max edit -->
+  <!--  Infix analyzing handler (default) -->
+  <requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/infix_suggest_analyzing">
+    <lst name="defaults">
+      <str name="spellcheck">true</str>
+      <str name="spellcheck.dictionary">infix_suggest_analyzing</str>
+      <str name="spellcheck.collate">false</str>
+      <!-- NOTE: if this is false, results are alpha-ordered, not by weight! -->
+      <str name="spellcheck.onlyMorePopular">true</str>
+    </lst>
+    <arr name="components">
+      <str>infix_suggest_analyzing</str>
+    </arr>
+  </requestHandler>
+
  <requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/fuzzy_suggest_analyzing">
    <lst name="defaults">
      <str name="spellcheck">true</str>
--- a/solr/core/src/test/org/apache/solr/spelling/suggest/TestAnalyzeInfixSuggestions.java
+++ b/solr/core/src/test/org/apache/solr/spelling/suggest/TestAnalyzeInfixSuggestions.java
@ -0,0 +1,66 @@
+package org.apache.solr.spelling.suggest;
+
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.params.SpellingParams;
+import org.junit.BeforeClass;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class TestAnalyzeInfixSuggestions extends SolrTestCaseJ4  {
+  static final String URI_DEFAULT = "/infix_suggest_analyzing";
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    initCore("solrconfig-phrasesuggest.xml","schema-phrasesuggest.xml");
+    assertQ(req("qt", URI_DEFAULT, "q", "", SpellingParams.SPELLCHECK_BUILD, "true"));
+  }
+  
+  public void testSingle() throws Exception {
+    
+    assertQ(req("qt", URI_DEFAULT, "q", "japan", SpellingParams.SPELLCHECK_COUNT, "1"),
+      "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/int[@name='numFound'][.='1']",
+      "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[1][.='<b>Japan</b>ese Autocomplete and <b>Japan</b>ese Highlighter broken']"
+      );
+    
+    assertQ(req("qt", URI_DEFAULT, "q", "high", SpellingParams.SPELLCHECK_COUNT, "1"),
+      "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='high']/int[@name='numFound'][.='1']",
+      "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='high']/arr[@name='suggestion']/str[1][.='Japanese Autocomplete and Japanese <b>High</b>lighter broken']"
+      );
+  }
+  
+  public void testMultiple() throws Exception {
+    
+    assertQ(req("qt", URI_DEFAULT, "q", "japan", SpellingParams.SPELLCHECK_COUNT, "2"),
+      "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/int[@name='numFound'][.='2']",
+      "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[1][.='<b>Japan</b>ese Autocomplete and <b>Japan</b>ese Highlighter broken']",
+      "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[2][.='Add <b>Japan</b>ese Kanji number normalization to Kuromoji']"
+      );
+    assertQ(req("qt", URI_DEFAULT, "q", "japan", SpellingParams.SPELLCHECK_COUNT, "3"),
+      "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/int[@name='numFound'][.='3']",
+      "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[1][.='<b>Japan</b>ese Autocomplete and <b>Japan</b>ese Highlighter broken']",
+      "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[2][.='Add <b>Japan</b>ese Kanji number normalization to Kuromoji']",
+      "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[3][.='Add decompose compound <b>Japan</b>ese Katakana token capability to Kuromoji']"
+      );
+    assertQ(req("qt", URI_DEFAULT, "q", "japan", SpellingParams.SPELLCHECK_COUNT, "4"),
+      "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/int[@name='numFound'][.='3']",
+      "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[1][.='<b>Japan</b>ese Autocomplete and <b>Japan</b>ese Highlighter broken']",
+      "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[2][.='Add <b>Japan</b>ese Kanji number normalization to Kuromoji']",
+      "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[3][.='Add decompose compound <b>Japan</b>ese Katakana token capability to Kuromoji']"
+      );
+  }
+}