SOLR-5631: Add support for FreeTextSuggester

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1558635 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2014-01-16 01:22:28 +00:00
parent 5945077918
commit 0461883e2e
5 changed files with 176 additions and 0 deletions

View File

@ -145,6 +145,9 @@ New Features
* SOLR-5529: Add support for queries to use multiple suggesters.
(Areek Zillur, Erick Erickson, via Robert Muir)
* SOLR-5631: Add support for Lucene's FreeTextSuggester.
(Areek Zillur via Robert Muir)
Bug Fixes
----------------------

View File

@ -0,0 +1,85 @@
package org.apache.solr.spelling.suggest.fst;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.analyzing.FreeTextSuggester;
import org.apache.lucene.util.IOUtils;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.FieldType;
import org.apache.solr.spelling.suggest.LookupFactory;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* LookupFactory implementation for {@link FreeTextSuggester}
* */
public class FreeTextLookupFactory extends LookupFactory {
/**
* The analyzer used at "query-time" and "build-time" to analyze suggestions.
*/
public static final String QUERY_ANALYZER = "suggestFreeTextAnalyzerFieldType";
/**
* The n-gram model to use in the underlying suggester; Default value is 2.
* */
public static final String NGRAMS = "ngrams";
/**
* The separator to use in the underlying suggester;
* */
public static final String SEPARATOR = "separator";
/**
* File name for the automaton.
*/
private static final String FILENAME = "ftsta.bin";
@Override
public Lookup create(NamedList params, SolrCore core) {
Object fieldTypeName = params.get(QUERY_ANALYZER);
if (fieldTypeName == null) {
throw new IllegalArgumentException("Error in configuration: " + QUERY_ANALYZER + " parameter is mandatory");
}
FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString());
if (ft == null) {
throw new IllegalArgumentException("Error in configuration: " + fieldTypeName.toString() + " is not defined in the schema");
}
Analyzer indexAnalyzer = ft.getAnalyzer();
Analyzer queryAnalyzer = ft.getQueryAnalyzer();
int grams = (params.get(NGRAMS) != null)
? Integer.parseInt(params.get(NGRAMS).toString())
: FreeTextSuggester.DEFAULT_GRAMS;
byte separator = (params.get(SEPARATOR) != null)
? params.get(SEPARATOR).toString().getBytes(IOUtils.CHARSET_UTF_8)[0]
: FreeTextSuggester.DEFAULT_SEPARATOR;
return new FreeTextSuggester(indexAnalyzer, queryAnalyzer, grams, separator);
}
@Override
public String storeFileName() {
return FILENAME;
}
}

View File

@ -0,0 +1,2 @@
foo bar baz blah
boo foo bar foo bee

View File

@ -102,6 +102,22 @@
</searchComponent>
<searchComponent class="solr.SuggestComponent" name="free_text_suggest">
<lst name="suggester">
<str name="name">free_text_suggest</str>
<str name="lookupImpl">FreeTextLookupFactory</str>
<str name="dictionaryImpl">FileDictionaryFactory</str>
<str name="storeDir">free_text_suggest</str>
<str name="buildOnCommit">false</str>
<str name="sourceLocation">freeTextSuggest.txt</str>
<!-- Suggester properties -->
<str name="separator"> </str>
<str name="suggestFreeTextAnalyzerFieldType">text</str>
<int name="ngrams">2</int>
</lst>
</searchComponent>
<!-- FuzzyLookup suggest component with FileDictionaryFactory -->
<searchComponent class="solr.SuggestComponent" name="fuzzy_suggest_analyzing_with_file_dict">
<lst name="suggester">
@ -259,6 +275,17 @@
<str>fuzzy_suggest_analyzing_with_file_dict</str>
</arr>
</requestHandler>
<requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/free_text_suggest">
<lst name="defaults">
<str name="suggest">true</str>
</lst>
<arr name="components">
<str>free_text_suggest</str>
</arr>
</requestHandler>
<requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/fuzzy_suggest_analyzing_with_high_freq_dict">
<lst name="defaults">

View File

@ -0,0 +1,59 @@
package org.apache.solr.spelling.suggest;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.params.SpellingParams;
import org.junit.BeforeClass;
public class TestFreeTextSuggestions extends SolrTestCaseJ4 {
static final String URI = "/free_text_suggest";
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-phrasesuggest.xml","schema-phrasesuggest.xml");
assertQ(req("qt", URI, "q", "", SuggesterParams.SUGGEST_BUILD_ALL, "true"));
}
public void test() {
assertQ(req("qt", URI, "q", "foo b", SuggesterParams.SUGGEST_COUNT, "1", SuggesterParams.SUGGEST_DICT, "free_text_suggest"),
"//lst[@name='suggest']/lst[@name='free_text_suggest']/lst[@name='foo b']/int[@name='numFound'][.='1']",
"//lst[@name='suggest']/lst[@name='free_text_suggest']/lst[@name='foo b']/arr[@name='suggestions']/lst[1]/str[@name='term'][.='foo bar']"
);
assertQ(req("qt", URI, "q", "foo ", SuggesterParams.SUGGEST_COUNT, "2", SuggesterParams.SUGGEST_DICT, "free_text_suggest"),
"//lst[@name='suggest']/lst[@name='free_text_suggest']/lst[@name='foo ']/int[@name='numFound'][.='2']",
"//lst[@name='suggest']/lst[@name='free_text_suggest']/lst[@name='foo ']/arr[@name='suggestions']/lst[1]/str[@name='term'][.='foo bar']",
"//lst[@name='suggest']/lst[@name='free_text_suggest']/lst[@name='foo ']/arr[@name='suggestions']/lst[2]/str[@name='term'][.='foo bee']"
);
assertQ(req("qt", URI, "q", "foo", SuggesterParams.SUGGEST_COUNT, "2", SuggesterParams.SUGGEST_DICT, "free_text_suggest"),
"//lst[@name='suggest']/lst[@name='free_text_suggest']/lst[@name='foo']/int[@name='numFound'][.='1']",
"//lst[@name='suggest']/lst[@name='free_text_suggest']/lst[@name='foo']/arr[@name='suggestions']/lst[1]/str[@name='term'][.='foo']"
);
assertQ(req("qt", URI, "q", "b", SuggesterParams.SUGGEST_COUNT, "5", SuggesterParams.SUGGEST_DICT, "free_text_suggest"),
"//lst[@name='suggest']/lst[@name='free_text_suggest']/lst[@name='b']/int[@name='numFound'][.='5']",
"//lst[@name='suggest']/lst[@name='free_text_suggest']/lst[@name='b']/arr[@name='suggestions']/lst[1]/str[@name='term'][.='bar']",
"//lst[@name='suggest']/lst[@name='free_text_suggest']/lst[@name='b']/arr[@name='suggestions']/lst[2]/str[@name='term'][.='baz']",
"//lst[@name='suggest']/lst[@name='free_text_suggest']/lst[@name='b']/arr[@name='suggestions']/lst[3]/str[@name='term'][.='bee']",
"//lst[@name='suggest']/lst[@name='free_text_suggest']/lst[@name='b']/arr[@name='suggestions']/lst[4]/str[@name='term'][.='blah']",
"//lst[@name='suggest']/lst[@name='free_text_suggest']/lst[@name='b']/arr[@name='suggestions']/lst[5]/str[@name='term'][.='boo']"
);
}
}