diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 21160aec499..89bf8b3229b 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -143,6 +143,9 @@ New Features
* SOLR-5631: Add support for Lucene's FreeTextSuggester.
(Areek Zillur via Robert Muir)
+* SOLR-5695: Add support for Lucene's BlendedInfixSuggester.
+ (Areek Zillur)
+
* SOLR-1301: Add a Solr contrib that allows for building Solr indexes via
Hadoop's MapReduce. (Matt Revelle, Alexander Kanarsky, Steve Rowe,
Mark Miller, Greg Bowyer, Jason Rutherglen, Kris Jirapinyo, Jason Venner ,
diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java
index e8196413cc6..f09c089d743 100644
--- a/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java
+++ b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java
@@ -37,22 +37,25 @@ public class AnalyzingInfixLookupFactory extends LookupFactory {
/**
* The analyzer used at "query-time" and "build-time" to analyze suggestions.
*/
- public static final String QUERY_ANALYZER = "suggestAnalyzerFieldType";
+ protected static final String QUERY_ANALYZER = "suggestAnalyzerFieldType";
/**
* The path where the underlying index is stored
* if no index is found, it will be generated by
* the AnalyzingInfixSuggester
*/
- public static final String INDEX_PATH = "indexPath";
+ protected static final String INDEX_PATH = "indexPath";
/**
* Minimum number of leading characters before PrefixQuery is used (default 4).
* Prefixes shorter than this are indexed as character ngrams
* (increasing index size but making lookups faster)
*/
- private static final String MIN_PREFIX_CHARS = "minPrefixChars";
+ protected static final String MIN_PREFIX_CHARS = "minPrefixChars";
+ /**
+ * Default path where the index for the suggester is stored/loaded from
+ * */
private static final String DEFAULT_INDEX_PATH = "analyzingInfixSuggesterIndexDir";
/**
diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/fst/BlendedInfixLookupFactory.java b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/BlendedInfixLookupFactory.java
new file mode 100644
index 00000000000..1662913c694
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/BlendedInfixLookupFactory.java
@@ -0,0 +1,118 @@
+package org.apache.solr.spelling.suggest.fst;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;
+import org.apache.lucene.search.suggest.analyzing.BlendedInfixSuggester;
+import org.apache.lucene.search.suggest.analyzing.BlendedInfixSuggester.BlenderType;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.schema.FieldType;
+
+/**
+ * Factory for {@link BlendedInfixLookupFactory}
+ * @lucene.experimental
+ */
+public class BlendedInfixLookupFactory extends AnalyzingInfixLookupFactory {
+
+ /**
+ * Blender type used to calculate weight coefficient using the position
+ * of the first matching word
+ *
+ * Available blender types are:
+ * linear: weight*(1 - 0.10*position) [default]
+ * reciprocal: weight/(1+position)
+ */
+ private static final String BLENDER_TYPE = "blenderType";
+
+ /**
+ * Factor to multiply the number of searched elements
+ * Default is 10
+ */
+ private static final String NUM_FACTOR = "numFactor";
+
+ /**
+ * Default path where the index for the suggester is stored/loaded from
+ * */
+ private static final String DEFAULT_INDEX_PATH = "blendedInfixSuggesterIndexDir";
+
+ /**
+ * File name for the automaton.
+ */
+ private static final String FILENAME = "bifsta.bin";
+
+
+ @Override
+ public Lookup create(NamedList params, SolrCore core) {
+ // mandatory parameter
+ Object fieldTypeName = params.get(QUERY_ANALYZER);
+ if (fieldTypeName == null) {
+ throw new IllegalArgumentException("Error in configuration: " + QUERY_ANALYZER + " parameter is mandatory");
+ }
+ FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString());
+ if (ft == null) {
+ throw new IllegalArgumentException("Error in configuration: " + fieldTypeName.toString() + " is not defined in the schema");
+ }
+ Analyzer indexAnalyzer = ft.getAnalyzer();
+ Analyzer queryAnalyzer = ft.getQueryAnalyzer();
+
+ // optional parameters
+
+ String indexPath = params.get(INDEX_PATH) != null
+ ? params.get(INDEX_PATH).toString()
+ : DEFAULT_INDEX_PATH;
+
+ int minPrefixChars = params.get(MIN_PREFIX_CHARS) != null
+ ? Integer.parseInt(params.get(MIN_PREFIX_CHARS).toString())
+ : AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS;
+
+ BlenderType blenderType = getBlenderType(params.get(BLENDER_TYPE));
+
+ int numFactor = params.get(NUM_FACTOR) != null
+ ? Integer.parseInt(params.get(NUM_FACTOR).toString())
+ : BlendedInfixSuggester.DEFAULT_NUM_FACTOR;
+
+ try {
+ return new BlendedInfixSuggester(core.getSolrConfig().luceneMatchVersion,
+ new File(indexPath), indexAnalyzer, queryAnalyzer, minPrefixChars, blenderType, numFactor);
+ } catch (IOException e) {
+ throw new RuntimeException();
+ }
+ }
+
+ @Override
+ public String storeFileName() {
+ return FILENAME;
+ }
+
+ private BlenderType getBlenderType(Object blenderTypeParam) {
+ BlenderType blenderType = BlenderType.POSITION_LINEAR;
+ if (blenderTypeParam != null) {
+ String blenderTypeStr = blenderTypeParam.toString();
+ if (blenderTypeStr.equalsIgnoreCase("reciprocal")) {
+ blenderType = BlenderType.POSITION_RECIPROCAL;
+ }
+ }
+ return blenderType;
+ }
+}
diff --git a/solr/core/src/test-files/solr/collection1/conf/blendedInfixSuggest.txt b/solr/core/src/test-files/solr/collection1/conf/blendedInfixSuggest.txt
new file mode 100644
index 00000000000..c3b3d340bb7
--- /dev/null
+++ b/solr/core/src/test-files/solr/collection1/conf/blendedInfixSuggest.txt
@@ -0,0 +1,3 @@
+top of the lake 18 lake
+star wars: episode v - the empire strikes back 12 star
+the returned 10 ret
diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml
index 60033d9f755..74a27596e22 100644
--- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml
@@ -118,6 +118,34 @@
+
+
+
+ blended_infix_suggest_linear
+ BlendedInfixLookupFactory
+ FileDictionaryFactory
+ false
+ blendedInfixSuggest.txt
+
+
+ linear
+ text
+
+
+
+ blended_infix_suggest_reciprocal
+ BlendedInfixLookupFactory
+ FileDictionaryFactory
+ false
+ blendedInfixSuggest.txt
+
+
+ reciprocal
+ text
+
+
+
+
@@ -277,6 +305,15 @@
+
+
+ true
+
+
+ blended_infix_suggest
+
+
+
true
diff --git a/solr/core/src/test/org/apache/solr/spelling/suggest/TestAnalyzeInfixSuggestions.java b/solr/core/src/test/org/apache/solr/spelling/suggest/TestAnalyzeInfixSuggestions.java
index 0ee3e583356..0e076ef3268 100644
--- a/solr/core/src/test/org/apache/solr/spelling/suggest/TestAnalyzeInfixSuggestions.java
+++ b/solr/core/src/test/org/apache/solr/spelling/suggest/TestAnalyzeInfixSuggestions.java
@@ -1,7 +1,10 @@
package org.apache.solr.spelling.suggest;
+import java.io.File;
+
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.params.SpellingParams;
+import org.junit.AfterClass;
import org.junit.BeforeClass;
/*
@@ -30,6 +33,16 @@ public class TestAnalyzeInfixSuggestions extends SolrTestCaseJ4 {
assertQ(req("qt", URI_DEFAULT, "q", "", SpellingParams.SPELLCHECK_BUILD, "true"));
}
+ @AfterClass
+ public static void afterClass() throws Exception {
+ File indexPathDir = new File("analyzingInfixSuggesterIndexDir");
+ File indexPathDirTmp = new File("analyzingInfixSuggesterIndexDir.tmp");
+ if (indexPathDir.exists())
+ assertTrue(recurseDelete(indexPathDir));
+ if (indexPathDirTmp.exists())
+ assertTrue(recurseDelete(indexPathDirTmp));
+ }
+
public void testSingle() throws Exception {
assertQ(req("qt", URI_DEFAULT, "q", "japan", SpellingParams.SPELLCHECK_COUNT, "1"),
@@ -63,4 +76,5 @@ public class TestAnalyzeInfixSuggestions extends SolrTestCaseJ4 {
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[3][.='Add decompose compound Japanese Katakana token capability to Kuromoji']"
);
}
+
}
\ No newline at end of file
diff --git a/solr/core/src/test/org/apache/solr/spelling/suggest/TestBlendedInfixSuggestions.java b/solr/core/src/test/org/apache/solr/spelling/suggest/TestBlendedInfixSuggestions.java
new file mode 100644
index 00000000000..ee2c93cccc0
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/spelling/suggest/TestBlendedInfixSuggestions.java
@@ -0,0 +1,101 @@
+package org.apache.solr.spelling.suggest;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+
+import org.apache.solr.SolrTestCaseJ4;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+public class TestBlendedInfixSuggestions extends SolrTestCaseJ4 {
+ static final String URI = "/blended_infix_suggest";
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ initCore("solrconfig-phrasesuggest.xml","schema-phrasesuggest.xml");
+ assertQ(req("qt", URI, "q", "", SuggesterParams.SUGGEST_BUILD_ALL, "true"));
+ }
+
+ @AfterClass
+ public static void afterClass() throws Exception {
+ File indexPathDir = new File("blendedInfixSuggesterIndexDir");
+ File indexPathDirTmp = new File("blendedInfixSuggesterIndexDir.tmp");
+ if (indexPathDir.exists())
+ assertTrue(recurseDelete(indexPathDir));
+ if (indexPathDirTmp.exists())
+ assertTrue(recurseDelete(indexPathDirTmp));
+ }
+
+ public void testLinearBlenderType() {
+ assertQ(req("qt", URI, "q", "the", SuggesterParams.SUGGEST_COUNT, "10", SuggesterParams.SUGGEST_DICT, "blended_infix_suggest_linear"),
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_linear']/lst[@name='the']/int[@name='numFound'][.='3']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_linear']/lst[@name='the']/arr[@name='suggestions']/lst[1]/str[@name='term'][.='top of the lake']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_linear']/lst[@name='the']/arr[@name='suggestions']/lst[1]/long[@name='weight'][.='14']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_linear']/lst[@name='the']/arr[@name='suggestions']/lst[1]/str[@name='payload'][.='lake']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_linear']/lst[@name='the']/arr[@name='suggestions']/lst[2]/str[@name='term'][.='the returned']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_linear']/lst[@name='the']/arr[@name='suggestions']/lst[2]/long[@name='weight'][.='10']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_linear']/lst[@name='the']/arr[@name='suggestions']/lst[2]/str[@name='payload'][.='ret']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_linear']/lst[@name='the']/arr[@name='suggestions']/lst[3]/str[@name='term'][.='star wars: episode v - the empire strikes back']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_linear']/lst[@name='the']/arr[@name='suggestions']/lst[3]/long[@name='weight'][.='7']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_linear']/lst[@name='the']/arr[@name='suggestions']/lst[3]/str[@name='payload'][.='star']"
+ );
+
+ }
+
+ public void testReciprocalBlenderType() {
+ assertQ(req("qt", URI, "q", "the", SuggesterParams.SUGGEST_COUNT, "10", SuggesterParams.SUGGEST_DICT, "blended_infix_suggest_reciprocal"),
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_reciprocal']/lst[@name='the']/int[@name='numFound'][.='3']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_reciprocal']/lst[@name='the']/arr[@name='suggestions']/lst[1]/str[@name='term'][.='the returned']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_reciprocal']/lst[@name='the']/arr[@name='suggestions']/lst[1]/long[@name='weight'][.='10']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_reciprocal']/lst[@name='the']/arr[@name='suggestions']/lst[1]/str[@name='payload'][.='ret']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_reciprocal']/lst[@name='the']/arr[@name='suggestions']/lst[2]/str[@name='term'][.='top of the lake']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_reciprocal']/lst[@name='the']/arr[@name='suggestions']/lst[2]/long[@name='weight'][.='6']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_reciprocal']/lst[@name='the']/arr[@name='suggestions']/lst[2]/str[@name='payload'][.='lake']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_reciprocal']/lst[@name='the']/arr[@name='suggestions']/lst[3]/str[@name='term'][.='star wars: episode v - the empire strikes back']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_reciprocal']/lst[@name='the']/arr[@name='suggestions']/lst[3]/long[@name='weight'][.='2']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_reciprocal']/lst[@name='the']/arr[@name='suggestions']/lst[3]/str[@name='payload'][.='star']"
+ );
+ }
+
+ public void testMultiSuggester() {
+ assertQ(req("qt", URI, "q", "the", SuggesterParams.SUGGEST_COUNT, "10", SuggesterParams.SUGGEST_DICT, "blended_infix_suggest_linear", SuggesterParams.SUGGEST_DICT, "blended_infix_suggest_reciprocal"),
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_linear']/lst[@name='the']/int[@name='numFound'][.='3']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_linear']/lst[@name='the']/arr[@name='suggestions']/lst[1]/str[@name='term'][.='top of the lake']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_linear']/lst[@name='the']/arr[@name='suggestions']/lst[1]/long[@name='weight'][.='14']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_linear']/lst[@name='the']/arr[@name='suggestions']/lst[1]/str[@name='payload'][.='lake']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_linear']/lst[@name='the']/arr[@name='suggestions']/lst[2]/str[@name='term'][.='the returned']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_linear']/lst[@name='the']/arr[@name='suggestions']/lst[2]/long[@name='weight'][.='10']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_linear']/lst[@name='the']/arr[@name='suggestions']/lst[2]/str[@name='payload'][.='ret']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_linear']/lst[@name='the']/arr[@name='suggestions']/lst[3]/str[@name='term'][.='star wars: episode v - the empire strikes back']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_linear']/lst[@name='the']/arr[@name='suggestions']/lst[3]/long[@name='weight'][.='7']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_linear']/lst[@name='the']/arr[@name='suggestions']/lst[3]/str[@name='payload'][.='star']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_reciprocal']/lst[@name='the']/int[@name='numFound'][.='3']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_reciprocal']/lst[@name='the']/arr[@name='suggestions']/lst[1]/str[@name='term'][.='the returned']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_reciprocal']/lst[@name='the']/arr[@name='suggestions']/lst[1]/long[@name='weight'][.='10']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_reciprocal']/lst[@name='the']/arr[@name='suggestions']/lst[1]/str[@name='payload'][.='ret']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_reciprocal']/lst[@name='the']/arr[@name='suggestions']/lst[2]/str[@name='term'][.='top of the lake']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_reciprocal']/lst[@name='the']/arr[@name='suggestions']/lst[2]/long[@name='weight'][.='6']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_reciprocal']/lst[@name='the']/arr[@name='suggestions']/lst[2]/str[@name='payload'][.='lake']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_reciprocal']/lst[@name='the']/arr[@name='suggestions']/lst[3]/str[@name='term'][.='star wars: episode v - the empire strikes back']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_reciprocal']/lst[@name='the']/arr[@name='suggestions']/lst[3]/long[@name='weight'][.='2']",
+ "//lst[@name='suggest']/lst[@name='blended_infix_suggest_reciprocal']/lst[@name='the']/arr[@name='suggestions']/lst[3]/str[@name='payload'][.='star']"
+ );
+ }
+
+}