diff --git a/lucene/MIGRATE.txt b/lucene/MIGRATE.txt index 90ed4fc7baf..e0e70a36ccf 100644 --- a/lucene/MIGRATE.txt +++ b/lucene/MIGRATE.txt @@ -118,3 +118,8 @@ docId() and score() (LUCENE-6228) If a custom Scorer implementation does not have an associated Weight, it can probably be replaced with a Scorable instead. + +## Suggesters now return Long instead of long for weight() during indexing, and double +instead of long at suggest time ## + +Most code should just require recompilation, though possibly requiring some added casts. diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java index 413d401b6a5..63f432fb76a 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java @@ -224,7 +224,12 @@ public class BlendedInfixSuggester extends AnalyzingInfixSuggester { } else { coefficient = createCoefficient(searcher, fd.doc, matchedTokens, prefixToken); } - + if (weight == 0) { + weight = 1; + } + if (weight < 1 / LINEAR_COEF && weight > -1 / LINEAR_COEF) { + weight *= 1 / LINEAR_COEF; + } long score = (long) (weight * coefficient); LookupResult result; diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java index ace44678957..296e40452d2 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java @@ -44,22 +44,58 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase { * of the matching term. */ public void testBlendedSort() throws IOException { - BytesRef payload = new BytesRef("star"); - Input keys[] = new Input[]{ new Input("star wars: episode v - the empire strikes back", 8, payload) }; + BlendedInfixSuggester suggester = getBlendedInfixSuggester(keys); - Path tempDir = createTempDir("BlendedInfixSuggesterTest"); + assertSuggestionsRanking(payload, suggester); + } - Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET); - BlendedInfixSuggester suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a, a, - AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, - BlendedInfixSuggester.BlenderType.POSITION_LINEAR, - BlendedInfixSuggester.DEFAULT_NUM_FACTOR, false); - suggester.build(new InputArrayIterator(keys)); + /** + * Test to validate the suggestions ranking according to the position coefficient, + * even if the weight associated to the suggestion is unitary. + */ + public void testBlendedSort_fieldWeightUnitary_shouldRankSuggestionsByPositionMatch() throws IOException { + BytesRef payload = new BytesRef("star"); + Input keys[] = new Input[]{ + new Input("star wars: episode v - the empire strikes back", 1, payload) + }; + BlendedInfixSuggester suggester = getBlendedInfixSuggester(keys); + assertSuggestionsRanking(payload, suggester); + } + + /** + * Test to validate the suggestions ranking according to the position coefficient, + * even if the weight associated to the suggestion is zero. + */ + public void testBlendedSort_fieldWeightZero_shouldRankSuggestionsByPositionMatch() throws IOException { + BytesRef payload = new BytesRef("star"); + Input keys[] = new Input[]{ + new Input("star wars: episode v - the empire strikes back", 0, payload) + }; + BlendedInfixSuggester suggester = getBlendedInfixSuggester(keys); + + assertSuggestionsRanking(payload, suggester); + } + + /** + * Test to validate the suggestions ranking according to the position coefficient, + * even if the weight associated to the suggestion is very big, no overflow should happen. + */ + public void testBlendedSort_fieldWeightLongMax_shouldRankSuggestionsByPositionMatchWithNoOverflow() throws IOException { + BytesRef payload = new BytesRef("star"); + Input keys[] = new Input[]{ + new Input("star wars: episode v - the empire strikes back", Long.MAX_VALUE, payload) + }; + BlendedInfixSuggester suggester = getBlendedInfixSuggester(keys); + + assertSuggestionsRanking(payload, suggester); + } + + private void assertSuggestionsRanking(BytesRef payload, BlendedInfixSuggester suggester) throws IOException { // we query for star wars and check that the weight // is smaller when we search for tokens that are far from the beginning @@ -78,6 +114,18 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase { suggester.close(); } + private BlendedInfixSuggester getBlendedInfixSuggester(Input[] keys) throws IOException { + Path tempDir = createTempDir("BlendedInfixSuggesterTest"); + + Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET); + BlendedInfixSuggester suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a, a, + AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, + BlendedInfixSuggester.BlenderType.POSITION_LINEAR, + BlendedInfixSuggester.DEFAULT_NUM_FACTOR, false); + suggester.build(new InputArrayIterator(keys)); + return suggester; + } + /** * Verify the different flavours of the blender types */ @@ -195,14 +243,7 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase { new Input("top of the lake", 8, payload) }; - Path tempDir = createTempDir("BlendedInfixSuggesterTest"); - - Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET); - BlendedInfixSuggester suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a, a, - AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, - BlendedInfixSuggester.BlenderType.POSITION_LINEAR, - BlendedInfixSuggester.DEFAULT_NUM_FACTOR, false); - suggester.build(new InputArrayIterator(keys)); + BlendedInfixSuggester suggester = getBlendedInfixSuggester(keys); getInResults(suggester, "of ", payload, 1); getInResults(suggester, "the ", payload, 1); diff --git a/solr/solr-ref-guide/src/suggester.adoc b/solr/solr-ref-guide/src/suggester.adoc index 7b141b48a36..95fde7835f0 100644 --- a/solr/solr-ref-guide/src/suggester.adoc +++ b/solr/solr-ref-guide/src/suggester.adoc @@ -188,7 +188,7 @@ This implementation supports <>. ==== BlendedInfixLookupFactory -An extension of the `AnalyzingInfixSuggester` which provides additional functionality to weight prefix matches across the matched documents. You can tell it to score higher if a hit is closer to the start of the suggestion or vice versa. +An extension of the `AnalyzingInfixSuggester` which provides additional functionality to weight prefix matches across the matched documents. It scores higher if a hit is closer to the start of the suggestion. This implementation uses the following additional properties: @@ -197,9 +197,11 @@ Used to calculate weight coefficient using the position of the first matching wo `position_linear`::: `weightFieldValue * (1 - 0.10*position)`: Matches to the start will be given a higher score. This is the default. `position_reciprocal`::: -`weightFieldValue / (1 + position)`: Matches to the end will be given a higher score. +`weightFieldValue / (1 + position)`: Matches to the start will be given a higher score. The score of matches positioned far from the start of the suggestion decays faster than linear. +`position_exponential_reciprocal`::: +`weightFieldValue / pow(1 + position,exponent)`: Matches to the start will be given a higher score. The score of matches positioned far from the start of the suggestion decays faster than reciprocal. `exponent`:::: -An optional configuration variable for `position_reciprocal` to control how fast the score will increase or decrease. Default `2.0`. +An optional configuration variable for `position_exponential_reciprocal` to control how fast the score will decrease. Default `2.0`. `numFactor`:: The factor to multiply the number of searched elements from which results will be pruned. Default is `10`.