LUCENE-8343: change suggesters to use Long instead of long weight during indexing, and double instead of long score at suggest time

2025-02-08 02:58:58 +00:00 · 2018-09-11 12:03:40 -04:00 · 2018-09-11 12:03:40 -04:00 · 398074d0f8
commit 398074d0f8
parent a619038e90 1a83a14668
4 changed files with 74 additions and 21 deletions
--- a/lucene/MIGRATE.txt
+++ b/lucene/MIGRATE.txt
@ -118,3 +118,8 @@ docId() and score() (LUCENE-6228)

 If a custom Scorer implementation does not have an associated Weight, it can probably
 be replaced with a Scorable instead.
+
+## Suggesters now return Long instead of long for weight() during indexing, and double
+instead of long at suggest time ##
+
+Most code should just require recompilation, though possibly requiring some added casts.
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java
@ -224,7 +224,12 @@ public class BlendedInfixSuggester extends AnalyzingInfixSuggester {
      } else {
        coefficient = createCoefficient(searcher, fd.doc, matchedTokens, prefixToken);
      }
-
+      if (weight == 0) {
+        weight = 1;
+      }
+      if (weight < 1 / LINEAR_COEF && weight > -1 / LINEAR_COEF) {
+        weight *= 1 / LINEAR_COEF;
+      }
      long score = (long) (weight * coefficient);

      LookupResult result;
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java
@ -44,22 +44,58 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
   * of the matching term.
   */
  public void testBlendedSort() throws IOException {
-
    BytesRef payload = new BytesRef("star");
-
    Input keys[] = new Input[]{
        new Input("star wars: episode v - the empire strikes back", 8, payload)
    };
+    BlendedInfixSuggester suggester = getBlendedInfixSuggester(keys);

-    Path tempDir = createTempDir("BlendedInfixSuggesterTest");
+    assertSuggestionsRanking(payload, suggester);
+  }

-    Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET);
-    BlendedInfixSuggester suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a, a,
-                                                                AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS,
-                                                                BlendedInfixSuggester.BlenderType.POSITION_LINEAR,
-                                                                BlendedInfixSuggester.DEFAULT_NUM_FACTOR, false);
-    suggester.build(new InputArrayIterator(keys));
+  /**
+   * Test to validate the suggestions ranking according to the position coefficient,
+   * even if the weight associated to the suggestion is unitary.
+   */
+  public void testBlendedSort_fieldWeightUnitary_shouldRankSuggestionsByPositionMatch() throws IOException {
+    BytesRef payload = new BytesRef("star");
+    Input keys[] = new Input[]{
+        new Input("star wars: episode v - the empire strikes back", 1, payload)
+    };
+    BlendedInfixSuggester suggester = getBlendedInfixSuggester(keys);

+    assertSuggestionsRanking(payload, suggester);
+  }
+
+  /**
+   * Test to validate the suggestions ranking according to the position coefficient,
+   * even if the weight associated to the suggestion is zero.
+   */
+  public void testBlendedSort_fieldWeightZero_shouldRankSuggestionsByPositionMatch() throws IOException {
+    BytesRef payload = new BytesRef("star");
+    Input keys[] = new Input[]{
+        new Input("star wars: episode v - the empire strikes back", 0, payload)
+    };
+    BlendedInfixSuggester suggester = getBlendedInfixSuggester(keys);
+
+    assertSuggestionsRanking(payload, suggester);
+  }
+
+  /**
+   * Test to validate the suggestions ranking according to the position coefficient,
+   * even if the weight associated to the suggestion is very big, no overflow should happen.
+   */
+  public void testBlendedSort_fieldWeightLongMax_shouldRankSuggestionsByPositionMatchWithNoOverflow() throws IOException {
+    BytesRef payload = new BytesRef("star");
+    Input keys[] = new Input[]{
+        new Input("star wars: episode v - the empire strikes back", Long.MAX_VALUE, payload)
+    };
+    BlendedInfixSuggester suggester = getBlendedInfixSuggester(keys);
+
+    assertSuggestionsRanking(payload, suggester);
+  }
+
+  private void assertSuggestionsRanking(BytesRef payload, BlendedInfixSuggester suggester) throws IOException {
    // we query for star wars and check that the weight
    // is smaller when we search for tokens that are far from the beginning

@ -78,6 +114,18 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
    suggester.close();
  }

+  private BlendedInfixSuggester getBlendedInfixSuggester(Input[] keys) throws IOException {
+    Path tempDir = createTempDir("BlendedInfixSuggesterTest");
+
+    Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET);
+    BlendedInfixSuggester suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a, a,
+        AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS,
+        BlendedInfixSuggester.BlenderType.POSITION_LINEAR,
+        BlendedInfixSuggester.DEFAULT_NUM_FACTOR, false);
+    suggester.build(new InputArrayIterator(keys));
+    return suggester;
+  }
+
  /**
   * Verify the different flavours of the blender types
   */
@ -195,14 +243,7 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
        new Input("top of the lake", 8, payload)
    };

-    Path tempDir = createTempDir("BlendedInfixSuggesterTest");
-
-    Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET);
-    BlendedInfixSuggester suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a, a,
-                                                                AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS,
-                                                                BlendedInfixSuggester.BlenderType.POSITION_LINEAR,
-                                                                BlendedInfixSuggester.DEFAULT_NUM_FACTOR, false);
-    suggester.build(new InputArrayIterator(keys));
+    BlendedInfixSuggester suggester = getBlendedInfixSuggester(keys);

    getInResults(suggester, "of ", payload, 1);
    getInResults(suggester, "the ", payload, 1);
--- a/solr/solr-ref-guide/src/suggester.adoc
+++ b/solr/solr-ref-guide/src/suggester.adoc
@ -188,7 +188,7 @@ This implementation supports <<Context Filtering>>.

 ==== BlendedInfixLookupFactory

-An extension of the `AnalyzingInfixSuggester` which provides additional functionality to weight prefix matches across the matched documents. You can tell it to score higher if a hit is closer to the start of the suggestion or vice versa.
+An extension of the `AnalyzingInfixSuggester` which provides additional functionality to weight prefix matches across the matched documents. It scores higher if a hit is closer to the start of the suggestion.

 This implementation uses the following additional properties:

@ -197,9 +197,11 @@ Used to calculate weight coefficient using the position of the first matching wo
 `position_linear`:::
 `weightFieldValue * (1 - 0.10*position)`: Matches to the start will be given a higher score. This is the default.
 `position_reciprocal`:::
-`weightFieldValue / (1 + position)`: Matches to the end will be given a higher score.
+`weightFieldValue / (1 + position)`: Matches to the start will be given a higher score. The score of matches positioned far from the start of the suggestion decays faster than linear.
+`position_exponential_reciprocal`:::
+`weightFieldValue / pow(1 + position,exponent)`: Matches to the start will be given a higher score. The score of matches positioned far from the start of the suggestion decays faster than reciprocal.
 `exponent`::::
-An optional configuration variable for `position_reciprocal` to control how fast the score will increase or decrease. Default `2.0`.
+An optional configuration variable for `position_exponential_reciprocal` to control how fast the score will decrease. Default `2.0`.

 `numFactor`::
 The factor to multiply the number of searched elements from which results will be pruned. Default is `10`.