LUCENE-8343: change suggesters to use Long instead of long weight during indexing, and double instead of long score at suggest time

This commit is contained in:
Mike McCandless 2018-09-11 12:03:40 -04:00
commit 398074d0f8
4 changed files with 74 additions and 21 deletions

View File

@ -118,3 +118,8 @@ docId() and score() (LUCENE-6228)
If a custom Scorer implementation does not have an associated Weight, it can probably
be replaced with a Scorable instead.
## Suggesters now return Long instead of long for weight() during indexing, and double
instead of long at suggest time ##
Most code should just require recompilation, though possibly requiring some added casts.

View File

@ -224,7 +224,12 @@ public class BlendedInfixSuggester extends AnalyzingInfixSuggester {
} else {
coefficient = createCoefficient(searcher, fd.doc, matchedTokens, prefixToken);
}
if (weight == 0) {
weight = 1;
}
if (weight < 1 / LINEAR_COEF && weight > -1 / LINEAR_COEF) {
weight *= 1 / LINEAR_COEF;
}
long score = (long) (weight * coefficient);
LookupResult result;

View File

@ -44,22 +44,58 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
* of the matching term.
*/
public void testBlendedSort() throws IOException {
BytesRef payload = new BytesRef("star");
Input keys[] = new Input[]{
new Input("star wars: episode v - the empire strikes back", 8, payload)
};
BlendedInfixSuggester suggester = getBlendedInfixSuggester(keys);
Path tempDir = createTempDir("BlendedInfixSuggesterTest");
assertSuggestionsRanking(payload, suggester);
}
Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET);
BlendedInfixSuggester suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a, a,
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS,
BlendedInfixSuggester.BlenderType.POSITION_LINEAR,
BlendedInfixSuggester.DEFAULT_NUM_FACTOR, false);
suggester.build(new InputArrayIterator(keys));
/**
* Test to validate the suggestions ranking according to the position coefficient,
* even if the weight associated to the suggestion is unitary.
*/
public void testBlendedSort_fieldWeightUnitary_shouldRankSuggestionsByPositionMatch() throws IOException {
BytesRef payload = new BytesRef("star");
Input keys[] = new Input[]{
new Input("star wars: episode v - the empire strikes back", 1, payload)
};
BlendedInfixSuggester suggester = getBlendedInfixSuggester(keys);
assertSuggestionsRanking(payload, suggester);
}
/**
* Test to validate the suggestions ranking according to the position coefficient,
* even if the weight associated to the suggestion is zero.
*/
public void testBlendedSort_fieldWeightZero_shouldRankSuggestionsByPositionMatch() throws IOException {
BytesRef payload = new BytesRef("star");
Input keys[] = new Input[]{
new Input("star wars: episode v - the empire strikes back", 0, payload)
};
BlendedInfixSuggester suggester = getBlendedInfixSuggester(keys);
assertSuggestionsRanking(payload, suggester);
}
/**
* Test to validate the suggestions ranking according to the position coefficient,
* even if the weight associated to the suggestion is very big, no overflow should happen.
*/
public void testBlendedSort_fieldWeightLongMax_shouldRankSuggestionsByPositionMatchWithNoOverflow() throws IOException {
BytesRef payload = new BytesRef("star");
Input keys[] = new Input[]{
new Input("star wars: episode v - the empire strikes back", Long.MAX_VALUE, payload)
};
BlendedInfixSuggester suggester = getBlendedInfixSuggester(keys);
assertSuggestionsRanking(payload, suggester);
}
private void assertSuggestionsRanking(BytesRef payload, BlendedInfixSuggester suggester) throws IOException {
// we query for star wars and check that the weight
// is smaller when we search for tokens that are far from the beginning
@ -78,6 +114,18 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
suggester.close();
}
private BlendedInfixSuggester getBlendedInfixSuggester(Input[] keys) throws IOException {
Path tempDir = createTempDir("BlendedInfixSuggesterTest");
Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET);
BlendedInfixSuggester suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a, a,
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS,
BlendedInfixSuggester.BlenderType.POSITION_LINEAR,
BlendedInfixSuggester.DEFAULT_NUM_FACTOR, false);
suggester.build(new InputArrayIterator(keys));
return suggester;
}
/**
* Verify the different flavours of the blender types
*/
@ -195,14 +243,7 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
new Input("top of the lake", 8, payload)
};
Path tempDir = createTempDir("BlendedInfixSuggesterTest");
Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET);
BlendedInfixSuggester suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a, a,
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS,
BlendedInfixSuggester.BlenderType.POSITION_LINEAR,
BlendedInfixSuggester.DEFAULT_NUM_FACTOR, false);
suggester.build(new InputArrayIterator(keys));
BlendedInfixSuggester suggester = getBlendedInfixSuggester(keys);
getInResults(suggester, "of ", payload, 1);
getInResults(suggester, "the ", payload, 1);

View File

@ -188,7 +188,7 @@ This implementation supports <<Context Filtering>>.
==== BlendedInfixLookupFactory
An extension of the `AnalyzingInfixSuggester` which provides additional functionality to weight prefix matches across the matched documents. You can tell it to score higher if a hit is closer to the start of the suggestion or vice versa.
An extension of the `AnalyzingInfixSuggester` which provides additional functionality to weight prefix matches across the matched documents. It scores higher if a hit is closer to the start of the suggestion.
This implementation uses the following additional properties:
@ -197,9 +197,11 @@ Used to calculate weight coefficient using the position of the first matching wo
`position_linear`:::
`weightFieldValue * (1 - 0.10*position)`: Matches to the start will be given a higher score. This is the default.
`position_reciprocal`:::
`weightFieldValue / (1 + position)`: Matches to the end will be given a higher score.
`weightFieldValue / (1 + position)`: Matches to the start will be given a higher score. The score of matches positioned far from the start of the suggestion decays faster than linear.
`position_exponential_reciprocal`:::
`weightFieldValue / pow(1 + position,exponent)`: Matches to the start will be given a higher score. The score of matches positioned far from the start of the suggestion decays faster than reciprocal.
`exponent`::::
An optional configuration variable for `position_reciprocal` to control how fast the score will increase or decrease. Default `2.0`.
An optional configuration variable for `position_exponential_reciprocal` to control how fast the score will decrease. Default `2.0`.
`numFactor`::
The factor to multiply the number of searched elements from which results will be pruned. Default is `10`.