mirror of https://github.com/apache/lucene.git
LUCENE-8343: change suggesters to use Long instead of long weight during indexing, and double instead of long score at suggest time
This commit is contained in:
commit
398074d0f8
|
@ -118,3 +118,8 @@ docId() and score() (LUCENE-6228)
|
|||
|
||||
If a custom Scorer implementation does not have an associated Weight, it can probably
|
||||
be replaced with a Scorable instead.
|
||||
|
||||
## Suggesters now return Long instead of long for weight() during indexing, and double
|
||||
instead of long at suggest time ##
|
||||
|
||||
Most code should just require recompilation, though possibly requiring some added casts.
|
||||
|
|
|
@ -224,7 +224,12 @@ public class BlendedInfixSuggester extends AnalyzingInfixSuggester {
|
|||
} else {
|
||||
coefficient = createCoefficient(searcher, fd.doc, matchedTokens, prefixToken);
|
||||
}
|
||||
|
||||
if (weight == 0) {
|
||||
weight = 1;
|
||||
}
|
||||
if (weight < 1 / LINEAR_COEF && weight > -1 / LINEAR_COEF) {
|
||||
weight *= 1 / LINEAR_COEF;
|
||||
}
|
||||
long score = (long) (weight * coefficient);
|
||||
|
||||
LookupResult result;
|
||||
|
|
|
@ -44,22 +44,58 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
|
|||
* of the matching term.
|
||||
*/
|
||||
public void testBlendedSort() throws IOException {
|
||||
|
||||
BytesRef payload = new BytesRef("star");
|
||||
|
||||
Input keys[] = new Input[]{
|
||||
new Input("star wars: episode v - the empire strikes back", 8, payload)
|
||||
};
|
||||
BlendedInfixSuggester suggester = getBlendedInfixSuggester(keys);
|
||||
|
||||
Path tempDir = createTempDir("BlendedInfixSuggesterTest");
|
||||
assertSuggestionsRanking(payload, suggester);
|
||||
}
|
||||
|
||||
Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET);
|
||||
BlendedInfixSuggester suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a, a,
|
||||
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS,
|
||||
BlendedInfixSuggester.BlenderType.POSITION_LINEAR,
|
||||
BlendedInfixSuggester.DEFAULT_NUM_FACTOR, false);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
/**
|
||||
* Test to validate the suggestions ranking according to the position coefficient,
|
||||
* even if the weight associated to the suggestion is unitary.
|
||||
*/
|
||||
public void testBlendedSort_fieldWeightUnitary_shouldRankSuggestionsByPositionMatch() throws IOException {
|
||||
BytesRef payload = new BytesRef("star");
|
||||
Input keys[] = new Input[]{
|
||||
new Input("star wars: episode v - the empire strikes back", 1, payload)
|
||||
};
|
||||
BlendedInfixSuggester suggester = getBlendedInfixSuggester(keys);
|
||||
|
||||
assertSuggestionsRanking(payload, suggester);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test to validate the suggestions ranking according to the position coefficient,
|
||||
* even if the weight associated to the suggestion is zero.
|
||||
*/
|
||||
public void testBlendedSort_fieldWeightZero_shouldRankSuggestionsByPositionMatch() throws IOException {
|
||||
BytesRef payload = new BytesRef("star");
|
||||
Input keys[] = new Input[]{
|
||||
new Input("star wars: episode v - the empire strikes back", 0, payload)
|
||||
};
|
||||
BlendedInfixSuggester suggester = getBlendedInfixSuggester(keys);
|
||||
|
||||
assertSuggestionsRanking(payload, suggester);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test to validate the suggestions ranking according to the position coefficient,
|
||||
* even if the weight associated to the suggestion is very big, no overflow should happen.
|
||||
*/
|
||||
public void testBlendedSort_fieldWeightLongMax_shouldRankSuggestionsByPositionMatchWithNoOverflow() throws IOException {
|
||||
BytesRef payload = new BytesRef("star");
|
||||
Input keys[] = new Input[]{
|
||||
new Input("star wars: episode v - the empire strikes back", Long.MAX_VALUE, payload)
|
||||
};
|
||||
BlendedInfixSuggester suggester = getBlendedInfixSuggester(keys);
|
||||
|
||||
assertSuggestionsRanking(payload, suggester);
|
||||
}
|
||||
|
||||
private void assertSuggestionsRanking(BytesRef payload, BlendedInfixSuggester suggester) throws IOException {
|
||||
// we query for star wars and check that the weight
|
||||
// is smaller when we search for tokens that are far from the beginning
|
||||
|
||||
|
@ -78,6 +114,18 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
|
|||
suggester.close();
|
||||
}
|
||||
|
||||
private BlendedInfixSuggester getBlendedInfixSuggester(Input[] keys) throws IOException {
|
||||
Path tempDir = createTempDir("BlendedInfixSuggesterTest");
|
||||
|
||||
Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET);
|
||||
BlendedInfixSuggester suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a, a,
|
||||
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS,
|
||||
BlendedInfixSuggester.BlenderType.POSITION_LINEAR,
|
||||
BlendedInfixSuggester.DEFAULT_NUM_FACTOR, false);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
return suggester;
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify the different flavours of the blender types
|
||||
*/
|
||||
|
@ -195,14 +243,7 @@ public class BlendedInfixSuggesterTest extends LuceneTestCase {
|
|||
new Input("top of the lake", 8, payload)
|
||||
};
|
||||
|
||||
Path tempDir = createTempDir("BlendedInfixSuggesterTest");
|
||||
|
||||
Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET);
|
||||
BlendedInfixSuggester suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a, a,
|
||||
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS,
|
||||
BlendedInfixSuggester.BlenderType.POSITION_LINEAR,
|
||||
BlendedInfixSuggester.DEFAULT_NUM_FACTOR, false);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
BlendedInfixSuggester suggester = getBlendedInfixSuggester(keys);
|
||||
|
||||
getInResults(suggester, "of ", payload, 1);
|
||||
getInResults(suggester, "the ", payload, 1);
|
||||
|
|
|
@ -188,7 +188,7 @@ This implementation supports <<Context Filtering>>.
|
|||
|
||||
==== BlendedInfixLookupFactory
|
||||
|
||||
An extension of the `AnalyzingInfixSuggester` which provides additional functionality to weight prefix matches across the matched documents. You can tell it to score higher if a hit is closer to the start of the suggestion or vice versa.
|
||||
An extension of the `AnalyzingInfixSuggester` which provides additional functionality to weight prefix matches across the matched documents. It scores higher if a hit is closer to the start of the suggestion.
|
||||
|
||||
This implementation uses the following additional properties:
|
||||
|
||||
|
@ -197,9 +197,11 @@ Used to calculate weight coefficient using the position of the first matching wo
|
|||
`position_linear`:::
|
||||
`weightFieldValue * (1 - 0.10*position)`: Matches to the start will be given a higher score. This is the default.
|
||||
`position_reciprocal`:::
|
||||
`weightFieldValue / (1 + position)`: Matches to the end will be given a higher score.
|
||||
`weightFieldValue / (1 + position)`: Matches to the start will be given a higher score. The score of matches positioned far from the start of the suggestion decays faster than linear.
|
||||
`position_exponential_reciprocal`:::
|
||||
`weightFieldValue / pow(1 + position,exponent)`: Matches to the start will be given a higher score. The score of matches positioned far from the start of the suggestion decays faster than reciprocal.
|
||||
`exponent`::::
|
||||
An optional configuration variable for `position_reciprocal` to control how fast the score will increase or decrease. Default `2.0`.
|
||||
An optional configuration variable for `position_exponential_reciprocal` to control how fast the score will decrease. Default `2.0`.
|
||||
|
||||
`numFactor`::
|
||||
The factor to multiply the number of searched elements from which results will be pruned. Default is `10`.
|
||||
|
|
Loading…
Reference in New Issue