mirror of https://github.com/apache/lucene.git
LUCENE-10115: Add a fuzzy parsing extension point for custom query parsers
This commit adds the QueryParserBase::getFuzzyDistance protected method, which can be overridden by subclasses to provide customisation of how the similarity distance is determined. The default implementation retains the current behaviour.
This commit is contained in:
parent
b2a04a4bb4
commit
a7578709a6
|
@ -150,6 +150,9 @@ API Changes
|
|||
optimization to use the points index to skip over non-competitive documents,
|
||||
which is enabled by default from 9.0 (Mayya Sharipova, Adrien Grand)
|
||||
|
||||
* LUCENE-10115: Add an extension point, BaseQueryParser#getFuzzyDistance, to allow custom
|
||||
query parsers to determine the similarity distance for fuzzy queries. (Chris Hegarty)
|
||||
|
||||
Improvements
|
||||
|
||||
* LUCENE-9960: Avoid unnecessary top element replacement for equal elements in PriorityQueue. (Dawid Weiss)
|
||||
|
|
|
@ -810,23 +810,38 @@ public abstract class QueryParserBase extends QueryBuilder
|
|||
return q;
|
||||
}
|
||||
|
||||
Query handleBareFuzzy(String qfield, Token fuzzySlop, String termImage) throws ParseException {
|
||||
Query q;
|
||||
float fms = fuzzyMinSim;
|
||||
/**
|
||||
* Determines the similarity distance for the given fuzzy token and term string.
|
||||
*
|
||||
* <p>The default implementation uses the string image of the {@code fuzzyToken} in an attempt to
|
||||
* parse it to a primitive float value. Otherwise, the {@linkplain #getFuzzyMinSim() minimal
|
||||
* similarity} distance is returned. Subclasses can override this method to return a similarity
|
||||
* distance, say based on the {@code termStr}, if the {@code fuzzyToken} does not specify a
|
||||
* distance.
|
||||
*
|
||||
* @param fuzzyToken The Fuzzy token
|
||||
* @param termStr The Term string
|
||||
* @return The similarity distance
|
||||
*/
|
||||
protected float getFuzzyDistance(Token fuzzyToken, String termStr) {
|
||||
try {
|
||||
fms = Float.parseFloat(fuzzySlop.image.substring(1));
|
||||
return Float.parseFloat(fuzzyToken.image.substring(1));
|
||||
} catch (
|
||||
@SuppressWarnings("unused")
|
||||
Exception ignored) {
|
||||
}
|
||||
return fuzzyMinSim;
|
||||
}
|
||||
|
||||
Query handleBareFuzzy(String qfield, Token fuzzySlop, String termImage) throws ParseException {
|
||||
float fms = getFuzzyDistance(fuzzySlop, termImage);
|
||||
if (fms < 0.0f) {
|
||||
throw new ParseException(
|
||||
"Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");
|
||||
} else if (fms >= 1.0f && fms != (int) fms) {
|
||||
throw new ParseException("Fractional edit distances are not allowed!");
|
||||
}
|
||||
q = getFuzzyQuery(qfield, termImage, fms);
|
||||
return q;
|
||||
return getFuzzyQuery(qfield, termImage, fms);
|
||||
}
|
||||
|
||||
// extracted from the .jj grammar
|
||||
|
|
|
@ -196,6 +196,34 @@ public class TestQueryParser extends QueryParserTestBase {
|
|||
assertEquals(qp.parse("a:[11.95 TO 12.95]"), qp.parse("12.45~1€"));
|
||||
}
|
||||
|
||||
public void testFuzzyDistanceExtendability() throws ParseException {
|
||||
QueryParser qp =
|
||||
new QueryParser("a", new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
|
||||
@Override
|
||||
protected float getFuzzyDistance(Token fuzzySlop, String termStr) {
|
||||
try {
|
||||
return Float.parseFloat(fuzzySlop.image.substring(1));
|
||||
} catch (
|
||||
@SuppressWarnings("unused")
|
||||
Exception ignored) {
|
||||
}
|
||||
return 1f; // alternative value to the default min similarity
|
||||
}
|
||||
};
|
||||
assertEquals(qp.parse("term~"), qp.parse("term~1"));
|
||||
assertEquals(qp.parse("term~XXX"), qp.parse("term~1"));
|
||||
|
||||
QueryParser qp2 =
|
||||
new QueryParser("a", new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
|
||||
@Override
|
||||
protected float getFuzzyDistance(Token fuzzySlop, String termStr) {
|
||||
return termStr.length(); // distance based on the term length
|
||||
}
|
||||
};
|
||||
assertEquals(qp2.parse("a~"), qp2.parse("a~1"));
|
||||
assertEquals(qp2.parse("ab~"), qp2.parse("ab~2"));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void testStarParsing() throws Exception {
|
||||
final int[] type = new int[1];
|
||||
|
|
Loading…
Reference in New Issue