LUCENE-10115: Add a fuzzy parsing extension point for custom query parsers

This commit adds the QueryParserBase::getFuzzyDistance protected method, which 
can be overridden by subclasses to provide customisation of how the similarity distance 
is determined. The default implementation retains the current behaviour.
This commit is contained in:
Chris Hegarty 2021-09-21 13:25:09 +01:00 committed by GitHub
parent b2a04a4bb4
commit a7578709a6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 52 additions and 6 deletions

View File

@ -150,6 +150,9 @@ API Changes
optimization to use the points index to skip over non-competitive documents, optimization to use the points index to skip over non-competitive documents,
which is enabled by default from 9.0 (Mayya Sharipova, Adrien Grand) which is enabled by default from 9.0 (Mayya Sharipova, Adrien Grand)
* LUCENE-10115: Add an extension point, BaseQueryParser#getFuzzyDistance, to allow custom
query parsers to determine the similarity distance for fuzzy queries. (Chris Hegarty)
Improvements Improvements
* LUCENE-9960: Avoid unnecessary top element replacement for equal elements in PriorityQueue. (Dawid Weiss) * LUCENE-9960: Avoid unnecessary top element replacement for equal elements in PriorityQueue. (Dawid Weiss)

View File

@ -810,23 +810,38 @@ public abstract class QueryParserBase extends QueryBuilder
return q; return q;
} }
Query handleBareFuzzy(String qfield, Token fuzzySlop, String termImage) throws ParseException { /**
Query q; * Determines the similarity distance for the given fuzzy token and term string.
float fms = fuzzyMinSim; *
* <p>The default implementation uses the string image of the {@code fuzzyToken} in an attempt to
* parse it to a primitive float value. Otherwise, the {@linkplain #getFuzzyMinSim() minimal
* similarity} distance is returned. Subclasses can override this method to return a similarity
* distance, say based on the {@code termStr}, if the {@code fuzzyToken} does not specify a
* distance.
*
* @param fuzzyToken The Fuzzy token
* @param termStr The Term string
* @return The similarity distance
*/
protected float getFuzzyDistance(Token fuzzyToken, String termStr) {
try { try {
fms = Float.parseFloat(fuzzySlop.image.substring(1)); return Float.parseFloat(fuzzyToken.image.substring(1));
} catch ( } catch (
@SuppressWarnings("unused") @SuppressWarnings("unused")
Exception ignored) { Exception ignored) {
} }
return fuzzyMinSim;
}
Query handleBareFuzzy(String qfield, Token fuzzySlop, String termImage) throws ParseException {
float fms = getFuzzyDistance(fuzzySlop, termImage);
if (fms < 0.0f) { if (fms < 0.0f) {
throw new ParseException( throw new ParseException(
"Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !"); "Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");
} else if (fms >= 1.0f && fms != (int) fms) { } else if (fms >= 1.0f && fms != (int) fms) {
throw new ParseException("Fractional edit distances are not allowed!"); throw new ParseException("Fractional edit distances are not allowed!");
} }
q = getFuzzyQuery(qfield, termImage, fms); return getFuzzyQuery(qfield, termImage, fms);
return q;
} }
// extracted from the .jj grammar // extracted from the .jj grammar

View File

@ -196,6 +196,34 @@ public class TestQueryParser extends QueryParserTestBase {
assertEquals(qp.parse("a:[11.95 TO 12.95]"), qp.parse("12.45~1€")); assertEquals(qp.parse("a:[11.95 TO 12.95]"), qp.parse("12.45~1€"));
} }
public void testFuzzyDistanceExtendability() throws ParseException {
QueryParser qp =
new QueryParser("a", new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
@Override
protected float getFuzzyDistance(Token fuzzySlop, String termStr) {
try {
return Float.parseFloat(fuzzySlop.image.substring(1));
} catch (
@SuppressWarnings("unused")
Exception ignored) {
}
return 1f; // alternative value to the default min similarity
}
};
assertEquals(qp.parse("term~"), qp.parse("term~1"));
assertEquals(qp.parse("term~XXX"), qp.parse("term~1"));
QueryParser qp2 =
new QueryParser("a", new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
@Override
protected float getFuzzyDistance(Token fuzzySlop, String termStr) {
return termStr.length(); // distance based on the term length
}
};
assertEquals(qp2.parse("a~"), qp2.parse("a~1"));
assertEquals(qp2.parse("ab~"), qp2.parse("ab~2"));
}
@Override @Override
public void testStarParsing() throws Exception { public void testStarParsing() throws Exception {
final int[] type = new int[1]; final int[] type = new int[1];