diff --git a/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java b/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java index db4164f9f9e..8dc52f5dc3b 100644 --- a/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java +++ b/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java @@ -128,7 +128,7 @@ public class MapperQueryParser extends QueryParser { setLowercaseExpandedTerms(settings.lowercaseExpandedTerms()); setPhraseSlop(settings.phraseSlop()); setDefaultOperator(settings.defaultOperator()); - setFuzzyMinSim(settings.fuzzyMinSim()); + setFuzzyMinSim(settings.getFuzziness().asFloat()); setFuzzyPrefixLength(settings.fuzzyPrefixLength()); setLocale(settings.locale()); this.analyzeWildcard = settings.analyzeWildcard(); diff --git a/core/src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java b/core/src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java index ca364e486e1..e079e00303e 100644 --- a/core/src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java +++ b/core/src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java @@ -25,6 +25,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.util.automaton.Operations; +import org.elasticsearch.common.unit.Fuzziness; import org.joda.time.DateTimeZone; import java.util.Collection; @@ -49,7 +50,7 @@ public class QueryParserSettings { private boolean lowercaseExpandedTerms = true; private boolean enablePositionIncrements = true; private int phraseSlop = 0; - private float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity; + private Fuzziness fuzziness = Fuzziness.AUTO; private int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; private int fuzzyMaxExpansions = FuzzyQuery.defaultMaxExpansions; private int maxDeterminizedStates = Operations.DEFAULT_MAX_DETERMINIZED_STATES; @@ -158,14 +159,6 @@ public class QueryParserSettings { this.phraseSlop = phraseSlop; } - public float fuzzyMinSim() { - return fuzzyMinSim; - } - - public void fuzzyMinSim(float fuzzyMinSim) { - this.fuzzyMinSim = fuzzyMinSim; - } - public int fuzzyPrefixLength() { return fuzzyPrefixLength; } @@ -340,7 +333,7 @@ public class QueryParserSettings { if (enablePositionIncrements != that.enablePositionIncrements) return false; if (escape != that.escape) return false; if (analyzeWildcard != that.analyzeWildcard) return false; - if (Float.compare(that.fuzzyMinSim, fuzzyMinSim) != 0) return false; + if (fuzziness != null ? fuzziness.equals(that.fuzziness) == false : fuzziness != null) return false; if (fuzzyPrefixLength != that.fuzzyPrefixLength) return false; if (fuzzyMaxExpansions != that.fuzzyMaxExpansions) return false; if (fuzzyRewriteMethod != null ? !fuzzyRewriteMethod.equals(that.fuzzyRewriteMethod) : that.fuzzyRewriteMethod != null) @@ -395,7 +388,7 @@ public class QueryParserSettings { result = 31 * result + (lowercaseExpandedTerms ? 1 : 0); result = 31 * result + (enablePositionIncrements ? 1 : 0); result = 31 * result + phraseSlop; - result = 31 * result + (fuzzyMinSim != +0.0f ? Float.floatToIntBits(fuzzyMinSim) : 0); + result = 31 * result + (fuzziness.hashCode()); result = 31 * result + fuzzyPrefixLength; result = 31 * result + (escape ? 1 : 0); result = 31 * result + (defaultAnalyzer != null ? defaultAnalyzer.hashCode() : 0); @@ -413,4 +406,12 @@ public class QueryParserSettings { result = 31 * result + (timeZone != null ? timeZone.hashCode() : 0); return result; } + + public void setFuzziness(Fuzziness fuzziness) { + this.fuzziness = fuzziness; + } + + public Fuzziness getFuzziness() { + return fuzziness; + } } diff --git a/core/src/main/java/org/elasticsearch/common/unit/Fuzziness.java b/core/src/main/java/org/elasticsearch/common/unit/Fuzziness.java index 30b959d25b6..cfcd209b435 100644 --- a/core/src/main/java/org/elasticsearch/common/unit/Fuzziness.java +++ b/core/src/main/java/org/elasticsearch/common/unit/Fuzziness.java @@ -43,29 +43,17 @@ public final class Fuzziness implements ToXContent { public static final Fuzziness AUTO = new Fuzziness("AUTO"); public static final ParseField FIELD = new ParseField(X_FIELD_NAME.camelCase().getValue()); - private final Object fuzziness; + private final String fuzziness; private Fuzziness(int fuzziness) { Preconditions.checkArgument(fuzziness >= 0 && fuzziness <= 2, "Valid edit distances are [0, 1, 2] but was [" + fuzziness + "]"); - this.fuzziness = fuzziness; - } - - private Fuzziness(float fuzziness) { - Preconditions.checkArgument(fuzziness >= 0.0 && fuzziness < 1.0f, "Valid similarities must be in the interval [0..1] but was [" + fuzziness + "]"); - this.fuzziness = fuzziness; + this.fuzziness = Integer.toString(fuzziness); } private Fuzziness(String fuzziness) { this.fuzziness = fuzziness; } - /** - * Creates a {@link Fuzziness} instance from a similarity. The value must be in the range [0..1) - */ - public static Fuzziness fromSimilarity(float similarity) { - return new Fuzziness(similarity); - } - /** * Creates a {@link Fuzziness} instance from an edit distance. The value must be one of [0, 1, 2] */ @@ -133,19 +121,17 @@ public final class Fuzziness implements ToXContent { } public int asDistance(String text) { - if (fuzziness instanceof String) { - if (this == AUTO) { //AUTO - final int len = termLen(text); - if (len <= 2) { - return 0; - } else if (len > 5) { - return 2; - } else { - return 1; - } + if (this == AUTO) { //AUTO + final int len = termLen(text); + if (len <= 2) { + return 0; + } else if (len > 5) { + return 2; + } else { + return 1; } } - return FuzzyQuery.floatToEdits(asFloat(), termLen(text)); + return Math.min(2, asInt()); } public TimeValue asTimeValue() { @@ -214,37 +200,6 @@ public final class Fuzziness implements ToXContent { return Float.parseFloat(fuzziness.toString()); } - public float asSimilarity() { - return asSimilarity(null); - } - - public float asSimilarity(String text) { - if (this == AUTO) { - final int len = termLen(text); - if (len <= 2) { - return 0.0f; - } else if (len > 5) { - return 0.5f; - } else { - return 0.66f; - } -// return dist == 0 ? dist : Math.min(0.999f, Math.max(0.0f, 1.0f - ((float) dist/ (float) termLen(text)))); - } - if (fuzziness instanceof Float) { // it's a similarity - return ((Float) fuzziness).floatValue(); - } else if (fuzziness instanceof Integer) { // it's an edit! - int dist = Math.min(((Integer) fuzziness).intValue(), - LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); - return Math.min(0.999f, Math.max(0.0f, 1.0f - ((float) dist / (float) termLen(text)))); - } else { - final float similarity = Float.parseFloat(fuzziness.toString()); - if (similarity >= 0.0f && similarity < 1.0f) { - return similarity; - } - } - throw new IllegalArgumentException("Can't get similarity from fuzziness [" + fuzziness + "]"); - } - private int termLen(String text) { return text == null ? 5 : text.codePointCount(0, text.length()); // 5 avg term length in english } diff --git a/core/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java b/core/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java index 752037b9b21..2a21d3d4595 100644 --- a/core/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java +++ b/core/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java @@ -179,7 +179,7 @@ public class QueryStringQueryParser implements QueryParser { } else if ("phrase_slop".equals(currentFieldName) || "phraseSlop".equals(currentFieldName)) { qpSettings.phraseSlop(parser.intValue()); } else if (parseContext.parseFieldMatcher().match(currentFieldName, FUZZINESS)) { - qpSettings.fuzzyMinSim(Fuzziness.parse(parser).asSimilarity()); + qpSettings.setFuzziness(Fuzziness.parse(parser)); } else if ("boost".equals(currentFieldName)) { qpSettings.boost(parser.floatValue()); } else if ("tie_breaker".equals(currentFieldName) || "tieBreaker".equals(currentFieldName)) { diff --git a/core/src/test/java/org/elasticsearch/common/unit/FuzzinessTests.java b/core/src/test/java/org/elasticsearch/common/unit/FuzzinessTests.java index 74ed24a5ec4..dc3c66b4e83 100644 --- a/core/src/test/java/org/elasticsearch/common/unit/FuzzinessTests.java +++ b/core/src/test/java/org/elasticsearch/common/unit/FuzzinessTests.java @@ -18,7 +18,6 @@ */ package org.elasticsearch.common.unit; -import org.apache.lucene.util.LuceneTestCase; import org.elasticsearch.common.xcontent.XContent; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentType; @@ -143,16 +142,6 @@ public class FuzzinessTests extends ElasticsearchTestCase { public void testAuto() { final int codePoints = randomIntBetween(0, 10); String string = randomRealisticUnicodeOfCodepointLength(codePoints); - if (codePoints <= 2) { - assertThat(Fuzziness.AUTO.asDistance(string), equalTo(0)); - assertThat(Fuzziness.fromSimilarity(Fuzziness.AUTO.asSimilarity(string)).asDistance(string), equalTo(0)); - } else if (codePoints > 5) { - assertThat(Fuzziness.AUTO.asDistance(string), equalTo(2)); - assertThat(Fuzziness.fromSimilarity(Fuzziness.AUTO.asSimilarity(string)).asDistance(string), equalTo(2)); - } else { - assertThat(Fuzziness.AUTO.asDistance(string), equalTo(1)); - assertThat(Fuzziness.fromSimilarity(Fuzziness.AUTO.asSimilarity(string)).asDistance(string), equalTo(1)); - } assertThat(Fuzziness.AUTO.asByte(), equalTo((byte) 1)); assertThat(Fuzziness.AUTO.asInt(), equalTo(1)); assertThat(Fuzziness.AUTO.asFloat(), equalTo(1f)); @@ -173,28 +162,4 @@ public class FuzzinessTests extends ElasticsearchTestCase { } } - @Test - @LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/10638") - public void testSimilarityToDistance() { - assertThat(Fuzziness.fromSimilarity(0.5f).asDistance("ab"), equalTo(1)); - assertThat(Fuzziness.fromSimilarity(0.66f).asDistance("abcefg"), equalTo(2)); - assertThat(Fuzziness.fromSimilarity(0.8f).asDistance("ab"), equalTo(0)); - assertThat(Fuzziness.fromSimilarity(0.8f).asDistance("abcefg"), equalTo(1)); - assertThat((double) Fuzziness.ONE.asSimilarity("abcefg"), closeTo(0.8f, 0.05)); - assertThat((double) Fuzziness.TWO.asSimilarity("abcefg"), closeTo(0.66f, 0.05)); - assertThat((double) Fuzziness.ONE.asSimilarity("ab"), closeTo(0.5f, 0.05)); - - int iters = randomIntBetween(100, 1000); - for (int i = 0; i < iters; i++) { - Fuzziness fuzziness = Fuzziness.fromEdits(between(1, 2)); - String string = rarely() ? randomRealisticUnicodeOfLengthBetween(2, 4) : - randomRealisticUnicodeOfLengthBetween(4, 10); - float similarity = fuzziness.asSimilarity(string); - if (similarity != 0.0f) { - Fuzziness similarityBased = Fuzziness.build(similarity); - assertThat((double) similarityBased.asSimilarity(string), closeTo(similarity, 0.05)); - assertThat(similarityBased.asDistance(string), equalTo(Math.min(2, fuzziness.asDistance(string)))); - } - } - } } diff --git a/core/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java b/core/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java index e8c73dfd0a9..b89c330dde8 100644 --- a/core/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java +++ b/core/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java @@ -437,7 +437,7 @@ public class SimpleIndexQueryParserTests extends ElasticsearchSingleNodeTest { @Test public void testFuzzyQueryWithFieldsBuilder() throws IOException { IndexQueryParserService queryParser = queryParser(); - Query parsedQuery = queryParser.parse(fuzzyQuery("name.first", "sh").fuzziness(Fuzziness.fromSimilarity(0.1f)).prefixLength(1).boost(2.0f).buildAsBytes()).query(); + Query parsedQuery = queryParser.parse(fuzzyQuery("name.first", "sh").fuzziness(Fuzziness.ONE).prefixLength(1).boost(2.0f).buildAsBytes()).query(); assertThat(parsedQuery, instanceOf(FuzzyQuery.class)); FuzzyQuery fuzzyQuery = (FuzzyQuery) parsedQuery; assertThat(fuzzyQuery.getTerm(), equalTo(new Term("name.first", "sh"))); @@ -454,7 +454,7 @@ public class SimpleIndexQueryParserTests extends ElasticsearchSingleNodeTest { assertThat(parsedQuery, instanceOf(FuzzyQuery.class)); FuzzyQuery fuzzyQuery = (FuzzyQuery) parsedQuery; assertThat(fuzzyQuery.getTerm(), equalTo(new Term("name.first", "sh"))); - assertThat(fuzzyQuery.getMaxEdits(), equalTo(FuzzyQuery.floatToEdits(0.1f, "sh".length()))); + assertThat(fuzzyQuery.getMaxEdits(), equalTo(Fuzziness.AUTO.asDistance("sh"))); assertThat(fuzzyQuery.getPrefixLength(), equalTo(1)); assertThat(fuzzyQuery.getBoost(), equalTo(2.0f)); } diff --git a/core/src/test/java/org/elasticsearch/index/query/fuzzy-with-fields.json b/core/src/test/java/org/elasticsearch/index/query/fuzzy-with-fields.json index 3e3d30ffdc0..7636496adc4 100644 --- a/core/src/test/java/org/elasticsearch/index/query/fuzzy-with-fields.json +++ b/core/src/test/java/org/elasticsearch/index/query/fuzzy-with-fields.json @@ -2,7 +2,7 @@ "fuzzy":{ "name.first":{ "value":"sh", - "fuzziness":0.1, + "fuzziness": "AUTO", "prefix_length":1, "boost":2.0 } diff --git a/docs/reference/api-conventions.asciidoc b/docs/reference/api-conventions.asciidoc index 7dfb3936e35..d54e93a9dc0 100644 --- a/docs/reference/api-conventions.asciidoc +++ b/docs/reference/api-conventions.asciidoc @@ -331,15 +331,6 @@ generates an edit distance based on the length of the term. For lengths: `>5`:: two edits allowed `AUTO` should generally be the preferred value for `fuzziness`. --- - -`0.0..1.0`:: - -converted into an edit distance using the formula: `length(term) * (1.0 - -fuzziness)`, eg a `fuzziness` of `0.6` with a term of length 10 would result -in an edit distance of `4`. Note: in all APIs the maximum allowed edit distance is `2`. - - [float] === Result Casing