diff --git a/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java b/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java
index db4164f9f9e..8dc52f5dc3b 100644
--- a/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java
+++ b/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java
@@ -128,7 +128,7 @@ public class MapperQueryParser extends QueryParser {
setLowercaseExpandedTerms(settings.lowercaseExpandedTerms());
setPhraseSlop(settings.phraseSlop());
setDefaultOperator(settings.defaultOperator());
- setFuzzyMinSim(settings.fuzzyMinSim());
+ setFuzzyMinSim(settings.getFuzziness().asFloat());
setFuzzyPrefixLength(settings.fuzzyPrefixLength());
setLocale(settings.locale());
this.analyzeWildcard = settings.analyzeWildcard();
diff --git a/core/src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java b/core/src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java
index ca364e486e1..e079e00303e 100644
--- a/core/src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java
+++ b/core/src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java
@@ -25,6 +25,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.util.automaton.Operations;
+import org.elasticsearch.common.unit.Fuzziness;
import org.joda.time.DateTimeZone;
import java.util.Collection;
@@ -49,7 +50,7 @@ public class QueryParserSettings {
private boolean lowercaseExpandedTerms = true;
private boolean enablePositionIncrements = true;
private int phraseSlop = 0;
- private float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
+ private Fuzziness fuzziness = Fuzziness.AUTO;
private int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
private int fuzzyMaxExpansions = FuzzyQuery.defaultMaxExpansions;
private int maxDeterminizedStates = Operations.DEFAULT_MAX_DETERMINIZED_STATES;
@@ -158,14 +159,6 @@ public class QueryParserSettings {
this.phraseSlop = phraseSlop;
}
- public float fuzzyMinSim() {
- return fuzzyMinSim;
- }
-
- public void fuzzyMinSim(float fuzzyMinSim) {
- this.fuzzyMinSim = fuzzyMinSim;
- }
-
public int fuzzyPrefixLength() {
return fuzzyPrefixLength;
}
@@ -340,7 +333,7 @@ public class QueryParserSettings {
if (enablePositionIncrements != that.enablePositionIncrements) return false;
if (escape != that.escape) return false;
if (analyzeWildcard != that.analyzeWildcard) return false;
- if (Float.compare(that.fuzzyMinSim, fuzzyMinSim) != 0) return false;
+ if (fuzziness != null ? fuzziness.equals(that.fuzziness) == false : fuzziness != null) return false;
if (fuzzyPrefixLength != that.fuzzyPrefixLength) return false;
if (fuzzyMaxExpansions != that.fuzzyMaxExpansions) return false;
if (fuzzyRewriteMethod != null ? !fuzzyRewriteMethod.equals(that.fuzzyRewriteMethod) : that.fuzzyRewriteMethod != null)
@@ -395,7 +388,7 @@ public class QueryParserSettings {
result = 31 * result + (lowercaseExpandedTerms ? 1 : 0);
result = 31 * result + (enablePositionIncrements ? 1 : 0);
result = 31 * result + phraseSlop;
- result = 31 * result + (fuzzyMinSim != +0.0f ? Float.floatToIntBits(fuzzyMinSim) : 0);
+ result = 31 * result + (fuzziness.hashCode());
result = 31 * result + fuzzyPrefixLength;
result = 31 * result + (escape ? 1 : 0);
result = 31 * result + (defaultAnalyzer != null ? defaultAnalyzer.hashCode() : 0);
@@ -413,4 +406,12 @@ public class QueryParserSettings {
result = 31 * result + (timeZone != null ? timeZone.hashCode() : 0);
return result;
}
+
+ public void setFuzziness(Fuzziness fuzziness) {
+ this.fuzziness = fuzziness;
+ }
+
+ public Fuzziness getFuzziness() {
+ return fuzziness;
+ }
}
diff --git a/core/src/main/java/org/elasticsearch/common/unit/Fuzziness.java b/core/src/main/java/org/elasticsearch/common/unit/Fuzziness.java
index 30b959d25b6..cfcd209b435 100644
--- a/core/src/main/java/org/elasticsearch/common/unit/Fuzziness.java
+++ b/core/src/main/java/org/elasticsearch/common/unit/Fuzziness.java
@@ -43,29 +43,17 @@ public final class Fuzziness implements ToXContent {
public static final Fuzziness AUTO = new Fuzziness("AUTO");
public static final ParseField FIELD = new ParseField(X_FIELD_NAME.camelCase().getValue());
- private final Object fuzziness;
+ private final String fuzziness;
private Fuzziness(int fuzziness) {
Preconditions.checkArgument(fuzziness >= 0 && fuzziness <= 2, "Valid edit distances are [0, 1, 2] but was [" + fuzziness + "]");
- this.fuzziness = fuzziness;
- }
-
- private Fuzziness(float fuzziness) {
- Preconditions.checkArgument(fuzziness >= 0.0 && fuzziness < 1.0f, "Valid similarities must be in the interval [0..1] but was [" + fuzziness + "]");
- this.fuzziness = fuzziness;
+ this.fuzziness = Integer.toString(fuzziness);
}
private Fuzziness(String fuzziness) {
this.fuzziness = fuzziness;
}
- /**
- * Creates a {@link Fuzziness} instance from a similarity. The value must be in the range [0..1)
- */
- public static Fuzziness fromSimilarity(float similarity) {
- return new Fuzziness(similarity);
- }
-
/**
* Creates a {@link Fuzziness} instance from an edit distance. The value must be one of [0, 1, 2]
*/
@@ -133,19 +121,17 @@ public final class Fuzziness implements ToXContent {
}
public int asDistance(String text) {
- if (fuzziness instanceof String) {
- if (this == AUTO) { //AUTO
- final int len = termLen(text);
- if (len <= 2) {
- return 0;
- } else if (len > 5) {
- return 2;
- } else {
- return 1;
- }
+ if (this == AUTO) { //AUTO
+ final int len = termLen(text);
+ if (len <= 2) {
+ return 0;
+ } else if (len > 5) {
+ return 2;
+ } else {
+ return 1;
}
}
- return FuzzyQuery.floatToEdits(asFloat(), termLen(text));
+ return Math.min(2, asInt());
}
public TimeValue asTimeValue() {
@@ -214,37 +200,6 @@ public final class Fuzziness implements ToXContent {
return Float.parseFloat(fuzziness.toString());
}
- public float asSimilarity() {
- return asSimilarity(null);
- }
-
- public float asSimilarity(String text) {
- if (this == AUTO) {
- final int len = termLen(text);
- if (len <= 2) {
- return 0.0f;
- } else if (len > 5) {
- return 0.5f;
- } else {
- return 0.66f;
- }
-// return dist == 0 ? dist : Math.min(0.999f, Math.max(0.0f, 1.0f - ((float) dist/ (float) termLen(text))));
- }
- if (fuzziness instanceof Float) { // it's a similarity
- return ((Float) fuzziness).floatValue();
- } else if (fuzziness instanceof Integer) { // it's an edit!
- int dist = Math.min(((Integer) fuzziness).intValue(),
- LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
- return Math.min(0.999f, Math.max(0.0f, 1.0f - ((float) dist / (float) termLen(text))));
- } else {
- final float similarity = Float.parseFloat(fuzziness.toString());
- if (similarity >= 0.0f && similarity < 1.0f) {
- return similarity;
- }
- }
- throw new IllegalArgumentException("Can't get similarity from fuzziness [" + fuzziness + "]");
- }
-
private int termLen(String text) {
return text == null ? 5 : text.codePointCount(0, text.length()); // 5 avg term length in english
}
diff --git a/core/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java b/core/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java
index 752037b9b21..2a21d3d4595 100644
--- a/core/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java
+++ b/core/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java
@@ -179,7 +179,7 @@ public class QueryStringQueryParser implements QueryParser {
} else if ("phrase_slop".equals(currentFieldName) || "phraseSlop".equals(currentFieldName)) {
qpSettings.phraseSlop(parser.intValue());
} else if (parseContext.parseFieldMatcher().match(currentFieldName, FUZZINESS)) {
- qpSettings.fuzzyMinSim(Fuzziness.parse(parser).asSimilarity());
+ qpSettings.setFuzziness(Fuzziness.parse(parser));
} else if ("boost".equals(currentFieldName)) {
qpSettings.boost(parser.floatValue());
} else if ("tie_breaker".equals(currentFieldName) || "tieBreaker".equals(currentFieldName)) {
diff --git a/core/src/test/java/org/elasticsearch/common/unit/FuzzinessTests.java b/core/src/test/java/org/elasticsearch/common/unit/FuzzinessTests.java
index 74ed24a5ec4..dc3c66b4e83 100644
--- a/core/src/test/java/org/elasticsearch/common/unit/FuzzinessTests.java
+++ b/core/src/test/java/org/elasticsearch/common/unit/FuzzinessTests.java
@@ -18,7 +18,6 @@
*/
package org.elasticsearch.common.unit;
-import org.apache.lucene.util.LuceneTestCase;
import org.elasticsearch.common.xcontent.XContent;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
@@ -143,16 +142,6 @@ public class FuzzinessTests extends ElasticsearchTestCase {
public void testAuto() {
final int codePoints = randomIntBetween(0, 10);
String string = randomRealisticUnicodeOfCodepointLength(codePoints);
- if (codePoints <= 2) {
- assertThat(Fuzziness.AUTO.asDistance(string), equalTo(0));
- assertThat(Fuzziness.fromSimilarity(Fuzziness.AUTO.asSimilarity(string)).asDistance(string), equalTo(0));
- } else if (codePoints > 5) {
- assertThat(Fuzziness.AUTO.asDistance(string), equalTo(2));
- assertThat(Fuzziness.fromSimilarity(Fuzziness.AUTO.asSimilarity(string)).asDistance(string), equalTo(2));
- } else {
- assertThat(Fuzziness.AUTO.asDistance(string), equalTo(1));
- assertThat(Fuzziness.fromSimilarity(Fuzziness.AUTO.asSimilarity(string)).asDistance(string), equalTo(1));
- }
assertThat(Fuzziness.AUTO.asByte(), equalTo((byte) 1));
assertThat(Fuzziness.AUTO.asInt(), equalTo(1));
assertThat(Fuzziness.AUTO.asFloat(), equalTo(1f));
@@ -173,28 +162,4 @@ public class FuzzinessTests extends ElasticsearchTestCase {
}
}
- @Test
- @LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/10638")
- public void testSimilarityToDistance() {
- assertThat(Fuzziness.fromSimilarity(0.5f).asDistance("ab"), equalTo(1));
- assertThat(Fuzziness.fromSimilarity(0.66f).asDistance("abcefg"), equalTo(2));
- assertThat(Fuzziness.fromSimilarity(0.8f).asDistance("ab"), equalTo(0));
- assertThat(Fuzziness.fromSimilarity(0.8f).asDistance("abcefg"), equalTo(1));
- assertThat((double) Fuzziness.ONE.asSimilarity("abcefg"), closeTo(0.8f, 0.05));
- assertThat((double) Fuzziness.TWO.asSimilarity("abcefg"), closeTo(0.66f, 0.05));
- assertThat((double) Fuzziness.ONE.asSimilarity("ab"), closeTo(0.5f, 0.05));
-
- int iters = randomIntBetween(100, 1000);
- for (int i = 0; i < iters; i++) {
- Fuzziness fuzziness = Fuzziness.fromEdits(between(1, 2));
- String string = rarely() ? randomRealisticUnicodeOfLengthBetween(2, 4) :
- randomRealisticUnicodeOfLengthBetween(4, 10);
- float similarity = fuzziness.asSimilarity(string);
- if (similarity != 0.0f) {
- Fuzziness similarityBased = Fuzziness.build(similarity);
- assertThat((double) similarityBased.asSimilarity(string), closeTo(similarity, 0.05));
- assertThat(similarityBased.asDistance(string), equalTo(Math.min(2, fuzziness.asDistance(string))));
- }
- }
- }
}
diff --git a/core/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java b/core/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java
index e8c73dfd0a9..b89c330dde8 100644
--- a/core/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java
+++ b/core/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java
@@ -437,7 +437,7 @@ public class SimpleIndexQueryParserTests extends ElasticsearchSingleNodeTest {
@Test
public void testFuzzyQueryWithFieldsBuilder() throws IOException {
IndexQueryParserService queryParser = queryParser();
- Query parsedQuery = queryParser.parse(fuzzyQuery("name.first", "sh").fuzziness(Fuzziness.fromSimilarity(0.1f)).prefixLength(1).boost(2.0f).buildAsBytes()).query();
+ Query parsedQuery = queryParser.parse(fuzzyQuery("name.first", "sh").fuzziness(Fuzziness.ONE).prefixLength(1).boost(2.0f).buildAsBytes()).query();
assertThat(parsedQuery, instanceOf(FuzzyQuery.class));
FuzzyQuery fuzzyQuery = (FuzzyQuery) parsedQuery;
assertThat(fuzzyQuery.getTerm(), equalTo(new Term("name.first", "sh")));
@@ -454,7 +454,7 @@ public class SimpleIndexQueryParserTests extends ElasticsearchSingleNodeTest {
assertThat(parsedQuery, instanceOf(FuzzyQuery.class));
FuzzyQuery fuzzyQuery = (FuzzyQuery) parsedQuery;
assertThat(fuzzyQuery.getTerm(), equalTo(new Term("name.first", "sh")));
- assertThat(fuzzyQuery.getMaxEdits(), equalTo(FuzzyQuery.floatToEdits(0.1f, "sh".length())));
+ assertThat(fuzzyQuery.getMaxEdits(), equalTo(Fuzziness.AUTO.asDistance("sh")));
assertThat(fuzzyQuery.getPrefixLength(), equalTo(1));
assertThat(fuzzyQuery.getBoost(), equalTo(2.0f));
}
diff --git a/core/src/test/java/org/elasticsearch/index/query/fuzzy-with-fields.json b/core/src/test/java/org/elasticsearch/index/query/fuzzy-with-fields.json
index 3e3d30ffdc0..7636496adc4 100644
--- a/core/src/test/java/org/elasticsearch/index/query/fuzzy-with-fields.json
+++ b/core/src/test/java/org/elasticsearch/index/query/fuzzy-with-fields.json
@@ -2,7 +2,7 @@
"fuzzy":{
"name.first":{
"value":"sh",
- "fuzziness":0.1,
+ "fuzziness": "AUTO",
"prefix_length":1,
"boost":2.0
}
diff --git a/docs/reference/api-conventions.asciidoc b/docs/reference/api-conventions.asciidoc
index 7dfb3936e35..d54e93a9dc0 100644
--- a/docs/reference/api-conventions.asciidoc
+++ b/docs/reference/api-conventions.asciidoc
@@ -331,15 +331,6 @@ generates an edit distance based on the length of the term. For lengths:
`>5`:: two edits allowed
`AUTO` should generally be the preferred value for `fuzziness`.
---
-
-`0.0..1.0`::
-
-converted into an edit distance using the formula: `length(term) * (1.0 -
-fuzziness)`, eg a `fuzziness` of `0.6` with a term of length 10 would result
-in an edit distance of `4`. Note: in all APIs the maximum allowed edit distance is `2`.
-
-
[float]
=== Result Casing