diff --git a/core/src/main/java/org/elasticsearch/common/unit/Fuzziness.java b/core/src/main/java/org/elasticsearch/common/unit/Fuzziness.java index 100ddefd99d..179870f8653 100644 --- a/core/src/main/java/org/elasticsearch/common/unit/Fuzziness.java +++ b/core/src/main/java/org/elasticsearch/common/unit/Fuzziness.java @@ -18,6 +18,8 @@ */ package org.elasticsearch.common.unit; +import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.Version; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -43,8 +45,12 @@ public final class Fuzziness implements ToXContentFragment, Writeable { public static final Fuzziness TWO = new Fuzziness(2); public static final Fuzziness AUTO = new Fuzziness("AUTO"); public static final ParseField FIELD = new ParseField(X_FIELD_NAME); + private static final int DEFAULT_LOW_DISTANCE = 3; + private static final int DEFAULT_HIGH_DISTANCE = 6; private final String fuzziness; + private int lowDistance = DEFAULT_LOW_DISTANCE; + private int highDistance = DEFAULT_HIGH_DISTANCE; private Fuzziness(int fuzziness) { if (fuzziness != 0 && fuzziness != 1 && fuzziness != 2) { @@ -54,22 +60,48 @@ public final class Fuzziness implements ToXContentFragment, Writeable { } private Fuzziness(String fuzziness) { - if (fuzziness == null) { + if (fuzziness == null || fuzziness.isEmpty()) { throw new IllegalArgumentException("fuzziness can't be null!"); } this.fuzziness = fuzziness.toUpperCase(Locale.ROOT); } + private Fuzziness(String fuzziness, int lowDistance, int highDistance) { + this(fuzziness); + if (lowDistance < 0 || highDistance < 0 || lowDistance > highDistance) { + throw new IllegalArgumentException("fuzziness wrongly configured, must be: lowDistance > 0, highDistance" + + " > 0 and lowDistance <= highDistance "); + } + this.lowDistance = lowDistance; + this.highDistance = highDistance; + } + /** * Read from a stream. */ public Fuzziness(StreamInput in) throws IOException { fuzziness = in.readString(); + if (in.getVersion().onOrAfter(Version.V_6_1_0) && in.readBoolean()) { + lowDistance = in.readVInt(); + highDistance = in.readVInt(); + } } @Override public void writeTo(StreamOutput out) throws IOException { out.writeString(fuzziness); + if (out.getVersion().onOrAfter(Version.V_6_1_0)) { + // we cannot serialize the low/high bounds since the other node does not know about them. + // This is a best-effort to not fail queries in case the cluster is being upgraded and users + // start using features that are not available on all nodes. + if (isAutoWithCustomValues()) { + out.writeBoolean(true); + out.writeVInt(lowDistance); + out.writeVInt(highDistance); + } else { + out.writeBoolean(false); + } + } } /** @@ -88,10 +120,29 @@ public final class Fuzziness implements ToXContentFragment, Writeable { String string = fuzziness.toString(); if (AUTO.asString().equalsIgnoreCase(string)) { return AUTO; + } else if (string.toUpperCase(Locale.ROOT).startsWith(AUTO.asString() + ":")) { + return parseCustomAuto(string); } return new Fuzziness(string); } + private static Fuzziness parseCustomAuto( final String string) { + assert string.toUpperCase(Locale.ROOT).startsWith(AUTO.asString() + ":"); + String[] fuzzinessLimit = string.substring(AUTO.asString().length() + 1).split(","); + if (fuzzinessLimit.length == 2) { + try { + int lowerLimit = Integer.parseInt(fuzzinessLimit[0]); + int highLimit = Integer.parseInt(fuzzinessLimit[1]); + return new Fuzziness("AUTO", lowerLimit, highLimit); + } catch (NumberFormatException e) { + throw new ElasticsearchParseException("failed to parse [{}] as a \"auto:int,int\"", e, + string); + } + } else { + throw new ElasticsearchParseException("failed to find low and high distance values"); + } + } + public static Fuzziness parse(XContentParser parser) throws IOException { XContentParser.Token token = parser.currentToken(); switch (token) { @@ -100,6 +151,8 @@ public final class Fuzziness implements ToXContentFragment, Writeable { final String fuzziness = parser.text(); if (AUTO.asString().equalsIgnoreCase(fuzziness)) { return AUTO; + } else if (fuzziness.toUpperCase(Locale.ROOT).startsWith(AUTO.asString() + ":")) { + return parseCustomAuto(fuzziness); } try { final int minimumSimilarity = Integer.parseInt(fuzziness); @@ -135,19 +188,19 @@ public final class Fuzziness implements ToXContentFragment, Writeable { public int asDistance(String text) { if (this.equals(AUTO)) { //AUTO final int len = termLen(text); - if (len <= 2) { + if (len < lowDistance) { return 0; - } else if (len > 5) { - return 2; - } else { + } else if (len < highDistance) { return 1; + } else { + return 2; } } return Math.min(2, (int) asFloat()); } public float asFloat() { - if (this.equals(AUTO)) { + if (this.equals(AUTO) || isAutoWithCustomValues()) { return 1f; } return Float.parseFloat(fuzziness.toString()); @@ -158,9 +211,17 @@ public final class Fuzziness implements ToXContentFragment, Writeable { } public String asString() { + if (isAutoWithCustomValues()) { + return fuzziness.toString() + ":" + lowDistance + "," + highDistance; + } return fuzziness.toString(); } + private boolean isAutoWithCustomValues() { + return fuzziness.startsWith("AUTO") && (lowDistance != DEFAULT_LOW_DISTANCE || + highDistance != DEFAULT_HIGH_DISTANCE); + } + @Override public boolean equals(Object obj) { if (this == obj) { diff --git a/core/src/test/java/org/elasticsearch/common/unit/FuzzinessTests.java b/core/src/test/java/org/elasticsearch/common/unit/FuzzinessTests.java index b370250bf9d..87a9441cb25 100644 --- a/core/src/test/java/org/elasticsearch/common/unit/FuzzinessTests.java +++ b/core/src/test/java/org/elasticsearch/common/unit/FuzzinessTests.java @@ -18,6 +18,7 @@ */ package org.elasticsearch.common.unit; +import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -49,8 +50,8 @@ public class FuzzinessTests extends ESTestCase { assertThat(parser.nextToken(), equalTo(XContentParser.Token.START_OBJECT)); assertThat(parser.nextToken(), equalTo(XContentParser.Token.FIELD_NAME)); assertThat(parser.nextToken(), equalTo(XContentParser.Token.VALUE_NUMBER)); - Fuzziness parse = Fuzziness.parse(parser); - assertThat(parse.asFloat(), equalTo(floatValue)); + Fuzziness fuzziness = Fuzziness.parse(parser); + assertThat(fuzziness.asFloat(), equalTo(floatValue)); assertThat(parser.nextToken(), equalTo(XContentParser.Token.END_OBJECT)); } { @@ -67,21 +68,21 @@ public class FuzzinessTests extends ESTestCase { assertThat(parser.nextToken(), equalTo(XContentParser.Token.START_OBJECT)); assertThat(parser.nextToken(), equalTo(XContentParser.Token.FIELD_NAME)); assertThat(parser.nextToken(), anyOf(equalTo(XContentParser.Token.VALUE_NUMBER), equalTo(XContentParser.Token.VALUE_STRING))); - Fuzziness parse = Fuzziness.parse(parser); + Fuzziness fuzziness = Fuzziness.parse(parser); if (value.intValue() >= 1) { - assertThat(parse.asDistance(), equalTo(Math.min(2, value.intValue()))); + assertThat(fuzziness.asDistance(), equalTo(Math.min(2, value.intValue()))); } assertThat(parser.nextToken(), equalTo(XContentParser.Token.END_OBJECT)); if (intValue.equals(value)) { switch (intValue) { case 1: - assertThat(parse, sameInstance(Fuzziness.ONE)); + assertThat(fuzziness, sameInstance(Fuzziness.ONE)); break; case 2: - assertThat(parse, sameInstance(Fuzziness.TWO)); + assertThat(fuzziness, sameInstance(Fuzziness.TWO)); break; case 0: - assertThat(parse, sameInstance(Fuzziness.ZERO)); + assertThat(fuzziness, sameInstance(Fuzziness.ZERO)); break; default: break; @@ -90,19 +91,26 @@ public class FuzzinessTests extends ESTestCase { } { XContentBuilder json; - if (randomBoolean()) { + boolean isDefaultAutoFuzzinessTested = randomBoolean(); + if (isDefaultAutoFuzzinessTested) { json = Fuzziness.AUTO.toXContent(jsonBuilder().startObject(), null).endObject(); } else { + String auto = randomBoolean() ? "AUTO" : "auto"; + if (randomBoolean()) { + auto += ":" + randomIntBetween(1, 3) + "," + randomIntBetween(4, 10); + } json = jsonBuilder().startObject() - .field(Fuzziness.X_FIELD_NAME, randomBoolean() ? "AUTO" : "auto") - .endObject(); + .field(Fuzziness.X_FIELD_NAME, auto) + .endObject(); } XContentParser parser = createParser(json); assertThat(parser.nextToken(), equalTo(XContentParser.Token.START_OBJECT)); assertThat(parser.nextToken(), equalTo(XContentParser.Token.FIELD_NAME)); assertThat(parser.nextToken(), equalTo(XContentParser.Token.VALUE_STRING)); - Fuzziness parse = Fuzziness.parse(parser); - assertThat(parse, sameInstance(Fuzziness.AUTO)); + Fuzziness fuzziness = Fuzziness.parse(parser); + if (isDefaultAutoFuzzinessTested) { + assertThat(fuzziness, sameInstance(Fuzziness.AUTO)); + } assertThat(parser.nextToken(), equalTo(XContentParser.Token.END_OBJECT)); } } @@ -132,13 +140,30 @@ public class FuzzinessTests extends ESTestCase { assertEquals(fuzziness, deserializedFuzziness); } - public void testSerializationAuto() throws IOException { + public void testSerializationDefaultAuto() throws IOException { Fuzziness fuzziness = Fuzziness.AUTO; Fuzziness deserializedFuzziness = doSerializeRoundtrip(fuzziness); assertEquals(fuzziness, deserializedFuzziness); assertEquals(fuzziness.asFloat(), deserializedFuzziness.asFloat(), 0f); } + public void testSerializationCustomAuto() throws IOException { + String auto = "AUTO:4,7"; + XContentBuilder json = jsonBuilder().startObject() + .field(Fuzziness.X_FIELD_NAME, auto) + .endObject(); + + XContentParser parser = createParser(json); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.START_OBJECT)); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.FIELD_NAME)); + assertThat(parser.nextToken(), equalTo(XContentParser.Token.VALUE_STRING)); + Fuzziness fuzziness = Fuzziness.parse(parser); + + Fuzziness deserializedFuzziness = doSerializeRoundtrip(fuzziness); + assertEquals(fuzziness, deserializedFuzziness); + assertEquals(fuzziness.asString(), deserializedFuzziness.asString()); + } + private static Fuzziness doSerializeRoundtrip(Fuzziness in) throws IOException { BytesStreamOutput output = new BytesStreamOutput(); in.writeTo(output); diff --git a/core/src/test/java/org/elasticsearch/index/query/FuzzyQueryBuilderTests.java b/core/src/test/java/org/elasticsearch/index/query/FuzzyQueryBuilderTests.java index 58c70fcfcb3..eaec365b9af 100644 --- a/core/src/test/java/org/elasticsearch/index/query/FuzzyQueryBuilderTests.java +++ b/core/src/test/java/org/elasticsearch/index/query/FuzzyQueryBuilderTests.java @@ -23,6 +23,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.Query; +import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.search.internal.SearchContext; @@ -120,6 +121,92 @@ public class FuzzyQueryBuilderTests extends AbstractQueryTestCase 0); + String query = "{\n" + + " \"fuzzy\":{\n" + + " \"" + STRING_FIELD_NAME + "\":{\n" + + " \"value\":\"sh\",\n" + + " \"fuzziness\": \"AUTO:2,5\",\n" + + " \"prefix_length\":1,\n" + + " \"boost\":2.0\n" + + " }\n" + + " }\n" + + "}"; + Query parsedQuery = parseQuery(query).toQuery(createShardContext()); + assertThat(parsedQuery, instanceOf(BoostQuery.class)); + BoostQuery boostQuery = (BoostQuery) parsedQuery; + assertThat(boostQuery.getBoost(), equalTo(2.0f)); + assertThat(boostQuery.getQuery(), instanceOf(FuzzyQuery.class)); + FuzzyQuery fuzzyQuery = (FuzzyQuery) boostQuery.getQuery(); + assertThat(fuzzyQuery.getTerm(), equalTo(new Term(STRING_FIELD_NAME, "sh"))); + assertThat(fuzzyQuery.getMaxEdits(), equalTo(1)); + assertThat(fuzzyQuery.getPrefixLength(), equalTo(1)); + } + + public void testToQueryWithStringFieldDefinedWrongFuzziness() throws IOException { + assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); + String queryMissingFuzzinessUpLimit = "{\n" + + " \"fuzzy\":{\n" + + " \"" + STRING_FIELD_NAME + "\":{\n" + + " \"value\":\"sh\",\n" + + " \"fuzziness\": \"AUTO:2\",\n" + + " \"prefix_length\":1,\n" + + " \"boost\":2.0\n" + + " }\n" + + " }\n" + + "}"; + ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, + () -> parseQuery(queryMissingFuzzinessUpLimit).toQuery(createShardContext())); + String msg = "failed to find low and high distance values"; + assertTrue(e.getMessage() + " didn't contain: " + msg + " but: " + e.getMessage(), e.getMessage().contains(msg)); + + String queryHavingNegativeFuzzinessLowLimit = "{\n" + + " \"fuzzy\":{\n" + + " \"" + STRING_FIELD_NAME + "\":{\n" + + " \"value\":\"sh\",\n" + + " \"fuzziness\": \"AUTO:-1,6\",\n" + + " \"prefix_length\":1,\n" + + " \"boost\":2.0\n" + + " }\n" + + " }\n" + + "}"; + String msg2 = "fuzziness wrongly configured"; + IllegalArgumentException e2 = expectThrows(IllegalArgumentException.class, + () -> parseQuery(queryHavingNegativeFuzzinessLowLimit).toQuery(createShardContext())); + assertTrue(e2.getMessage() + " didn't contain: " + msg2 + " but: " + e.getMessage(), e.getMessage().contains + (msg)); + + String queryMissingFuzzinessUpLimit2 = "{\n" + + " \"fuzzy\":{\n" + + " \"" + STRING_FIELD_NAME + "\":{\n" + + " \"value\":\"sh\",\n" + + " \"fuzziness\": \"AUTO:1,\",\n" + + " \"prefix_length\":1,\n" + + " \"boost\":2.0\n" + + " }\n" + + " }\n" + + "}"; + e = expectThrows(ElasticsearchParseException.class, + () -> parseQuery(queryMissingFuzzinessUpLimit2).toQuery(createShardContext())); + assertTrue(e.getMessage() + " didn't contain: " + msg + " but: " + e.getMessage(), e.getMessage().contains(msg)); + + String queryMissingFuzzinessLowLimit = "{\n" + + " \"fuzzy\":{\n" + + " \"" + STRING_FIELD_NAME + "\":{\n" + + " \"value\":\"sh\",\n" + + " \"fuzziness\": \"AUTO:,5\",\n" + + " \"prefix_length\":1,\n" + + " \"boost\":2.0\n" + + " }\n" + + " }\n" + + "}"; + e = expectThrows(ElasticsearchParseException.class, + () -> parseQuery(queryMissingFuzzinessLowLimit).toQuery(createShardContext())); + msg = "failed to parse [AUTO:,5] as a \"auto:int,int\""; + assertTrue(e.getMessage() + " didn't contain: " + msg + " but: " + e.getMessage(), e.getMessage().contains(msg)); + } + public void testToQueryWithNumericField() throws IOException { assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); String query = "{\n" + diff --git a/docs/reference/api-conventions.asciidoc b/docs/reference/api-conventions.asciidoc index b21eac79081..9c4d87fcb27 100644 --- a/docs/reference/api-conventions.asciidoc +++ b/docs/reference/api-conventions.asciidoc @@ -577,7 +577,9 @@ the maximum allowed Levenshtein Edit Distance (or number of edits) `AUTO`:: + -- -generates an edit distance based on the length of the term. For lengths: +generates an edit distance based on the length of the term. +Low and high distance arguments may be optionally provided `AUTO:[low],[high]`, if not specified, +the default values are 3 and 6, equivalent to `AUTO:3,6` that make for lengths: `0..2`:: must match exactly `3..5`:: one edit allowed