Deprecate `levenstein` in favor of `levenshtein` (#27409)
Support both spellings thoughout 6.x, reporting the incorrect one as deprecated.
This commit is contained in:
parent
fadbe0de08
commit
fd564b10db
|
@ -31,6 +31,8 @@ import org.elasticsearch.ExceptionsHelper;
|
|||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.logging.DeprecationLogger;
|
||||
import org.elasticsearch.common.logging.Loggers;
|
||||
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
|
@ -45,6 +47,9 @@ import java.util.function.Consumer;
|
|||
|
||||
public final class DirectCandidateGeneratorBuilder implements CandidateGenerator {
|
||||
|
||||
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(
|
||||
Loggers.getLogger(DirectCandidateGeneratorBuilder.class));
|
||||
|
||||
private static final String TYPE = "direct_generator";
|
||||
|
||||
public static final ParseField DIRECT_GENERATOR_FIELD = new ParseField(TYPE);
|
||||
|
@ -211,8 +216,8 @@ public final class DirectCandidateGeneratorBuilder implements CandidateGenerator
|
|||
* string distance for terms inside the index.
|
||||
* <li><code>damerau_levenshtein</code> - String distance algorithm
|
||||
* based on Damerau-Levenshtein algorithm.
|
||||
* <li><code>levenstein</code> - String distance algorithm based on
|
||||
* Levenstein edit distance algorithm.
|
||||
* <li><code>levenshtein</code> - String distance algorithm based on
|
||||
* Levenshtein edit distance algorithm.
|
||||
* <li><code>jarowinkler</code> - String distance algorithm based on
|
||||
* Jaro-Winkler algorithm.
|
||||
* <li><code>ngram</code> - String distance algorithm based on character
|
||||
|
@ -458,13 +463,16 @@ public final class DirectCandidateGeneratorBuilder implements CandidateGenerator
|
|||
}
|
||||
}
|
||||
|
||||
private static StringDistance resolveDistance(String distanceVal) {
|
||||
static StringDistance resolveDistance(String distanceVal) {
|
||||
distanceVal = distanceVal.toLowerCase(Locale.US);
|
||||
if ("internal".equals(distanceVal)) {
|
||||
return DirectSpellChecker.INTERNAL_LEVENSHTEIN;
|
||||
} else if ("damerau_levenshtein".equals(distanceVal) || "damerauLevenshtein".equals(distanceVal)) {
|
||||
return new LuceneLevenshteinDistance();
|
||||
} else if ("levenstein".equals(distanceVal)) {
|
||||
DEPRECATION_LOGGER.deprecated("Deprecated distance [levenstein] used, replaced by [levenshtein]");
|
||||
return new LevensteinDistance();
|
||||
} else if ("levenshtein".equals(distanceVal)) {
|
||||
return new LevensteinDistance();
|
||||
// TODO Jaro and Winkler are 2 people - so apply same naming logic
|
||||
// as damerau_levenshtein
|
||||
|
|
|
@ -30,6 +30,8 @@ import org.elasticsearch.common.ParsingException;
|
|||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.io.stream.Writeable;
|
||||
import org.elasticsearch.common.logging.DeprecationLogger;
|
||||
import org.elasticsearch.common.logging.Loggers;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.index.query.QueryShardContext;
|
||||
|
@ -66,6 +68,9 @@ import static org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBu
|
|||
* global options, but are only applicable for this suggestion.
|
||||
*/
|
||||
public class TermSuggestionBuilder extends SuggestionBuilder<TermSuggestionBuilder> {
|
||||
|
||||
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(TermSuggestionBuilder.class));
|
||||
|
||||
private static final String SUGGESTION_NAME = "term";
|
||||
|
||||
private SuggestMode suggestMode = SuggestMode.MISSING;
|
||||
|
@ -214,8 +219,8 @@ public class TermSuggestionBuilder extends SuggestionBuilder<TermSuggestionBuild
|
|||
* string distance for terms inside the index.
|
||||
* <li><code>damerau_levenshtein</code> - String distance algorithm based on
|
||||
* Damerau-Levenshtein algorithm.
|
||||
* <li><code>levenstein</code> - String distance algorithm based on
|
||||
* Levenstein edit distance algorithm.
|
||||
* <li><code>levenshtein</code> - String distance algorithm based on
|
||||
* Levenshtein edit distance algorithm.
|
||||
* <li><code>jarowinkler</code> - String distance algorithm based on
|
||||
* Jaro-Winkler algorithm.
|
||||
* <li><code>ngram</code> - String distance algorithm based on character
|
||||
|
@ -543,8 +548,8 @@ public class TermSuggestionBuilder extends SuggestionBuilder<TermSuggestionBuild
|
|||
return new LuceneLevenshteinDistance();
|
||||
}
|
||||
},
|
||||
/** String distance algorithm based on Levenstein edit distance algorithm. */
|
||||
LEVENSTEIN {
|
||||
/** String distance algorithm based on Levenshtein edit distance algorithm. */
|
||||
LEVENSHTEIN {
|
||||
@Override
|
||||
public StringDistance toLucene() {
|
||||
return new LevensteinDistance();
|
||||
|
@ -584,7 +589,10 @@ public class TermSuggestionBuilder extends SuggestionBuilder<TermSuggestionBuild
|
|||
case "damerauLevenshtein":
|
||||
return DAMERAU_LEVENSHTEIN;
|
||||
case "levenstein":
|
||||
return LEVENSTEIN;
|
||||
DEPRECATION_LOGGER.deprecated("Deprecated distance [levenstein] used, replaced by [levenshtein]");
|
||||
return LEVENSHTEIN;
|
||||
case "levenshtein":
|
||||
return LEVENSHTEIN;
|
||||
case "ngram":
|
||||
return NGRAM;
|
||||
case "jarowinkler":
|
||||
|
|
|
@ -19,6 +19,11 @@
|
|||
|
||||
package org.elasticsearch.search.suggest.phrase;
|
||||
|
||||
import org.apache.lucene.search.spell.DirectSpellChecker;
|
||||
import org.apache.lucene.search.spell.JaroWinklerDistance;
|
||||
import org.apache.lucene.search.spell.LevensteinDistance;
|
||||
import org.apache.lucene.search.spell.LuceneLevenshteinDistance;
|
||||
import org.apache.lucene.search.spell.NGramDistance;
|
||||
import org.elasticsearch.common.ParsingException;
|
||||
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
|
||||
import org.elasticsearch.common.xcontent.ToXContent;
|
||||
|
@ -38,6 +43,8 @@ import java.util.List;
|
|||
import java.util.function.Supplier;
|
||||
|
||||
import static org.elasticsearch.test.EqualsHashCodeTestUtils.checkEqualsAndHashCode;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.core.IsInstanceOf.instanceOf;
|
||||
|
||||
public class DirectCandidateGeneratorTests extends ESTestCase {
|
||||
private static final int NUMBER_OF_RUNS = 20;
|
||||
|
@ -65,6 +72,22 @@ public class DirectCandidateGeneratorTests extends ESTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testFromString() {
|
||||
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("internal"), equalTo(DirectSpellChecker.INTERNAL_LEVENSHTEIN));
|
||||
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("damerau_levenshtein"), instanceOf(LuceneLevenshteinDistance.class));
|
||||
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("levenshtein"), instanceOf(LevensteinDistance.class));
|
||||
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("jaroWinkler"), instanceOf(JaroWinklerDistance.class));
|
||||
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("ngram"), instanceOf(NGramDistance.class));
|
||||
|
||||
expectThrows(IllegalArgumentException.class, () -> DirectCandidateGeneratorBuilder.resolveDistance("doesnt_exist"));
|
||||
expectThrows(NullPointerException.class, () -> DirectCandidateGeneratorBuilder.resolveDistance(null));
|
||||
}
|
||||
|
||||
public void testLevensteinDeprecation() {
|
||||
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("levenstein"), instanceOf(LevensteinDistance.class));
|
||||
assertWarnings("Deprecated distance [levenstein] used, replaced by [levenshtein]");
|
||||
}
|
||||
|
||||
private static DirectCandidateGeneratorBuilder mutate(DirectCandidateGeneratorBuilder original) throws IOException {
|
||||
DirectCandidateGeneratorBuilder mutation = copy(original);
|
||||
List<Supplier<DirectCandidateGeneratorBuilder>> mutators = new ArrayList<>();
|
||||
|
@ -89,7 +112,7 @@ public class DirectCandidateGeneratorTests extends ESTestCase {
|
|||
mutators.add(() -> mutation.preFilter(original.preFilter() == null ? "preFilter" : original.preFilter() + "_other"));
|
||||
mutators.add(() -> mutation.sort(original.sort() == null ? "score" : original.sort() + "_other"));
|
||||
mutators.add(
|
||||
() -> mutation.stringDistance(original.stringDistance() == null ? "levenstein" : original.stringDistance() + "_other"));
|
||||
() -> mutation.stringDistance(original.stringDistance() == null ? "levenshtein" : original.stringDistance() + "_other"));
|
||||
mutators.add(() -> mutation.suggestMode(original.suggestMode() == null ? "missing" : original.suggestMode() + "_other"));
|
||||
return randomFrom(mutators).get();
|
||||
}
|
||||
|
@ -189,7 +212,7 @@ public class DirectCandidateGeneratorTests extends ESTestCase {
|
|||
maybeSet(generator::postFilter, randomAlphaOfLengthBetween(1, 20));
|
||||
maybeSet(generator::size, randomIntBetween(1, 20));
|
||||
maybeSet(generator::sort, randomFrom("score", "frequency"));
|
||||
maybeSet(generator::stringDistance, randomFrom("internal", "damerau_levenshtein", "levenstein", "jarowinkler", "ngram"));
|
||||
maybeSet(generator::stringDistance, randomFrom("internal", "damerau_levenshtein", "levenshtein", "jarowinkler", "ngram"));
|
||||
maybeSet(generator::suggestMode, randomFrom("missing", "popular", "always"));
|
||||
return generator;
|
||||
}
|
||||
|
|
|
@ -20,10 +20,10 @@
|
|||
package org.elasticsearch.search.suggest.term;
|
||||
|
||||
import org.elasticsearch.common.io.stream.AbstractWriteableEnumTestCase;
|
||||
import org.elasticsearch.search.suggest.term.TermSuggestionBuilder.StringDistanceImpl;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import static org.elasticsearch.search.suggest.term.TermSuggestionBuilder.StringDistanceImpl;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
|
||||
/**
|
||||
|
@ -38,7 +38,7 @@ public class StringDistanceImplTests extends AbstractWriteableEnumTestCase {
|
|||
public void testValidOrdinals() {
|
||||
assertThat(StringDistanceImpl.INTERNAL.ordinal(), equalTo(0));
|
||||
assertThat(StringDistanceImpl.DAMERAU_LEVENSHTEIN.ordinal(), equalTo(1));
|
||||
assertThat(StringDistanceImpl.LEVENSTEIN.ordinal(), equalTo(2));
|
||||
assertThat(StringDistanceImpl.LEVENSHTEIN.ordinal(), equalTo(2));
|
||||
assertThat(StringDistanceImpl.JAROWINKLER.ordinal(), equalTo(3));
|
||||
assertThat(StringDistanceImpl.NGRAM.ordinal(), equalTo(4));
|
||||
}
|
||||
|
@ -47,28 +47,27 @@ public class StringDistanceImplTests extends AbstractWriteableEnumTestCase {
|
|||
public void testFromString() {
|
||||
assertThat(StringDistanceImpl.resolve("internal"), equalTo(StringDistanceImpl.INTERNAL));
|
||||
assertThat(StringDistanceImpl.resolve("damerau_levenshtein"), equalTo(StringDistanceImpl.DAMERAU_LEVENSHTEIN));
|
||||
assertThat(StringDistanceImpl.resolve("levenstein"), equalTo(StringDistanceImpl.LEVENSTEIN));
|
||||
assertThat(StringDistanceImpl.resolve("levenshtein"), equalTo(StringDistanceImpl.LEVENSHTEIN));
|
||||
assertThat(StringDistanceImpl.resolve("jarowinkler"), equalTo(StringDistanceImpl.JAROWINKLER));
|
||||
assertThat(StringDistanceImpl.resolve("ngram"), equalTo(StringDistanceImpl.NGRAM));
|
||||
|
||||
final String doesntExist = "doesnt_exist";
|
||||
try {
|
||||
StringDistanceImpl.resolve(doesntExist);
|
||||
fail("StringDistanceImpl should not have an element " + doesntExist);
|
||||
} catch (IllegalArgumentException e) {
|
||||
}
|
||||
try {
|
||||
StringDistanceImpl.resolve(null);
|
||||
fail("StringDistanceImpl.resolve on a null value should throw an exception.");
|
||||
} catch (NullPointerException e) {
|
||||
expectThrows(IllegalArgumentException.class, () -> StringDistanceImpl.resolve(doesntExist));
|
||||
|
||||
NullPointerException e = expectThrows(NullPointerException.class, () -> StringDistanceImpl.resolve(null));
|
||||
assertThat(e.getMessage(), equalTo("Input string is null"));
|
||||
}
|
||||
|
||||
public void testLevensteinDeprecation() {
|
||||
assertThat(StringDistanceImpl.resolve("levenstein"), equalTo(StringDistanceImpl.LEVENSHTEIN));
|
||||
assertWarnings("Deprecated distance [levenstein] used, replaced by [levenshtein]");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void testWriteTo() throws IOException {
|
||||
assertWriteToStream(StringDistanceImpl.INTERNAL, 0);
|
||||
assertWriteToStream(StringDistanceImpl.DAMERAU_LEVENSHTEIN, 1);
|
||||
assertWriteToStream(StringDistanceImpl.LEVENSTEIN, 2);
|
||||
assertWriteToStream(StringDistanceImpl.LEVENSHTEIN, 2);
|
||||
assertWriteToStream(StringDistanceImpl.JAROWINKLER, 3);
|
||||
assertWriteToStream(StringDistanceImpl.NGRAM, 4);
|
||||
}
|
||||
|
@ -77,7 +76,7 @@ public class StringDistanceImplTests extends AbstractWriteableEnumTestCase {
|
|||
public void testReadFrom() throws IOException {
|
||||
assertReadFromStream(0, StringDistanceImpl.INTERNAL);
|
||||
assertReadFromStream(1, StringDistanceImpl.DAMERAU_LEVENSHTEIN);
|
||||
assertReadFromStream(2, StringDistanceImpl.LEVENSTEIN);
|
||||
assertReadFromStream(2, StringDistanceImpl.LEVENSHTEIN);
|
||||
assertReadFromStream(3, StringDistanceImpl.JAROWINKLER);
|
||||
assertReadFromStream(4, StringDistanceImpl.NGRAM);
|
||||
}
|
||||
|
|
|
@ -99,7 +99,7 @@ public class TermSuggestionBuilderTests extends AbstractSuggestionBuilderTestCas
|
|||
switch (randomVal) {
|
||||
case 0: return StringDistanceImpl.INTERNAL;
|
||||
case 1: return StringDistanceImpl.DAMERAU_LEVENSHTEIN;
|
||||
case 2: return StringDistanceImpl.LEVENSTEIN;
|
||||
case 2: return StringDistanceImpl.LEVENSHTEIN;
|
||||
case 3: return StringDistanceImpl.JAROWINKLER;
|
||||
case 4: return StringDistanceImpl.NGRAM;
|
||||
default: throw new IllegalArgumentException("No string distance algorithm with an ordinal of " + randomVal);
|
||||
|
|
|
@ -116,7 +116,7 @@ doesn't take the query into account that is part of request.
|
|||
for comparing string distance for terms inside the index.
|
||||
`damerau_levenshtein` - String distance algorithm based on
|
||||
Damerau-Levenshtein algorithm.
|
||||
`levenstein` - String distance algorithm based on Levenstein edit distance
|
||||
`levenshtein` - String distance algorithm based on Levenshtein edit distance
|
||||
algorithm.
|
||||
`jarowinkler` - String distance algorithm based on Jaro-Winkler algorithm.
|
||||
`ngram` - String distance algorithm based on character n-grams.
|
||||
|
|
Loading…
Reference in New Issue