Deprecate `levenstein` in favor of `levenshtein` (#27409)

Support both spellings thoughout 6.x, reporting the incorrect one as deprecated.
This commit is contained in:
olcbean 2017-11-23 13:53:47 +01:00 committed by David Turner
parent fadbe0de08
commit fd564b10db
6 changed files with 66 additions and 28 deletions

View File

@ -31,6 +31,8 @@ import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.xcontent.ConstructingObjectParser; import org.elasticsearch.common.xcontent.ConstructingObjectParser;
import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentFactory;
@ -45,6 +47,9 @@ import java.util.function.Consumer;
public final class DirectCandidateGeneratorBuilder implements CandidateGenerator { public final class DirectCandidateGeneratorBuilder implements CandidateGenerator {
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(
Loggers.getLogger(DirectCandidateGeneratorBuilder.class));
private static final String TYPE = "direct_generator"; private static final String TYPE = "direct_generator";
public static final ParseField DIRECT_GENERATOR_FIELD = new ParseField(TYPE); public static final ParseField DIRECT_GENERATOR_FIELD = new ParseField(TYPE);
@ -211,8 +216,8 @@ public final class DirectCandidateGeneratorBuilder implements CandidateGenerator
* string distance for terms inside the index. * string distance for terms inside the index.
* <li><code>damerau_levenshtein</code> - String distance algorithm * <li><code>damerau_levenshtein</code> - String distance algorithm
* based on Damerau-Levenshtein algorithm. * based on Damerau-Levenshtein algorithm.
* <li><code>levenstein</code> - String distance algorithm based on * <li><code>levenshtein</code> - String distance algorithm based on
* Levenstein edit distance algorithm. * Levenshtein edit distance algorithm.
* <li><code>jarowinkler</code> - String distance algorithm based on * <li><code>jarowinkler</code> - String distance algorithm based on
* Jaro-Winkler algorithm. * Jaro-Winkler algorithm.
* <li><code>ngram</code> - String distance algorithm based on character * <li><code>ngram</code> - String distance algorithm based on character
@ -458,13 +463,16 @@ public final class DirectCandidateGeneratorBuilder implements CandidateGenerator
} }
} }
private static StringDistance resolveDistance(String distanceVal) { static StringDistance resolveDistance(String distanceVal) {
distanceVal = distanceVal.toLowerCase(Locale.US); distanceVal = distanceVal.toLowerCase(Locale.US);
if ("internal".equals(distanceVal)) { if ("internal".equals(distanceVal)) {
return DirectSpellChecker.INTERNAL_LEVENSHTEIN; return DirectSpellChecker.INTERNAL_LEVENSHTEIN;
} else if ("damerau_levenshtein".equals(distanceVal) || "damerauLevenshtein".equals(distanceVal)) { } else if ("damerau_levenshtein".equals(distanceVal) || "damerauLevenshtein".equals(distanceVal)) {
return new LuceneLevenshteinDistance(); return new LuceneLevenshteinDistance();
} else if ("levenstein".equals(distanceVal)) { } else if ("levenstein".equals(distanceVal)) {
DEPRECATION_LOGGER.deprecated("Deprecated distance [levenstein] used, replaced by [levenshtein]");
return new LevensteinDistance();
} else if ("levenshtein".equals(distanceVal)) {
return new LevensteinDistance(); return new LevensteinDistance();
// TODO Jaro and Winkler are 2 people - so apply same naming logic // TODO Jaro and Winkler are 2 people - so apply same naming logic
// as damerau_levenshtein // as damerau_levenshtein

View File

@ -30,6 +30,8 @@ import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.index.query.QueryShardContext;
@ -66,6 +68,9 @@ import static org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBu
* global options, but are only applicable for this suggestion. * global options, but are only applicable for this suggestion.
*/ */
public class TermSuggestionBuilder extends SuggestionBuilder<TermSuggestionBuilder> { public class TermSuggestionBuilder extends SuggestionBuilder<TermSuggestionBuilder> {
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(TermSuggestionBuilder.class));
private static final String SUGGESTION_NAME = "term"; private static final String SUGGESTION_NAME = "term";
private SuggestMode suggestMode = SuggestMode.MISSING; private SuggestMode suggestMode = SuggestMode.MISSING;
@ -214,8 +219,8 @@ public class TermSuggestionBuilder extends SuggestionBuilder<TermSuggestionBuild
* string distance for terms inside the index. * string distance for terms inside the index.
* <li><code>damerau_levenshtein</code> - String distance algorithm based on * <li><code>damerau_levenshtein</code> - String distance algorithm based on
* Damerau-Levenshtein algorithm. * Damerau-Levenshtein algorithm.
* <li><code>levenstein</code> - String distance algorithm based on * <li><code>levenshtein</code> - String distance algorithm based on
* Levenstein edit distance algorithm. * Levenshtein edit distance algorithm.
* <li><code>jarowinkler</code> - String distance algorithm based on * <li><code>jarowinkler</code> - String distance algorithm based on
* Jaro-Winkler algorithm. * Jaro-Winkler algorithm.
* <li><code>ngram</code> - String distance algorithm based on character * <li><code>ngram</code> - String distance algorithm based on character
@ -543,8 +548,8 @@ public class TermSuggestionBuilder extends SuggestionBuilder<TermSuggestionBuild
return new LuceneLevenshteinDistance(); return new LuceneLevenshteinDistance();
} }
}, },
/** String distance algorithm based on Levenstein edit distance algorithm. */ /** String distance algorithm based on Levenshtein edit distance algorithm. */
LEVENSTEIN { LEVENSHTEIN {
@Override @Override
public StringDistance toLucene() { public StringDistance toLucene() {
return new LevensteinDistance(); return new LevensteinDistance();
@ -584,7 +589,10 @@ public class TermSuggestionBuilder extends SuggestionBuilder<TermSuggestionBuild
case "damerauLevenshtein": case "damerauLevenshtein":
return DAMERAU_LEVENSHTEIN; return DAMERAU_LEVENSHTEIN;
case "levenstein": case "levenstein":
return LEVENSTEIN; DEPRECATION_LOGGER.deprecated("Deprecated distance [levenstein] used, replaced by [levenshtein]");
return LEVENSHTEIN;
case "levenshtein":
return LEVENSHTEIN;
case "ngram": case "ngram":
return NGRAM; return NGRAM;
case "jarowinkler": case "jarowinkler":

View File

@ -19,6 +19,11 @@
package org.elasticsearch.search.suggest.phrase; package org.elasticsearch.search.suggest.phrase;
import org.apache.lucene.search.spell.DirectSpellChecker;
import org.apache.lucene.search.spell.JaroWinklerDistance;
import org.apache.lucene.search.spell.LevensteinDistance;
import org.apache.lucene.search.spell.LuceneLevenshteinDistance;
import org.apache.lucene.search.spell.NGramDistance;
import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.ToXContent;
@ -38,6 +43,8 @@ import java.util.List;
import java.util.function.Supplier; import java.util.function.Supplier;
import static org.elasticsearch.test.EqualsHashCodeTestUtils.checkEqualsAndHashCode; import static org.elasticsearch.test.EqualsHashCodeTestUtils.checkEqualsAndHashCode;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.core.IsInstanceOf.instanceOf;
public class DirectCandidateGeneratorTests extends ESTestCase { public class DirectCandidateGeneratorTests extends ESTestCase {
private static final int NUMBER_OF_RUNS = 20; private static final int NUMBER_OF_RUNS = 20;
@ -65,6 +72,22 @@ public class DirectCandidateGeneratorTests extends ESTestCase {
} }
} }
public void testFromString() {
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("internal"), equalTo(DirectSpellChecker.INTERNAL_LEVENSHTEIN));
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("damerau_levenshtein"), instanceOf(LuceneLevenshteinDistance.class));
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("levenshtein"), instanceOf(LevensteinDistance.class));
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("jaroWinkler"), instanceOf(JaroWinklerDistance.class));
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("ngram"), instanceOf(NGramDistance.class));
expectThrows(IllegalArgumentException.class, () -> DirectCandidateGeneratorBuilder.resolveDistance("doesnt_exist"));
expectThrows(NullPointerException.class, () -> DirectCandidateGeneratorBuilder.resolveDistance(null));
}
public void testLevensteinDeprecation() {
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("levenstein"), instanceOf(LevensteinDistance.class));
assertWarnings("Deprecated distance [levenstein] used, replaced by [levenshtein]");
}
private static DirectCandidateGeneratorBuilder mutate(DirectCandidateGeneratorBuilder original) throws IOException { private static DirectCandidateGeneratorBuilder mutate(DirectCandidateGeneratorBuilder original) throws IOException {
DirectCandidateGeneratorBuilder mutation = copy(original); DirectCandidateGeneratorBuilder mutation = copy(original);
List<Supplier<DirectCandidateGeneratorBuilder>> mutators = new ArrayList<>(); List<Supplier<DirectCandidateGeneratorBuilder>> mutators = new ArrayList<>();
@ -89,7 +112,7 @@ public class DirectCandidateGeneratorTests extends ESTestCase {
mutators.add(() -> mutation.preFilter(original.preFilter() == null ? "preFilter" : original.preFilter() + "_other")); mutators.add(() -> mutation.preFilter(original.preFilter() == null ? "preFilter" : original.preFilter() + "_other"));
mutators.add(() -> mutation.sort(original.sort() == null ? "score" : original.sort() + "_other")); mutators.add(() -> mutation.sort(original.sort() == null ? "score" : original.sort() + "_other"));
mutators.add( mutators.add(
() -> mutation.stringDistance(original.stringDistance() == null ? "levenstein" : original.stringDistance() + "_other")); () -> mutation.stringDistance(original.stringDistance() == null ? "levenshtein" : original.stringDistance() + "_other"));
mutators.add(() -> mutation.suggestMode(original.suggestMode() == null ? "missing" : original.suggestMode() + "_other")); mutators.add(() -> mutation.suggestMode(original.suggestMode() == null ? "missing" : original.suggestMode() + "_other"));
return randomFrom(mutators).get(); return randomFrom(mutators).get();
} }
@ -189,7 +212,7 @@ public class DirectCandidateGeneratorTests extends ESTestCase {
maybeSet(generator::postFilter, randomAlphaOfLengthBetween(1, 20)); maybeSet(generator::postFilter, randomAlphaOfLengthBetween(1, 20));
maybeSet(generator::size, randomIntBetween(1, 20)); maybeSet(generator::size, randomIntBetween(1, 20));
maybeSet(generator::sort, randomFrom("score", "frequency")); maybeSet(generator::sort, randomFrom("score", "frequency"));
maybeSet(generator::stringDistance, randomFrom("internal", "damerau_levenshtein", "levenstein", "jarowinkler", "ngram")); maybeSet(generator::stringDistance, randomFrom("internal", "damerau_levenshtein", "levenshtein", "jarowinkler", "ngram"));
maybeSet(generator::suggestMode, randomFrom("missing", "popular", "always")); maybeSet(generator::suggestMode, randomFrom("missing", "popular", "always"));
return generator; return generator;
} }

View File

@ -20,10 +20,10 @@
package org.elasticsearch.search.suggest.term; package org.elasticsearch.search.suggest.term;
import org.elasticsearch.common.io.stream.AbstractWriteableEnumTestCase; import org.elasticsearch.common.io.stream.AbstractWriteableEnumTestCase;
import org.elasticsearch.search.suggest.term.TermSuggestionBuilder.StringDistanceImpl;
import java.io.IOException; import java.io.IOException;
import static org.elasticsearch.search.suggest.term.TermSuggestionBuilder.StringDistanceImpl;
import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.equalTo;
/** /**
@ -38,7 +38,7 @@ public class StringDistanceImplTests extends AbstractWriteableEnumTestCase {
public void testValidOrdinals() { public void testValidOrdinals() {
assertThat(StringDistanceImpl.INTERNAL.ordinal(), equalTo(0)); assertThat(StringDistanceImpl.INTERNAL.ordinal(), equalTo(0));
assertThat(StringDistanceImpl.DAMERAU_LEVENSHTEIN.ordinal(), equalTo(1)); assertThat(StringDistanceImpl.DAMERAU_LEVENSHTEIN.ordinal(), equalTo(1));
assertThat(StringDistanceImpl.LEVENSTEIN.ordinal(), equalTo(2)); assertThat(StringDistanceImpl.LEVENSHTEIN.ordinal(), equalTo(2));
assertThat(StringDistanceImpl.JAROWINKLER.ordinal(), equalTo(3)); assertThat(StringDistanceImpl.JAROWINKLER.ordinal(), equalTo(3));
assertThat(StringDistanceImpl.NGRAM.ordinal(), equalTo(4)); assertThat(StringDistanceImpl.NGRAM.ordinal(), equalTo(4));
} }
@ -47,28 +47,27 @@ public class StringDistanceImplTests extends AbstractWriteableEnumTestCase {
public void testFromString() { public void testFromString() {
assertThat(StringDistanceImpl.resolve("internal"), equalTo(StringDistanceImpl.INTERNAL)); assertThat(StringDistanceImpl.resolve("internal"), equalTo(StringDistanceImpl.INTERNAL));
assertThat(StringDistanceImpl.resolve("damerau_levenshtein"), equalTo(StringDistanceImpl.DAMERAU_LEVENSHTEIN)); assertThat(StringDistanceImpl.resolve("damerau_levenshtein"), equalTo(StringDistanceImpl.DAMERAU_LEVENSHTEIN));
assertThat(StringDistanceImpl.resolve("levenstein"), equalTo(StringDistanceImpl.LEVENSTEIN)); assertThat(StringDistanceImpl.resolve("levenshtein"), equalTo(StringDistanceImpl.LEVENSHTEIN));
assertThat(StringDistanceImpl.resolve("jarowinkler"), equalTo(StringDistanceImpl.JAROWINKLER)); assertThat(StringDistanceImpl.resolve("jarowinkler"), equalTo(StringDistanceImpl.JAROWINKLER));
assertThat(StringDistanceImpl.resolve("ngram"), equalTo(StringDistanceImpl.NGRAM)); assertThat(StringDistanceImpl.resolve("ngram"), equalTo(StringDistanceImpl.NGRAM));
final String doesntExist = "doesnt_exist"; final String doesntExist = "doesnt_exist";
try { expectThrows(IllegalArgumentException.class, () -> StringDistanceImpl.resolve(doesntExist));
StringDistanceImpl.resolve(doesntExist);
fail("StringDistanceImpl should not have an element " + doesntExist); NullPointerException e = expectThrows(NullPointerException.class, () -> StringDistanceImpl.resolve(null));
} catch (IllegalArgumentException e) {
}
try {
StringDistanceImpl.resolve(null);
fail("StringDistanceImpl.resolve on a null value should throw an exception.");
} catch (NullPointerException e) {
assertThat(e.getMessage(), equalTo("Input string is null")); assertThat(e.getMessage(), equalTo("Input string is null"));
} }
public void testLevensteinDeprecation() {
assertThat(StringDistanceImpl.resolve("levenstein"), equalTo(StringDistanceImpl.LEVENSHTEIN));
assertWarnings("Deprecated distance [levenstein] used, replaced by [levenshtein]");
} }
@Override @Override
public void testWriteTo() throws IOException { public void testWriteTo() throws IOException {
assertWriteToStream(StringDistanceImpl.INTERNAL, 0); assertWriteToStream(StringDistanceImpl.INTERNAL, 0);
assertWriteToStream(StringDistanceImpl.DAMERAU_LEVENSHTEIN, 1); assertWriteToStream(StringDistanceImpl.DAMERAU_LEVENSHTEIN, 1);
assertWriteToStream(StringDistanceImpl.LEVENSTEIN, 2); assertWriteToStream(StringDistanceImpl.LEVENSHTEIN, 2);
assertWriteToStream(StringDistanceImpl.JAROWINKLER, 3); assertWriteToStream(StringDistanceImpl.JAROWINKLER, 3);
assertWriteToStream(StringDistanceImpl.NGRAM, 4); assertWriteToStream(StringDistanceImpl.NGRAM, 4);
} }
@ -77,7 +76,7 @@ public class StringDistanceImplTests extends AbstractWriteableEnumTestCase {
public void testReadFrom() throws IOException { public void testReadFrom() throws IOException {
assertReadFromStream(0, StringDistanceImpl.INTERNAL); assertReadFromStream(0, StringDistanceImpl.INTERNAL);
assertReadFromStream(1, StringDistanceImpl.DAMERAU_LEVENSHTEIN); assertReadFromStream(1, StringDistanceImpl.DAMERAU_LEVENSHTEIN);
assertReadFromStream(2, StringDistanceImpl.LEVENSTEIN); assertReadFromStream(2, StringDistanceImpl.LEVENSHTEIN);
assertReadFromStream(3, StringDistanceImpl.JAROWINKLER); assertReadFromStream(3, StringDistanceImpl.JAROWINKLER);
assertReadFromStream(4, StringDistanceImpl.NGRAM); assertReadFromStream(4, StringDistanceImpl.NGRAM);
} }

View File

@ -99,7 +99,7 @@ public class TermSuggestionBuilderTests extends AbstractSuggestionBuilderTestCas
switch (randomVal) { switch (randomVal) {
case 0: return StringDistanceImpl.INTERNAL; case 0: return StringDistanceImpl.INTERNAL;
case 1: return StringDistanceImpl.DAMERAU_LEVENSHTEIN; case 1: return StringDistanceImpl.DAMERAU_LEVENSHTEIN;
case 2: return StringDistanceImpl.LEVENSTEIN; case 2: return StringDistanceImpl.LEVENSHTEIN;
case 3: return StringDistanceImpl.JAROWINKLER; case 3: return StringDistanceImpl.JAROWINKLER;
case 4: return StringDistanceImpl.NGRAM; case 4: return StringDistanceImpl.NGRAM;
default: throw new IllegalArgumentException("No string distance algorithm with an ordinal of " + randomVal); default: throw new IllegalArgumentException("No string distance algorithm with an ordinal of " + randomVal);

View File

@ -116,7 +116,7 @@ doesn't take the query into account that is part of request.
for comparing string distance for terms inside the index. for comparing string distance for terms inside the index.
`damerau_levenshtein` - String distance algorithm based on `damerau_levenshtein` - String distance algorithm based on
Damerau-Levenshtein algorithm. Damerau-Levenshtein algorithm.
`levenstein` - String distance algorithm based on Levenstein edit distance `levenshtein` - String distance algorithm based on Levenshtein edit distance
algorithm. algorithm.
`jarowinkler` - String distance algorithm based on Jaro-Winkler algorithm. `jarowinkler` - String distance algorithm based on Jaro-Winkler algorithm.
`ngram` - String distance algorithm based on character n-grams. `ngram` - String distance algorithm based on character n-grams.