Deprecate `levenstein` in favor of `levenshtein` (#27409)

Support both spellings thoughout 6.x, reporting the incorrect one as deprecated.
This commit is contained in:
olcbean 2017-11-23 13:53:47 +01:00 committed by David Turner
parent fadbe0de08
commit fd564b10db
6 changed files with 66 additions and 28 deletions

View File

@ -31,6 +31,8 @@ import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
@ -45,6 +47,9 @@ import java.util.function.Consumer;
public final class DirectCandidateGeneratorBuilder implements CandidateGenerator {
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(
Loggers.getLogger(DirectCandidateGeneratorBuilder.class));
private static final String TYPE = "direct_generator";
public static final ParseField DIRECT_GENERATOR_FIELD = new ParseField(TYPE);
@ -211,8 +216,8 @@ public final class DirectCandidateGeneratorBuilder implements CandidateGenerator
* string distance for terms inside the index.
* <li><code>damerau_levenshtein</code> - String distance algorithm
* based on Damerau-Levenshtein algorithm.
* <li><code>levenstein</code> - String distance algorithm based on
* Levenstein edit distance algorithm.
* <li><code>levenshtein</code> - String distance algorithm based on
* Levenshtein edit distance algorithm.
* <li><code>jarowinkler</code> - String distance algorithm based on
* Jaro-Winkler algorithm.
* <li><code>ngram</code> - String distance algorithm based on character
@ -458,13 +463,16 @@ public final class DirectCandidateGeneratorBuilder implements CandidateGenerator
}
}
private static StringDistance resolveDistance(String distanceVal) {
static StringDistance resolveDistance(String distanceVal) {
distanceVal = distanceVal.toLowerCase(Locale.US);
if ("internal".equals(distanceVal)) {
return DirectSpellChecker.INTERNAL_LEVENSHTEIN;
} else if ("damerau_levenshtein".equals(distanceVal) || "damerauLevenshtein".equals(distanceVal)) {
return new LuceneLevenshteinDistance();
} else if ("levenstein".equals(distanceVal)) {
DEPRECATION_LOGGER.deprecated("Deprecated distance [levenstein] used, replaced by [levenshtein]");
return new LevensteinDistance();
} else if ("levenshtein".equals(distanceVal)) {
return new LevensteinDistance();
// TODO Jaro and Winkler are 2 people - so apply same naming logic
// as damerau_levenshtein

View File

@ -30,6 +30,8 @@ import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.query.QueryShardContext;
@ -66,6 +68,9 @@ import static org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBu
* global options, but are only applicable for this suggestion.
*/
public class TermSuggestionBuilder extends SuggestionBuilder<TermSuggestionBuilder> {
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(TermSuggestionBuilder.class));
private static final String SUGGESTION_NAME = "term";
private SuggestMode suggestMode = SuggestMode.MISSING;
@ -214,8 +219,8 @@ public class TermSuggestionBuilder extends SuggestionBuilder<TermSuggestionBuild
* string distance for terms inside the index.
* <li><code>damerau_levenshtein</code> - String distance algorithm based on
* Damerau-Levenshtein algorithm.
* <li><code>levenstein</code> - String distance algorithm based on
* Levenstein edit distance algorithm.
* <li><code>levenshtein</code> - String distance algorithm based on
* Levenshtein edit distance algorithm.
* <li><code>jarowinkler</code> - String distance algorithm based on
* Jaro-Winkler algorithm.
* <li><code>ngram</code> - String distance algorithm based on character
@ -543,8 +548,8 @@ public class TermSuggestionBuilder extends SuggestionBuilder<TermSuggestionBuild
return new LuceneLevenshteinDistance();
}
},
/** String distance algorithm based on Levenstein edit distance algorithm. */
LEVENSTEIN {
/** String distance algorithm based on Levenshtein edit distance algorithm. */
LEVENSHTEIN {
@Override
public StringDistance toLucene() {
return new LevensteinDistance();
@ -584,7 +589,10 @@ public class TermSuggestionBuilder extends SuggestionBuilder<TermSuggestionBuild
case "damerauLevenshtein":
return DAMERAU_LEVENSHTEIN;
case "levenstein":
return LEVENSTEIN;
DEPRECATION_LOGGER.deprecated("Deprecated distance [levenstein] used, replaced by [levenshtein]");
return LEVENSHTEIN;
case "levenshtein":
return LEVENSHTEIN;
case "ngram":
return NGRAM;
case "jarowinkler":

View File

@ -19,6 +19,11 @@
package org.elasticsearch.search.suggest.phrase;
import org.apache.lucene.search.spell.DirectSpellChecker;
import org.apache.lucene.search.spell.JaroWinklerDistance;
import org.apache.lucene.search.spell.LevensteinDistance;
import org.apache.lucene.search.spell.LuceneLevenshteinDistance;
import org.apache.lucene.search.spell.NGramDistance;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.xcontent.ToXContent;
@ -38,6 +43,8 @@ import java.util.List;
import java.util.function.Supplier;
import static org.elasticsearch.test.EqualsHashCodeTestUtils.checkEqualsAndHashCode;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.core.IsInstanceOf.instanceOf;
public class DirectCandidateGeneratorTests extends ESTestCase {
private static final int NUMBER_OF_RUNS = 20;
@ -65,6 +72,22 @@ public class DirectCandidateGeneratorTests extends ESTestCase {
}
}
public void testFromString() {
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("internal"), equalTo(DirectSpellChecker.INTERNAL_LEVENSHTEIN));
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("damerau_levenshtein"), instanceOf(LuceneLevenshteinDistance.class));
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("levenshtein"), instanceOf(LevensteinDistance.class));
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("jaroWinkler"), instanceOf(JaroWinklerDistance.class));
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("ngram"), instanceOf(NGramDistance.class));
expectThrows(IllegalArgumentException.class, () -> DirectCandidateGeneratorBuilder.resolveDistance("doesnt_exist"));
expectThrows(NullPointerException.class, () -> DirectCandidateGeneratorBuilder.resolveDistance(null));
}
public void testLevensteinDeprecation() {
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("levenstein"), instanceOf(LevensteinDistance.class));
assertWarnings("Deprecated distance [levenstein] used, replaced by [levenshtein]");
}
private static DirectCandidateGeneratorBuilder mutate(DirectCandidateGeneratorBuilder original) throws IOException {
DirectCandidateGeneratorBuilder mutation = copy(original);
List<Supplier<DirectCandidateGeneratorBuilder>> mutators = new ArrayList<>();
@ -89,7 +112,7 @@ public class DirectCandidateGeneratorTests extends ESTestCase {
mutators.add(() -> mutation.preFilter(original.preFilter() == null ? "preFilter" : original.preFilter() + "_other"));
mutators.add(() -> mutation.sort(original.sort() == null ? "score" : original.sort() + "_other"));
mutators.add(
() -> mutation.stringDistance(original.stringDistance() == null ? "levenstein" : original.stringDistance() + "_other"));
() -> mutation.stringDistance(original.stringDistance() == null ? "levenshtein" : original.stringDistance() + "_other"));
mutators.add(() -> mutation.suggestMode(original.suggestMode() == null ? "missing" : original.suggestMode() + "_other"));
return randomFrom(mutators).get();
}
@ -189,7 +212,7 @@ public class DirectCandidateGeneratorTests extends ESTestCase {
maybeSet(generator::postFilter, randomAlphaOfLengthBetween(1, 20));
maybeSet(generator::size, randomIntBetween(1, 20));
maybeSet(generator::sort, randomFrom("score", "frequency"));
maybeSet(generator::stringDistance, randomFrom("internal", "damerau_levenshtein", "levenstein", "jarowinkler", "ngram"));
maybeSet(generator::stringDistance, randomFrom("internal", "damerau_levenshtein", "levenshtein", "jarowinkler", "ngram"));
maybeSet(generator::suggestMode, randomFrom("missing", "popular", "always"));
return generator;
}

View File

@ -20,10 +20,10 @@
package org.elasticsearch.search.suggest.term;
import org.elasticsearch.common.io.stream.AbstractWriteableEnumTestCase;
import org.elasticsearch.search.suggest.term.TermSuggestionBuilder.StringDistanceImpl;
import java.io.IOException;
import static org.elasticsearch.search.suggest.term.TermSuggestionBuilder.StringDistanceImpl;
import static org.hamcrest.Matchers.equalTo;
/**
@ -38,7 +38,7 @@ public class StringDistanceImplTests extends AbstractWriteableEnumTestCase {
public void testValidOrdinals() {
assertThat(StringDistanceImpl.INTERNAL.ordinal(), equalTo(0));
assertThat(StringDistanceImpl.DAMERAU_LEVENSHTEIN.ordinal(), equalTo(1));
assertThat(StringDistanceImpl.LEVENSTEIN.ordinal(), equalTo(2));
assertThat(StringDistanceImpl.LEVENSHTEIN.ordinal(), equalTo(2));
assertThat(StringDistanceImpl.JAROWINKLER.ordinal(), equalTo(3));
assertThat(StringDistanceImpl.NGRAM.ordinal(), equalTo(4));
}
@ -47,28 +47,27 @@ public class StringDistanceImplTests extends AbstractWriteableEnumTestCase {
public void testFromString() {
assertThat(StringDistanceImpl.resolve("internal"), equalTo(StringDistanceImpl.INTERNAL));
assertThat(StringDistanceImpl.resolve("damerau_levenshtein"), equalTo(StringDistanceImpl.DAMERAU_LEVENSHTEIN));
assertThat(StringDistanceImpl.resolve("levenstein"), equalTo(StringDistanceImpl.LEVENSTEIN));
assertThat(StringDistanceImpl.resolve("levenshtein"), equalTo(StringDistanceImpl.LEVENSHTEIN));
assertThat(StringDistanceImpl.resolve("jarowinkler"), equalTo(StringDistanceImpl.JAROWINKLER));
assertThat(StringDistanceImpl.resolve("ngram"), equalTo(StringDistanceImpl.NGRAM));
final String doesntExist = "doesnt_exist";
try {
StringDistanceImpl.resolve(doesntExist);
fail("StringDistanceImpl should not have an element " + doesntExist);
} catch (IllegalArgumentException e) {
}
try {
StringDistanceImpl.resolve(null);
fail("StringDistanceImpl.resolve on a null value should throw an exception.");
} catch (NullPointerException e) {
expectThrows(IllegalArgumentException.class, () -> StringDistanceImpl.resolve(doesntExist));
NullPointerException e = expectThrows(NullPointerException.class, () -> StringDistanceImpl.resolve(null));
assertThat(e.getMessage(), equalTo("Input string is null"));
}
public void testLevensteinDeprecation() {
assertThat(StringDistanceImpl.resolve("levenstein"), equalTo(StringDistanceImpl.LEVENSHTEIN));
assertWarnings("Deprecated distance [levenstein] used, replaced by [levenshtein]");
}
@Override
public void testWriteTo() throws IOException {
assertWriteToStream(StringDistanceImpl.INTERNAL, 0);
assertWriteToStream(StringDistanceImpl.DAMERAU_LEVENSHTEIN, 1);
assertWriteToStream(StringDistanceImpl.LEVENSTEIN, 2);
assertWriteToStream(StringDistanceImpl.LEVENSHTEIN, 2);
assertWriteToStream(StringDistanceImpl.JAROWINKLER, 3);
assertWriteToStream(StringDistanceImpl.NGRAM, 4);
}
@ -77,7 +76,7 @@ public class StringDistanceImplTests extends AbstractWriteableEnumTestCase {
public void testReadFrom() throws IOException {
assertReadFromStream(0, StringDistanceImpl.INTERNAL);
assertReadFromStream(1, StringDistanceImpl.DAMERAU_LEVENSHTEIN);
assertReadFromStream(2, StringDistanceImpl.LEVENSTEIN);
assertReadFromStream(2, StringDistanceImpl.LEVENSHTEIN);
assertReadFromStream(3, StringDistanceImpl.JAROWINKLER);
assertReadFromStream(4, StringDistanceImpl.NGRAM);
}

View File

@ -99,7 +99,7 @@ public class TermSuggestionBuilderTests extends AbstractSuggestionBuilderTestCas
switch (randomVal) {
case 0: return StringDistanceImpl.INTERNAL;
case 1: return StringDistanceImpl.DAMERAU_LEVENSHTEIN;
case 2: return StringDistanceImpl.LEVENSTEIN;
case 2: return StringDistanceImpl.LEVENSHTEIN;
case 3: return StringDistanceImpl.JAROWINKLER;
case 4: return StringDistanceImpl.NGRAM;
default: throw new IllegalArgumentException("No string distance algorithm with an ordinal of " + randomVal);

View File

@ -116,7 +116,7 @@ doesn't take the query into account that is part of request.
for comparing string distance for terms inside the index.
`damerau_levenshtein` - String distance algorithm based on
Damerau-Levenshtein algorithm.
`levenstein` - String distance algorithm based on Levenstein edit distance
`levenshtein` - String distance algorithm based on Levenshtein edit distance
algorithm.
`jarowinkler` - String distance algorithm based on Jaro-Winkler algorithm.
`ngram` - String distance algorithm based on character n-grams.