Deprecate `jarowinkler` in favor of `jaro_winkler` (#27526)

Jaro and Winkler are two people, so we should use the same naming convention as for Damerau–Levenshtein.
This commit is contained in:
olcbean 2017-11-30 13:49:34 +01:00 committed by David Turner
parent 41f73e0acf
commit d25c9671de
6 changed files with 29 additions and 14 deletions

View File

@ -218,7 +218,7 @@ public final class DirectCandidateGeneratorBuilder implements CandidateGenerator
* based on Damerau-Levenshtein algorithm.
* <li><code>levenshtein</code> - String distance algorithm based on
* Levenshtein edit distance algorithm.
* <li><code>jarowinkler</code> - String distance algorithm based on
* <li><code>jaro_winkler</code> - String distance algorithm based on
* Jaro-Winkler algorithm.
* <li><code>ngram</code> - String distance algorithm based on character
* n-grams.
@ -474,9 +474,10 @@ public final class DirectCandidateGeneratorBuilder implements CandidateGenerator
return new LevensteinDistance();
} else if ("levenshtein".equals(distanceVal)) {
return new LevensteinDistance();
// TODO Jaro and Winkler are 2 people - so apply same naming logic
// as damerau_levenshtein
} else if ("jarowinkler".equals(distanceVal)) {
DEPRECATION_LOGGER.deprecated("Deprecated distance [jarowinkler] used, replaced by [jaro_winkler]");
return new JaroWinklerDistance();
} else if ("jaro_winkler".equals(distanceVal)) {
return new JaroWinklerDistance();
} else if ("ngram".equals(distanceVal)) {
return new NGramDistance();

View File

@ -221,7 +221,7 @@ public class TermSuggestionBuilder extends SuggestionBuilder<TermSuggestionBuild
* Damerau-Levenshtein algorithm.
* <li><code>levenshtein</code> - String distance algorithm based on
* Levenshtein edit distance algorithm.
* <li><code>jarowinkler</code> - String distance algorithm based on
* <li><code>jaro_winkler</code> - String distance algorithm based on
* Jaro-Winkler algorithm.
* <li><code>ngram</code> - String distance algorithm based on character
* n-grams.
@ -556,7 +556,7 @@ public class TermSuggestionBuilder extends SuggestionBuilder<TermSuggestionBuild
}
},
/** String distance algorithm based on Jaro-Winkler algorithm. */
JAROWINKLER {
JARO_WINKLER {
@Override
public StringDistance toLucene() {
return new JaroWinklerDistance();
@ -596,7 +596,10 @@ public class TermSuggestionBuilder extends SuggestionBuilder<TermSuggestionBuild
case "ngram":
return NGRAM;
case "jarowinkler":
return JAROWINKLER;
DEPRECATION_LOGGER.deprecated("Deprecated distance [jarowinkler] used, replaced by [jaro_winkler]");
return JARO_WINKLER;
case "jaro_winkler":
return JARO_WINKLER;
default: throw new IllegalArgumentException("Illegal distance option " + str);
}
}

View File

@ -76,7 +76,7 @@ public class DirectCandidateGeneratorTests extends ESTestCase {
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("internal"), equalTo(DirectSpellChecker.INTERNAL_LEVENSHTEIN));
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("damerau_levenshtein"), instanceOf(LuceneLevenshteinDistance.class));
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("levenshtein"), instanceOf(LevensteinDistance.class));
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("jaroWinkler"), instanceOf(JaroWinklerDistance.class));
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("jaro_winkler"), instanceOf(JaroWinklerDistance.class));
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("ngram"), instanceOf(NGramDistance.class));
expectThrows(IllegalArgumentException.class, () -> DirectCandidateGeneratorBuilder.resolveDistance("doesnt_exist"));
@ -88,6 +88,11 @@ public class DirectCandidateGeneratorTests extends ESTestCase {
assertWarnings("Deprecated distance [levenstein] used, replaced by [levenshtein]");
}
public void testJaroWinklerDeprecation() {
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("jaroWinkler"), instanceOf(JaroWinklerDistance.class));
assertWarnings("Deprecated distance [jarowinkler] used, replaced by [jaro_winkler]");
}
private static DirectCandidateGeneratorBuilder mutate(DirectCandidateGeneratorBuilder original) throws IOException {
DirectCandidateGeneratorBuilder mutation = copy(original);
List<Supplier<DirectCandidateGeneratorBuilder>> mutators = new ArrayList<>();
@ -212,7 +217,8 @@ public class DirectCandidateGeneratorTests extends ESTestCase {
maybeSet(generator::postFilter, randomAlphaOfLengthBetween(1, 20));
maybeSet(generator::size, randomIntBetween(1, 20));
maybeSet(generator::sort, randomFrom("score", "frequency"));
maybeSet(generator::stringDistance, randomFrom("internal", "damerau_levenshtein", "levenshtein", "jarowinkler", "ngram"));
maybeSet(generator::stringDistance,
randomFrom("internal", "damerau_levenshtein", "levenshtein", "jaro_winkler", "ngram"));
maybeSet(generator::suggestMode, randomFrom("missing", "popular", "always"));
return generator;
}

View File

@ -39,7 +39,7 @@ public class StringDistanceImplTests extends AbstractWriteableEnumTestCase {
assertThat(StringDistanceImpl.INTERNAL.ordinal(), equalTo(0));
assertThat(StringDistanceImpl.DAMERAU_LEVENSHTEIN.ordinal(), equalTo(1));
assertThat(StringDistanceImpl.LEVENSHTEIN.ordinal(), equalTo(2));
assertThat(StringDistanceImpl.JAROWINKLER.ordinal(), equalTo(3));
assertThat(StringDistanceImpl.JARO_WINKLER.ordinal(), equalTo(3));
assertThat(StringDistanceImpl.NGRAM.ordinal(), equalTo(4));
}
@ -48,7 +48,7 @@ public class StringDistanceImplTests extends AbstractWriteableEnumTestCase {
assertThat(StringDistanceImpl.resolve("internal"), equalTo(StringDistanceImpl.INTERNAL));
assertThat(StringDistanceImpl.resolve("damerau_levenshtein"), equalTo(StringDistanceImpl.DAMERAU_LEVENSHTEIN));
assertThat(StringDistanceImpl.resolve("levenshtein"), equalTo(StringDistanceImpl.LEVENSHTEIN));
assertThat(StringDistanceImpl.resolve("jarowinkler"), equalTo(StringDistanceImpl.JAROWINKLER));
assertThat(StringDistanceImpl.resolve("jaro_winkler"), equalTo(StringDistanceImpl.JARO_WINKLER));
assertThat(StringDistanceImpl.resolve("ngram"), equalTo(StringDistanceImpl.NGRAM));
final String doesntExist = "doesnt_exist";
@ -63,12 +63,17 @@ public class StringDistanceImplTests extends AbstractWriteableEnumTestCase {
assertWarnings("Deprecated distance [levenstein] used, replaced by [levenshtein]");
}
public void testJaroWinklerDeprecation() {
assertThat(StringDistanceImpl.resolve("jaroWinkler"), equalTo(StringDistanceImpl.JARO_WINKLER));
assertWarnings("Deprecated distance [jarowinkler] used, replaced by [jaro_winkler]");
}
@Override
public void testWriteTo() throws IOException {
assertWriteToStream(StringDistanceImpl.INTERNAL, 0);
assertWriteToStream(StringDistanceImpl.DAMERAU_LEVENSHTEIN, 1);
assertWriteToStream(StringDistanceImpl.LEVENSHTEIN, 2);
assertWriteToStream(StringDistanceImpl.JAROWINKLER, 3);
assertWriteToStream(StringDistanceImpl.JARO_WINKLER, 3);
assertWriteToStream(StringDistanceImpl.NGRAM, 4);
}
@ -77,7 +82,7 @@ public class StringDistanceImplTests extends AbstractWriteableEnumTestCase {
assertReadFromStream(0, StringDistanceImpl.INTERNAL);
assertReadFromStream(1, StringDistanceImpl.DAMERAU_LEVENSHTEIN);
assertReadFromStream(2, StringDistanceImpl.LEVENSHTEIN);
assertReadFromStream(3, StringDistanceImpl.JAROWINKLER);
assertReadFromStream(3, StringDistanceImpl.JARO_WINKLER);
assertReadFromStream(4, StringDistanceImpl.NGRAM);
}

View File

@ -100,7 +100,7 @@ public class TermSuggestionBuilderTests extends AbstractSuggestionBuilderTestCas
case 0: return StringDistanceImpl.INTERNAL;
case 1: return StringDistanceImpl.DAMERAU_LEVENSHTEIN;
case 2: return StringDistanceImpl.LEVENSHTEIN;
case 3: return StringDistanceImpl.JAROWINKLER;
case 3: return StringDistanceImpl.JARO_WINKLER;
case 4: return StringDistanceImpl.NGRAM;
default: throw new IllegalArgumentException("No string distance algorithm with an ordinal of " + randomVal);
}

View File

@ -118,5 +118,5 @@ doesn't take the query into account that is part of request.
Damerau-Levenshtein algorithm.
`levenshtein` - String distance algorithm based on Levenshtein edit distance
algorithm.
`jarowinkler` - String distance algorithm based on Jaro-Winkler algorithm.
`jaro_winkler` - String distance algorithm based on Jaro-Winkler algorithm.
`ngram` - String distance algorithm based on character n-grams.