Make locale parsing less lenient. (#26361)

The `locale` field of `date` fields accepts almost any string and unknown
locales are simply ignored, which is trappy. We should fail on unknown languages
or countries.

This commit also makes `-` an accepted separator in addition to `_` since `-`
is the recommended separator (https://tools.ietf.org/html/rfc5646#section-2.1).
`_` is probably still worth supporting since it is the separator used by
`Locale#toString()`.
This commit is contained in:
Adrien Grand 2017-08-28 09:59:25 +02:00 committed by GitHub
parent 36e22bc30f
commit 262ea9534f
4 changed files with 150 additions and 23 deletions

View File

@ -20,7 +20,9 @@
package org.elasticsearch.common.util;
import java.util.Arrays;
import java.util.Locale;
import java.util.MissingResourceException;
/**
* Utilities for for dealing with {@link Locale} objects
@ -28,33 +30,78 @@ import java.util.Locale;
public class LocaleUtils {
/**
* Parse the string describing a locale into a {@link Locale} object
* Parse the given locale as {@code language}, {@code language-country} or
* {@code language-country-variant}.
* Either underscores or hyphens may be used as separators, but consistently, ie.
* you may not use an hyphen to separate the language from the country and an
* underscore to separate the country from the variant.
* @throws IllegalArgumentException if there are too many parts in the locale string
* @throws IllegalArgumentException if the language or country is not recognized
*/
public static Locale parse(String localeStr) {
final String[] parts = localeStr.split("_", -1);
switch (parts.length) {
case 3:
// lang_country_variant
return new Locale(parts[0], parts[1], parts[2]);
case 2:
// lang_country
return new Locale(parts[0], parts[1]);
case 1:
if ("ROOT".equalsIgnoreCase(parts[0])) {
return Locale.ROOT;
}
// lang
return new Locale(parts[0]);
default:
throw new IllegalArgumentException("Can't parse locale: [" + localeStr + "]");
boolean useUnderscoreAsSeparator = false;
for (int i = 0; i < localeStr.length(); ++i) {
final char c = localeStr.charAt(i);
if (c == '-') {
// the locale uses - as a separator, as expected
break;
} else if (c == '_') {
useUnderscoreAsSeparator = true;
break;
}
}
final String[] parts;
if (useUnderscoreAsSeparator) {
parts = localeStr.split("_", -1);
} else {
parts = localeStr.split("-", -1);
}
final Locale locale = parseParts(parts);
try {
locale.getISO3Language();
} catch (MissingResourceException e) {
throw new IllegalArgumentException("Unknown language: " + parts[0], e);
}
try {
locale.getISO3Country();
} catch (MissingResourceException e) {
throw new IllegalArgumentException("Unknown country: " + parts[1], e);
}
return locale;
}
/**
* Return a string for a {@link Locale} object
* Parse the string describing a locale into a {@link Locale} object
* for 5.x indices.
*/
public static String toString(Locale locale) {
// JAVA7 - use .toLanguageTag instead of .toString()
return locale.toString();
@Deprecated
public static Locale parse5x(String localeStr) {
final String[] parts = localeStr.split("_", -1);
return parseParts(parts);
}
private static Locale parseParts(String[] parts) {
switch (parts.length) {
case 3:
// lang, country, variant
return new Locale(parts[0], parts[1], parts[2]);
case 2:
// lang, country
return new Locale(parts[0], parts[1]);
case 1:
if ("ROOT".equalsIgnoreCase(parts[0])) {
return Locale.ROOT;
}
// lang
return new Locale(parts[0]);
default:
throw new IllegalArgumentException("Locales can have at most 3 parts but got " + parts.length + ": " + Arrays.asList(parts));
}
}
}

View File

@ -30,6 +30,7 @@ import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.common.Explicit;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.joda.DateMathParser;
@ -154,7 +155,13 @@ public class DateFieldMapper extends FieldMapper {
builder.ignoreMalformed(TypeParsers.nodeBooleanValue(name, "ignore_malformed", propNode, parserContext));
iterator.remove();
} else if (propName.equals("locale")) {
builder.locale(LocaleUtils.parse(propNode.toString()));
Locale locale;
if (parserContext.indexVersionCreated().onOrAfter(Version.V_6_0_0_beta2)) {
locale = LocaleUtils.parse(propNode.toString());
} else {
locale = LocaleUtils.parse5x(propNode.toString());
}
builder.locale(locale);
iterator.remove();
} else if (propName.equals("format")) {
builder.dateTimeFormatter(parseDateTimeFormatter(propNode));

View File

@ -180,7 +180,13 @@ public class RangeFieldMapper extends FieldMapper {
builder.coerce(TypeParsers.nodeBooleanValue(name, "coerce", propNode, parserContext));
iterator.remove();
} else if (propName.equals("locale")) {
builder.locale(LocaleUtils.parse(propNode.toString()));
Locale locale;
if (parserContext.indexVersionCreated().onOrAfter(Version.V_6_0_0_beta2)) {
locale = LocaleUtils.parse(propNode.toString());
} else {
locale = LocaleUtils.parse5x(propNode.toString());
}
builder.locale(locale);
iterator.remove();
} else if (propName.equals("format")) {
builder.dateTimeFormatter(parseDateTimeFormatter(propNode));

View File

@ -0,0 +1,67 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.util;
import org.elasticsearch.test.ESTestCase;
import org.hamcrest.Matchers;
import java.util.Locale;
public class LocaleUtilsTests extends ESTestCase {
public void testIllegalLang() {
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> LocaleUtils.parse("yz"));
assertThat(e.getMessage(), Matchers.containsString("Unknown language: yz"));
e = expectThrows(IllegalArgumentException.class,
() -> LocaleUtils.parse("yz-CA"));
assertThat(e.getMessage(), Matchers.containsString("Unknown language: yz"));
}
public void testIllegalCountry() {
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> LocaleUtils.parse("en-YZ"));
assertThat(e.getMessage(), Matchers.containsString("Unknown country: YZ"));
e = expectThrows(IllegalArgumentException.class,
() -> LocaleUtils.parse("en-YZ-foobar"));
assertThat(e.getMessage(), Matchers.containsString("Unknown country: YZ"));
}
public void testIllegalNumberOfParts() {
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> LocaleUtils.parse("en-US-foo-bar"));
assertThat(e.getMessage(), Matchers.containsString("Locales can have at most 3 parts but got 4"));
}
public void testUnderscores() {
Locale locale1 = LocaleUtils.parse("fr_FR");
Locale locale2 = LocaleUtils.parse("fr-FR");
assertEquals(locale2, locale1);
}
public void testSimple() {
assertEquals(Locale.FRENCH, LocaleUtils.parse("fr"));
assertEquals(Locale.FRANCE, LocaleUtils.parse("fr-FR"));
assertEquals(Locale.ROOT, LocaleUtils.parse("root"));
assertEquals(Locale.ROOT, LocaleUtils.parse(""));
}
}