Make locale parsing less lenient. (#26361)
The `locale` field of `date` fields accepts almost any string and unknown locales are simply ignored, which is trappy. We should fail on unknown languages or countries. This commit also makes `-` an accepted separator in addition to `_` since `-` is the recommended separator (https://tools.ietf.org/html/rfc5646#section-2.1). `_` is probably still worth supporting since it is the separator used by `Locale#toString()`.
This commit is contained in:
parent
36e22bc30f
commit
262ea9534f
|
@ -20,7 +20,9 @@
|
|||
package org.elasticsearch.common.util;
|
||||
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Locale;
|
||||
import java.util.MissingResourceException;
|
||||
|
||||
/**
|
||||
* Utilities for for dealing with {@link Locale} objects
|
||||
|
@ -28,16 +30,68 @@ import java.util.Locale;
|
|||
public class LocaleUtils {
|
||||
|
||||
/**
|
||||
* Parse the string describing a locale into a {@link Locale} object
|
||||
* Parse the given locale as {@code language}, {@code language-country} or
|
||||
* {@code language-country-variant}.
|
||||
* Either underscores or hyphens may be used as separators, but consistently, ie.
|
||||
* you may not use an hyphen to separate the language from the country and an
|
||||
* underscore to separate the country from the variant.
|
||||
* @throws IllegalArgumentException if there are too many parts in the locale string
|
||||
* @throws IllegalArgumentException if the language or country is not recognized
|
||||
*/
|
||||
public static Locale parse(String localeStr) {
|
||||
boolean useUnderscoreAsSeparator = false;
|
||||
for (int i = 0; i < localeStr.length(); ++i) {
|
||||
final char c = localeStr.charAt(i);
|
||||
if (c == '-') {
|
||||
// the locale uses - as a separator, as expected
|
||||
break;
|
||||
} else if (c == '_') {
|
||||
useUnderscoreAsSeparator = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
final String[] parts;
|
||||
if (useUnderscoreAsSeparator) {
|
||||
parts = localeStr.split("_", -1);
|
||||
} else {
|
||||
parts = localeStr.split("-", -1);
|
||||
}
|
||||
|
||||
final Locale locale = parseParts(parts);
|
||||
|
||||
try {
|
||||
locale.getISO3Language();
|
||||
} catch (MissingResourceException e) {
|
||||
throw new IllegalArgumentException("Unknown language: " + parts[0], e);
|
||||
}
|
||||
|
||||
try {
|
||||
locale.getISO3Country();
|
||||
} catch (MissingResourceException e) {
|
||||
throw new IllegalArgumentException("Unknown country: " + parts[1], e);
|
||||
}
|
||||
|
||||
return locale;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the string describing a locale into a {@link Locale} object
|
||||
* for 5.x indices.
|
||||
*/
|
||||
@Deprecated
|
||||
public static Locale parse5x(String localeStr) {
|
||||
final String[] parts = localeStr.split("_", -1);
|
||||
return parseParts(parts);
|
||||
}
|
||||
|
||||
private static Locale parseParts(String[] parts) {
|
||||
switch (parts.length) {
|
||||
case 3:
|
||||
// lang_country_variant
|
||||
// lang, country, variant
|
||||
return new Locale(parts[0], parts[1], parts[2]);
|
||||
case 2:
|
||||
// lang_country
|
||||
// lang, country
|
||||
return new Locale(parts[0], parts[1]);
|
||||
case 1:
|
||||
if ("ROOT".equalsIgnoreCase(parts[0])) {
|
||||
|
@ -46,15 +100,8 @@ public class LocaleUtils {
|
|||
// lang
|
||||
return new Locale(parts[0]);
|
||||
default:
|
||||
throw new IllegalArgumentException("Can't parse locale: [" + localeStr + "]");
|
||||
throw new IllegalArgumentException("Locales can have at most 3 parts but got " + parts.length + ": " + Arrays.asList(parts));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a string for a {@link Locale} object
|
||||
*/
|
||||
public static String toString(Locale locale) {
|
||||
// JAVA7 - use .toLanguageTag instead of .toString()
|
||||
return locale.toString();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.search.BoostQuery;
|
|||
import org.apache.lucene.search.IndexOrDocValuesQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.Explicit;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.joda.DateMathParser;
|
||||
|
@ -154,7 +155,13 @@ public class DateFieldMapper extends FieldMapper {
|
|||
builder.ignoreMalformed(TypeParsers.nodeBooleanValue(name, "ignore_malformed", propNode, parserContext));
|
||||
iterator.remove();
|
||||
} else if (propName.equals("locale")) {
|
||||
builder.locale(LocaleUtils.parse(propNode.toString()));
|
||||
Locale locale;
|
||||
if (parserContext.indexVersionCreated().onOrAfter(Version.V_6_0_0_beta2)) {
|
||||
locale = LocaleUtils.parse(propNode.toString());
|
||||
} else {
|
||||
locale = LocaleUtils.parse5x(propNode.toString());
|
||||
}
|
||||
builder.locale(locale);
|
||||
iterator.remove();
|
||||
} else if (propName.equals("format")) {
|
||||
builder.dateTimeFormatter(parseDateTimeFormatter(propNode));
|
||||
|
|
|
@ -180,7 +180,13 @@ public class RangeFieldMapper extends FieldMapper {
|
|||
builder.coerce(TypeParsers.nodeBooleanValue(name, "coerce", propNode, parserContext));
|
||||
iterator.remove();
|
||||
} else if (propName.equals("locale")) {
|
||||
builder.locale(LocaleUtils.parse(propNode.toString()));
|
||||
Locale locale;
|
||||
if (parserContext.indexVersionCreated().onOrAfter(Version.V_6_0_0_beta2)) {
|
||||
locale = LocaleUtils.parse(propNode.toString());
|
||||
} else {
|
||||
locale = LocaleUtils.parse5x(propNode.toString());
|
||||
}
|
||||
builder.locale(locale);
|
||||
iterator.remove();
|
||||
} else if (propName.equals("format")) {
|
||||
builder.dateTimeFormatter(parseDateTimeFormatter(propNode));
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.common.util;
|
||||
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import org.hamcrest.Matchers;
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
public class LocaleUtilsTests extends ESTestCase {
|
||||
|
||||
public void testIllegalLang() {
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> LocaleUtils.parse("yz"));
|
||||
assertThat(e.getMessage(), Matchers.containsString("Unknown language: yz"));
|
||||
|
||||
e = expectThrows(IllegalArgumentException.class,
|
||||
() -> LocaleUtils.parse("yz-CA"));
|
||||
assertThat(e.getMessage(), Matchers.containsString("Unknown language: yz"));
|
||||
}
|
||||
|
||||
public void testIllegalCountry() {
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> LocaleUtils.parse("en-YZ"));
|
||||
assertThat(e.getMessage(), Matchers.containsString("Unknown country: YZ"));
|
||||
|
||||
e = expectThrows(IllegalArgumentException.class,
|
||||
() -> LocaleUtils.parse("en-YZ-foobar"));
|
||||
assertThat(e.getMessage(), Matchers.containsString("Unknown country: YZ"));
|
||||
}
|
||||
|
||||
public void testIllegalNumberOfParts() {
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> LocaleUtils.parse("en-US-foo-bar"));
|
||||
assertThat(e.getMessage(), Matchers.containsString("Locales can have at most 3 parts but got 4"));
|
||||
}
|
||||
|
||||
public void testUnderscores() {
|
||||
Locale locale1 = LocaleUtils.parse("fr_FR");
|
||||
Locale locale2 = LocaleUtils.parse("fr-FR");
|
||||
assertEquals(locale2, locale1);
|
||||
}
|
||||
|
||||
public void testSimple() {
|
||||
assertEquals(Locale.FRENCH, LocaleUtils.parse("fr"));
|
||||
assertEquals(Locale.FRANCE, LocaleUtils.parse("fr-FR"));
|
||||
assertEquals(Locale.ROOT, LocaleUtils.parse("root"));
|
||||
assertEquals(Locale.ROOT, LocaleUtils.parse(""));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue