From 262ea9534f23761c05570996674932a506259890 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Mon, 28 Aug 2017 09:59:25 +0200 Subject: [PATCH] Make locale parsing less lenient. (#26361) The `locale` field of `date` fields accepts almost any string and unknown locales are simply ignored, which is trappy. We should fail on unknown languages or countries. This commit also makes `-` an accepted separator in addition to `_` since `-` is the recommended separator (https://tools.ietf.org/html/rfc5646#section-2.1). `_` is probably still worth supporting since it is the separator used by `Locale#toString()`. --- .../common/util/LocaleUtils.java | 89 ++++++++++++++----- .../index/mapper/DateFieldMapper.java | 9 +- .../index/mapper/RangeFieldMapper.java | 8 +- .../common/util/LocaleUtilsTests.java | 67 ++++++++++++++ 4 files changed, 150 insertions(+), 23 deletions(-) create mode 100644 core/src/test/java/org/elasticsearch/common/util/LocaleUtilsTests.java diff --git a/core/src/main/java/org/elasticsearch/common/util/LocaleUtils.java b/core/src/main/java/org/elasticsearch/common/util/LocaleUtils.java index 2e6c01a1ca7..b95c11e8138 100644 --- a/core/src/main/java/org/elasticsearch/common/util/LocaleUtils.java +++ b/core/src/main/java/org/elasticsearch/common/util/LocaleUtils.java @@ -20,7 +20,9 @@ package org.elasticsearch.common.util; +import java.util.Arrays; import java.util.Locale; +import java.util.MissingResourceException; /** * Utilities for for dealing with {@link Locale} objects @@ -28,33 +30,78 @@ import java.util.Locale; public class LocaleUtils { /** - * Parse the string describing a locale into a {@link Locale} object + * Parse the given locale as {@code language}, {@code language-country} or + * {@code language-country-variant}. + * Either underscores or hyphens may be used as separators, but consistently, ie. + * you may not use an hyphen to separate the language from the country and an + * underscore to separate the country from the variant. + * @throws IllegalArgumentException if there are too many parts in the locale string + * @throws IllegalArgumentException if the language or country is not recognized */ public static Locale parse(String localeStr) { - final String[] parts = localeStr.split("_", -1); - switch (parts.length) { - case 3: - // lang_country_variant - return new Locale(parts[0], parts[1], parts[2]); - case 2: - // lang_country - return new Locale(parts[0], parts[1]); - case 1: - if ("ROOT".equalsIgnoreCase(parts[0])) { - return Locale.ROOT; - } - // lang - return new Locale(parts[0]); - default: - throw new IllegalArgumentException("Can't parse locale: [" + localeStr + "]"); + boolean useUnderscoreAsSeparator = false; + for (int i = 0; i < localeStr.length(); ++i) { + final char c = localeStr.charAt(i); + if (c == '-') { + // the locale uses - as a separator, as expected + break; + } else if (c == '_') { + useUnderscoreAsSeparator = true; + break; + } } + + final String[] parts; + if (useUnderscoreAsSeparator) { + parts = localeStr.split("_", -1); + } else { + parts = localeStr.split("-", -1); + } + + final Locale locale = parseParts(parts); + + try { + locale.getISO3Language(); + } catch (MissingResourceException e) { + throw new IllegalArgumentException("Unknown language: " + parts[0], e); + } + + try { + locale.getISO3Country(); + } catch (MissingResourceException e) { + throw new IllegalArgumentException("Unknown country: " + parts[1], e); + } + + return locale; } /** - * Return a string for a {@link Locale} object + * Parse the string describing a locale into a {@link Locale} object + * for 5.x indices. */ - public static String toString(Locale locale) { - // JAVA7 - use .toLanguageTag instead of .toString() - return locale.toString(); + @Deprecated + public static Locale parse5x(String localeStr) { + final String[] parts = localeStr.split("_", -1); + return parseParts(parts); } + + private static Locale parseParts(String[] parts) { + switch (parts.length) { + case 3: + // lang, country, variant + return new Locale(parts[0], parts[1], parts[2]); + case 2: + // lang, country + return new Locale(parts[0], parts[1]); + case 1: + if ("ROOT".equalsIgnoreCase(parts[0])) { + return Locale.ROOT; + } + // lang + return new Locale(parts[0]); + default: + throw new IllegalArgumentException("Locales can have at most 3 parts but got " + parts.length + ": " + Arrays.asList(parts)); + } + } + } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java index 10c2df7eef0..7dddebf91aa 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java @@ -30,6 +30,7 @@ import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.Version; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.joda.DateMathParser; @@ -154,7 +155,13 @@ public class DateFieldMapper extends FieldMapper { builder.ignoreMalformed(TypeParsers.nodeBooleanValue(name, "ignore_malformed", propNode, parserContext)); iterator.remove(); } else if (propName.equals("locale")) { - builder.locale(LocaleUtils.parse(propNode.toString())); + Locale locale; + if (parserContext.indexVersionCreated().onOrAfter(Version.V_6_0_0_beta2)) { + locale = LocaleUtils.parse(propNode.toString()); + } else { + locale = LocaleUtils.parse5x(propNode.toString()); + } + builder.locale(locale); iterator.remove(); } else if (propName.equals("format")) { builder.dateTimeFormatter(parseDateTimeFormatter(propNode)); diff --git a/core/src/main/java/org/elasticsearch/index/mapper/RangeFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/RangeFieldMapper.java index 1f1cdd71e4b..9479d9370d2 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/RangeFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/RangeFieldMapper.java @@ -180,7 +180,13 @@ public class RangeFieldMapper extends FieldMapper { builder.coerce(TypeParsers.nodeBooleanValue(name, "coerce", propNode, parserContext)); iterator.remove(); } else if (propName.equals("locale")) { - builder.locale(LocaleUtils.parse(propNode.toString())); + Locale locale; + if (parserContext.indexVersionCreated().onOrAfter(Version.V_6_0_0_beta2)) { + locale = LocaleUtils.parse(propNode.toString()); + } else { + locale = LocaleUtils.parse5x(propNode.toString()); + } + builder.locale(locale); iterator.remove(); } else if (propName.equals("format")) { builder.dateTimeFormatter(parseDateTimeFormatter(propNode)); diff --git a/core/src/test/java/org/elasticsearch/common/util/LocaleUtilsTests.java b/core/src/test/java/org/elasticsearch/common/util/LocaleUtilsTests.java new file mode 100644 index 00000000000..9675b225a16 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/common/util/LocaleUtilsTests.java @@ -0,0 +1,67 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.util; + +import org.elasticsearch.test.ESTestCase; +import org.hamcrest.Matchers; + +import java.util.Locale; + +public class LocaleUtilsTests extends ESTestCase { + + public void testIllegalLang() { + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> LocaleUtils.parse("yz")); + assertThat(e.getMessage(), Matchers.containsString("Unknown language: yz")); + + e = expectThrows(IllegalArgumentException.class, + () -> LocaleUtils.parse("yz-CA")); + assertThat(e.getMessage(), Matchers.containsString("Unknown language: yz")); + } + + public void testIllegalCountry() { + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> LocaleUtils.parse("en-YZ")); + assertThat(e.getMessage(), Matchers.containsString("Unknown country: YZ")); + + e = expectThrows(IllegalArgumentException.class, + () -> LocaleUtils.parse("en-YZ-foobar")); + assertThat(e.getMessage(), Matchers.containsString("Unknown country: YZ")); + } + + public void testIllegalNumberOfParts() { + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> LocaleUtils.parse("en-US-foo-bar")); + assertThat(e.getMessage(), Matchers.containsString("Locales can have at most 3 parts but got 4")); + } + + public void testUnderscores() { + Locale locale1 = LocaleUtils.parse("fr_FR"); + Locale locale2 = LocaleUtils.parse("fr-FR"); + assertEquals(locale2, locale1); + } + + public void testSimple() { + assertEquals(Locale.FRENCH, LocaleUtils.parse("fr")); + assertEquals(Locale.FRANCE, LocaleUtils.parse("fr-FR")); + assertEquals(Locale.ROOT, LocaleUtils.parse("root")); + assertEquals(Locale.ROOT, LocaleUtils.parse("")); + } +}