Make locale parsing less lenient. (#26361)
The `locale` field of `date` fields accepts almost any string and unknown locales are simply ignored, which is trappy. We should fail on unknown languages or countries. This commit also makes `-` an accepted separator in addition to `_` since `-` is the recommended separator (https://tools.ietf.org/html/rfc5646#section-2.1). `_` is probably still worth supporting since it is the separator used by `Locale#toString()`.
This commit is contained in:
parent
36e22bc30f
commit
262ea9534f
|
@ -20,7 +20,9 @@
|
||||||
package org.elasticsearch.common.util;
|
package org.elasticsearch.common.util;
|
||||||
|
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
import java.util.MissingResourceException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utilities for for dealing with {@link Locale} objects
|
* Utilities for for dealing with {@link Locale} objects
|
||||||
|
@ -28,33 +30,78 @@ import java.util.Locale;
|
||||||
public class LocaleUtils {
|
public class LocaleUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse the string describing a locale into a {@link Locale} object
|
* Parse the given locale as {@code language}, {@code language-country} or
|
||||||
|
* {@code language-country-variant}.
|
||||||
|
* Either underscores or hyphens may be used as separators, but consistently, ie.
|
||||||
|
* you may not use an hyphen to separate the language from the country and an
|
||||||
|
* underscore to separate the country from the variant.
|
||||||
|
* @throws IllegalArgumentException if there are too many parts in the locale string
|
||||||
|
* @throws IllegalArgumentException if the language or country is not recognized
|
||||||
*/
|
*/
|
||||||
public static Locale parse(String localeStr) {
|
public static Locale parse(String localeStr) {
|
||||||
final String[] parts = localeStr.split("_", -1);
|
boolean useUnderscoreAsSeparator = false;
|
||||||
switch (parts.length) {
|
for (int i = 0; i < localeStr.length(); ++i) {
|
||||||
case 3:
|
final char c = localeStr.charAt(i);
|
||||||
// lang_country_variant
|
if (c == '-') {
|
||||||
return new Locale(parts[0], parts[1], parts[2]);
|
// the locale uses - as a separator, as expected
|
||||||
case 2:
|
break;
|
||||||
// lang_country
|
} else if (c == '_') {
|
||||||
return new Locale(parts[0], parts[1]);
|
useUnderscoreAsSeparator = true;
|
||||||
case 1:
|
break;
|
||||||
if ("ROOT".equalsIgnoreCase(parts[0])) {
|
}
|
||||||
return Locale.ROOT;
|
|
||||||
}
|
|
||||||
// lang
|
|
||||||
return new Locale(parts[0]);
|
|
||||||
default:
|
|
||||||
throw new IllegalArgumentException("Can't parse locale: [" + localeStr + "]");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final String[] parts;
|
||||||
|
if (useUnderscoreAsSeparator) {
|
||||||
|
parts = localeStr.split("_", -1);
|
||||||
|
} else {
|
||||||
|
parts = localeStr.split("-", -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
final Locale locale = parseParts(parts);
|
||||||
|
|
||||||
|
try {
|
||||||
|
locale.getISO3Language();
|
||||||
|
} catch (MissingResourceException e) {
|
||||||
|
throw new IllegalArgumentException("Unknown language: " + parts[0], e);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
locale.getISO3Country();
|
||||||
|
} catch (MissingResourceException e) {
|
||||||
|
throw new IllegalArgumentException("Unknown country: " + parts[1], e);
|
||||||
|
}
|
||||||
|
|
||||||
|
return locale;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return a string for a {@link Locale} object
|
* Parse the string describing a locale into a {@link Locale} object
|
||||||
|
* for 5.x indices.
|
||||||
*/
|
*/
|
||||||
public static String toString(Locale locale) {
|
@Deprecated
|
||||||
// JAVA7 - use .toLanguageTag instead of .toString()
|
public static Locale parse5x(String localeStr) {
|
||||||
return locale.toString();
|
final String[] parts = localeStr.split("_", -1);
|
||||||
|
return parseParts(parts);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static Locale parseParts(String[] parts) {
|
||||||
|
switch (parts.length) {
|
||||||
|
case 3:
|
||||||
|
// lang, country, variant
|
||||||
|
return new Locale(parts[0], parts[1], parts[2]);
|
||||||
|
case 2:
|
||||||
|
// lang, country
|
||||||
|
return new Locale(parts[0], parts[1]);
|
||||||
|
case 1:
|
||||||
|
if ("ROOT".equalsIgnoreCase(parts[0])) {
|
||||||
|
return Locale.ROOT;
|
||||||
|
}
|
||||||
|
// lang
|
||||||
|
return new Locale(parts[0]);
|
||||||
|
default:
|
||||||
|
throw new IllegalArgumentException("Locales can have at most 3 parts but got " + parts.length + ": " + Arrays.asList(parts));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.search.BoostQuery;
|
||||||
import org.apache.lucene.search.IndexOrDocValuesQuery;
|
import org.apache.lucene.search.IndexOrDocValuesQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.elasticsearch.Version;
|
||||||
import org.elasticsearch.common.Explicit;
|
import org.elasticsearch.common.Explicit;
|
||||||
import org.elasticsearch.common.Nullable;
|
import org.elasticsearch.common.Nullable;
|
||||||
import org.elasticsearch.common.joda.DateMathParser;
|
import org.elasticsearch.common.joda.DateMathParser;
|
||||||
|
@ -154,7 +155,13 @@ public class DateFieldMapper extends FieldMapper {
|
||||||
builder.ignoreMalformed(TypeParsers.nodeBooleanValue(name, "ignore_malformed", propNode, parserContext));
|
builder.ignoreMalformed(TypeParsers.nodeBooleanValue(name, "ignore_malformed", propNode, parserContext));
|
||||||
iterator.remove();
|
iterator.remove();
|
||||||
} else if (propName.equals("locale")) {
|
} else if (propName.equals("locale")) {
|
||||||
builder.locale(LocaleUtils.parse(propNode.toString()));
|
Locale locale;
|
||||||
|
if (parserContext.indexVersionCreated().onOrAfter(Version.V_6_0_0_beta2)) {
|
||||||
|
locale = LocaleUtils.parse(propNode.toString());
|
||||||
|
} else {
|
||||||
|
locale = LocaleUtils.parse5x(propNode.toString());
|
||||||
|
}
|
||||||
|
builder.locale(locale);
|
||||||
iterator.remove();
|
iterator.remove();
|
||||||
} else if (propName.equals("format")) {
|
} else if (propName.equals("format")) {
|
||||||
builder.dateTimeFormatter(parseDateTimeFormatter(propNode));
|
builder.dateTimeFormatter(parseDateTimeFormatter(propNode));
|
||||||
|
|
|
@ -180,7 +180,13 @@ public class RangeFieldMapper extends FieldMapper {
|
||||||
builder.coerce(TypeParsers.nodeBooleanValue(name, "coerce", propNode, parserContext));
|
builder.coerce(TypeParsers.nodeBooleanValue(name, "coerce", propNode, parserContext));
|
||||||
iterator.remove();
|
iterator.remove();
|
||||||
} else if (propName.equals("locale")) {
|
} else if (propName.equals("locale")) {
|
||||||
builder.locale(LocaleUtils.parse(propNode.toString()));
|
Locale locale;
|
||||||
|
if (parserContext.indexVersionCreated().onOrAfter(Version.V_6_0_0_beta2)) {
|
||||||
|
locale = LocaleUtils.parse(propNode.toString());
|
||||||
|
} else {
|
||||||
|
locale = LocaleUtils.parse5x(propNode.toString());
|
||||||
|
}
|
||||||
|
builder.locale(locale);
|
||||||
iterator.remove();
|
iterator.remove();
|
||||||
} else if (propName.equals("format")) {
|
} else if (propName.equals("format")) {
|
||||||
builder.dateTimeFormatter(parseDateTimeFormatter(propNode));
|
builder.dateTimeFormatter(parseDateTimeFormatter(propNode));
|
||||||
|
|
|
@ -0,0 +1,67 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.common.util;
|
||||||
|
|
||||||
|
import org.elasticsearch.test.ESTestCase;
|
||||||
|
import org.hamcrest.Matchers;
|
||||||
|
|
||||||
|
import java.util.Locale;
|
||||||
|
|
||||||
|
public class LocaleUtilsTests extends ESTestCase {
|
||||||
|
|
||||||
|
public void testIllegalLang() {
|
||||||
|
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||||
|
() -> LocaleUtils.parse("yz"));
|
||||||
|
assertThat(e.getMessage(), Matchers.containsString("Unknown language: yz"));
|
||||||
|
|
||||||
|
e = expectThrows(IllegalArgumentException.class,
|
||||||
|
() -> LocaleUtils.parse("yz-CA"));
|
||||||
|
assertThat(e.getMessage(), Matchers.containsString("Unknown language: yz"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testIllegalCountry() {
|
||||||
|
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||||
|
() -> LocaleUtils.parse("en-YZ"));
|
||||||
|
assertThat(e.getMessage(), Matchers.containsString("Unknown country: YZ"));
|
||||||
|
|
||||||
|
e = expectThrows(IllegalArgumentException.class,
|
||||||
|
() -> LocaleUtils.parse("en-YZ-foobar"));
|
||||||
|
assertThat(e.getMessage(), Matchers.containsString("Unknown country: YZ"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testIllegalNumberOfParts() {
|
||||||
|
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||||
|
() -> LocaleUtils.parse("en-US-foo-bar"));
|
||||||
|
assertThat(e.getMessage(), Matchers.containsString("Locales can have at most 3 parts but got 4"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testUnderscores() {
|
||||||
|
Locale locale1 = LocaleUtils.parse("fr_FR");
|
||||||
|
Locale locale2 = LocaleUtils.parse("fr-FR");
|
||||||
|
assertEquals(locale2, locale1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSimple() {
|
||||||
|
assertEquals(Locale.FRENCH, LocaleUtils.parse("fr"));
|
||||||
|
assertEquals(Locale.FRANCE, LocaleUtils.parse("fr-FR"));
|
||||||
|
assertEquals(Locale.ROOT, LocaleUtils.parse("root"));
|
||||||
|
assertEquals(Locale.ROOT, LocaleUtils.parse(""));
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue