LUCENE-6978: Refactor several code places that lookup locales by string name to use BCP47 locale tag instead

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1724891 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2016-01-15 22:41:57 +00:00
parent d3565b1d8f
commit f5b0e537b4
12 changed files with 73 additions and 73 deletions

View File

@ -225,6 +225,12 @@ Other
with better compile-time-checked MethodType; generated class files
are no longer marked as synthetic. (Uwe Schindler)
* LUCENE-6978: Refactor several code places that lookup locales
by string name to use BCP47 locale tag instead. LuceneTestCase
now also prints locales on failing tests this way.
Locale#forLanguageTag() and Locale#toString() were placed on list
of forbidden signatures. (Uwe Schindler, Robert Muir)
======================= Lucene 5.4.1 =======================
Bug Fixes

View File

@ -101,6 +101,6 @@ public class TestNLS extends LuceneTestCase {
MessagesTestBundle.Q0005E_MESSAGE_NOT_IN_BUNDLE, locale);
assertEquals("Message with key:Q0005E_MESSAGE_NOT_IN_BUNDLE and locale: "
+ locale.toString() + " not found.", message);
+ locale.toLanguageTag() + " not found.", message);
}
}

View File

@ -1501,13 +1501,18 @@ public abstract class LuceneTestCase extends Assert {
}
}
private static final String[] availableLanguageTags = Arrays.stream(Locale.getAvailableLocales())
.map(Locale::toLanguageTag)
.sorted()
.distinct()
.toArray(String[]::new);
/**
* Return a random Locale from the available locales on the system.
* @see <a href="http://issues.apache.org/jira/browse/LUCENE-4020">LUCENE-4020</a>
*/
public static Locale randomLocale(Random random) {
Locale locales[] = Locale.getAvailableLocales();
return locales[random.nextInt(locales.length)];
return localeForLanguageTag(availableLanguageTags[random.nextInt(availableLanguageTags.length)]);
}
/**
@ -1520,15 +1525,8 @@ public abstract class LuceneTestCase extends Assert {
}
/** return a Locale object equivalent to its programmatic name */
public static Locale localeForName(String localeName) {
String elements[] = localeName.split("\\_");
switch(elements.length) {
case 4: /* fallthrough for special cases */
case 3: return new Locale(elements[0], elements[1], elements[2]);
case 2: return new Locale(elements[0], elements[1]);
case 1: return new Locale(elements[0]);
default: throw new IllegalArgumentException("Invalid Locale: " + localeName);
}
public static Locale localeForLanguageTag(String languageTag) {
return new Locale.Builder().setLanguageTag(languageTag).build();
}
private static Directory newFSDirectoryImpl(Class<? extends FSDirectory> clazz, Path path, LockFactory lf) throws IOException {

View File

@ -129,7 +129,7 @@ public final class RunListenerPrintReproduceInfo extends RunListener {
if (classEnvRule != null) {
System.err.println("NOTE: test params are: codec=" + classEnvRule.codec +
", sim=" + classEnvRule.similarity +
", locale=" + classEnvRule.locale +
", locale=" + classEnvRule.locale.toLanguageTag() +
", timezone=" + (classEnvRule.timeZone == null ? "(null)" : classEnvRule.timeZone.getID()));
}
System.err.println("NOTE: " + System.getProperty("os.name") + " "
@ -178,7 +178,7 @@ public final class RunListenerPrintReproduceInfo extends RunListener {
// Environment.
if (!TEST_LINE_DOCS_FILE.equals(DEFAULT_LINE_DOCS_FILE)) addVmOpt(b, "tests.linedocsfile", TEST_LINE_DOCS_FILE);
if (classEnvRule != null) {
addVmOpt(b, "tests.locale", classEnvRule.locale);
addVmOpt(b, "tests.locale", classEnvRule.locale.toLanguageTag());
if (classEnvRule.timeZone != null) {
addVmOpt(b, "tests.timezone", classEnvRule.timeZone.getID());
}

View File

@ -53,7 +53,7 @@ import static org.apache.lucene.util.LuceneTestCase.TEST_DOCVALUESFORMAT;
import static org.apache.lucene.util.LuceneTestCase.TEST_POSTINGSFORMAT;
import static org.apache.lucene.util.LuceneTestCase.VERBOSE;
import static org.apache.lucene.util.LuceneTestCase.assumeFalse;
import static org.apache.lucene.util.LuceneTestCase.localeForName;
import static org.apache.lucene.util.LuceneTestCase.localeForLanguageTag;
import static org.apache.lucene.util.LuceneTestCase.random;
import static org.apache.lucene.util.LuceneTestCase.randomLocale;
import static org.apache.lucene.util.LuceneTestCase.randomTimeZone;
@ -200,7 +200,7 @@ final class TestRuleSetupAndRestoreClassEnv extends AbstractBeforeAfterRule {
// Always pick a random one for consistency (whether tests.locale was specified or not).
savedLocale = Locale.getDefault();
Locale randomLocale = randomLocale(random);
locale = testLocale.equals("random") ? randomLocale : localeForName(testLocale);
locale = testLocale.equals("random") ? randomLocale : localeForLanguageTag(testLocale);
Locale.setDefault(locale);
savedTimeZone = TimeZone.getDefault();

View File

@ -37,3 +37,6 @@ java.lang.Character#codePointAt(char[],int) @ Implicit end offset is error-prone
java.io.File#delete() @ use Files.delete for real exception, IOUtils.deleteFilesIgnoringExceptions if you dont care
java.util.Collections#shuffle(java.util.List) @ Use shuffle(List, Random) instead so that it can be reproduced
java.util.Locale#forLanguageTag(java.lang.String) @ use new Locale.Builder().setLanguageTag(...).build() which has error handling
java.util.Locale#toString() @ use Locale#toLanguageTag() for a standardized BCP47 locale name

View File

@ -532,6 +532,12 @@ Other Changes
* SOLR-8555: SearchGroupShardResponseProcessor (initialCapacity) tweaks (Christine Poerschke)
* LUCENE-6978: Refactor several code places that lookup locales
by string name to use BCP47 locale tag instead. LuceneTestCase
now also prints locales on failing tests this way. In addition,
several places in Solr now additionally support BCP47 in config
files. (Uwe Schindler, Robert Muir)
================== 5.4.1 ==================
Bug Fixes

View File

@ -8,13 +8,14 @@ import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IllformedLocaleException;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TimeZone;
import java.util.WeakHashMap;
import org.apache.solr.common.util.SuppressForbidden;
import org.apache.solr.handler.dataimport.config.EntityField;
import org.apache.solr.util.DateMathParser;
@ -51,24 +52,10 @@ import org.apache.solr.util.DateMathParser;
public class DateFormatEvaluator extends Evaluator {
public static final String DEFAULT_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
protected Map<DateFormatCacheKey, SimpleDateFormat> cache = new WeakHashMap<>();
protected Map<String, Locale> availableLocales = new HashMap<>();
protected Set<String> availableTimezones = new HashSet<>();
/**
* Used to wrap cache keys containing a Locale, TimeZone and date format String
*/
static protected class DateFormatCacheKey {
DateFormatCacheKey(Locale l, TimeZone tz, String df) {
this.locale = l;
this.timezone = tz;
this.dateFormat = df;
}
Locale locale;
TimeZone timezone;
String dateFormat;
}
@SuppressForbidden(reason = "Usage of outdated locale parsing with Locale#toString() because of backwards compatibility")
public DateFormatEvaluator() {
for (Locale locale : Locale.getAvailableLocales()) {
availableLocales.put(locale.toString(), locale);
@ -77,18 +64,13 @@ public class DateFormatEvaluator extends Evaluator {
availableTimezones.add(tz);
}
}
private SimpleDateFormat getDateFormat(String pattern, TimeZone timezone, Locale locale) {
DateFormatCacheKey dfck = new DateFormatCacheKey(locale, timezone, pattern);
SimpleDateFormat sdf = cache.get(dfck);
if(sdf == null) {
sdf = new SimpleDateFormat(pattern, locale);
sdf.setTimeZone(timezone);
cache.put(dfck, sdf);
}
final SimpleDateFormat sdf = new SimpleDateFormat(pattern, locale);
sdf.setTimeZone(timezone);
return sdf;
}
@Override
public String evaluate(String expression, Context context) {
List<Object> l = parseParams(expression, context.getVariableResolver());
@ -102,7 +84,7 @@ public class DateFormatEvaluator extends Evaluator {
o = wrapper.resolve();
format = o.toString();
}
Locale locale = Locale.ROOT;
Locale locale = Locale.ENGLISH; // we default to ENGLISH for dates for full Java 9 compatibility
if(l.size()>2) {
Object localeObj = l.get(2);
String localeStr = null;
@ -112,8 +94,10 @@ public class DateFormatEvaluator extends Evaluator {
localeStr = localeObj.toString();
}
locale = availableLocales.get(localeStr);
if(locale==null) {
throw new DataImportHandlerException(SEVERE, "Unsupported locale: " + localeStr);
if (locale == null) try {
locale = new Locale.Builder().setLanguageTag(localeStr).build();
} catch (IllformedLocaleException ex) {
throw new DataImportHandlerException(SEVERE, "Malformed / non-existent locale: " + localeStr, ex);
}
}
TimeZone tz = TimeZone.getDefault();

View File

@ -47,10 +47,14 @@ public class DateFormatTransformer extends Transformer {
public Object transformRow(Map<String, Object> aRow, Context context) {
for (Map<String, String> map : context.getAllEntityFields()) {
Locale locale = Locale.ROOT;
String customLocale = map.get("locale");
if(customLocale != null){
locale = new Locale(customLocale);
Locale locale = Locale.ENGLISH; // we default to ENGLISH for dates for full Java 9 compatibility
String customLocale = map.get(LOCALE);
if (customLocale != null) {
try {
locale = new Locale.Builder().setLanguageTag(customLocale).build();
} catch (IllformedLocaleException e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE, "Invalid Locale specified: " + customLocale, e);
}
}
String fmt = map.get(DATE_TIME_FMT);
@ -97,4 +101,6 @@ public class DateFormatTransformer extends Transformer {
}
public static final String DATE_TIME_FMT = "dateTimeFormat";
public static final String LOCALE = "locale";
}

View File

@ -20,11 +20,10 @@ import java.text.NumberFormat;
import java.text.ParseException;
import java.text.ParsePosition;
import java.util.ArrayList;
import java.util.IllformedLocaleException;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* <p>
@ -45,8 +44,6 @@ import java.util.regex.Pattern;
*/
public class NumberFormatTransformer extends Transformer {
private static final Pattern localeRegex = Pattern.compile("^([a-z]{2})-([A-Z]{2})$");
@Override
@SuppressWarnings("unchecked")
public Object transformRow(Map<String, Object> row, Context context) {
@ -55,19 +52,17 @@ public class NumberFormatTransformer extends Transformer {
if (style != null) {
String column = fld.get(DataImporter.COLUMN);
String srcCol = fld.get(RegexTransformer.SRC_COL_NAME);
Locale locale = null;
String localeStr = context.replaceTokens(fld.get(LOCALE));
if (srcCol == null)
srcCol = column;
Locale locale = Locale.ROOT;
if (localeStr != null) {
Matcher matcher = localeRegex.matcher(localeStr);
if (matcher.find() && matcher.groupCount() == 2) {
locale = new Locale(matcher.group(1), matcher.group(2));
} else {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE, "Invalid Locale specified for field: " + fld);
try {
locale = new Locale.Builder().setLanguageTag(localeStr).build();
} catch (IllformedLocaleException e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Invalid Locale '" + localeStr + "' specified for field: " + fld, e);
}
} else {
locale = Locale.ROOT;
}
Object val = row.get(srcCol);

View File

@ -21,15 +21,14 @@ import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies;
import org.apache.solr.SolrTestCaseJ4;
import java.sql.DriverManager;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Properties;
@ThreadLeakAction({ThreadLeakAction.Action.WARN})
@ -38,8 +37,10 @@ import java.util.Properties;
@ThreadLeakScope(ThreadLeakScope.Scope.NONE)
public class TestJdbcDataSourceConvertType extends AbstractDataImportHandlerTestCase {
public void testConvertType() throws Throwable {
assumeTrue("Derby is not happy with locale sr__#Latn", !"sr__#Latn".equals(Locale.getDefault().toString()));
final Locale loc = Locale.getDefault();
assumeFalse("Derby is not happy with locale sr-Latn-*",
Objects.equals(new Locale("sr").getLanguage(), loc.getLanguage()) &&
Objects.equals("Latn", loc.getScript()));
// ironically convertType=false causes BigDecimal to String conversion
convertTypeTest("false", String.class);

View File

@ -22,6 +22,7 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.IllformedLocaleException;
import java.util.List;
import java.util.Locale;
import java.util.Map;
@ -33,6 +34,7 @@ import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.params.MultiMapSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.DateUtil;
import org.apache.solr.common.util.SuppressForbidden;
import org.apache.solr.handler.extraction.ExtractingParams;
import org.apache.solr.handler.extraction.SolrContentHandler;
import org.apache.solr.handler.extraction.SolrContentHandlerFactory;
@ -48,8 +50,6 @@ import org.apache.tika.sax.XHTMLContentHandler;
import org.apache.tika.sax.xpath.Matcher;
import org.apache.tika.sax.xpath.MatchingContentHandler;
import org.apache.tika.sax.xpath.XPathParser;
import org.apache.xml.serialize.OutputFormat;
import org.apache.xml.serialize.XMLSerializer;
import org.kitesdk.morphline.api.Command;
import org.kitesdk.morphline.api.CommandBuilder;
@ -99,7 +99,7 @@ public final class SolrCellBuilder implements CommandBuilder {
private final IndexSchema schema;
private final List<String> dateFormats;
private final String xpathExpr;
private final List<Parser> parsers = new ArrayList();
private final List<Parser> parsers = new ArrayList<>();
private final SolrContentHandlerFactory solrContentHandlerFactory;
private final Locale locale;
@ -118,7 +118,7 @@ public final class SolrCellBuilder implements CommandBuilder {
LOG.debug("solrLocator: {}", locator);
this.schema = locator.getIndexSchema();
Preconditions.checkNotNull(schema);
LOG.trace("Solr schema: \n{}", Joiner.on("\n").join(new TreeMap(schema.getFields()).values()));
LOG.trace("Solr schema: \n{}", Joiner.on("\n").join(new TreeMap<>(schema.getFields()).values()));
ListMultimap<String, String> cellParams = ArrayListMultimap.create();
String uprefix = getConfigs().getString(config, ExtractingParams.UNKNOWN_FIELD_PREFIX, null);
@ -156,7 +156,7 @@ public final class SolrCellBuilder implements CommandBuilder {
String handlerStr = getConfigs().getString(config, "solrContentHandlerFactory", TrimSolrContentHandlerFactory.class.getName());
Class<? extends SolrContentHandlerFactory> factoryClass;
try {
factoryClass = (Class<? extends SolrContentHandlerFactory>)Class.forName(handlerStr);
factoryClass = Class.forName(handlerStr).asSubclass(SolrContentHandlerFactory.class);
} catch (ClassNotFoundException cnfe) {
throw new MorphlineCompilationException("Could not find class "
+ handlerStr + " to use for " + "solrContentHandlerFactory", config, cnfe);
@ -208,7 +208,7 @@ public final class SolrCellBuilder implements CommandBuilder {
}
//LOG.info("mediaTypeToParserMap="+mediaTypeToParserMap);
Map<String, String[]> tmp = new HashMap();
Map<String, String[]> tmp = new HashMap<>();
for (Map.Entry<String,Collection<String>> entry : cellParams.asMap().entrySet()) {
tmp.put(entry.getKey(), entry.getValue().toArray(new String[entry.getValue().size()]));
}
@ -327,17 +327,18 @@ public final class SolrCellBuilder implements CommandBuilder {
return record;
}
@SuppressForbidden(reason = "Usage of outdated locale parsing with Locale#toString() because of backwards compatibility")
private Locale getLocale(String name) {
for (Locale locale : Locale.getAvailableLocales()) {
if (locale.toString().equals(name)) {
return locale;
}
}
assert Locale.ROOT.toString().equals("");
if (name.equals(Locale.ROOT.toString())) {
return Locale.ROOT;
try {
return new Locale.Builder().setLanguageTag(name).build();
} catch (IllformedLocaleException ex) {
throw new MorphlineCompilationException("Malformed / non-existent locale: " + name, getConfig(), ex);
}
throw new MorphlineCompilationException("Unknown locale: " + name, getConfig());
}
}