LANG-828 FastDateParser does not handle non-Gregorian calendars properly

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@1389976 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sebastian Bazley 2012-09-25 16:32:20 +00:00
parent 5ffcb8d35b
commit 34beef1d08
3 changed files with 94 additions and 11 deletions

View File

@ -22,6 +22,7 @@
<body>
<release version="3.2" date="TBA" description="Next release">
<action issue="LANG-828" type="fix">FastDateParser does not handle non-Gregorian calendars properly</action>
<action issue="LANG-826" type="fix">FastDateParser does not handle non-ASCII digits correctly</action>
<action issue="LANG-825" type="add">Create StrBuilder APIs similar to String.format(String, Object...)</action>
<action issue="LANG-817" type="fix">Add org.apache.commons.lang3.SystemUtils.IS_OS_WINDOWS_8</action>

View File

@ -22,6 +22,7 @@ import java.io.Serializable;
import java.text.DateFormatSymbols;
import java.text.ParseException;
import java.text.ParsePosition;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
@ -55,6 +56,13 @@ import java.util.regex.Pattern;
* <p>Timing tests indicate this class is as about as fast as SimpleDateFormat
* in single thread applications and about 25% faster in multi-thread applications.</p>
*
* <p>Note that the code only handles Gregorian calendars. The following non-Gregorian
* calendars use SimpleDateFormat internally, and so will be slower:
* <ul>
* <li>ja_JP_TH - Japanese Imperial</li>
* <li>th_TH (any variant) - Thai Buddhist</li>
* </ul>
* </p>
* @since 3.2
*/
public class FastDateParser implements DateParser, Serializable {
@ -114,7 +122,17 @@ public class FastDateParser implements DateParser, Serializable {
if(!patternMatcher.lookingAt()) {
throw new IllegalArgumentException("Invalid pattern");
}
String localeName = locale.toString();
// These locales don't use the Gregorian calendar
// See http://docs.oracle.com/javase/6/docs/technotes/guides/intl/calendar.doc.html
if (localeName.equals("ja_JP_JP") || localeName.startsWith("th_TH")) {
collector.add(new SimpleDateFormatStrategy());
strategies= collector.toArray(new Strategy[collector.size()]);
parsePattern= Pattern.compile("(.*+)");
return;
}
currentFormatField= patternMatcher.group();
Strategy currentStrategy= getStrategy(currentFormatField);
for(;;) {
@ -797,6 +815,38 @@ public class FastDateParser implements DateParser, Serializable {
}
}
/**
* Dummy strategy which delegates to SimpleDateFormat.
*/
private static class SimpleDateFormatStrategy implements Strategy {
@Override
public boolean isNumber() {
return false;
}
@Override
public void setCalendar(FastDateParser parser, Calendar cal, String value) {
String pat = parser.pattern;
Locale loc = parser.locale;
SimpleDateFormat sdf = new SimpleDateFormat(pat, loc);
try {
Date d = sdf.parse(value);
cal.setTime(d);
} catch (ParseException e) {
throw new IllegalArgumentException(
"Unexpected error using pattern " + pat + " with locale " + loc.toString(), e);
}
}
@Override
public boolean addRegex(FastDateParser parser, StringBuilder regex) {
return false;
}
}
private static final Strategy ERA_STRATEGY = new TextStrategy(Calendar.ERA);
private static final Strategy DAY_OF_WEEK_STRATEGY = new TextStrategy(Calendar.DAY_OF_WEEK);
private static final Strategy AM_PM_STRATEGY = new TextStrategy(Calendar.AM_PM);

View File

@ -19,7 +19,6 @@ package org.apache.commons.lang3.time;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.Serializable;
import java.text.ParseException;
import java.text.SimpleDateFormat;
@ -31,6 +30,8 @@ import java.util.Locale;
import java.util.Map;
import java.util.TimeZone;
import junit.framework.Assert;
import org.apache.commons.lang3.SerializationUtils;
import org.junit.Test;
@ -191,15 +192,37 @@ public class FastDateParserTest {
public void testParses() throws Exception {
for(Locale locale : Locale.getAvailableLocales()) {
for(TimeZone tz : new TimeZone[]{NEW_YORK, GMT}) {
Calendar cal = Calendar.getInstance(tz);
cal.clear();
cal.set(2003, 1, 10);
Date in = cal.getTime();
for(String format : new String[]{LONG_FORMAT, SHORT_FORMAT}) {
SimpleDateFormat sdf = new SimpleDateFormat(LONG_FORMAT, locale);
String fmt = sdf.format(in);
Date out = sdf.parse(fmt);
assertEquals(locale.toString()+" "+ format+ " "+tz.getID(), in, out);
Calendar cal = Calendar.getInstance(tz);
for(int year : new int[]{2003, 1940, 1868, 1867, 0, -1940}) {
// http://docs.oracle.com/javase/6/docs/technotes/guides/intl/calendar.doc.html
if (year < 1868 && locale.toString().equals("ja_JP_JP")) {
continue; // Japanese imperial calendar does not support eras before 1868
}
cal.clear();
if (year < 0) {
cal.set(-year, 1, 10);
cal.set(Calendar.ERA, GregorianCalendar.BC);
} else {
cal.set(year, 1, 10);
}
Date in = cal.getTime();
for(String format : new String[]{LONG_FORMAT, SHORT_FORMAT}) {
SimpleDateFormat sdf = new SimpleDateFormat(format, locale);
if (format.equals(SHORT_FORMAT)) {
if (year < 1930) {
sdf.set2DigitYearStart(cal.getTime());
}
}
String fmt = sdf.format(in);
try {
Date out = sdf.parse(fmt);
assertEquals(locale.toString()+" "+year+" "+ format+ " "+tz.getID(), in, out);
} catch (ParseException pe) {
System.out.println(fmt+" "+locale.toString()+" "+year+" "+ format+ " "+tz.getID());
throw pe;
}
}
}
}
}
@ -253,19 +276,28 @@ public class FastDateParserTest {
if (eraBC) {
cal.set(Calendar.ERA, GregorianCalendar.BC);
}
boolean failed = false;
for(Locale locale : Locale.getAvailableLocales()) {
// ja_JP_JP cannot handle dates before 1868 properly
if (eraBC && format.equals(SHORT_FORMAT) && locale.toString().equals("ja_JP_JP")) {
continue;
}
SimpleDateFormat sdf = new SimpleDateFormat(format, locale);
DateParser fdf = getInstance(format, locale);
try {
checkParse(locale, cal, sdf, fdf);
} catch(ParseException ex) {
failed = true;
// TODO: are these Java bugs?
// ja_JP_JP, th_TH, and th_TH_TH fail with both eras because the generated era name does not match
// ja_JP_JP fails with era BC because it converts to -2002
System.out.println("Locale "+locale+ " failed with "+format+" era "+(eraBC?"BC":"AD")+"\n" + trimMessage(ex.toString()));
}
}
if (failed) {
Assert.fail("One or more tests failed, see above");
}
}
private String trimMessage(String msg) {