LUCENE-2797: Upgrade icu to 4.6

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1042185 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2010-12-04 14:08:03 +00:00
parent 30af48bda8
commit f87ca310ec
23 changed files with 148 additions and 25 deletions

View File

@ -279,7 +279,7 @@ Build
dependency management between contribs by a new ANT macro.
(Uwe Schindler, Shai Erera)
* LUCENE-2399, LUCENE-2683: Upgrade contrib/icu's ICU jar file to ICU 4.4.2
* LUCENE-2797: Upgrade contrib/icu's ICU jar file to ICU 4.6
(Robert Muir)
Optimizations

View File

@ -20,7 +20,7 @@ lucene-analyzers-common-XX.jar
lucene-analyzers-icu-XX.jar
An add-on analysis library that provides improved Unicode support via
International Components for Unicode (ICU). Note: this module depends on
the ICU4j jar file (version >= 4.4.0)
the ICU4j jar file (version >= 4.6.0)
lucene-analyzers-phonetic-XX.jar
An add-on analysis library that provides phonetic encoders via Apache

View File

@ -167,11 +167,16 @@ public abstract class CollationTestBase extends LuceneTestCase {
// Copied (and slightly modified) from
// org.apache.lucene.search.TestSort.testInternationalSort()
//
// TODO: this test is really fragile. there are already 3 different cases,
// depending upon unicode version.
public void testCollationKeySort(Analyzer usAnalyzer,
Analyzer franceAnalyzer,
Analyzer swedenAnalyzer,
Analyzer denmarkAnalyzer,
String usResult) throws Exception {
String usResult,
String frResult,
String svResult,
String dkResult) throws Exception {
RAMDirectory indexStore = new RAMDirectory();
IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)));
@ -220,13 +225,13 @@ public abstract class CollationTestBase extends LuceneTestCase {
assertMatches(searcher, queryY, sort, usResult);
sort.setSort(new SortField("France", SortField.STRING));
assertMatches(searcher, queryX, sort, "EACGI");
assertMatches(searcher, queryX, sort, frResult);
sort.setSort(new SortField("Sweden", SortField.STRING));
assertMatches(searcher, queryY, sort, "BJDFH");
assertMatches(searcher, queryY, sort, svResult);
sort.setSort(new SortField("Denmark", SortField.STRING));
assertMatches(searcher, queryY, sort, "BJDHF");
assertMatches(searcher, queryY, sort, dkResult);
}
// Make sure the documents returned by the search match the expected list

View File

@ -76,6 +76,7 @@ public class TestCollationKeyAnalyzer extends CollationTestBase {
// The ICU Collator and Sun java.text.Collator implementations differ in their
// orderings - "BFJDH" is the ordering for java.text.Collator for Locale.US.
testCollationKeySort
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, oStrokeFirst ? "BFJHD" : "BFJDH");
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer,
oStrokeFirst ? "BFJHD" : "BFJDH", "EACGI", "BJDFH", "BJDHF");
}
}

View File

@ -94,6 +94,7 @@ public class TestCollationKeyFilter extends CollationTestBase {
// The ICU Collator and Sun java.text.Collator implementations differ in their
// orderings - "BFJDH" is the ordering for java.text.Collator for Locale.US.
testCollationKeySort
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, oStrokeFirst ? "BFJHD" : "BFJDH");
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer,
oStrokeFirst ? "BFJHD" : "BFJDH", "EACGI", "BJDFH", "BJDHF");
}
}

View File

@ -1,2 +0,0 @@
AnyObjectId[4d9d4e1277822f7a08dd9469ae2ca81d44902552] was removed in git history.
Apache SVN contains full history.

View File

@ -0,0 +1,2 @@
AnyObjectId[fad20cf105a9effe8677440bdcc6fcf0a82823b2] was removed in git history.
Apache SVN contains full history.

View File

@ -531,7 +531,7 @@ A7FF>004D
06E7..06E9>
06ED>
0653..0656>
0659..065E>
0659..065F>
0670>
0711>
07FA>
@ -550,6 +550,7 @@ A7FF>004D
A670..A672>
A802>
10A3F>
11046>
1D165..1D166>
1D242..1D244>

View File

@ -403,6 +403,17 @@ ABF9>0039
104A7>0037
104A8>0038
104A9>0039
# Brahmi
11066>0030
11067>0031
11068>0032
11069>0033
1106A>0034
1106B>0035
1106C>0036
1106D>0037
1106E>0038
1106F>0039
# Mathematical Alphanumeric Symbols - Bold digits
1D7CE>0030
1D7CF>0031

View File

@ -26,7 +26,7 @@
#
# file name: nfkc.txt
#
# machine-generated on: 2009-11-30
# machine-generated on: 2010-07-23
#
# Round-trip mappings (=) containing [:Diacritic:] (see DiacriticFolding.txt)
# were changed to one-way decompositions, as their non-starters now decompose.
@ -114,6 +114,7 @@
0657..065B:230
065C:220
065D..065E:230
065F:220
0670:35
06D6..06DC:230
06DF..06E2:230
@ -150,6 +151,7 @@
081B..0823:230
0825..0827:230
0829..082D:230
0859..085B:220
093C:7
094D:9
0951:230
@ -192,7 +194,7 @@
1037:7
1039..103A:9
108D:220
135F:230
135D..135F:230
1714:9
1734:9
17D2:9
@ -212,6 +214,8 @@
1B6C:220
1B6D..1B73:230
1BAA:9
1BE6:7
1BF2..1BF3:9
1C37:7
1CD0..1CD2:230
1CD4:1
@ -231,6 +235,7 @@
1DCF:220
1DD0:202
1DD1..1DE6:230
1DFC:233
1DFD:220
1DFE:230
1DFF:220
@ -248,6 +253,7 @@
20EC..20EF:220
20F0:230
2CEF..2CF1:230
2D7F:9
2DE0..2DFF:230
302A:218
302B:228
@ -281,6 +287,7 @@ FE20..FE26:230
10A39:1
10A3A:220
10A3F:9
11046:9
110B9:9
110BA:7
1D165..1D166:216
@ -1426,6 +1433,14 @@ FE20..FE26:230
2092>006F
2093>0078
2094>0259
2095>0068
2096>006B
2097>006C
2098>006D
2099>006E
209A>0070
209B>0073
209C>0074
20A8>0052 0073
2100>0061 002F 0063
2101>0061 002F 0073
@ -5213,18 +5228,42 @@ FFEE>25CB
1F12C>0052
1F12D>0043 0044
1F12E>0057 005A
1F130>0041
1F131>0042
1F132>0043
1F133>0044
1F134>0045
1F135>0046
1F136>0047
1F137>0048
1F138>0049
1F139>004A
1F13A>004B
1F13B>004C
1F13C>004D
1F13D>004E
1F13E>004F
1F13F>0050
1F140>0051
1F141>0052
1F142>0053
1F143>0054
1F144>0055
1F145>0056
1F146>0057
1F147>0058
1F148>0059
1F149>005A
1F14A>0048 0056
1F14B>004D 0056
1F14C>0053 0044
1F14D>0053 0053
1F14E>0050 0050 0056
1F14F>0057 0043
1F190>0044 004A
1F200>307B 304B
1F201>30B3 30B3
1F202>30B5
1F210>624B
1F211>5B57
1F212>53CC
@ -5259,6 +5298,15 @@ FFEE>25CB
1F22F>6307
1F230>8D70
1F231>6253
1F232>7981
1F233>7A7A
1F234>5408
1F235>6E80
1F236>6709
1F237>6708
1F238>7533
1F239>5272
1F23A>55B6
1F240>3014 672C 3015
1F241>3014 4E09 3015
1F242>3014 4E8C 3015
@ -5268,6 +5316,8 @@ FFEE>25CB
1F246>3014 76D7 3015
1F247>3014 52DD 3015
1F248>3014 6557 3015
1F250>5F97
1F251>53EF
2F800>4E3D
2F801>4E38
2F802>4E41

View File

@ -19,11 +19,11 @@
# remains attached.
#
# Extracted from:
# DerivedNormalizationProps-5.2.0.txt
# Date: 2009-08-26, 18:18:50 GMT [MD]
# DerivedNormalizationProps-6.0.0.txt
# Date: 2010-05-20, 15:14:12 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2009 Unicode, Inc.
# Copyright (c) 1991-2010 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
@ -43,7 +43,7 @@
# WARNING: Application to STRINGS must apply NFC after mapping each character, because characters may interact.
# For more information, see [http://www.unicode.org/reports/tr44/]
# Omitted code points are unchanged by this mapping.
# @missing: 0000..10FFFF><code point>
# @missing: 0000..10FFFF; NFKC_CF; <code point>
# All code points not explicitly listed for NFKC_Casefold
# have the value <codepoint>.
@ -531,6 +531,7 @@
0520>0521
0522>0523
0524>0525
0526>0527
0531>0561
0532>0562
0533>0563
@ -1097,6 +1098,14 @@
2092>006F
2093>0078
2094>0259
2095>0068
2096>006B
2097>006C
2098>006D
2099>006E
209A>0070
209B>0073
209C>0074
20A8>0072 0073
2100>0061 002F 0063
2101>0061 002F 0073
@ -2312,6 +2321,7 @@ A658>A659
A65A>A65B
A65C>A65D
A65E>A65F
A660>A661
A662>A663
A664>A665
A666>A667
@ -2378,6 +2388,13 @@ A782>A783
A784>A785
A786>A787
A78B>A78C
A78D>0265
A790>A791
A7A0>A7A1
A7A2>A7A3
A7A4>A7A5
A7A6>A7A7
A7A8>A7A9
F900>8C48
F901>66F4
F902>8ECA
@ -4798,18 +4815,42 @@ FFF0..FFF8>
1F12C>0072
1F12D>0063 0064
1F12E>0077 007A
1F130>0061
1F131>0062
1F132>0063
1F133>0064
1F134>0065
1F135>0066
1F136>0067
1F137>0068
1F138>0069
1F139>006A
1F13A>006B
1F13B>006C
1F13C>006D
1F13D>006E
1F13E>006F
1F13F>0070
1F140>0071
1F141>0072
1F142>0073
1F143>0074
1F144>0075
1F145>0076
1F146>0077
1F147>0078
1F148>0079
1F149>007A
1F14A>0068 0076
1F14B>006D 0076
1F14C>0073 0064
1F14D>0073 0073
1F14E>0070 0070 0076
1F14F>0077 0063
1F190>0064 006A
1F200>307B 304B
1F201>30B3 30B3
1F202>30B5
1F210>624B
1F211>5B57
1F212>53CC
@ -4844,6 +4885,15 @@ FFF0..FFF8>
1F22F>6307
1F230>8D70
1F231>6253
1F232>7981
1F233>7A7A
1F234>5408
1F235>6E80
1F236>6709
1F237>6708
1F238>7533
1F239>5272
1F23A>55B6
1F240>3014 672C 3015
1F241>3014 4E09 3015
1F242>3014 4E8C 3015
@ -4853,6 +4903,8 @@ FFF0..FFF8>
1F246>3014 76D7 3015
1F247>3014 52DD 3015
1F248>3014 6557 3015
1F250>5F97
1F251>53EF
2F800>4E3D
2F801>4E38
2F802>4E41
@ -5393,4 +5445,4 @@ E0080..E00FF>
E0100..E01EF>
E01F0..E0FFF>
# Total code points: 9740
# Total code points: 9792

View File

@ -358,7 +358,7 @@ and
<h1><a name="backcompat">Backwards Compatibility</a></h1>
<p>
This module exists to provide up-to-date Unicode functionality that supports
the most recent version of Unicode (currently 5.2). However, some users who wish
the most recent version of Unicode (currently 6.0). However, some users who wish
for stronger backwards compatibility can restrict
{@link org.apache.lucene.analysis.icu.ICUNormalizer2Filter} to operate on only
a specific Unicode Version by using a {@link com.ibm.icu.text.FilteredNormalizer2}.

View File

@ -73,6 +73,7 @@ public class TestICUCollationKeyAnalyzer extends CollationTestBase {
// The ICU Collator and java.text.Collator implementations differ in their
// orderings - "BFJHD" is the ordering for the ICU Collator for Locale.US.
testCollationKeySort
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, "BFJHD");
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer,
"BFJHD", "ECAGI", "BJDFH", "BJDHF");
}
}

View File

@ -91,6 +91,7 @@ public class TestICUCollationKeyFilter extends CollationTestBase {
// The ICU Collator and java.text.Collator implementations differ in their
// orderings - "BFJHD" is the ordering for the ICU Collator for Locale.US.
testCollationKeySort
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, "BFJHD");
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer,
"BFJHD", "ECAGI", "BJDFH", "BJDHF");
}
}

View File

@ -1,2 +0,0 @@
AnyObjectId[4d9d4e1277822f7a08dd9469ae2ca81d44902552] was removed in git history.
Apache SVN contains full history.

View File

@ -0,0 +1,2 @@
AnyObjectId[fad20cf105a9effe8677440bdcc6fcf0a82823b2] was removed in git history.
Apache SVN contains full history.

View File

@ -1,2 +0,0 @@
AnyObjectId[4d9d4e1277822f7a08dd9469ae2ca81d44902552] was removed in git history.
Apache SVN contains full history.

View File

@ -0,0 +1,2 @@
AnyObjectId[fad20cf105a9effe8677440bdcc6fcf0a82823b2] was removed in git history.
Apache SVN contains full history.