mirror of https://github.com/apache/lucene.git
LUCENE-2797: Upgrade icu to 4.6
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1042185 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
30af48bda8
commit
f87ca310ec
|
@ -279,7 +279,7 @@ Build
|
|||
dependency management between contribs by a new ANT macro.
|
||||
(Uwe Schindler, Shai Erera)
|
||||
|
||||
* LUCENE-2399, LUCENE-2683: Upgrade contrib/icu's ICU jar file to ICU 4.4.2
|
||||
* LUCENE-2797: Upgrade contrib/icu's ICU jar file to ICU 4.6
|
||||
(Robert Muir)
|
||||
|
||||
Optimizations
|
||||
|
|
|
@ -20,7 +20,7 @@ lucene-analyzers-common-XX.jar
|
|||
lucene-analyzers-icu-XX.jar
|
||||
An add-on analysis library that provides improved Unicode support via
|
||||
International Components for Unicode (ICU). Note: this module depends on
|
||||
the ICU4j jar file (version >= 4.4.0)
|
||||
the ICU4j jar file (version >= 4.6.0)
|
||||
|
||||
lucene-analyzers-phonetic-XX.jar
|
||||
An add-on analysis library that provides phonetic encoders via Apache
|
||||
|
|
|
@ -167,11 +167,16 @@ public abstract class CollationTestBase extends LuceneTestCase {
|
|||
// Copied (and slightly modified) from
|
||||
// org.apache.lucene.search.TestSort.testInternationalSort()
|
||||
//
|
||||
// TODO: this test is really fragile. there are already 3 different cases,
|
||||
// depending upon unicode version.
|
||||
public void testCollationKeySort(Analyzer usAnalyzer,
|
||||
Analyzer franceAnalyzer,
|
||||
Analyzer swedenAnalyzer,
|
||||
Analyzer denmarkAnalyzer,
|
||||
String usResult) throws Exception {
|
||||
String usResult,
|
||||
String frResult,
|
||||
String svResult,
|
||||
String dkResult) throws Exception {
|
||||
RAMDirectory indexStore = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)));
|
||||
|
@ -220,13 +225,13 @@ public abstract class CollationTestBase extends LuceneTestCase {
|
|||
assertMatches(searcher, queryY, sort, usResult);
|
||||
|
||||
sort.setSort(new SortField("France", SortField.STRING));
|
||||
assertMatches(searcher, queryX, sort, "EACGI");
|
||||
assertMatches(searcher, queryX, sort, frResult);
|
||||
|
||||
sort.setSort(new SortField("Sweden", SortField.STRING));
|
||||
assertMatches(searcher, queryY, sort, "BJDFH");
|
||||
assertMatches(searcher, queryY, sort, svResult);
|
||||
|
||||
sort.setSort(new SortField("Denmark", SortField.STRING));
|
||||
assertMatches(searcher, queryY, sort, "BJDHF");
|
||||
assertMatches(searcher, queryY, sort, dkResult);
|
||||
}
|
||||
|
||||
// Make sure the documents returned by the search match the expected list
|
||||
|
|
|
@ -76,6 +76,7 @@ public class TestCollationKeyAnalyzer extends CollationTestBase {
|
|||
// The ICU Collator and Sun java.text.Collator implementations differ in their
|
||||
// orderings - "BFJDH" is the ordering for java.text.Collator for Locale.US.
|
||||
testCollationKeySort
|
||||
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, oStrokeFirst ? "BFJHD" : "BFJDH");
|
||||
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer,
|
||||
oStrokeFirst ? "BFJHD" : "BFJDH", "EACGI", "BJDFH", "BJDHF");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -94,6 +94,7 @@ public class TestCollationKeyFilter extends CollationTestBase {
|
|||
// The ICU Collator and Sun java.text.Collator implementations differ in their
|
||||
// orderings - "BFJDH" is the ordering for java.text.Collator for Locale.US.
|
||||
testCollationKeySort
|
||||
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, oStrokeFirst ? "BFJHD" : "BFJDH");
|
||||
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer,
|
||||
oStrokeFirst ? "BFJHD" : "BFJDH", "EACGI", "BJDFH", "BJDHF");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,2 +0,0 @@
|
|||
AnyObjectId[4d9d4e1277822f7a08dd9469ae2ca81d44902552] was removed in git history.
|
||||
Apache SVN contains full history.
|
|
@ -0,0 +1,2 @@
|
|||
AnyObjectId[fad20cf105a9effe8677440bdcc6fcf0a82823b2] was removed in git history.
|
||||
Apache SVN contains full history.
|
|
@ -531,7 +531,7 @@ A7FF>004D
|
|||
06E7..06E9>
|
||||
06ED>
|
||||
0653..0656>
|
||||
0659..065E>
|
||||
0659..065F>
|
||||
0670>
|
||||
0711>
|
||||
07FA>
|
||||
|
@ -550,6 +550,7 @@ A7FF>004D
|
|||
A670..A672>
|
||||
A802>
|
||||
10A3F>
|
||||
11046>
|
||||
1D165..1D166>
|
||||
1D242..1D244>
|
||||
|
||||
|
|
|
@ -403,6 +403,17 @@ ABF9>0039
|
|||
104A7>0037
|
||||
104A8>0038
|
||||
104A9>0039
|
||||
# Brahmi
|
||||
11066>0030
|
||||
11067>0031
|
||||
11068>0032
|
||||
11069>0033
|
||||
1106A>0034
|
||||
1106B>0035
|
||||
1106C>0036
|
||||
1106D>0037
|
||||
1106E>0038
|
||||
1106F>0039
|
||||
# Mathematical Alphanumeric Symbols - Bold digits
|
||||
1D7CE>0030
|
||||
1D7CF>0031
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
#
|
||||
# file name: nfkc.txt
|
||||
#
|
||||
# machine-generated on: 2009-11-30
|
||||
# machine-generated on: 2010-07-23
|
||||
#
|
||||
# Round-trip mappings (=) containing [:Diacritic:] (see DiacriticFolding.txt)
|
||||
# were changed to one-way decompositions, as their non-starters now decompose.
|
||||
|
@ -114,6 +114,7 @@
|
|||
0657..065B:230
|
||||
065C:220
|
||||
065D..065E:230
|
||||
065F:220
|
||||
0670:35
|
||||
06D6..06DC:230
|
||||
06DF..06E2:230
|
||||
|
@ -150,6 +151,7 @@
|
|||
081B..0823:230
|
||||
0825..0827:230
|
||||
0829..082D:230
|
||||
0859..085B:220
|
||||
093C:7
|
||||
094D:9
|
||||
0951:230
|
||||
|
@ -192,7 +194,7 @@
|
|||
1037:7
|
||||
1039..103A:9
|
||||
108D:220
|
||||
135F:230
|
||||
135D..135F:230
|
||||
1714:9
|
||||
1734:9
|
||||
17D2:9
|
||||
|
@ -212,6 +214,8 @@
|
|||
1B6C:220
|
||||
1B6D..1B73:230
|
||||
1BAA:9
|
||||
1BE6:7
|
||||
1BF2..1BF3:9
|
||||
1C37:7
|
||||
1CD0..1CD2:230
|
||||
1CD4:1
|
||||
|
@ -231,6 +235,7 @@
|
|||
1DCF:220
|
||||
1DD0:202
|
||||
1DD1..1DE6:230
|
||||
1DFC:233
|
||||
1DFD:220
|
||||
1DFE:230
|
||||
1DFF:220
|
||||
|
@ -248,6 +253,7 @@
|
|||
20EC..20EF:220
|
||||
20F0:230
|
||||
2CEF..2CF1:230
|
||||
2D7F:9
|
||||
2DE0..2DFF:230
|
||||
302A:218
|
||||
302B:228
|
||||
|
@ -281,6 +287,7 @@ FE20..FE26:230
|
|||
10A39:1
|
||||
10A3A:220
|
||||
10A3F:9
|
||||
11046:9
|
||||
110B9:9
|
||||
110BA:7
|
||||
1D165..1D166:216
|
||||
|
@ -1426,6 +1433,14 @@ FE20..FE26:230
|
|||
2092>006F
|
||||
2093>0078
|
||||
2094>0259
|
||||
2095>0068
|
||||
2096>006B
|
||||
2097>006C
|
||||
2098>006D
|
||||
2099>006E
|
||||
209A>0070
|
||||
209B>0073
|
||||
209C>0074
|
||||
20A8>0052 0073
|
||||
2100>0061 002F 0063
|
||||
2101>0061 002F 0073
|
||||
|
@ -5213,18 +5228,42 @@ FFEE>25CB
|
|||
1F12C>0052
|
||||
1F12D>0043 0044
|
||||
1F12E>0057 005A
|
||||
1F130>0041
|
||||
1F131>0042
|
||||
1F132>0043
|
||||
1F133>0044
|
||||
1F134>0045
|
||||
1F135>0046
|
||||
1F136>0047
|
||||
1F137>0048
|
||||
1F138>0049
|
||||
1F139>004A
|
||||
1F13A>004B
|
||||
1F13B>004C
|
||||
1F13C>004D
|
||||
1F13D>004E
|
||||
1F13E>004F
|
||||
1F13F>0050
|
||||
1F140>0051
|
||||
1F141>0052
|
||||
1F142>0053
|
||||
1F143>0054
|
||||
1F144>0055
|
||||
1F145>0056
|
||||
1F146>0057
|
||||
1F147>0058
|
||||
1F148>0059
|
||||
1F149>005A
|
||||
1F14A>0048 0056
|
||||
1F14B>004D 0056
|
||||
1F14C>0053 0044
|
||||
1F14D>0053 0053
|
||||
1F14E>0050 0050 0056
|
||||
1F14F>0057 0043
|
||||
1F190>0044 004A
|
||||
1F200>307B 304B
|
||||
1F201>30B3 30B3
|
||||
1F202>30B5
|
||||
1F210>624B
|
||||
1F211>5B57
|
||||
1F212>53CC
|
||||
|
@ -5259,6 +5298,15 @@ FFEE>25CB
|
|||
1F22F>6307
|
||||
1F230>8D70
|
||||
1F231>6253
|
||||
1F232>7981
|
||||
1F233>7A7A
|
||||
1F234>5408
|
||||
1F235>6E80
|
||||
1F236>6709
|
||||
1F237>6708
|
||||
1F238>7533
|
||||
1F239>5272
|
||||
1F23A>55B6
|
||||
1F240>3014 672C 3015
|
||||
1F241>3014 4E09 3015
|
||||
1F242>3014 4E8C 3015
|
||||
|
@ -5268,6 +5316,8 @@ FFEE>25CB
|
|||
1F246>3014 76D7 3015
|
||||
1F247>3014 52DD 3015
|
||||
1F248>3014 6557 3015
|
||||
1F250>5F97
|
||||
1F251>53EF
|
||||
2F800>4E3D
|
||||
2F801>4E38
|
||||
2F802>4E41
|
||||
|
|
|
@ -19,11 +19,11 @@
|
|||
# remains attached.
|
||||
#
|
||||
# Extracted from:
|
||||
# DerivedNormalizationProps-5.2.0.txt
|
||||
# Date: 2009-08-26, 18:18:50 GMT [MD]
|
||||
# DerivedNormalizationProps-6.0.0.txt
|
||||
# Date: 2010-05-20, 15:14:12 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# Copyright (c) 1991-2010 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
|
||||
|
@ -43,7 +43,7 @@
|
|||
# WARNING: Application to STRINGS must apply NFC after mapping each character, because characters may interact.
|
||||
# For more information, see [http://www.unicode.org/reports/tr44/]
|
||||
# Omitted code points are unchanged by this mapping.
|
||||
# @missing: 0000..10FFFF><code point>
|
||||
# @missing: 0000..10FFFF; NFKC_CF; <code point>
|
||||
|
||||
# All code points not explicitly listed for NFKC_Casefold
|
||||
# have the value <codepoint>.
|
||||
|
@ -531,6 +531,7 @@
|
|||
0520>0521
|
||||
0522>0523
|
||||
0524>0525
|
||||
0526>0527
|
||||
0531>0561
|
||||
0532>0562
|
||||
0533>0563
|
||||
|
@ -1097,6 +1098,14 @@
|
|||
2092>006F
|
||||
2093>0078
|
||||
2094>0259
|
||||
2095>0068
|
||||
2096>006B
|
||||
2097>006C
|
||||
2098>006D
|
||||
2099>006E
|
||||
209A>0070
|
||||
209B>0073
|
||||
209C>0074
|
||||
20A8>0072 0073
|
||||
2100>0061 002F 0063
|
||||
2101>0061 002F 0073
|
||||
|
@ -2312,6 +2321,7 @@ A658>A659
|
|||
A65A>A65B
|
||||
A65C>A65D
|
||||
A65E>A65F
|
||||
A660>A661
|
||||
A662>A663
|
||||
A664>A665
|
||||
A666>A667
|
||||
|
@ -2378,6 +2388,13 @@ A782>A783
|
|||
A784>A785
|
||||
A786>A787
|
||||
A78B>A78C
|
||||
A78D>0265
|
||||
A790>A791
|
||||
A7A0>A7A1
|
||||
A7A2>A7A3
|
||||
A7A4>A7A5
|
||||
A7A6>A7A7
|
||||
A7A8>A7A9
|
||||
F900>8C48
|
||||
F901>66F4
|
||||
F902>8ECA
|
||||
|
@ -4798,18 +4815,42 @@ FFF0..FFF8>
|
|||
1F12C>0072
|
||||
1F12D>0063 0064
|
||||
1F12E>0077 007A
|
||||
1F130>0061
|
||||
1F131>0062
|
||||
1F132>0063
|
||||
1F133>0064
|
||||
1F134>0065
|
||||
1F135>0066
|
||||
1F136>0067
|
||||
1F137>0068
|
||||
1F138>0069
|
||||
1F139>006A
|
||||
1F13A>006B
|
||||
1F13B>006C
|
||||
1F13C>006D
|
||||
1F13D>006E
|
||||
1F13E>006F
|
||||
1F13F>0070
|
||||
1F140>0071
|
||||
1F141>0072
|
||||
1F142>0073
|
||||
1F143>0074
|
||||
1F144>0075
|
||||
1F145>0076
|
||||
1F146>0077
|
||||
1F147>0078
|
||||
1F148>0079
|
||||
1F149>007A
|
||||
1F14A>0068 0076
|
||||
1F14B>006D 0076
|
||||
1F14C>0073 0064
|
||||
1F14D>0073 0073
|
||||
1F14E>0070 0070 0076
|
||||
1F14F>0077 0063
|
||||
1F190>0064 006A
|
||||
1F200>307B 304B
|
||||
1F201>30B3 30B3
|
||||
1F202>30B5
|
||||
1F210>624B
|
||||
1F211>5B57
|
||||
1F212>53CC
|
||||
|
@ -4844,6 +4885,15 @@ FFF0..FFF8>
|
|||
1F22F>6307
|
||||
1F230>8D70
|
||||
1F231>6253
|
||||
1F232>7981
|
||||
1F233>7A7A
|
||||
1F234>5408
|
||||
1F235>6E80
|
||||
1F236>6709
|
||||
1F237>6708
|
||||
1F238>7533
|
||||
1F239>5272
|
||||
1F23A>55B6
|
||||
1F240>3014 672C 3015
|
||||
1F241>3014 4E09 3015
|
||||
1F242>3014 4E8C 3015
|
||||
|
@ -4853,6 +4903,8 @@ FFF0..FFF8>
|
|||
1F246>3014 76D7 3015
|
||||
1F247>3014 52DD 3015
|
||||
1F248>3014 6557 3015
|
||||
1F250>5F97
|
||||
1F251>53EF
|
||||
2F800>4E3D
|
||||
2F801>4E38
|
||||
2F802>4E41
|
||||
|
@ -5393,4 +5445,4 @@ E0080..E00FF>
|
|||
E0100..E01EF>
|
||||
E01F0..E0FFF>
|
||||
|
||||
# Total code points: 9740
|
||||
# Total code points: 9792
|
||||
|
|
|
@ -358,7 +358,7 @@ and
|
|||
<h1><a name="backcompat">Backwards Compatibility</a></h1>
|
||||
<p>
|
||||
This module exists to provide up-to-date Unicode functionality that supports
|
||||
the most recent version of Unicode (currently 5.2). However, some users who wish
|
||||
the most recent version of Unicode (currently 6.0). However, some users who wish
|
||||
for stronger backwards compatibility can restrict
|
||||
{@link org.apache.lucene.analysis.icu.ICUNormalizer2Filter} to operate on only
|
||||
a specific Unicode Version by using a {@link com.ibm.icu.text.FilteredNormalizer2}.
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -73,6 +73,7 @@ public class TestICUCollationKeyAnalyzer extends CollationTestBase {
|
|||
// The ICU Collator and java.text.Collator implementations differ in their
|
||||
// orderings - "BFJHD" is the ordering for the ICU Collator for Locale.US.
|
||||
testCollationKeySort
|
||||
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, "BFJHD");
|
||||
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer,
|
||||
"BFJHD", "ECAGI", "BJDFH", "BJDHF");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -91,6 +91,7 @@ public class TestICUCollationKeyFilter extends CollationTestBase {
|
|||
// The ICU Collator and java.text.Collator implementations differ in their
|
||||
// orderings - "BFJHD" is the ordering for the ICU Collator for Locale.US.
|
||||
testCollationKeySort
|
||||
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, "BFJHD");
|
||||
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer,
|
||||
"BFJHD", "ECAGI", "BJDFH", "BJDHF");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,2 +0,0 @@
|
|||
AnyObjectId[4d9d4e1277822f7a08dd9469ae2ca81d44902552] was removed in git history.
|
||||
Apache SVN contains full history.
|
|
@ -0,0 +1,2 @@
|
|||
AnyObjectId[fad20cf105a9effe8677440bdcc6fcf0a82823b2] was removed in git history.
|
||||
Apache SVN contains full history.
|
|
@ -1,2 +0,0 @@
|
|||
AnyObjectId[4d9d4e1277822f7a08dd9469ae2ca81d44902552] was removed in git history.
|
||||
Apache SVN contains full history.
|
|
@ -0,0 +1,2 @@
|
|||
AnyObjectId[fad20cf105a9effe8677440bdcc6fcf0a82823b2] was removed in git history.
|
||||
Apache SVN contains full history.
|
Loading…
Reference in New Issue