mirror of https://github.com/apache/lucene.git
LUCENE-9773: upgrade icu to 68.2 (#2372)
Upgrade from icu 62.2 to 68.2, with Unicode 13 support. Modify GenerateUTR30DataFiles to take the release tag as a program argument. Gradle populates this automatically, removing a manual step from regeneration process.
This commit is contained in:
parent
ef920388e6
commit
dd91f5ca82
|
@ -51,12 +51,16 @@ configure(project(":lucene:analysis:icu")) {
|
||||||
|
|
||||||
doFirst {
|
doFirst {
|
||||||
// all these steps must be done sequentially: it's a pipeline resulting in utr30.nrm
|
// all these steps must be done sequentially: it's a pipeline resulting in utr30.nrm
|
||||||
|
def v = getVersion('com.ibm.icu', 'icu4j');
|
||||||
project.javaexec {
|
project.javaexec {
|
||||||
main = "org.apache.lucene.analysis.icu.GenerateUTR30DataFiles"
|
main = "org.apache.lucene.analysis.icu.GenerateUTR30DataFiles"
|
||||||
classpath = sourceSets.tools.runtimeClasspath
|
classpath = sourceSets.tools.runtimeClasspath
|
||||||
|
|
||||||
ignoreExitValue false
|
ignoreExitValue false
|
||||||
workingDir utr30DataDir
|
workingDir utr30DataDir
|
||||||
|
args = [
|
||||||
|
"release-${v.replace(".", "-")}"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
project.exec {
|
project.exec {
|
||||||
|
|
|
@ -256,6 +256,8 @@ Other
|
||||||
* LUCENE-9627: Remove unused Lucene50FieldInfosFormat codec and small refactor some codecs
|
* LUCENE-9627: Remove unused Lucene50FieldInfosFormat codec and small refactor some codecs
|
||||||
to separate reading header/footer from reading content of the file. (Ignacio Vera)
|
to separate reading header/footer from reading content of the file. (Ignacio Vera)
|
||||||
|
|
||||||
|
* LUCENE-9773: Upgrade icu to 68.2 (Robert Muir)
|
||||||
|
|
||||||
======================= Lucene 8.9.0 =======================
|
======================= Lucene 8.9.0 =======================
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
|
@ -56,6 +56,7 @@
|
||||||
FE58>002D
|
FE58>002D
|
||||||
FE63>002D
|
FE63>002D
|
||||||
FF0D>002D
|
FF0D>002D
|
||||||
|
10EAD>002D
|
||||||
|
|
||||||
## Greek letterforms folding (done by kd)
|
## Greek letterforms folding (done by kd)
|
||||||
|
|
||||||
|
|
|
@ -76,6 +76,7 @@
|
||||||
0AFD..0AFF>
|
0AFD..0AFF>
|
||||||
0B3C>
|
0B3C>
|
||||||
0B4D>
|
0B4D>
|
||||||
|
0B55>
|
||||||
0BCD>
|
0BCD>
|
||||||
0C4D>
|
0C4D>
|
||||||
0CBC>
|
0CBC>
|
||||||
|
@ -85,6 +86,7 @@
|
||||||
0DCA>
|
0DCA>
|
||||||
0E47..0E4C>
|
0E47..0E4C>
|
||||||
0E4E>
|
0E4E>
|
||||||
|
0EBA>
|
||||||
0EC8..0ECC>
|
0EC8..0ECC>
|
||||||
0F18..0F19>
|
0F18..0F19>
|
||||||
0F35>
|
0F35>
|
||||||
|
@ -96,9 +98,12 @@
|
||||||
0FC6>
|
0FC6>
|
||||||
1037>
|
1037>
|
||||||
1039..103A>
|
1039..103A>
|
||||||
|
1063..1064>
|
||||||
|
1069..106D>
|
||||||
1087..108D>
|
1087..108D>
|
||||||
108F>
|
108F>
|
||||||
109A..109B>
|
109A..109B>
|
||||||
|
135D..135F>
|
||||||
17C9..17D3>
|
17C9..17D3>
|
||||||
17DD>
|
17DD>
|
||||||
1939..193B>
|
1939..193B>
|
||||||
|
@ -135,8 +140,8 @@ A67C..A67D>
|
||||||
A67F>
|
A67F>
|
||||||
A69C..A69D>
|
A69C..A69D>
|
||||||
A6F0..A6F1>
|
A6F0..A6F1>
|
||||||
A717..A721>
|
A700..A721>
|
||||||
A788>
|
A788..A78A>
|
||||||
A7F8..A7F9>
|
A7F8..A7F9>
|
||||||
A8C4>
|
A8C4>
|
||||||
A8E0..A8F1>
|
A8E0..A8F1>
|
||||||
|
@ -149,6 +154,7 @@ AA7B..AA7D>
|
||||||
AABF..AAC2>
|
AABF..AAC2>
|
||||||
AAF6>
|
AAF6>
|
||||||
AB5B..AB5F>
|
AB5B..AB5F>
|
||||||
|
AB69..AB6B>
|
||||||
ABEC..ABED>
|
ABEC..ABED>
|
||||||
FB1E>
|
FB1E>
|
||||||
FE20..FE2F>
|
FE20..FE2F>
|
||||||
|
@ -180,6 +186,9 @@ FFE3>
|
||||||
116B6..116B7>
|
116B6..116B7>
|
||||||
1172B>
|
1172B>
|
||||||
11839..1183A>
|
11839..1183A>
|
||||||
|
1193D..1193E>
|
||||||
|
11943>
|
||||||
|
119E0>
|
||||||
11A34>
|
11A34>
|
||||||
11A47>
|
11A47>
|
||||||
11A99>
|
11A99>
|
||||||
|
@ -188,12 +197,16 @@ FFE3>
|
||||||
11D44..11D45>
|
11D44..11D45>
|
||||||
11D97>
|
11D97>
|
||||||
16AF0..16AF4>
|
16AF0..16AF4>
|
||||||
|
16B30..16B36>
|
||||||
16F8F..16F9F>
|
16F8F..16F9F>
|
||||||
|
16FF0..16FF1>
|
||||||
1D167..1D169>
|
1D167..1D169>
|
||||||
1D16D..1D172>
|
1D16D..1D172>
|
||||||
1D17B..1D182>
|
1D17B..1D182>
|
||||||
1D185..1D18B>
|
1D185..1D18B>
|
||||||
1D1AA..1D1AD>
|
1D1AA..1D1AD>
|
||||||
|
1E130..1E136>
|
||||||
|
1E2EC..1E2EF>
|
||||||
1E8D0..1E8D6>
|
1E8D0..1E8D6>
|
||||||
1E944..1E946>
|
1E944..1E946>
|
||||||
1E948..1E94A>
|
1E948..1E94A>
|
||||||
|
|
|
@ -580,6 +580,16 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
|
||||||
118E7>0037 # WARANG CITI DIGIT SEVEN
|
118E7>0037 # WARANG CITI DIGIT SEVEN
|
||||||
118E8>0038 # WARANG CITI DIGIT EIGHT
|
118E8>0038 # WARANG CITI DIGIT EIGHT
|
||||||
118E9>0039 # WARANG CITI DIGIT NINE
|
118E9>0039 # WARANG CITI DIGIT NINE
|
||||||
|
11950>0030 # DIVES AKURU DIGIT ZERO
|
||||||
|
11951>0031 # DIVES AKURU DIGIT ONE
|
||||||
|
11952>0032 # DIVES AKURU DIGIT TWO
|
||||||
|
11953>0033 # DIVES AKURU DIGIT THREE
|
||||||
|
11954>0034 # DIVES AKURU DIGIT FOUR
|
||||||
|
11955>0035 # DIVES AKURU DIGIT FIVE
|
||||||
|
11956>0036 # DIVES AKURU DIGIT SIX
|
||||||
|
11957>0037 # DIVES AKURU DIGIT SEVEN
|
||||||
|
11958>0038 # DIVES AKURU DIGIT EIGHT
|
||||||
|
11959>0039 # DIVES AKURU DIGIT NINE
|
||||||
11C50>0030 # BHAIKSUKI DIGIT ZERO
|
11C50>0030 # BHAIKSUKI DIGIT ZERO
|
||||||
11C51>0031 # BHAIKSUKI DIGIT ONE
|
11C51>0031 # BHAIKSUKI DIGIT ONE
|
||||||
11C52>0032 # BHAIKSUKI DIGIT TWO
|
11C52>0032 # BHAIKSUKI DIGIT TWO
|
||||||
|
@ -630,6 +640,26 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
|
||||||
16B57>0037 # PAHAWH HMONG DIGIT SEVEN
|
16B57>0037 # PAHAWH HMONG DIGIT SEVEN
|
||||||
16B58>0038 # PAHAWH HMONG DIGIT EIGHT
|
16B58>0038 # PAHAWH HMONG DIGIT EIGHT
|
||||||
16B59>0039 # PAHAWH HMONG DIGIT NINE
|
16B59>0039 # PAHAWH HMONG DIGIT NINE
|
||||||
|
1E140>0030 # NYIAKENG PUACHUE HMONG DIGIT ZERO
|
||||||
|
1E141>0031 # NYIAKENG PUACHUE HMONG DIGIT ONE
|
||||||
|
1E142>0032 # NYIAKENG PUACHUE HMONG DIGIT TWO
|
||||||
|
1E143>0033 # NYIAKENG PUACHUE HMONG DIGIT THREE
|
||||||
|
1E144>0034 # NYIAKENG PUACHUE HMONG DIGIT FOUR
|
||||||
|
1E145>0035 # NYIAKENG PUACHUE HMONG DIGIT FIVE
|
||||||
|
1E146>0036 # NYIAKENG PUACHUE HMONG DIGIT SIX
|
||||||
|
1E147>0037 # NYIAKENG PUACHUE HMONG DIGIT SEVEN
|
||||||
|
1E148>0038 # NYIAKENG PUACHUE HMONG DIGIT EIGHT
|
||||||
|
1E149>0039 # NYIAKENG PUACHUE HMONG DIGIT NINE
|
||||||
|
1E2F0>0030 # WANCHO DIGIT ZERO
|
||||||
|
1E2F1>0031 # WANCHO DIGIT ONE
|
||||||
|
1E2F2>0032 # WANCHO DIGIT TWO
|
||||||
|
1E2F3>0033 # WANCHO DIGIT THREE
|
||||||
|
1E2F4>0034 # WANCHO DIGIT FOUR
|
||||||
|
1E2F5>0035 # WANCHO DIGIT FIVE
|
||||||
|
1E2F6>0036 # WANCHO DIGIT SIX
|
||||||
|
1E2F7>0037 # WANCHO DIGIT SEVEN
|
||||||
|
1E2F8>0038 # WANCHO DIGIT EIGHT
|
||||||
|
1E2F9>0039 # WANCHO DIGIT NINE
|
||||||
1E950>0030 # ADLAM DIGIT ZERO
|
1E950>0030 # ADLAM DIGIT ZERO
|
||||||
1E951>0031 # ADLAM DIGIT ONE
|
1E951>0031 # ADLAM DIGIT ONE
|
||||||
1E952>0032 # ADLAM DIGIT TWO
|
1E952>0032 # ADLAM DIGIT TWO
|
||||||
|
|
|
@ -9,7 +9,7 @@
|
||||||
#
|
#
|
||||||
# Complete data for Unicode NFC normalization.
|
# Complete data for Unicode NFC normalization.
|
||||||
|
|
||||||
* Unicode 11.0.0
|
* Unicode 13.0.0
|
||||||
|
|
||||||
# Canonical_Combining_Class (ccc) values
|
# Canonical_Combining_Class (ccc) values
|
||||||
0300..0314:230
|
0300..0314:230
|
||||||
|
@ -176,6 +176,7 @@
|
||||||
0E3A:9
|
0E3A:9
|
||||||
0E48..0E4B:107
|
0E48..0E4B:107
|
||||||
0EB8..0EB9:118
|
0EB8..0EB9:118
|
||||||
|
0EBA:9
|
||||||
0EC8..0ECB:122
|
0EC8..0ECB:122
|
||||||
0F18..0F19:220
|
0F18..0F19:220
|
||||||
0F35:220
|
0F35:220
|
||||||
|
@ -211,6 +212,7 @@
|
||||||
1AB5..1ABA:220
|
1AB5..1ABA:220
|
||||||
1ABB..1ABC:230
|
1ABB..1ABC:230
|
||||||
1ABD:220
|
1ABD:220
|
||||||
|
1ABF..1AC0:220
|
||||||
1B34:7
|
1B34:7
|
||||||
1B44:9
|
1B44:9
|
||||||
1B6B:230
|
1B6B:230
|
||||||
|
@ -275,6 +277,7 @@ A674..A67D:230
|
||||||
A69E..A69F:230
|
A69E..A69F:230
|
||||||
A6F0..A6F1:230
|
A6F0..A6F1:230
|
||||||
A806:9
|
A806:9
|
||||||
|
A82C:9
|
||||||
A8C4:9
|
A8C4:9
|
||||||
A8E0..A8F1:230
|
A8E0..A8F1:230
|
||||||
A92B..A92D:220
|
A92B..A92D:220
|
||||||
|
@ -305,6 +308,7 @@ FE2E..FE2F:230
|
||||||
10AE5:230
|
10AE5:230
|
||||||
10AE6:220
|
10AE6:220
|
||||||
10D24..10D27:230
|
10D24..10D27:230
|
||||||
|
10EAB..10EAC:230
|
||||||
10F46..10F47:220
|
10F46..10F47:220
|
||||||
10F48..10F4A:230
|
10F48..10F4A:230
|
||||||
10F4B:220
|
10F4B:220
|
||||||
|
@ -340,6 +344,9 @@ FE2E..FE2F:230
|
||||||
1172B:9
|
1172B:9
|
||||||
11839:9
|
11839:9
|
||||||
1183A:7
|
1183A:7
|
||||||
|
1193D..1193E:9
|
||||||
|
11943:7
|
||||||
|
119E0:9
|
||||||
11A34:9
|
11A34:9
|
||||||
11A47:9
|
11A47:9
|
||||||
11A99:9
|
11A99:9
|
||||||
|
@ -349,6 +356,7 @@ FE2E..FE2F:230
|
||||||
11D97:9
|
11D97:9
|
||||||
16AF0..16AF4:1
|
16AF0..16AF4:1
|
||||||
16B30..16B36:230
|
16B30..16B36:230
|
||||||
|
16FF0..16FF1:6
|
||||||
1BC9E:1
|
1BC9E:1
|
||||||
1D165..1D166:216
|
1D165..1D166:216
|
||||||
1D167..1D169:1
|
1D167..1D169:1
|
||||||
|
@ -364,6 +372,8 @@ FE2E..FE2F:230
|
||||||
1E01B..1E021:230
|
1E01B..1E021:230
|
||||||
1E023..1E024:230
|
1E023..1E024:230
|
||||||
1E026..1E02A:230
|
1E026..1E02A:230
|
||||||
|
1E130..1E136:230
|
||||||
|
1E2EC..1E2EF:230
|
||||||
1E8D0..1E8D6:220
|
1E8D0..1E8D6:220
|
||||||
1E944..1E949:230
|
1E944..1E949:230
|
||||||
1E94A:7
|
1E94A:7
|
||||||
|
@ -1874,6 +1884,7 @@ FB4E>05E4 05BF
|
||||||
114BE=114B9 114BD
|
114BE=114B9 114BD
|
||||||
115BA=115B8 115AF
|
115BA=115B8 115AF
|
||||||
115BB=115B9 115AF
|
115BB=115B9 115AF
|
||||||
|
11938=11935 11930
|
||||||
1D15E>1D157 1D165
|
1D15E>1D157 1D165
|
||||||
1D15F>1D158 1D165
|
1D15F>1D158 1D165
|
||||||
1D160>1D15F 1D16E
|
1D160>1D15F 1D16E
|
||||||
|
|
|
@ -13,7 +13,7 @@
|
||||||
# to NFKC one-way mappings.
|
# to NFKC one-way mappings.
|
||||||
# Use this file as the second gennorm2 input file after nfc.txt.
|
# Use this file as the second gennorm2 input file after nfc.txt.
|
||||||
|
|
||||||
* Unicode 11.0.0
|
* Unicode 13.0.0
|
||||||
|
|
||||||
00A0>0020
|
00A0>0020
|
||||||
00A8>0020 0308
|
00A8>0020 0308
|
||||||
|
@ -1107,6 +1107,7 @@
|
||||||
32FC>30F0
|
32FC>30F0
|
||||||
32FD>30F1
|
32FD>30F1
|
||||||
32FE>30F2
|
32FE>30F2
|
||||||
|
32FF>4EE4 548C
|
||||||
3300>30A2 30D1 30FC 30C8
|
3300>30A2 30D1 30FC 30C8
|
||||||
3301>30A2 30EB 30D5 30A1
|
3301>30A2 30EB 30D5 30A1
|
||||||
3302>30A2 30F3 30DA 30A2
|
3302>30A2 30F3 30DA 30A2
|
||||||
|
@ -1372,6 +1373,7 @@ AB5C>A727
|
||||||
AB5D>AB37
|
AB5D>AB37
|
||||||
AB5E>026B
|
AB5E>026B
|
||||||
AB5F>AB52
|
AB5F>AB52
|
||||||
|
AB69>028D
|
||||||
FB00>0066 0066
|
FB00>0066 0066
|
||||||
FB01>0066 0069
|
FB01>0066 0069
|
||||||
FB02>0066 006C
|
FB02>0066 006C
|
||||||
|
@ -3630,6 +3632,7 @@ FFEE>25CB
|
||||||
1F14F>0057 0043
|
1F14F>0057 0043
|
||||||
1F16A>004D 0043
|
1F16A>004D 0043
|
||||||
1F16B>004D 0044
|
1F16B>004D 0044
|
||||||
|
1F16C>004D 0052
|
||||||
1F190>0044 004A
|
1F190>0044 004A
|
||||||
1F200>307B 304B
|
1F200>307B 304B
|
||||||
1F201>30B3 30B3
|
1F201>30B3 30B3
|
||||||
|
@ -3689,3 +3692,13 @@ FFEE>25CB
|
||||||
1F248>3014 6557 3015
|
1F248>3014 6557 3015
|
||||||
1F250>5F97
|
1F250>5F97
|
||||||
1F251>53EF
|
1F251>53EF
|
||||||
|
1FBF0>0030
|
||||||
|
1FBF1>0031
|
||||||
|
1FBF2>0032
|
||||||
|
1FBF3>0033
|
||||||
|
1FBF4>0034
|
||||||
|
1FBF5>0035
|
||||||
|
1FBF6>0036
|
||||||
|
1FBF7>0037
|
||||||
|
1FBF8>0038
|
||||||
|
1FBF9>0039
|
||||||
|
|
|
@ -12,7 +12,7 @@
|
||||||
# and reformatted into syntax for the gennorm2 Normalizer2 data generator tool.
|
# and reformatted into syntax for the gennorm2 Normalizer2 data generator tool.
|
||||||
# Use this file as the third gennorm2 input file after nfc.txt and nfkc.txt.
|
# Use this file as the third gennorm2 input file after nfc.txt and nfkc.txt.
|
||||||
|
|
||||||
* Unicode 11.0.0
|
* Unicode 13.0.0
|
||||||
|
|
||||||
0041>0061
|
0041>0061
|
||||||
0042>0062
|
0042>0062
|
||||||
|
@ -2082,6 +2082,7 @@
|
||||||
32FC>30F0
|
32FC>30F0
|
||||||
32FD>30F1
|
32FD>30F1
|
||||||
32FE>30F2
|
32FE>30F2
|
||||||
|
32FF>4EE4 548C
|
||||||
3300>30A2 30D1 30FC 30C8
|
3300>30A2 30D1 30FC 30C8
|
||||||
3301>30A2 30EB 30D5 30A1
|
3301>30A2 30EB 30D5 30A1
|
||||||
3302>30A2 30F3 30DA 30A2
|
3302>30A2 30F3 30DA 30A2
|
||||||
|
@ -2450,12 +2451,23 @@ A7B3>AB53
|
||||||
A7B4>A7B5
|
A7B4>A7B5
|
||||||
A7B6>A7B7
|
A7B6>A7B7
|
||||||
A7B8>A7B9
|
A7B8>A7B9
|
||||||
|
A7BA>A7BB
|
||||||
|
A7BC>A7BD
|
||||||
|
A7BE>A7BF
|
||||||
|
A7C2>A7C3
|
||||||
|
A7C4>A794
|
||||||
|
A7C5>0282
|
||||||
|
A7C6>1D8E
|
||||||
|
A7C7>A7C8
|
||||||
|
A7C9>A7CA
|
||||||
|
A7F5>A7F6
|
||||||
A7F8>0127
|
A7F8>0127
|
||||||
A7F9>0153
|
A7F9>0153
|
||||||
AB5C>A727
|
AB5C>A727
|
||||||
AB5D>AB37
|
AB5D>AB37
|
||||||
AB5E>026B
|
AB5E>026B
|
||||||
AB5F>AB52
|
AB5F>AB52
|
||||||
|
AB69>028D
|
||||||
AB70>13A0
|
AB70>13A0
|
||||||
AB71>13A1
|
AB71>13A1
|
||||||
AB72>13A2
|
AB72>13A2
|
||||||
|
@ -5319,6 +5331,7 @@ FFF0..FFF8>
|
||||||
1F14F>0077 0063
|
1F14F>0077 0063
|
||||||
1F16A>006D 0063
|
1F16A>006D 0063
|
||||||
1F16B>006D 0064
|
1F16B>006D 0064
|
||||||
|
1F16C>006D 0072
|
||||||
1F190>0064 006A
|
1F190>0064 006A
|
||||||
1F200>307B 304B
|
1F200>307B 304B
|
||||||
1F201>30B3 30B3
|
1F201>30B3 30B3
|
||||||
|
@ -5378,6 +5391,16 @@ FFF0..FFF8>
|
||||||
1F248>3014 6557 3015
|
1F248>3014 6557 3015
|
||||||
1F250>5F97
|
1F250>5F97
|
||||||
1F251>53EF
|
1F251>53EF
|
||||||
|
1FBF0>0030
|
||||||
|
1FBF1>0031
|
||||||
|
1FBF2>0032
|
||||||
|
1FBF3>0033
|
||||||
|
1FBF4>0034
|
||||||
|
1FBF5>0035
|
||||||
|
1FBF6>0036
|
||||||
|
1FBF7>0037
|
||||||
|
1FBF8>0038
|
||||||
|
1FBF9>0039
|
||||||
2F800>4E3D
|
2F800>4E3D
|
||||||
2F801>4E38
|
2F801>4E38
|
||||||
2F802>4E41
|
2F802>4E41
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -42,7 +42,7 @@ import java.util.regex.Pattern;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Downloads/generates lucene/analysis/icu/src/data/utr30/*.txt
|
* Downloads/generates lucene/analysis/icu/src/data/utr30/*.txt for the specified icu release tag.
|
||||||
*
|
*
|
||||||
* <p>ASSUMPTION: This class will be run with current directory set to
|
* <p>ASSUMPTION: This class will be run with current directory set to
|
||||||
* lucene/analysis/icu/src/data/utr30/
|
* lucene/analysis/icu/src/data/utr30/
|
||||||
|
@ -56,7 +56,6 @@ import java.util.stream.Collectors;
|
||||||
*/
|
*/
|
||||||
public class GenerateUTR30DataFiles {
|
public class GenerateUTR30DataFiles {
|
||||||
private static final String ICU_GIT_TAG_URL = "https://raw.githubusercontent.com/unicode-org/icu";
|
private static final String ICU_GIT_TAG_URL = "https://raw.githubusercontent.com/unicode-org/icu";
|
||||||
private static final String ICU_RELEASE_TAG = "maint/maint-62";
|
|
||||||
private static final String ICU_DATA_NORM2_PATH = "icu4c/source/data/unidata/norm2";
|
private static final String ICU_DATA_NORM2_PATH = "icu4c/source/data/unidata/norm2";
|
||||||
private static final String NFC_TXT = "nfc.txt";
|
private static final String NFC_TXT = "nfc.txt";
|
||||||
private static final String NFKC_TXT = "nfkc.txt";
|
private static final String NFKC_TXT = "nfkc.txt";
|
||||||
|
@ -74,7 +73,11 @@ public class GenerateUTR30DataFiles {
|
||||||
|
|
||||||
public static void main(String args[]) {
|
public static void main(String args[]) {
|
||||||
try {
|
try {
|
||||||
getNFKCDataFilesFromIcuProject();
|
if (args.length != 1) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"usage: " + GenerateUTR30DataFiles.class.getName() + " <releaseTag>");
|
||||||
|
}
|
||||||
|
getNFKCDataFilesFromIcuProject(args[0]);
|
||||||
expandRulesInUTR30DataFiles();
|
expandRulesInUTR30DataFiles();
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
t.printStackTrace(System.err);
|
t.printStackTrace(System.err);
|
||||||
|
@ -151,9 +154,9 @@ public class GenerateUTR30DataFiles {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void getNFKCDataFilesFromIcuProject() throws IOException {
|
private static void getNFKCDataFilesFromIcuProject(String releaseTag) throws IOException {
|
||||||
URL icuTagsURL = new URL(ICU_GIT_TAG_URL + "/");
|
URL icuTagsURL = new URL(ICU_GIT_TAG_URL + "/");
|
||||||
URL icuReleaseTagURL = new URL(icuTagsURL, ICU_RELEASE_TAG + "/");
|
URL icuReleaseTagURL = new URL(icuTagsURL, releaseTag + "/");
|
||||||
URL norm2url = new URL(icuReleaseTagURL, ICU_DATA_NORM2_PATH + "/");
|
URL norm2url = new URL(icuReleaseTagURL, ICU_DATA_NORM2_PATH + "/");
|
||||||
|
|
||||||
System.err.print("Downloading " + NFKC_TXT + " ... ");
|
System.err.print("Downloading " + NFKC_TXT + " ... ");
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
9ad0d915018dcbb394678a920d72f606cd1c7214
|
|
|
@ -0,0 +1 @@
|
||||||
|
76893e6000401ace133a65262254be0ebe556d46
|
|
@ -1 +0,0 @@
|
||||||
9ad0d915018dcbb394678a920d72f606cd1c7214
|
|
|
@ -0,0 +1 @@
|
||||||
|
76893e6000401ace133a65262254be0ebe556d46
|
|
@ -23,7 +23,7 @@ com.googlecode.juniversalchardet:juniversalchardet:1.0.3 (1 constraints: 0605f33
|
||||||
com.googlecode.mp4parser:isoparser:1.1.22 (1 constraints: 38052d3b)
|
com.googlecode.mp4parser:isoparser:1.1.22 (1 constraints: 38052d3b)
|
||||||
com.healthmarketscience.jackcess:jackcess:3.0.1 (1 constraints: 0605fb35)
|
com.healthmarketscience.jackcess:jackcess:3.0.1 (1 constraints: 0605fb35)
|
||||||
com.healthmarketscience.jackcess:jackcess-encrypt:3.0.0 (1 constraints: 0505fa35)
|
com.healthmarketscience.jackcess:jackcess-encrypt:3.0.0 (1 constraints: 0505fa35)
|
||||||
com.ibm.icu:icu4j:62.2 (1 constraints: de040d31)
|
com.ibm.icu:icu4j:68.2 (1 constraints: e4041f31)
|
||||||
com.jayway.jsonpath:json-path:2.4.0 (1 constraints: 08050136)
|
com.jayway.jsonpath:json-path:2.4.0 (1 constraints: 08050136)
|
||||||
com.lmax:disruptor:3.4.2 (1 constraints: 0b050836)
|
com.lmax:disruptor:3.4.2 (1 constraints: 0b050836)
|
||||||
com.pff:java-libpst:0.8.1 (1 constraints: 0b050436)
|
com.pff:java-libpst:0.8.1 (1 constraints: 0b050436)
|
||||||
|
|
|
@ -16,7 +16,7 @@ com.googlecode.juniversalchardet:juniversalchardet=1.0.3
|
||||||
com.googlecode.mp4parser:isoparser=1.1.22
|
com.googlecode.mp4parser:isoparser=1.1.22
|
||||||
com.healthmarketscience.jackcess:jackcess-encrypt=3.0.0
|
com.healthmarketscience.jackcess:jackcess-encrypt=3.0.0
|
||||||
com.healthmarketscience.jackcess:jackcess=3.0.1
|
com.healthmarketscience.jackcess:jackcess=3.0.1
|
||||||
com.ibm.icu:icu4j=62.2
|
com.ibm.icu:icu4j=68.2
|
||||||
com.jayway.jsonpath:json-path=2.4.0
|
com.jayway.jsonpath:json-path=2.4.0
|
||||||
com.lmax:disruptor=3.4.2
|
com.lmax:disruptor=3.4.2
|
||||||
com.pff:java-libpst=0.8.1
|
com.pff:java-libpst=0.8.1
|
||||||
|
|
Loading…
Reference in New Issue