LUCENE-7540: Upgrade ICU to 59.1

This commit is contained in:
Jim Ferenczi 2017-05-24 18:11:10 +02:00
parent fed7343d2e
commit cd567b985a
16 changed files with 289 additions and 14 deletions

View File

@ -99,6 +99,8 @@ Other
* LUCENE-7753: Make fields static when possible.
(Daniel Jelinski via Adrien Grand)
* LUCENE-7540: Upgrade ICU to 59.1 (Mike McCandless, Jim Ferenczi)
======================= Lucene 6.7.0 =======================
Other

View File

@ -24,15 +24,15 @@ import org.apache.lucene.util.SparseFixedBitSet;
/**
* This file contains unicode properties used by various {@link CharTokenizer}s.
* The data was created using ICU4J v56.1.0.0
* The data was created using ICU4J v59.1.0.0
* <p>
* Unicode version: 8.0.0.0
* Unicode version: 9.0.0.0
*/
public final class UnicodeProps {
private UnicodeProps() {}
/** Unicode version that was used to generate this file: {@value} */
public static final String UNICODE_VERSION = "8.0.0.0";
public static final String UNICODE_VERSION = "9.0.0.0";
/** Bitset with Unicode WHITESPACE code points. */
public static final Bits WHITESPACE = createBits(

View File

@ -168,11 +168,14 @@ FFE3>
1134D>
11366..1136C>
11370..11374>
11442>
11446>
114C2..114C3>
115BF..115C0>
1163F>
116B6..116B7>
1172B>
11C3F>
16AF0..16AF4>
16F8F..16F9F>
1D167..1D169>
@ -181,6 +184,8 @@ FFE3>
1D185..1D18B>
1D1AA..1D1AD>
1E8D0..1E8D6>
1E944..1E946>
1E948..1E94A>
# Latin script "composed" that do not further decompose, so decompose here
# These are from AsciiFoldingFilter

View File

@ -510,6 +510,16 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
112F7>0037 # KHUDAWADI DIGIT SEVEN
112F8>0038 # KHUDAWADI DIGIT EIGHT
112F9>0039 # KHUDAWADI DIGIT NINE
11450>0030 # NEWA DIGIT ZERO
11451>0031 # NEWA DIGIT ONE
11452>0032 # NEWA DIGIT TWO
11453>0033 # NEWA DIGIT THREE
11454>0034 # NEWA DIGIT FOUR
11455>0035 # NEWA DIGIT FIVE
11456>0036 # NEWA DIGIT SIX
11457>0037 # NEWA DIGIT SEVEN
11458>0038 # NEWA DIGIT EIGHT
11459>0039 # NEWA DIGIT NINE
114D0>0030 # TIRHUTA DIGIT ZERO
114D1>0031 # TIRHUTA DIGIT ONE
114D2>0032 # TIRHUTA DIGIT TWO
@ -560,6 +570,16 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
118E7>0037 # WARANG CITI DIGIT SEVEN
118E8>0038 # WARANG CITI DIGIT EIGHT
118E9>0039 # WARANG CITI DIGIT NINE
11C50>0030 # BHAIKSUKI DIGIT ZERO
11C51>0031 # BHAIKSUKI DIGIT ONE
11C52>0032 # BHAIKSUKI DIGIT TWO
11C53>0033 # BHAIKSUKI DIGIT THREE
11C54>0034 # BHAIKSUKI DIGIT FOUR
11C55>0035 # BHAIKSUKI DIGIT FIVE
11C56>0036 # BHAIKSUKI DIGIT SIX
11C57>0037 # BHAIKSUKI DIGIT SEVEN
11C58>0038 # BHAIKSUKI DIGIT EIGHT
11C59>0039 # BHAIKSUKI DIGIT NINE
16A60>0030 # MRO DIGIT ZERO
16A61>0031 # MRO DIGIT ONE
16A62>0032 # MRO DIGIT TWO
@ -580,4 +600,14 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
16B57>0037 # PAHAWH HMONG DIGIT SEVEN
16B58>0038 # PAHAWH HMONG DIGIT EIGHT
16B59>0039 # PAHAWH HMONG DIGIT NINE
1E950>0030 # ADLAM DIGIT ZERO
1E951>0031 # ADLAM DIGIT ONE
1E952>0032 # ADLAM DIGIT TWO
1E953>0033 # ADLAM DIGIT THREE
1E954>0034 # ADLAM DIGIT FOUR
1E955>0035 # ADLAM DIGIT FIVE
1E956>0036 # ADLAM DIGIT SIX
1E957>0037 # ADLAM DIGIT SEVEN
1E958>0038 # ADLAM DIGIT EIGHT
1E959>0039 # ADLAM DIGIT NINE

View File

@ -1,4 +1,4 @@
# Copyright (C) 1999-2014, International Business Machines
# Copyright (C) 1999-2016, International Business Machines
# Corporation and others. All Rights Reserved.
#
# file name: nfc.txt
@ -7,7 +7,7 @@
#
# Complete data for Unicode NFC normalization.
* Unicode 7.0.0
* Unicode 9.0.0
# Canonical_Combining_Class (ccc) values
0300..0314:230
@ -129,6 +129,8 @@
0825..0827:230
0829..082D:230
0859..085B:220
08D4..08E1:230
08E3:220
08E4..08E5:230
08E6:220
08E7..08E8:230
@ -232,6 +234,7 @@
1DCF:220
1DD0:202
1DD1..1DF5:230
1DFB:230
1DFC:233
1DFD:220
1DFE:230
@ -260,7 +263,7 @@
3099..309A:8
A66F:230
A674..A67D:230
A69F:230
A69E..A69F:230
A6F0..A6F1:230
A806:9
A8C4:9
@ -280,6 +283,7 @@ ABED:9
FB1E:26
FE20..FE26:230
FE27..FE2D:220
FE2E..FE2F:230
101FD:220
102E0:220
10376..1037A:230
@ -299,6 +303,7 @@ FE27..FE2D:220
11133..11134:9
11173:7
111C0:9
111CA:7
11235:9
11236:7
112E9:7
@ -307,6 +312,8 @@ FE27..FE2D:220
1134D:9
11366..1136C:230
11370..11374:230
11442:9
11446:7
114C2:9
114C3:7
115BF:9
@ -314,6 +321,8 @@ FE27..FE2D:220
1163F:9
116B6:9
116B7:7
1172B:9
11C3F:9
16AF0..16AF4:1
16B30..16B36:230
1BC9E:1
@ -326,7 +335,14 @@ FE27..FE2D:220
1D18A..1D18B:220
1D1AA..1D1AD:230
1D242..1D244:230
1E000..1E006:230
1E008..1E018:230
1E01B..1E021:230
1E023..1E024:230
1E026..1E02A:230
1E8D0..1E8D6:220
1E944..1E949:230
1E94A:7
# Canonical decomposition mappings
00C0>0041 0300 # one-way: diacritic 0300

View File

@ -1,4 +1,4 @@
# Copyright (C) 1999-2014, International Business Machines
# Copyright (C) 1999-2016, International Business Machines
# Corporation and others. All Rights Reserved.
#
# file name: nfkc.txt
@ -11,7 +11,7 @@
# to NFKC one-way mappings.
# Use this file as the second gennorm2 input file after nfc.txt.
* Unicode 7.0.0
* Unicode 9.0.0
00A0>0020
00A8>0020 0308
@ -3675,6 +3675,7 @@ FFEE>25CB
1F238>7533
1F239>5272
1F23A>55B6
1F23B>914D
1F240>3014 672C 3015
1F241>3014 4E09 3015
1F242>3014 4E8C 3015

View File

@ -1,5 +1,5 @@
# Unicode Character Database
# Copyright (c) 1991-2014 Unicode, Inc.
# Copyright (c) 1991-2016 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
@ -12,7 +12,7 @@
# and reformatted into syntax for the gennorm2 Normalizer2 data generator tool.
# Use this file as the third gennorm2 input file after nfc.txt and nfkc.txt.
* Unicode 7.0.0
* Unicode 9.0.0
0041>0061
0042>0062
@ -632,8 +632,22 @@
10CD>2D2D
10FC>10DC
115F..1160>
13F8>13F0
13F9>13F1
13FA>13F2
13FB>13F3
13FC>13F4
13FD>13F5
17B4..17B5>
180B..180E>
1C80>0432
1C81>0434
1C82>043E
1C83>0441
1C84..1C85>0442
1C86>044A
1C87>0463
1C88>A64B
1D2C>0061
1D2D>00E6
1D2E>0062
@ -2382,14 +2396,99 @@ A7AA>0266
A7AB>025C
A7AC>0261
A7AD>026C
A7AE>026A
A7B0>029E
A7B1>0287
A7B2>029D
A7B3>AB53
A7B4>A7B5
A7B6>A7B7
A7F8>0127
A7F9>0153
AB5C>A727
AB5D>AB37
AB5E>026B
AB5F>AB52
AB70>13A0
AB71>13A1
AB72>13A2
AB73>13A3
AB74>13A4
AB75>13A5
AB76>13A6
AB77>13A7
AB78>13A8
AB79>13A9
AB7A>13AA
AB7B>13AB
AB7C>13AC
AB7D>13AD
AB7E>13AE
AB7F>13AF
AB80>13B0
AB81>13B1
AB82>13B2
AB83>13B3
AB84>13B4
AB85>13B5
AB86>13B6
AB87>13B7
AB88>13B8
AB89>13B9
AB8A>13BA
AB8B>13BB
AB8C>13BC
AB8D>13BD
AB8E>13BE
AB8F>13BF
AB90>13C0
AB91>13C1
AB92>13C2
AB93>13C3
AB94>13C4
AB95>13C5
AB96>13C6
AB97>13C7
AB98>13C8
AB99>13C9
AB9A>13CA
AB9B>13CB
AB9C>13CC
AB9D>13CD
AB9E>13CE
AB9F>13CF
ABA0>13D0
ABA1>13D1
ABA2>13D2
ABA3>13D3
ABA4>13D4
ABA5>13D5
ABA6>13D6
ABA7>13D7
ABA8>13D8
ABA9>13D9
ABAA>13DA
ABAB>13DB
ABAC>13DC
ABAD>13DD
ABAE>13DE
ABAF>13DF
ABB0>13E0
ABB1>13E1
ABB2>13E2
ABB3>13E3
ABB4>13E4
ABB5>13E5
ABB6>13E6
ABB7>13E7
ABB8>13E8
ABB9>13E9
ABBA>13EA
ABBB>13EB
ABBC>13EC
ABBD>13ED
ABBE>13EE
ABBF>13EF
F900>8C48
F901>66F4
F902>8ECA
@ -3766,6 +3865,93 @@ FFF0..FFF8>
10425>1044D
10426>1044E
10427>1044F
104B0>104D8
104B1>104D9
104B2>104DA
104B3>104DB
104B4>104DC
104B5>104DD
104B6>104DE
104B7>104DF
104B8>104E0
104B9>104E1
104BA>104E2
104BB>104E3
104BC>104E4
104BD>104E5
104BE>104E6
104BF>104E7
104C0>104E8
104C1>104E9
104C2>104EA
104C3>104EB
104C4>104EC
104C5>104ED
104C6>104EE
104C7>104EF
104C8>104F0
104C9>104F1
104CA>104F2
104CB>104F3
104CC>104F4
104CD>104F5
104CE>104F6
104CF>104F7
104D0>104F8
104D1>104F9
104D2>104FA
104D3>104FB
10C80>10CC0
10C81>10CC1
10C82>10CC2
10C83>10CC3
10C84>10CC4
10C85>10CC5
10C86>10CC6
10C87>10CC7
10C88>10CC8
10C89>10CC9
10C8A>10CCA
10C8B>10CCB
10C8C>10CCC
10C8D>10CCD
10C8E>10CCE
10C8F>10CCF
10C90>10CD0
10C91>10CD1
10C92>10CD2
10C93>10CD3
10C94>10CD4
10C95>10CD5
10C96>10CD6
10C97>10CD7
10C98>10CD8
10C99>10CD9
10C9A>10CDA
10C9B>10CDB
10C9C>10CDC
10C9D>10CDD
10C9E>10CDE
10C9F>10CDF
10CA0>10CE0
10CA1>10CE1
10CA2>10CE2
10CA3>10CE3
10CA4>10CE4
10CA5>10CE5
10CA6>10CE6
10CA7>10CE7
10CA8>10CE8
10CA9>10CE9
10CAA>10CEA
10CAB>10CEB
10CAC>10CEC
10CAD>10CED
10CAE>10CEE
10CAF>10CEF
10CB0>10CF0
10CB1>10CF1
10CB2>10CF2
118A0>118C0
118A1>118C1
118A2>118C2
@ -4803,6 +4989,40 @@ FFF0..FFF8>
1D7FD>0037
1D7FE>0038
1D7FF>0039
1E900>1E922
1E901>1E923
1E902>1E924
1E903>1E925
1E904>1E926
1E905>1E927
1E906>1E928
1E907>1E929
1E908>1E92A
1E909>1E92B
1E90A>1E92C
1E90B>1E92D
1E90C>1E92E
1E90D>1E92F
1E90E>1E930
1E90F>1E931
1E910>1E932
1E911>1E933
1E912>1E934
1E913>1E935
1E914>1E936
1E915>1E937
1E916>1E938
1E917>1E939
1E918>1E93A
1E919>1E93B
1E91A>1E93C
1E91B>1E93D
1E91C>1E93E
1E91D>1E93F
1E91E>1E940
1E91F>1E941
1E920>1E942
1E921>1E943
1EE00>0627
1EE01>0628
1EE02>062C
@ -5067,6 +5287,7 @@ FFF0..FFF8>
1F238>7533
1F239>5272
1F23A>55B6
1F23B>914D
1F240>3014 672C 3015
1F241>3014 4E09 3015
1F242>3014 4E8C 3015

View File

@ -63,7 +63,7 @@ import java.util.regex.Pattern;
public class GenerateUTR30DataFiles {
private static final String ICU_SVN_TAG_URL
= "http://source.icu-project.org/repos/icu/icu/tags";
private static final String ICU_RELEASE_TAG = "release-54-1";
private static final String ICU_RELEASE_TAG = "release-58-1";
private static final String ICU_DATA_NORM2_PATH = "source/data/unidata/norm2";
private static final String NFC_TXT = "nfc.txt";
private static final String NFKC_TXT = "nfkc.txt";

View File

@ -29,7 +29,7 @@ com.fasterxml.jackson.core.version = 2.5.4
/com.googlecode.juniversalchardet/juniversalchardet = 1.0.3
/com.googlecode.mp4parser/isoparser = 1.1.18
/com.healthmarketscience.jackcess/jackcess = 2.1.3
/com.ibm.icu/icu4j = 56.1
/com.ibm.icu/icu4j = 59.1
/com.pff/java-libpst = 0.8.1
com.sun.jersey.version = 1.9

View File

@ -1 +0,0 @@
8dd6671f52165a0419e6de5e1016400875a90fa9

View File

@ -0,0 +1 @@
6f06e820cf4c8968bbbaae66ae0b33f6a256b57f

View File

@ -1 +0,0 @@
8dd6671f52165a0419e6de5e1016400875a90fa9

View File

@ -0,0 +1 @@
6f06e820cf4c8968bbbaae66ae0b33f6a256b57f