diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 879d754c038..fe1be6e713a 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -99,6 +99,8 @@ Other * LUCENE-7753: Make fields static when possible. (Daniel Jelinski via Adrien Grand) +* LUCENE-7540: Upgrade ICU to 59.1 (Mike McCandless, Jim Ferenczi) + ======================= Lucene 6.7.0 ======================= Other diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java index 75070d10700..00ee311a4b0 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java @@ -24,15 +24,15 @@ import org.apache.lucene.util.SparseFixedBitSet; /** * This file contains unicode properties used by various {@link CharTokenizer}s. - * The data was created using ICU4J v56.1.0.0 + * The data was created using ICU4J v59.1.0.0 *

- * Unicode version: 8.0.0.0 + * Unicode version: 9.0.0.0 */ public final class UnicodeProps { private UnicodeProps() {} /** Unicode version that was used to generate this file: {@value} */ - public static final String UNICODE_VERSION = "8.0.0.0"; + public static final String UNICODE_VERSION = "9.0.0.0"; /** Bitset with Unicode WHITESPACE code points. */ public static final Bits WHITESPACE = createBits( diff --git a/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt b/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt index 3772daf1aeb..eb5b78e0ea2 100644 --- a/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt +++ b/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt @@ -168,11 +168,14 @@ FFE3> 1134D> 11366..1136C> 11370..11374> +11442> +11446> 114C2..114C3> 115BF..115C0> 1163F> 116B6..116B7> 1172B> +11C3F> 16AF0..16AF4> 16F8F..16F9F> 1D167..1D169> @@ -181,6 +184,8 @@ FFE3> 1D185..1D18B> 1D1AA..1D1AD> 1E8D0..1E8D6> +1E944..1E946> +1E948..1E94A> # Latin script "composed" that do not further decompose, so decompose here # These are from AsciiFoldingFilter diff --git a/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt b/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt index 62e6aefdf1c..fb8cf1ac66b 100644 --- a/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt +++ b/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt @@ -510,6 +510,16 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE 112F7>0037 # KHUDAWADI DIGIT SEVEN 112F8>0038 # KHUDAWADI DIGIT EIGHT 112F9>0039 # KHUDAWADI DIGIT NINE +11450>0030 # NEWA DIGIT ZERO +11451>0031 # NEWA DIGIT ONE +11452>0032 # NEWA DIGIT TWO +11453>0033 # NEWA DIGIT THREE +11454>0034 # NEWA DIGIT FOUR +11455>0035 # NEWA DIGIT FIVE +11456>0036 # NEWA DIGIT SIX +11457>0037 # NEWA DIGIT SEVEN +11458>0038 # NEWA DIGIT EIGHT +11459>0039 # NEWA DIGIT NINE 114D0>0030 # TIRHUTA DIGIT ZERO 114D1>0031 # TIRHUTA DIGIT ONE 114D2>0032 # TIRHUTA DIGIT TWO @@ -560,6 +570,16 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE 118E7>0037 # WARANG CITI DIGIT SEVEN 118E8>0038 # WARANG CITI DIGIT EIGHT 118E9>0039 # WARANG CITI DIGIT NINE +11C50>0030 # BHAIKSUKI DIGIT ZERO +11C51>0031 # BHAIKSUKI DIGIT ONE +11C52>0032 # BHAIKSUKI DIGIT TWO +11C53>0033 # BHAIKSUKI DIGIT THREE +11C54>0034 # BHAIKSUKI DIGIT FOUR +11C55>0035 # BHAIKSUKI DIGIT FIVE +11C56>0036 # BHAIKSUKI DIGIT SIX +11C57>0037 # BHAIKSUKI DIGIT SEVEN +11C58>0038 # BHAIKSUKI DIGIT EIGHT +11C59>0039 # BHAIKSUKI DIGIT NINE 16A60>0030 # MRO DIGIT ZERO 16A61>0031 # MRO DIGIT ONE 16A62>0032 # MRO DIGIT TWO @@ -580,4 +600,14 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE 16B57>0037 # PAHAWH HMONG DIGIT SEVEN 16B58>0038 # PAHAWH HMONG DIGIT EIGHT 16B59>0039 # PAHAWH HMONG DIGIT NINE +1E950>0030 # ADLAM DIGIT ZERO +1E951>0031 # ADLAM DIGIT ONE +1E952>0032 # ADLAM DIGIT TWO +1E953>0033 # ADLAM DIGIT THREE +1E954>0034 # ADLAM DIGIT FOUR +1E955>0035 # ADLAM DIGIT FIVE +1E956>0036 # ADLAM DIGIT SIX +1E957>0037 # ADLAM DIGIT SEVEN +1E958>0038 # ADLAM DIGIT EIGHT +1E959>0039 # ADLAM DIGIT NINE diff --git a/lucene/analysis/icu/src/data/utr30/nfc.txt b/lucene/analysis/icu/src/data/utr30/nfc.txt index 5b7374f2cd5..5f9b1821760 100644 --- a/lucene/analysis/icu/src/data/utr30/nfc.txt +++ b/lucene/analysis/icu/src/data/utr30/nfc.txt @@ -1,4 +1,4 @@ -# Copyright (C) 1999-2014, International Business Machines +# Copyright (C) 1999-2016, International Business Machines # Corporation and others. All Rights Reserved. # # file name: nfc.txt @@ -7,7 +7,7 @@ # # Complete data for Unicode NFC normalization. -* Unicode 7.0.0 +* Unicode 9.0.0 # Canonical_Combining_Class (ccc) values 0300..0314:230 @@ -129,6 +129,8 @@ 0825..0827:230 0829..082D:230 0859..085B:220 +08D4..08E1:230 +08E3:220 08E4..08E5:230 08E6:220 08E7..08E8:230 @@ -232,6 +234,7 @@ 1DCF:220 1DD0:202 1DD1..1DF5:230 +1DFB:230 1DFC:233 1DFD:220 1DFE:230 @@ -260,7 +263,7 @@ 3099..309A:8 A66F:230 A674..A67D:230 -A69F:230 +A69E..A69F:230 A6F0..A6F1:230 A806:9 A8C4:9 @@ -280,6 +283,7 @@ ABED:9 FB1E:26 FE20..FE26:230 FE27..FE2D:220 +FE2E..FE2F:230 101FD:220 102E0:220 10376..1037A:230 @@ -299,6 +303,7 @@ FE27..FE2D:220 11133..11134:9 11173:7 111C0:9 +111CA:7 11235:9 11236:7 112E9:7 @@ -307,6 +312,8 @@ FE27..FE2D:220 1134D:9 11366..1136C:230 11370..11374:230 +11442:9 +11446:7 114C2:9 114C3:7 115BF:9 @@ -314,6 +321,8 @@ FE27..FE2D:220 1163F:9 116B6:9 116B7:7 +1172B:9 +11C3F:9 16AF0..16AF4:1 16B30..16B36:230 1BC9E:1 @@ -326,7 +335,14 @@ FE27..FE2D:220 1D18A..1D18B:220 1D1AA..1D1AD:230 1D242..1D244:230 +1E000..1E006:230 +1E008..1E018:230 +1E01B..1E021:230 +1E023..1E024:230 +1E026..1E02A:230 1E8D0..1E8D6:220 +1E944..1E949:230 +1E94A:7 # Canonical decomposition mappings 00C0>0041 0300 # one-way: diacritic 0300 diff --git a/lucene/analysis/icu/src/data/utr30/nfkc.txt b/lucene/analysis/icu/src/data/utr30/nfkc.txt index fea41298bc0..f51fa5db4b7 100644 --- a/lucene/analysis/icu/src/data/utr30/nfkc.txt +++ b/lucene/analysis/icu/src/data/utr30/nfkc.txt @@ -1,4 +1,4 @@ -# Copyright (C) 1999-2014, International Business Machines +# Copyright (C) 1999-2016, International Business Machines # Corporation and others. All Rights Reserved. # # file name: nfkc.txt @@ -11,7 +11,7 @@ # to NFKC one-way mappings. # Use this file as the second gennorm2 input file after nfc.txt. -* Unicode 7.0.0 +* Unicode 9.0.0 00A0>0020 00A8>0020 0308 @@ -3675,6 +3675,7 @@ FFEE>25CB 1F238>7533 1F239>5272 1F23A>55B6 +1F23B>914D 1F240>3014 672C 3015 1F241>3014 4E09 3015 1F242>3014 4E8C 3015 diff --git a/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt b/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt index b24b4b277fa..7f33df58c84 100644 --- a/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt +++ b/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt @@ -1,5 +1,5 @@ # Unicode Character Database -# Copyright (c) 1991-2014 Unicode, Inc. +# Copyright (c) 1991-2016 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -12,7 +12,7 @@ # and reformatted into syntax for the gennorm2 Normalizer2 data generator tool. # Use this file as the third gennorm2 input file after nfc.txt and nfkc.txt. -* Unicode 7.0.0 +* Unicode 9.0.0 0041>0061 0042>0062 @@ -632,8 +632,22 @@ 10CD>2D2D 10FC>10DC 115F..1160> +13F8>13F0 +13F9>13F1 +13FA>13F2 +13FB>13F3 +13FC>13F4 +13FD>13F5 17B4..17B5> 180B..180E> +1C80>0432 +1C81>0434 +1C82>043E +1C83>0441 +1C84..1C85>0442 +1C86>044A +1C87>0463 +1C88>A64B 1D2C>0061 1D2D>00E6 1D2E>0062 @@ -2382,14 +2396,99 @@ A7AA>0266 A7AB>025C A7AC>0261 A7AD>026C +A7AE>026A A7B0>029E A7B1>0287 +A7B2>029D +A7B3>AB53 +A7B4>A7B5 +A7B6>A7B7 A7F8>0127 A7F9>0153 AB5C>A727 AB5D>AB37 AB5E>026B AB5F>AB52 +AB70>13A0 +AB71>13A1 +AB72>13A2 +AB73>13A3 +AB74>13A4 +AB75>13A5 +AB76>13A6 +AB77>13A7 +AB78>13A8 +AB79>13A9 +AB7A>13AA +AB7B>13AB +AB7C>13AC +AB7D>13AD +AB7E>13AE +AB7F>13AF +AB80>13B0 +AB81>13B1 +AB82>13B2 +AB83>13B3 +AB84>13B4 +AB85>13B5 +AB86>13B6 +AB87>13B7 +AB88>13B8 +AB89>13B9 +AB8A>13BA +AB8B>13BB +AB8C>13BC +AB8D>13BD +AB8E>13BE +AB8F>13BF +AB90>13C0 +AB91>13C1 +AB92>13C2 +AB93>13C3 +AB94>13C4 +AB95>13C5 +AB96>13C6 +AB97>13C7 +AB98>13C8 +AB99>13C9 +AB9A>13CA +AB9B>13CB +AB9C>13CC +AB9D>13CD +AB9E>13CE +AB9F>13CF +ABA0>13D0 +ABA1>13D1 +ABA2>13D2 +ABA3>13D3 +ABA4>13D4 +ABA5>13D5 +ABA6>13D6 +ABA7>13D7 +ABA8>13D8 +ABA9>13D9 +ABAA>13DA +ABAB>13DB +ABAC>13DC +ABAD>13DD +ABAE>13DE +ABAF>13DF +ABB0>13E0 +ABB1>13E1 +ABB2>13E2 +ABB3>13E3 +ABB4>13E4 +ABB5>13E5 +ABB6>13E6 +ABB7>13E7 +ABB8>13E8 +ABB9>13E9 +ABBA>13EA +ABBB>13EB +ABBC>13EC +ABBD>13ED +ABBE>13EE +ABBF>13EF F900>8C48 F901>66F4 F902>8ECA @@ -3766,6 +3865,93 @@ FFF0..FFF8> 10425>1044D 10426>1044E 10427>1044F +104B0>104D8 +104B1>104D9 +104B2>104DA +104B3>104DB +104B4>104DC +104B5>104DD +104B6>104DE +104B7>104DF +104B8>104E0 +104B9>104E1 +104BA>104E2 +104BB>104E3 +104BC>104E4 +104BD>104E5 +104BE>104E6 +104BF>104E7 +104C0>104E8 +104C1>104E9 +104C2>104EA +104C3>104EB +104C4>104EC +104C5>104ED +104C6>104EE +104C7>104EF +104C8>104F0 +104C9>104F1 +104CA>104F2 +104CB>104F3 +104CC>104F4 +104CD>104F5 +104CE>104F6 +104CF>104F7 +104D0>104F8 +104D1>104F9 +104D2>104FA +104D3>104FB +10C80>10CC0 +10C81>10CC1 +10C82>10CC2 +10C83>10CC3 +10C84>10CC4 +10C85>10CC5 +10C86>10CC6 +10C87>10CC7 +10C88>10CC8 +10C89>10CC9 +10C8A>10CCA +10C8B>10CCB +10C8C>10CCC +10C8D>10CCD +10C8E>10CCE +10C8F>10CCF +10C90>10CD0 +10C91>10CD1 +10C92>10CD2 +10C93>10CD3 +10C94>10CD4 +10C95>10CD5 +10C96>10CD6 +10C97>10CD7 +10C98>10CD8 +10C99>10CD9 +10C9A>10CDA +10C9B>10CDB +10C9C>10CDC +10C9D>10CDD +10C9E>10CDE +10C9F>10CDF +10CA0>10CE0 +10CA1>10CE1 +10CA2>10CE2 +10CA3>10CE3 +10CA4>10CE4 +10CA5>10CE5 +10CA6>10CE6 +10CA7>10CE7 +10CA8>10CE8 +10CA9>10CE9 +10CAA>10CEA +10CAB>10CEB +10CAC>10CEC +10CAD>10CED +10CAE>10CEE +10CAF>10CEF +10CB0>10CF0 +10CB1>10CF1 +10CB2>10CF2 118A0>118C0 118A1>118C1 118A2>118C2 @@ -4803,6 +4989,40 @@ FFF0..FFF8> 1D7FD>0037 1D7FE>0038 1D7FF>0039 +1E900>1E922 +1E901>1E923 +1E902>1E924 +1E903>1E925 +1E904>1E926 +1E905>1E927 +1E906>1E928 +1E907>1E929 +1E908>1E92A +1E909>1E92B +1E90A>1E92C +1E90B>1E92D +1E90C>1E92E +1E90D>1E92F +1E90E>1E930 +1E90F>1E931 +1E910>1E932 +1E911>1E933 +1E912>1E934 +1E913>1E935 +1E914>1E936 +1E915>1E937 +1E916>1E938 +1E917>1E939 +1E918>1E93A +1E919>1E93B +1E91A>1E93C +1E91B>1E93D +1E91C>1E93E +1E91D>1E93F +1E91E>1E940 +1E91F>1E941 +1E920>1E942 +1E921>1E943 1EE00>0627 1EE01>0628 1EE02>062C @@ -5067,6 +5287,7 @@ FFF0..FFF8> 1F238>7533 1F239>5272 1F23A>55B6 +1F23B>914D 1F240>3014 672C 3015 1F241>3014 4E09 3015 1F242>3014 4E8C 3015 diff --git a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk index 5b8479751d5..c94a023c2ce 100644 Binary files a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk and b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk differ diff --git a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk index 41b977b259a..c3357efa7ce 100644 Binary files a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk and b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk differ diff --git a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm index 2680264f181..1a16f3eb182 100644 Binary files a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm and b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm differ diff --git a/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java b/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java index 035a3a086b2..0f2bffecfb0 100644 --- a/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java +++ b/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java @@ -63,7 +63,7 @@ import java.util.regex.Pattern; public class GenerateUTR30DataFiles { private static final String ICU_SVN_TAG_URL = "http://source.icu-project.org/repos/icu/icu/tags"; - private static final String ICU_RELEASE_TAG = "release-54-1"; + private static final String ICU_RELEASE_TAG = "release-58-1"; private static final String ICU_DATA_NORM2_PATH = "source/data/unidata/norm2"; private static final String NFC_TXT = "nfc.txt"; private static final String NFKC_TXT = "nfkc.txt"; diff --git a/lucene/ivy-versions.properties b/lucene/ivy-versions.properties index 7e0e7c72f58..3c45b2cd35b 100644 --- a/lucene/ivy-versions.properties +++ b/lucene/ivy-versions.properties @@ -29,7 +29,7 @@ com.fasterxml.jackson.core.version = 2.5.4 /com.googlecode.juniversalchardet/juniversalchardet = 1.0.3 /com.googlecode.mp4parser/isoparser = 1.1.18 /com.healthmarketscience.jackcess/jackcess = 2.1.3 -/com.ibm.icu/icu4j = 56.1 +/com.ibm.icu/icu4j = 59.1 /com.pff/java-libpst = 0.8.1 com.sun.jersey.version = 1.9 diff --git a/lucene/licenses/icu4j-56.1.jar.sha1 b/lucene/licenses/icu4j-56.1.jar.sha1 deleted file mode 100644 index 5f8e0466fde..00000000000 --- a/lucene/licenses/icu4j-56.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -8dd6671f52165a0419e6de5e1016400875a90fa9 diff --git a/lucene/licenses/icu4j-59.1.jar.sha1 b/lucene/licenses/icu4j-59.1.jar.sha1 new file mode 100644 index 00000000000..f3f0018f053 --- /dev/null +++ b/lucene/licenses/icu4j-59.1.jar.sha1 @@ -0,0 +1 @@ +6f06e820cf4c8968bbbaae66ae0b33f6a256b57f diff --git a/solr/licenses/icu4j-56.1.jar.sha1 b/solr/licenses/icu4j-56.1.jar.sha1 deleted file mode 100644 index 5f8e0466fde..00000000000 --- a/solr/licenses/icu4j-56.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -8dd6671f52165a0419e6de5e1016400875a90fa9 diff --git a/solr/licenses/icu4j-59.1.jar.sha1 b/solr/licenses/icu4j-59.1.jar.sha1 new file mode 100644 index 00000000000..f3f0018f053 --- /dev/null +++ b/solr/licenses/icu4j-59.1.jar.sha1 @@ -0,0 +1 @@ +6f06e820cf4c8968bbbaae66ae0b33f6a256b57f