* Bump %unicode 9 -> %unicode 12.1 for the 3 unicode grammars * regenerate emoji conformance tests for unicode 12.1 * modify wordbreak conformance tests to use emoji data (which replaces old crazy E_base etc properties) * regenerate wordbreak conformance tests * Simplify grammar files and word-break conformance test generator, now that full-width numbers are WordBreak=Numeric * Use jflex emoji properties rather than ICU-generated ones
import org.apache.tools.ant.taskdefs.condition.Os
import java.nio.file.Files
def resources = scriptResources(buildscript)
* Regenerates ICU-related data files.
* This build file contains regeneration code utilizing both icu4j and icu4c.
* The icu4c version must match exactly the icu4j version in version.props:
* The one on your system is probably different. This script will attempt to
* download and compile a matching icu4c version automatically.
// Configure different icu4j dependencies.
configure(rootProject) {
configurations {
// icu_xyz
dependencies {
// icu_xyz "com.ibm.icu:icu4j:xyz"
icu_current 'com.ibm.icu:icu4j'
// Exclude explicit ICU configs from palantir's version unification.
versionRecommendations {
// excludeConfigurations "icu_xyz"
// This retrieves the module name/ version for ICU from the given
// configuration (if present).
def icuVersionFromConfiguration(Configuration configuration) {
return configuration.resolvedConfiguration.getFirstLevelModuleDependencies({ dep -> dep.group.startsWith("com.ibm.icu") }).collect { dep -> dep.module.id }.join(", ")
configure(project(":lucene:analysis:icu")) {
def utr30DataDir = file("src/data/utr30")
def icuBuildDir = file("${buildDir}/icu")
def icuBinDir
def gennorm
def icupkg
if (Os.isFamily(Os.FAMILY_WINDOWS)) {
icuBinDir = file("${icuBuildDir}/bin64")
gennorm = file("${icuBinDir}/gennorm2.exe")
icupkg = file("${icuBinDir}/icupkg.exe")
} else {
icuBinDir = file("${icuBuildDir}/icu/source/bin")
gennorm = file("${icuBinDir}/gennorm2")
icupkg = file("${icuBinDir}/icupkg")
// Resolve version lazily (can't resolve at configuration time).
def icu4jVersionProvider = project.provider { getVersion('com.ibm.icu', 'icu4j') }
// lazy gstring with ICU version.
def icu4jVersion = "${-> icu4jVersionProvider.get()}"
def icuCompileTask = Os.isFamily(Os.FAMILY_WINDOWS) ? "compileIcuWindows" : "compileIcuLinux"
task genUtr30DataFilesInternal() {
def icuConfig = rootProject.configurations.icu_current
dependsOn icuConfig
dependsOn icuCompileTask
// May be undefined yet, so use a provider.
dependsOn { sourceSets.tools.runtimeClasspath }
// gennorm generates file order-dependent output, so make it constant here.
def inputFiles = fileTree(dir: utr30DataDir, include: "*.txt").asList().toSorted(Comparator.comparing { File f -> f.name })
def outputFile = file("src/resources/org/apache/lucene/analysis/icu/utr30.nrm")
inputs.files inputFiles
inputs.property "icuConfig", provider { icuVersionFromConfiguration(icuConfig) }
outputs.file outputFile
doFirst {
// all these steps must be done sequentially: it's a pipeline resulting in utr30.nrm
project.javaexec {
main = "org.apache.lucene.analysis.icu.GenerateUTR30DataFiles"
classpath = sourceSets.tools.runtimeClasspath
ignoreExitValue false
workingDir utr30DataDir
args = [
"release-${icu4jVersion.replace(".", "-")}"
project.quietExec {
executable gennorm
args = [
*(inputFiles.collect { it.name })
project.quietExec {
executable icupkg
args = [
task genRbbiInternal() {
def icuConfig = rootProject.configurations.icu_current
dependsOn icuConfig
// May be undefined yet, so use a provider.
dependsOn { sourceSets.tools.runtimeClasspath }
def sourceDir = file("src/data/uax29")
def targetDir = file("src/resources/org/apache/lucene/analysis/icu/segmentation")
inputs.files fileTree(dir: sourceDir, include: "*.rbbi")
inputs.property "icuConfig", provider { icuVersionFromConfiguration(icuConfig) }
outputs.files fileTree(dir: targetDir, include: "*.brk")
doFirst {
project.javaexec {
main = "org.apache.lucene.analysis.icu.RBBIRuleCompiler"
classpath = sourceSets.tools.runtimeClasspath
ignoreExitValue false
enableAssertions true
args = [ sourceDir, targetDir ]
regenerate.dependsOn wrapWithPersistentChecksums(genUtr30DataFilesInternal, [ ignoreWithSource: icuCompileTask ])
regenerate.dependsOn wrapWithPersistentChecksums(genRbbiInternal)
task compileIcuWindows() {
doFirst {
def v = icu4jVersion
def icuBinZip = file("${icuBuildDir}/icu4c-${v.replace(".", "_")}.zip")
if (!icuBinZip.exists()) {
// Download binaries matching icu4j version in version.props
def src = URI.create("https://github.com/unicode-org/icu/releases/download/release-${v.replace(".", "-")}/icu4c-${v.replace(".", "_")}-Win64-MSVC2019.zip")
logger.lifecycle("Trying to download binary ICU version: ${v} from:\n ${src}")
Files.write(icuBinZip.toPath(), src.toURL().openStream().bytes)
logger.lifecycle("Downloaded ${icuBinZip.size()} bytes.")
// Unzip.
project.copy {
into icuBuildDir
from zipTree(icuBinZip)
task compileIcuLinux() {
doFirst {
if (Os.isFamily(Os.FAMILY_WINDOWS)) {
throw new GradleException("ICU compilation not supported on Windows.")
def v = icu4jVersion
def icuSrcTgz = file("${icuBuildDir}/icu4c-${v.replace(".", "_")}-src.tgz")
// Download sources for version matching icu4j version in version.props
if (!icuSrcTgz.exists()) {
def src = URI.create("https://github.com/unicode-org/icu/releases/download/release-${v.replace(".", "-")}/icu4c-${v.replace(".", "_")}-src.tgz")
logger.lifecycle("Trying to download and compile ICU version: ${v} from:\n ${src}")
Files.write(icuSrcTgz.toPath(), src.toURL().openStream().bytes)
logger.lifecycle("Downloaded ${icuSrcTgz.size()} bytes.")
def icuSrcDir = file("${icuBuildDir}/icu/source")
project.delete icuSrcDir
// Extract the tgz
project.quietExec {
executable "tar"
workingDir icuBuildDir
args = [
// Compile: (cd icu/source && ./configure --prefix=$(pwd) --enable-rpath && make -j4)
project.quietExec {
executable "sh"
workingDir icuSrcDir
environment("CFLAGS", "-O0")
environment("CXXFLAGS", "-O0")
args = [
project.quietExec {
executable "make"
workingDir icuSrcDir
args = [
"-j${propertyOrDefault('tests.jvms', '4')}"
// Test that the binaries work: derb -V
logger.lifecycle("Compiled ICU, checking...")
project.quietExec {
executable "./derb"
workingDir icuBinDir
args = [
// Regenerates UnicodeProps.java
configure(project(":lucene:analysis:common")) {
task generateUnicodePropsInternal() {
def icuConfig = rootProject.configurations.icu_current
def outputFile = file("src/java/org/apache/lucene/analysis/util/UnicodeProps.java")
description "Regenerate ${outputFile} (with ${icuConfig.name})"
group "generation"
dependsOn icuConfig
inputs.property "icuConfig", provider { icuVersionFromConfiguration(icuConfig) }
outputs.file outputFile
doFirst {
project.javaexec {
main "groovy.lang.GroovyShell"
classpath icuConfig, rootProject.configurations.groovy
args = [
"--encoding", "UTF-8",
regenerate.dependsOn wrapWithPersistentChecksums(generateUnicodePropsInternal, [ andThenTasks: ["spotlessJava", "spotlessJavaApply"] ])