The compilation of the library is slow, disable optimization as it doesn't speed up our usage of the gennorm2 tool. Use better heuristic for make parallelism (tests.jvms rather than just hardcoded value of four).
import org.apache.tools.ant.taskdefs.condition.Os
import java.nio.file.Files
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
def resources = scriptResources(buildscript)
* Regenerates ICU-related data files.
* This build file contains regeneration code utilizing both icu4j and icu4c.
* The icu4c version must match exactly the icu4j version in version.props:
* The one on your system is probably different. This script will attempt to
* download and compile a matching icu4c version automatically.
// Configure different icu4j dependencies.
configure(rootProject) {
configurations {
dependencies {
icu_62 "com.ibm.icu:icu4j:62.2"
icu_68 "com.ibm.icu:icu4j:68.2"
// Exclude ICU config from palantir's version unification.
versionRecommendations {
excludeConfigurations "icu_68", "icu_62"
configure(project(":lucene:analysis:icu")) {
def utr30DataDir = file("src/data/utr30")
def icuBuildDir = file("${buildDir}/icu")
def icuBinDir
def gennorm
def icupkg
if (Os.isFamily(Os.FAMILY_WINDOWS)) {
icuBinDir = file("${icuBuildDir}/bin64")
gennorm = file("${icuBinDir}/gennorm2.exe")
icupkg = file("${icuBinDir}/icupkg.exe")
} else {
icuBinDir = file("${icuBuildDir}/icu/source/bin")
gennorm = file("${icuBinDir}/gennorm2")
icupkg = file("${icuBinDir}/icupkg")
// Resolve version lazily (can't resolve at configuration time).
def icu4jVersionProvider = project.provider { getVersion('com.ibm.icu', 'icu4j') }
// lazy gstring with ICU version.
def icu4jVersion = "${-> icu4jVersionProvider.get()}"
def icuCompileTask = Os.isFamily(Os.FAMILY_WINDOWS) ? "compileIcuWindows" : "compileIcuLinux"
task genUtr30DataFilesInternal() {
dependsOn icuCompileTask
// May be undefined yet, so use a provider.
dependsOn { sourceSets.tools.runtimeClasspath }
// gennorm generates file order-dependent output, so make it constant here.
def inputFiles = fileTree(dir: utr30DataDir, include: "*.txt").asList().toSorted(Comparator.comparing { File f -> f.name })
def outputFile = file("src/resources/org/apache/lucene/analysis/icu/utr30.nrm")
inputs.files inputFiles
inputs.property "icu4j", icu4jVersionProvider
outputs.file outputFile
doFirst {
// all these steps must be done sequentially: it's a pipeline resulting in utr30.nrm
project.javaexec {
main = "org.apache.lucene.analysis.icu.GenerateUTR30DataFiles"
classpath = sourceSets.tools.runtimeClasspath
ignoreExitValue false
workingDir utr30DataDir
args = [
"release-${icu4jVersion.replace(".", "-")}"
project.quietExec {
executable gennorm
args = [
*(inputFiles.collect { it.name })
project.quietExec {
executable icupkg
args = [
task genRbbiInternal() {
// May be undefined yet, so use a provider.
dependsOn { sourceSets.tools.runtimeClasspath }
def sourceDir = file("src/data/uax29")
def targetDir = file("src/resources/org/apache/lucene/analysis/icu/segmentation")
inputs.files fileTree(dir: sourceDir, include: "*.rbbi")
inputs.property "icu4j", icu4jVersionProvider
outputs.files fileTree(dir: targetDir, include: "*.brk")
doFirst {
project.javaexec {
main = "org.apache.lucene.analysis.icu.RBBIRuleCompiler"
classpath = sourceSets.tools.runtimeClasspath
ignoreExitValue false
enableAssertions true
args = [ sourceDir, targetDir ]
regenerate.dependsOn wrapWithPersistentChecksums(genUtr30DataFilesInternal, [ ignoreWithSource: icuCompileTask ])
regenerate.dependsOn wrapWithPersistentChecksums(genRbbiInternal)
task compileIcuWindows() {
doFirst {
def v = icu4jVersion
def icuBinZip = file("${icuBuildDir}/icu4c-${v.replace(".", "_")}.zip")
if (!icuBinZip.exists()) {
// Download binaries matching icu4j version in version.props
def src = URI.create("https://github.com/unicode-org/icu/releases/download/release-${v.replace(".", "-")}/icu4c-${v.replace(".", "_")}-Win64-MSVC2019.zip")
logger.lifecycle("Trying to download binary ICU version: ${v} from:\n ${src}")
Files.write(icuBinZip.toPath(), src.toURL().openStream().bytes)
logger.lifecycle("Downloaded ${icuBinZip.size()} bytes.")
// Unzip.
project.copy {
into icuBuildDir
from zipTree(icuBinZip)
task compileIcuLinux() {
doFirst {
if (Os.isFamily(Os.FAMILY_WINDOWS)) {
throw new GradleException("ICU compilation not supported on Windows.")
def v = icu4jVersion
def icuSrcTgz = file("${icuBuildDir}/icu4c-${v.replace(".", "_")}-src.tgz")
// Download sources for version matching icu4j version in version.props
if (!icuSrcTgz.exists()) {
def src = URI.create("https://github.com/unicode-org/icu/releases/download/release-${v.replace(".", "-")}/icu4c-${v.replace(".", "_")}-src.tgz")
logger.lifecycle("Trying to download and compile ICU version: ${v} from:\n ${src}")
Files.write(icuSrcTgz.toPath(), src.toURL().openStream().bytes)
logger.lifecycle("Downloaded ${icuSrcTgz.size()} bytes.")
def icuSrcDir = file("${icuBuildDir}/icu/source")
project.delete icuSrcDir
// Extract the tgz
project.quietExec {
executable "tar"
workingDir icuBuildDir
args = [
// Compile: (cd icu/source && ./configure --prefix=$(pwd) --enable-rpath && make -j4)
project.quietExec {
executable "sh"
workingDir icuSrcDir
environment("CFLAGS", "-O0")
environment("CXXFLAGS", "-O0")
args = [
project.quietExec {
executable "make"
workingDir icuSrcDir
args = [
"-j${propertyOrDefault('tests.jvms', '4')}"
// Test that the binaries work: derb -V
logger.lifecycle("Compiled ICU, checking...")
project.quietExec {
executable "./derb"
workingDir icuBinDir
args = [
// Regenerates UnicodeProps.java
configure(project(":lucene:analysis:common")) {
task generateUnicodePropsInternal() {
def icuConfig = rootProject.configurations.icu_68
def outputFile = file("src/java/org/apache/lucene/analysis/util/UnicodeProps.java")
description "Regenerate ${outputFile} (with ${icuConfig.name})"
group "generation"
dependsOn icuConfig
inputs.property "icuConfig", icuConfig.name
outputs.file outputFile
doFirst {
project.javaexec {
main "groovy.lang.GroovyShell"
classpath icuConfig, rootProject.configurations.groovy
args = [
"--encoding", "UTF-8",
regenerate.dependsOn wrapWithPersistentChecksums(generateUnicodePropsInternal, [ andThenTasks: "spotlessApply" ])
// UnicodeEmojiProperties.jflex
configure(project(":lucene:core")) {
task generateEmojiPropertiesInternal() {
def icuConfig = rootProject.configurations.icu_62
def outputFile = file("src/data/jflex/UnicodeEmojiProperties.jflex")
description "Regenerate ${outputFile} (with ${icuConfig.name})"
group "generation"
dependsOn icuConfig
inputs.property "icuConfig", icuConfig.name
outputs.file outputFile
doFirst {
project.javaexec {
main "groovy.lang.GroovyShell"
classpath icuConfig, rootProject.configurations.groovy
args = [
"--encoding", "UTF-8",
regenerate.dependsOn wrapWithPersistentChecksums(generateEmojiPropertiesInternal)