LUCENE-9933: Add non-file properties to wrapped regenerate checksums (#95)

This commit is contained in:
Dawid Weiss 2021-04-19 13:37:47 +02:00 committed by GitHub
parent 936b3451af
commit bd8f182b13
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 29939 additions and 30015 deletions

View File

@ -67,6 +67,11 @@ configure(project(":lucene:analysis:icu")) {
icupkg = file("${icuBinDir}/icupkg")
}
// Resolve version lazily (can't resolve at configuration time).
def icu4jVersionProvider = project.provider { getVersion('com.ibm.icu', 'icu4j') }
// lazy gstring with ICU version.
def icu4jVersion = "${-> icu4jVersionProvider.get()}"
def icuCompileTask = Os.isFamily(Os.FAMILY_WINDOWS) ? "compileIcuWindows" : "compileIcuLinux"
task genUtr30DataFilesInternal() {
@ -80,11 +85,11 @@ configure(project(":lucene:analysis:icu")) {
def outputFile = file("src/resources/org/apache/lucene/analysis/icu/utr30.nrm")
inputs.files inputFiles
inputs.property "icu4j", icu4jVersionProvider
outputs.file outputFile
doFirst {
// all these steps must be done sequentially: it's a pipeline resulting in utr30.nrm
def v = getVersion('com.ibm.icu', 'icu4j');
project.javaexec {
main = "org.apache.lucene.analysis.icu.GenerateUTR30DataFiles"
classpath = sourceSets.tools.runtimeClasspath
@ -92,7 +97,7 @@ configure(project(":lucene:analysis:icu")) {
ignoreExitValue false
workingDir utr30DataDir
args = [
"release-${v.replace(".", "-")}"
"release-${icu4jVersion.replace(".", "-")}"
]
}
@ -127,6 +132,7 @@ configure(project(":lucene:analysis:icu")) {
def targetDir = file("src/resources/org/apache/lucene/analysis/icu/segmentation")
inputs.files fileTree(dir: sourceDir, include: "*.rbbi")
inputs.property "icu4j", icu4jVersionProvider
outputs.files fileTree(dir: targetDir, include: "*.brk")
doFirst {
@ -146,8 +152,7 @@ configure(project(":lucene:analysis:icu")) {
task compileIcuWindows() {
doFirst {
def v = getVersion('com.ibm.icu', 'icu4j');
def v = icu4jVersion
def icuBinZip = file("${icuBuildDir}/icu4c-${v.replace(".", "_")}.zip")
if (!icuBinZip.exists()) {
icuBuildDir.mkdirs()
@ -175,7 +180,7 @@ configure(project(":lucene:analysis:icu")) {
throw new GradleException("ICU compilation not supported on Windows.")
}
def v = getVersion('com.ibm.icu', 'icu4j');
def v = icu4jVersion
def icuSrcTgz = file("${icuBuildDir}/icu4c-${v.replace(".", "_")}-src.tgz")
// Download sources for version matching icu4j version in version.props
@ -244,6 +249,8 @@ configure(project(":lucene:analysis:common")) {
group "generation"
dependsOn icuConfig
inputs.property "icuConfig", icuConfig.name
outputs.file outputFile
doFirst {
@ -274,6 +281,8 @@ configure(project(":lucene:core")) {
group "generation"
dependsOn icuConfig
inputs.property "icuConfig", icuConfig.name
outputs.file outputFile
doFirst {

View File

@ -1,3 +1,5 @@
import java.nio.file.Files
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -63,9 +65,13 @@ configure(project(":lucene:analysis:common")) {
dependsOn { sourceSets.tools.runtimeClasspath }
inputs.property "tldZones", tldZones
outputs.files jflexMacro, tldList
doFirst {
File tmpJflexMacro = File.createTempFile(jflexMacro.getName(), ".tmp", getTemporaryDir())
File tmpTldList = File.createTempFile(tldList.getName(), ".tmp", getTemporaryDir())
project.javaexec {
main = "org.apache.lucene.analysis.standard.GenerateJflexTLDMacros"
classpath = sourceSets.tools.runtimeClasspath
@ -73,12 +79,30 @@ configure(project(":lucene:analysis:common")) {
ignoreExitValue false
args = [
tldZones,
jflexMacro,
tldList
tmpJflexMacro,
tmpTldList
]
}
logger.lifecycle("You've regenerated the TLD include file, remember to regenerate UAX29URLEmailTokenizerImpl too.")
// LUCENE-9926: tldZones is regenerated daily. Compare the generated content (excluding comments) so that
// we only update actual output files if non-comments have changed.
def contentLines = { File file ->
if (file.exists()) {
List<String> lines = file.readLines("UTF-8")
lines.removeIf { line -> line.isBlank() || line.startsWith("//") }
return lines
} else {
return []
}
}
if (contentLines(tmpTldList).equals(contentLines(tldList))) {
logger.lifecycle("Generated TLD content identical as before, not updating.")
} else {
tldList.setBytes tmpTldList.bytes
jflexMacro.setBytes tmpJflexMacro.bytes
logger.lifecycle("You've regenerated the TLD include file, remember to regenerate UAX29URLEmailTokenizerImpl too.")
}
}
}

View File

@ -21,6 +21,47 @@ import org.apache.commons.codec.digest.DigestUtils
// Create common 'regenerate' task sub-tasks can hook into.
/**
* Compute all "checksummed" key-value pairs.
*/
def computeChecksummedEntries = { Task sourceTask ->
// An flat ordered map of key-value pairs.
Map<String, String> allEntries = new TreeMap<>()
// Make sure all input properties are either simple strings
// or closures returning simple strings.
//
// Don't overcomplicate things with other serializable types.
Map<String, Object> props = sourceTask.inputs.properties
props.forEach { key, val ->
// Handle closures and other lazy providers.
if (val instanceof Provider<?>) {
val = val.get()
}
if (val instanceof Closure<?>) {
val = val.call()
}
if (!(val instanceof String)) {
throw new GradleException("Input properties of wrapped tasks must all be " +
"strings: ${key} in ${sourceTask.name} is not.")
}
allEntries.put("property:" + key, (String) val)
}
// Collect all of task inputs/ output files and compute their checksums.
FileCollection allFiles = sourceTask.inputs.files + sourceTask.outputs.files
// Compute checksums for root-project relative paths
allFiles.files.forEach { file ->
allEntries.put(
sourceTask.project.rootDir.relativePath(file),
file.exists() ? new DigestUtils(DigestUtils.sha1Digest).digestAsHex(file).trim() : "--")
}
return allEntries
}
configure([
project(":lucene:analysis:common"),
project(":lucene:core"),
@ -86,28 +127,18 @@ configure([
}
doFirst {
// Collect all of task inputs/ outputs.
FileCollection allFiles = sourceTask.inputs.files + sourceTask.outputs.files
ext.allFiles = allFiles
// Compute checksums for root-project relative paths
Map<String, String> actualChecksums = allFiles.files.collectEntries { file ->
[
sourceTask.project.rootDir.relativePath(file),
file.exists() ? new DigestUtils(DigestUtils.sha1Digest).digestAsHex(file).trim() : "--"
]
}
ext.actualChecksums = actualChecksums
// Current persisted task input/outputs (file checksums, properties)
ext.currentChecksums = computeChecksummedEntries(sourceTask)
// Load any previously written checksums
ext.savedChecksums = new TreeMap<>()
ext.checksumsFile = project.file("src/generated/checksums/${sourceTaskName}.json")
Map<String, String> savedChecksums = [:]
if (checksumsFile.exists()) {
savedChecksums = new JsonSlurper().parse(checksumsFile) as Map
savedChecksums.putAll(new JsonSlurper().parse(checksumsFile) as Map)
}
ext.savedChecksums = savedChecksums
ext.checksumMatch = (savedChecksums.equals(actualChecksums))
// Compare saved and current checksums for subsequent tasks.
ext.checksumMatch = (savedChecksums.equals(currentChecksums))
}
})
@ -117,16 +148,16 @@ configure([
doFirst {
if (!checksumLoadTask.checksumMatch) {
// This can be made prettier but leave it verbose for now:
Map<String, String> actual = checksumLoadTask.actualChecksums
Map<String, String> current = checksumLoadTask.currentChecksums
Map<String, String> expected = checksumLoadTask.savedChecksums
def same = actual.intersect(expected)
actual = actual - same
def same = current.intersect(expected)
current = current - same
expected = expected - same
throw new GradleException("Checksums mismatch for derived resources; you might have" +
" modified a generated resource (regenerate task: ${sourceTask.name}):\n" +
"Actual:\n ${actual.entrySet().join('\n ')}\n\n" +
"Current:\n ${current.entrySet().join('\n ')}\n\n" +
"Expected:\n ${expected.entrySet().join('\n ')}"
)
}
@ -141,16 +172,10 @@ configure([
File checksumsFile = checksumLoadTask.ext.checksumsFile
checksumsFile.parentFile.mkdirs()
// Recompute checksums for root-project relative paths
Map<String, String> actualChecksums = checksumLoadTask.ext.allFiles.files.collectEntries { file ->
[
sourceTask.project.rootDir.relativePath(file),
new DigestUtils(DigestUtils.sha1Digest).digestAsHex(file).trim()
]
}
// Recompute checksums after the task has completed and write them.
def updatedChecksums = computeChecksummedEntries(sourceTask)
checksumsFile.setText(
JsonOutput.prettyPrint(JsonOutput.toJson(new TreeMap<String, String>(actualChecksums))), "UTF-8")
JsonOutput.prettyPrint(JsonOutput.toJson(new TreeMap<String, String>(updatedChecksums))), "UTF-8")
logger.warn("Updated generated file checksums for task ${sourceTask.path}.")
}

View File

@ -146,3 +146,7 @@ Finally, if you do feel like force-regenerating everything, remember to exclude
monster...
gradlew regenerate -x generateUAX29URLEmailTokenizerInternal --rerun-tasks
and on Windows, exclude snowball regeneration (requires bash):
gradlew regenerate -x generateUAX29URLEmailTokenizerInternal -x snowball --rerun-tasks

View File

@ -1,4 +1,5 @@
{
"lucene/analysis/common/src/java/org/apache/lucene/analysis/email/ASCIITLD.jflex": "521338e15fbd3fbdd2c1f8fd9c9fc365d4bcce9d",
"lucene/analysis/common/src/test/org/apache/lucene/analysis/email/TLDs.txt": "1c5a201efff431be1c62150aa6bd3dac0f3a21e2"
"lucene/analysis/common/src/java/org/apache/lucene/analysis/email/ASCIITLD.jflex": "aae1ea12f09aa2efcf7611df2dd11cda32869cda",
"lucene/analysis/common/src/test/org/apache/lucene/analysis/email/TLDs.txt": "54d9a32e6dbac42aee8b3aa0d1133ed2fb5f5259",
"property:tldZones": "https://data.iana.org/TLD/tlds-alpha-by-domain.txt"
}

View File

@ -1,6 +1,6 @@
{
"gradle/generation/jflex/skeleton.disable.buffer.expansion.txt": "68263ff0a014904c6e89b040d868d8f399408908",
"lucene/analysis/common/src/java/org/apache/lucene/analysis/email/ASCIITLD.jflex": "521338e15fbd3fbdd2c1f8fd9c9fc365d4bcce9d",
"lucene/analysis/common/src/java/org/apache/lucene/analysis/email/UAX29URLEmailTokenizerImpl.java": "e437900d9570ca007f9c02c9ea286222b644c329",
"lucene/analysis/common/src/java/org/apache/lucene/analysis/email/ASCIITLD.jflex": "aae1ea12f09aa2efcf7611df2dd11cda32869cda",
"lucene/analysis/common/src/java/org/apache/lucene/analysis/email/UAX29URLEmailTokenizerImpl.java": "d890462c065c2b66ce0e58d95fae64ecb64049d2",
"lucene/analysis/common/src/java/org/apache/lucene/analysis/email/UAX29URLEmailTokenizerImpl.jflex": "56a751d27e481fb55388f91ebf34f5a0cb8cb1b2"
}

View File

@ -1,3 +1,4 @@
{
"lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java": "7d2cf5f959c2dfc5b83295e359212a1228f761c4"
"lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java": "7d2cf5f959c2dfc5b83295e359212a1228f761c4",
"property:icuConfig": "icu_68"
}

View File

@ -15,7 +15,7 @@
* limitations under the License.
*/
// Generated from IANA TLD Database <https://data.iana.org/TLD/tlds-alpha-by-domain.txt>
// file version from 2021 Apr 12, Mon 07:07:01 Coordinated Universal Time
// file version from 2021 Apr 18, Sun 07:07:01 Coordinated Universal Time
// generated by org.apache.lucene.analysis.standard.GenerateJflexTLDMacros
// LUCENE-8278: None of the TLDs in {ASCIITLD} is a 1-character-shorter prefix of another TLD
@ -777,7 +777,6 @@ ASCIITLD = "." (
| [nN][aA][bB]
| [nN][aA][gG][oO][yY][aA]
| [nN][aA][mM][eE]
| [nN][aA][tT][iI][oO][nN][wW][iI][dD][eE]
| [nN][aA][tT][uU][rR][aA]
| [nN][aA][vV][yY]
| [nN][bB][aA]
@ -831,7 +830,6 @@ ASCIITLD = "." (
| [oO][nN][gG]
| [oO][nN][lL]
| [oO][nN][lL][iI][nN][eE]
| [oO][nN][yY][oO][uU][rR][sS][iI][dD][eE]
| [oO][oO][oO]
| [oO][pP][eE][nN]
| [oO][rR][aA][cC][lL][eE]

View File

@ -831,7 +831,6 @@ na
nab
nagoya
name
nationwide
natura
navy
nba
@ -891,7 +890,6 @@ one
ong
onl
online
onyourside
ooo
open
oracle

View File

@ -127,7 +127,6 @@ public class GenerateJflexTLDMacros {
getIANATLDDatabase();
partitionTLDprefixesBySuffixLength();
writeOutput();
System.out.println("Wrote TLD macros to '" + jflexMacroFile + "':");
int totalDomains = 0;
for (int suffixLength = 0; suffixLength < TLDsBySuffixLength.size(); ++suffixLength) {
int domainsAtThisSuffixLength = TLDsBySuffixLength.get(suffixLength).size();

View File

@ -2,5 +2,6 @@
"lucene/analysis/icu/src/data/uax29/Default.rbbi": "71bfaee5e81ac272aff828d1e44d0612be1b8363",
"lucene/analysis/icu/src/data/uax29/MyanmarSyllable.rbbi": "4c6817658b454add5ec1f9ac8c0015ce8eb3b5f2",
"lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk": "1b9013b7ef4ba32a851a330c58a8fa820b9dda79",
"lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk": "cc023ec17e0148518086098691785a32b88ee09a"
"lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk": "cc023ec17e0148518086098691785a32b88ee09a",
"property:icu4j": "68.2"
}

View File

@ -7,5 +7,6 @@
"lucene/analysis/icu/src/data/utr30/nfc.txt": "ec95d7f7e5910791717234dd09efc4b13dc32d35",
"lucene/analysis/icu/src/data/utr30/nfkc.txt": "b8e91bc64e354af505d51f6072a43c90c6b4d1b5",
"lucene/analysis/icu/src/data/utr30/nfkc_cf.txt": "22d90ea4a7771e6ddebaaeb9438e98ce625e16f5",
"lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm": "32a6a3198039883c93f9ebef31fe24c1029f2b07"
"lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm": "32a6a3198039883c93f9ebef31fe24c1029f2b07",
"property:icu4j": "68.2"
}

View File

@ -1,3 +1,4 @@
{
"lucene/core/src/data/jflex/UnicodeEmojiProperties.jflex": "7491dd535debc6e9e9ce367c4d3a7217e466dcae"
"lucene/core/src/data/jflex/UnicodeEmojiProperties.jflex": "7491dd535debc6e9e9ce367c4d3a7217e466dcae",
"property:icuConfig": "icu_62"
}

View File

@ -2,5 +2,6 @@
"lucene/core/src/java/org/apache/lucene/util/automaton/Lev1ParametricDescription.java": "8a07d087eba9db1bc228b9dbc4e3b9294dac8478",
"lucene/core/src/java/org/apache/lucene/util/automaton/Lev1TParametricDescription.java": "a328606a8933fe2f989bf3dbed84aa34fb4113ed",
"lucene/core/src/java/org/apache/lucene/util/automaton/Lev2ParametricDescription.java": "0d839846eb3cbe0ef62576ab33d63a97c28a8b45",
"lucene/core/src/java/org/apache/lucene/util/automaton/Lev2TParametricDescription.java": "7c29a828a20f084c4998179fd6a4ee9aa909c1ce"
"lucene/core/src/java/org/apache/lucene/util/automaton/Lev2TParametricDescription.java": "7c29a828a20f084c4998179fd6a4ee9aa909c1ce",
"property:source": "https://github.com/jpbarrette/moman/archive/497c90e34e412b6494db6dabf0d95db8034bd325.zip"
}

View File

@ -26,5 +26,6 @@
"lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked8.java": "bc5124047b26fc0be147db5bc855be038d306f65",
"lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked9.java": "1121f69ea6d830ab6f4bd2f51d017b792c17d1b1",
"lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPackedSingleBlock.java": "36984601502fcc812eb9d9a845fa10774e575653",
"lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlock.java": "18c97614f29045519a8d440a35c685c50a5e9a34"
"lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlock.java": "18c97614f29045519a8d440a35c685c50a5e9a34",
"property:source": "https://github.com/jpbarrette/moman/archive/497c90e34e412b6494db6dabf0d95db8034bd325.zip"
}