mirror of https://github.com/apache/lucene.git
251 lines
7.9 KiB
Groovy
251 lines
7.9 KiB
Groovy
import java.nio.file.Files
|
|
|
|
/*
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
* contributor license agreements. See the NOTICE file distributed with
|
|
* this work for additional information regarding copyright ownership.
|
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
* (the "License"); you may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
// Add a top-level pseudo-task to which we will attach individual regenerate tasks.
|
|
|
|
configure(rootProject) {
|
|
configurations {
|
|
jflex
|
|
}
|
|
|
|
dependencies {
|
|
jflex "de.jflex:jflex:${scriptDepVersions['jflex']}"
|
|
}
|
|
}
|
|
|
|
def resources = scriptResources(buildscript)
|
|
def skeletonDefault = file("${resources}/skeleton.default.txt")
|
|
def skeletonNoBufferExpansion = file("${resources}/skeleton.disable.buffer.expansion.txt")
|
|
|
|
configure(project(":lucene:core")) {
|
|
task generateStandardTokenizerInternal(type: JFlexTask) {
|
|
description "Regenerate StandardTokenizerImpl.java"
|
|
group "generation"
|
|
|
|
jflexFile = file('src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex')
|
|
skeleton = skeletonNoBufferExpansion
|
|
|
|
doLast {
|
|
ant.replace(
|
|
file: file('src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java'),
|
|
encoding: "UTF-8",
|
|
token: "private static final int ZZ_BUFFERSIZE =",
|
|
value: "private int ZZ_BUFFERSIZE ="
|
|
)
|
|
}
|
|
}
|
|
|
|
regenerate.dependsOn wrapWithPersistentChecksums(generateStandardTokenizerInternal, [ andThenTasks: "spotlessApply" ])
|
|
}
|
|
|
|
configure(project(":lucene:analysis:common")) {
|
|
task generateTldsInternal() {
|
|
def tldZones = "https://data.iana.org/TLD/tlds-alpha-by-domain.txt"
|
|
def jflexMacro = file("src/java/org/apache/lucene/analysis/email/ASCIITLD.jflex")
|
|
def tldList = file("src/test/org/apache/lucene/analysis/email/TLDs.txt")
|
|
|
|
description "Regenerate top-level domain jflex macros and tests"
|
|
group "generation"
|
|
|
|
dependsOn { sourceSets.tools.runtimeClasspath }
|
|
|
|
inputs.property "tldZones", tldZones
|
|
outputs.files jflexMacro, tldList
|
|
|
|
doFirst {
|
|
File tmpJflexMacro = File.createTempFile(jflexMacro.getName(), ".tmp", getTemporaryDir())
|
|
File tmpTldList = File.createTempFile(tldList.getName(), ".tmp", getTemporaryDir())
|
|
|
|
project.javaexec {
|
|
main = "org.apache.lucene.analysis.standard.GenerateJflexTLDMacros"
|
|
classpath = sourceSets.tools.runtimeClasspath
|
|
|
|
ignoreExitValue false
|
|
args = [
|
|
tldZones,
|
|
tmpJflexMacro,
|
|
tmpTldList
|
|
]
|
|
}
|
|
|
|
// LUCENE-9926: tldZones is regenerated daily. Compare the generated content (excluding comments) so that
|
|
// we only update actual output files if non-comments have changed.
|
|
def contentLines = { File file ->
|
|
if (file.exists()) {
|
|
List<String> lines = file.readLines("UTF-8")
|
|
lines.removeIf { line -> line.isBlank() || line.startsWith("//") }
|
|
return lines
|
|
} else {
|
|
return []
|
|
}
|
|
}
|
|
|
|
if (contentLines(tmpTldList).equals(contentLines(tldList))) {
|
|
logger.lifecycle("Generated TLD content identical as before, not updating.")
|
|
} else {
|
|
tldList.setBytes tmpTldList.bytes
|
|
jflexMacro.setBytes tmpJflexMacro.bytes
|
|
logger.lifecycle("You've regenerated the TLD include file, remember to regenerate UAX29URLEmailTokenizerImpl too.")
|
|
}
|
|
}
|
|
}
|
|
|
|
task generateWikipediaTokenizerInternal(type: JFlexTask) {
|
|
description "Regenerate WikipediaTokenizerImpl.java"
|
|
group "generation"
|
|
|
|
jflexFile = file('src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex')
|
|
skeleton = skeletonDefault
|
|
}
|
|
|
|
task generateClassicTokenizerInternal(type: JFlexTask) {
|
|
description "Regenerate ClassicTokenizerImpl.java"
|
|
group "generation"
|
|
|
|
jflexFile = file('src/java/org/apache/lucene/analysis/classic/ClassicTokenizerImpl.jflex')
|
|
skeleton = skeletonDefault
|
|
}
|
|
|
|
task generateUAX29URLEmailTokenizerInternal(type: JFlexTask) {
|
|
description "Regenerate UAX29URLEmailTokenizerImpl.java"
|
|
group "generation"
|
|
|
|
jflexFile = file('src/java/org/apache/lucene/analysis/email/UAX29URLEmailTokenizerImpl.jflex')
|
|
skeleton = skeletonNoBufferExpansion
|
|
heapSize = "12g"
|
|
|
|
// Don't enforce strict dependency (although it would make sense to regenerate both).
|
|
// Just ensure we run after TLD regeneration, if it runs too.
|
|
mustRunAfter generateTldsInternal
|
|
|
|
// Additional input (included by the source jflex file)
|
|
inputs.file file('src/java/org/apache/lucene/analysis/email/ASCIITLD.jflex')
|
|
|
|
doFirst {
|
|
logger.lifecycle("Regenerating UAX29URLEmailTokenizerImpl. This may take a long time (and requires ${heapSize} of memory!).")
|
|
}
|
|
|
|
doLast {
|
|
ant.replace(
|
|
file: file('src/java/org/apache/lucene/analysis/email/UAX29URLEmailTokenizerImpl.java'),
|
|
encoding: "UTF-8",
|
|
token: "private static final int ZZ_BUFFERSIZE =",
|
|
value: "private int ZZ_BUFFERSIZE ="
|
|
)
|
|
}
|
|
}
|
|
|
|
task generateHTMLStripCharFilterInternal(type: JFlexTask) {
|
|
description "Regenerate HTMLStripCharFilter.java"
|
|
group "generation"
|
|
|
|
jflexFile = file('src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex')
|
|
skeleton = skeletonDefault
|
|
|
|
doFirst {
|
|
// Regenerate HTMLCharacterEntities.jflex first.
|
|
def target = file('src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex')
|
|
quietExec {
|
|
executable = project.externalTool("python3")
|
|
workingDir = target.parentFile
|
|
args += [
|
|
"-B", // don't write any bytecode cache
|
|
"htmlentity.py"
|
|
]
|
|
}
|
|
|
|
project.ant.fixcrlf(
|
|
file: target,
|
|
encoding: "UTF-8",
|
|
eol: "lf"
|
|
)
|
|
}
|
|
}
|
|
|
|
regenerate.dependsOn wrapWithPersistentChecksums(generateWikipediaTokenizerInternal, [ andThenTasks: "spotlessApply" ]),
|
|
wrapWithPersistentChecksums(generateClassicTokenizerInternal, [ andThenTasks: "spotlessApply" ]),
|
|
wrapWithPersistentChecksums(generateUAX29URLEmailTokenizerInternal, [ andThenTasks: "spotlessApply" ]),
|
|
wrapWithPersistentChecksums(generateHTMLStripCharFilterInternal, [ andThenTasks: "spotlessApply" ]),
|
|
wrapWithPersistentChecksums(generateTldsInternal, [ andThenTasks: "spotlessApply" ])
|
|
}
|
|
|
|
class JFlexTask extends DefaultTask {
|
|
@InputFile
|
|
File jflexFile
|
|
|
|
@InputFile
|
|
File skeleton
|
|
|
|
@Optional
|
|
String heapSize
|
|
|
|
@OutputFile
|
|
File getGeneratedFile() {
|
|
return project.file(jflexFile.absolutePath.replace(".jflex", ".java"))
|
|
}
|
|
|
|
JFlexTask() {
|
|
dependsOn(project.rootProject.configurations.jflex)
|
|
}
|
|
|
|
@TaskAction
|
|
def generate() {
|
|
if (!jflexFile || !jflexFile.exists()) {
|
|
throw new GradleException("JFlex file does not exist: ${jflexFile}")
|
|
}
|
|
|
|
def target = project.file(jflexFile.absolutePath.replace(".jflex", ".java"))
|
|
|
|
logger.lifecycle("Recompiling JFlex: ${project.rootDir.relativePath(jflexFile)}")
|
|
|
|
project.javaexec {
|
|
classpath {
|
|
project.rootProject.configurations.jflex
|
|
}
|
|
|
|
main = "jflex.Main"
|
|
args += [
|
|
"-nobak",
|
|
"--quiet",
|
|
"--encoding", "UTF-8",
|
|
]
|
|
|
|
if (heapSize) {
|
|
maxHeapSize = heapSize
|
|
}
|
|
|
|
if (skeleton) {
|
|
args += ["--skel", skeleton.absolutePath]
|
|
}
|
|
|
|
args += [
|
|
"-d", target.parentFile.absolutePath,
|
|
jflexFile
|
|
]
|
|
}
|
|
|
|
// Correct line endings for Windows.
|
|
project.ant.fixcrlf(
|
|
file: target,
|
|
encoding: "UTF-8",
|
|
eol: "lf"
|
|
)
|
|
}
|
|
}
|