lucene/gradle/generation/jflex.gradle
Dawid Weiss 6bde0f3ec8 LUCENE-9134: UAX29URLEmailTokenizerImpl regeneration. This requires TONS
of memory and time... insane compared to the size of the input. None of my
machines pass it without at least 12 gigs of heap (!).
2020-01-27 12:36:13 +01:00

122 lines
3.1 KiB
Groovy

// Add a top-level pseudo-task to which we will attach individual regenerate tasks.
configure(rootProject) {
configurations {
jflex
}
dependencies {
jflex "de.jflex:jflex:${scriptDepVersions['jflex']}"
}
task jflex() {
description "Regenerate sources for corresponding jflex grammar files."
group "generation"
dependsOn ":lucene:core:jflexStandardTokenizerImpl"
dependsOn ":lucene:analysis:common:jflexUAX29URLEmailTokenizerImpl"
}
}
// We always regenerate, no need to declare outputs.
class JFlexTask extends DefaultTask {
@Input
File jflexFile
@Input
File skeleton
@Optional
String heapSize
JFlexTask() {
dependsOn(project.rootProject.configurations.jflex)
}
@TaskAction
def generate() {
if (!jflexFile || !jflexFile.exists()) {
throw new RuntimeException("JFlex file does not exist: ${jflexFile}")
}
def targetDir = jflexFile.parentFile
def target = jflexFile.absolutePath.replace(".jflex", ".java")
logger.lifecycle("Regenerating JFlex:\n from: ${jflexFile}\n to: ${target}")
project.javaexec {
classpath {
project.rootProject.configurations.jflex
}
main = "jflex.Main"
args += [
"-nobak",
"--quiet",
"--encoding", "UTF-8",
]
if (heapSize) {
maxHeapSize = heapSize
}
if (skeleton) {
args += ["--skel", skeleton.absolutePath]
}
args += [
"-d", targetDir.absolutePath,
jflexFile
]
}
// Correct line endings for Windows.
project.ant.fixcrlf(
file: target,
encoding: "UTF-8",
eol: "lf"
)
}
}
configure(project(":lucene:core")) {
task jflexStandardTokenizerImpl(type: JFlexTask) {
description "Regenerate StandardTokenizerImpl.java"
group "generation"
jflexFile = file('src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex')
skeleton = file("src/data/jflex/skeleton.disable.buffer.expansion.txt")
doLast {
ant.replace(
file: file('src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java'),
encoding: "UTF-8",
token: "private static final int ZZ_BUFFERSIZE =",
value: "private int ZZ_BUFFERSIZE ="
)
}
}
}
configure(project(":lucene:analysis:common")) {
task jflexUAX29URLEmailTokenizerImpl(type: JFlexTask) {
description "Regenerate UAX29URLEmailTokenizerImpl.java"
group "generation"
jflexFile = file('src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex')
skeleton = project(":lucene:core").file("src/data/jflex/skeleton.disable.buffer.expansion.txt")
heapSize = "12g"
doFirst {
logger.lifecycle("Regenerating UAX29URLEmailTokenizerImpl. This may take a long time (and requires tons of memory).")
}
doLast {
ant.replace(
file: file('src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java'),
encoding: "UTF-8",
token: "private static final int ZZ_BUFFERSIZE =",
value: "private int ZZ_BUFFERSIZE ="
)
}
}
}