2020-01-27 12:05:34 -05:00
|
|
|
/*
|
|
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
* contributor license agreements. See the NOTICE file distributed with
|
|
|
|
* this work for additional information regarding copyright ownership.
|
|
|
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
* (the "License"); you may not use this file except in compliance with
|
|
|
|
* the License. You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2020-01-26 13:44:18 -05:00
|
|
|
// Add a top-level pseudo-task to which we will attach individual regenerate tasks.
|
|
|
|
|
|
|
|
configure(rootProject) {
|
|
|
|
configurations {
|
|
|
|
jflex
|
|
|
|
}
|
|
|
|
|
|
|
|
dependencies {
|
|
|
|
jflex "de.jflex:jflex:${scriptDepVersions['jflex']}"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-02 03:56:47 -04:00
|
|
|
def resources = scriptResources(buildscript)
|
|
|
|
def skeletonDefault = file("${resources}/skeleton.default.txt")
|
|
|
|
def skeletonNoBufferExpansion = file("${resources}/skeleton.disable.buffer.expansion.txt")
|
2020-01-26 13:44:18 -05:00
|
|
|
|
|
|
|
configure(project(":lucene:core")) {
|
2021-04-02 03:56:47 -04:00
|
|
|
task generateStandardTokenizer(type: JFlexTask) {
|
2020-01-26 13:44:18 -05:00
|
|
|
description "Regenerate StandardTokenizerImpl.java"
|
|
|
|
group "generation"
|
|
|
|
|
|
|
|
jflexFile = file('src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex')
|
2021-04-02 03:56:47 -04:00
|
|
|
skeleton = skeletonNoBufferExpansion
|
2020-01-26 13:44:18 -05:00
|
|
|
|
|
|
|
doLast {
|
|
|
|
ant.replace(
|
|
|
|
file: file('src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java'),
|
|
|
|
encoding: "UTF-8",
|
|
|
|
token: "private static final int ZZ_BUFFERSIZE =",
|
|
|
|
value: "private int ZZ_BUFFERSIZE ="
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
2021-03-23 04:25:53 -04:00
|
|
|
|
2021-04-02 05:46:43 -04:00
|
|
|
regenerate.dependsOn wrapWithPersistentChecksums(generateStandardTokenizer, [ andThenTasks: "spotlessApply" ])
|
2020-01-26 13:44:18 -05:00
|
|
|
}
|
2020-01-27 06:36:13 -05:00
|
|
|
|
|
|
|
configure(project(":lucene:analysis:common")) {
|
2021-04-07 04:56:21 -04:00
|
|
|
task generateTlds() {
|
|
|
|
def tldZones = "https://www.internic.net/zones/root.zone"
|
|
|
|
def jflexMacro = file("src/java/org/apache/lucene/analysis/email/ASCIITLD.jflex")
|
|
|
|
def tldList = file("src/test/org/apache/lucene/analysis/email/TLDs.txt")
|
|
|
|
|
|
|
|
description "Regenerate top-level domain jflex macros and tests"
|
|
|
|
group "generation"
|
|
|
|
|
|
|
|
dependsOn { sourceSets.tools.runtimeClasspath }
|
|
|
|
|
|
|
|
outputs.files jflexMacro, tldList
|
|
|
|
|
|
|
|
doFirst {
|
|
|
|
project.javaexec {
|
|
|
|
main = "org.apache.lucene.analysis.standard.GenerateJflexTLDMacros"
|
|
|
|
classpath = sourceSets.tools.runtimeClasspath
|
|
|
|
|
|
|
|
ignoreExitValue false
|
|
|
|
args = [
|
|
|
|
tldZones,
|
|
|
|
jflexMacro,
|
|
|
|
tldList
|
|
|
|
]
|
|
|
|
}
|
|
|
|
|
|
|
|
logger.lifecycle("You've regenerated the TLD include file, remember to regenerate UAX29URLEmailTokenizerImpl too.")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-02 03:56:47 -04:00
|
|
|
task generateWikipediaTokenizer(type: JFlexTask) {
|
2020-01-27 06:36:13 -05:00
|
|
|
description "Regenerate WikipediaTokenizerImpl.java"
|
|
|
|
group "generation"
|
2020-12-28 06:26:13 -05:00
|
|
|
|
2020-09-28 03:49:28 -04:00
|
|
|
jflexFile = file('src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex')
|
2021-04-02 03:56:47 -04:00
|
|
|
skeleton = skeletonDefault
|
2020-12-28 06:26:13 -05:00
|
|
|
}
|
|
|
|
|
2021-04-02 03:56:47 -04:00
|
|
|
task generateClassicTokenizer(type: JFlexTask) {
|
2020-01-27 06:36:13 -05:00
|
|
|
description "Regenerate ClassicTokenizerImpl.java"
|
|
|
|
group "generation"
|
2020-12-28 06:26:13 -05:00
|
|
|
|
2020-09-28 03:49:28 -04:00
|
|
|
jflexFile = file('src/java/org/apache/lucene/analysis/classic/ClassicTokenizerImpl.jflex')
|
2021-04-02 03:56:47 -04:00
|
|
|
skeleton = skeletonDefault
|
2020-12-28 06:26:13 -05:00
|
|
|
}
|
|
|
|
|
2021-04-02 03:56:47 -04:00
|
|
|
task generateUAX29URLEmailTokenizer(type: JFlexTask) {
|
2020-01-27 06:36:13 -05:00
|
|
|
description "Regenerate UAX29URLEmailTokenizerImpl.java"
|
|
|
|
group "generation"
|
|
|
|
|
2020-09-28 03:49:28 -04:00
|
|
|
jflexFile = file('src/java/org/apache/lucene/analysis/email/UAX29URLEmailTokenizerImpl.jflex')
|
2021-04-02 03:56:47 -04:00
|
|
|
skeleton = skeletonNoBufferExpansion
|
2020-01-27 06:36:13 -05:00
|
|
|
heapSize = "12g"
|
|
|
|
|
2021-04-07 04:56:21 -04:00
|
|
|
// Don't enforce strict dependency (although it would make sense to regenerate both).
|
|
|
|
// Just ensure we run after TLD regeneration, if it runs too.
|
|
|
|
mustRunAfter generateTlds
|
|
|
|
|
|
|
|
// Additional input (included by the source jflex file)
|
|
|
|
inputs.file file('src/java/org/apache/lucene/analysis/email/ASCIITLD.jflex')
|
|
|
|
|
2020-01-27 06:36:13 -05:00
|
|
|
doFirst {
|
2021-03-24 10:38:34 -04:00
|
|
|
logger.lifecycle("Regenerating UAX29URLEmailTokenizerImpl. This may take a long time (and requires ${heapSize} of memory!).")
|
2020-01-27 06:36:13 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
doLast {
|
|
|
|
ant.replace(
|
2020-09-28 03:49:28 -04:00
|
|
|
file: file('src/java/org/apache/lucene/analysis/email/UAX29URLEmailTokenizerImpl.java'),
|
2020-01-27 06:36:13 -05:00
|
|
|
encoding: "UTF-8",
|
|
|
|
token: "private static final int ZZ_BUFFERSIZE =",
|
|
|
|
value: "private int ZZ_BUFFERSIZE ="
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
2020-01-30 07:45:15 -05:00
|
|
|
|
2021-04-02 03:56:47 -04:00
|
|
|
task generateHTMLStripCharFilter(type: JFlexTask) {
|
2020-01-30 07:45:15 -05:00
|
|
|
description "Regenerate HTMLStripCharFilter.java"
|
|
|
|
group "generation"
|
|
|
|
|
|
|
|
jflexFile = file('src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex')
|
2021-04-02 03:56:47 -04:00
|
|
|
skeleton = skeletonDefault
|
2020-01-30 07:45:15 -05:00
|
|
|
|
|
|
|
doFirst {
|
|
|
|
// Regenerate HTMLCharacterEntities.jflex first.
|
|
|
|
def target = file('src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex')
|
2021-03-30 08:38:13 -04:00
|
|
|
quietExec {
|
|
|
|
executable = project.externalTool("python3")
|
|
|
|
workingDir = target.parentFile
|
|
|
|
args += [
|
|
|
|
"-B", // don't write any bytecode cache
|
|
|
|
"htmlentity.py"
|
|
|
|
]
|
2020-01-30 07:45:15 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
project.ant.fixcrlf(
|
|
|
|
file: target,
|
|
|
|
encoding: "UTF-8",
|
|
|
|
eol: "lf"
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
2020-12-28 06:26:13 -05:00
|
|
|
|
2021-04-02 05:46:43 -04:00
|
|
|
regenerate.dependsOn wrapWithPersistentChecksums(generateWikipediaTokenizer, [ andThenTasks: "spotlessApply" ]),
|
|
|
|
wrapWithPersistentChecksums(generateClassicTokenizer, [ andThenTasks: "spotlessApply" ]),
|
|
|
|
wrapWithPersistentChecksums(generateUAX29URLEmailTokenizer, [ andThenTasks: "spotlessApply" ]),
|
2021-04-07 04:56:21 -04:00
|
|
|
wrapWithPersistentChecksums(generateHTMLStripCharFilter, [ andThenTasks: "spotlessApply" ]),
|
|
|
|
wrapWithPersistentChecksums(generateTlds, [ andThenTasks: "spotlessApply" ])
|
2021-04-02 03:56:47 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
class JFlexTask extends DefaultTask {
|
|
|
|
@InputFile
|
|
|
|
File jflexFile
|
|
|
|
|
|
|
|
@InputFile
|
|
|
|
File skeleton
|
|
|
|
|
|
|
|
@Optional
|
|
|
|
String heapSize
|
|
|
|
|
|
|
|
@OutputFile
|
|
|
|
File getGeneratedFile() {
|
|
|
|
return project.file(jflexFile.absolutePath.replace(".jflex", ".java"))
|
|
|
|
}
|
|
|
|
|
|
|
|
JFlexTask() {
|
|
|
|
dependsOn(project.rootProject.configurations.jflex)
|
|
|
|
}
|
|
|
|
|
|
|
|
@TaskAction
|
|
|
|
def generate() {
|
|
|
|
if (!jflexFile || !jflexFile.exists()) {
|
|
|
|
throw new GradleException("JFlex file does not exist: ${jflexFile}")
|
|
|
|
}
|
|
|
|
|
|
|
|
def target = project.file(jflexFile.absolutePath.replace(".jflex", ".java"))
|
|
|
|
|
|
|
|
logger.lifecycle("Recompiling JFlex: ${project.rootDir.relativePath(jflexFile)}")
|
|
|
|
|
|
|
|
project.javaexec {
|
|
|
|
classpath {
|
|
|
|
project.rootProject.configurations.jflex
|
|
|
|
}
|
|
|
|
|
|
|
|
main = "jflex.Main"
|
|
|
|
args += [
|
|
|
|
"-nobak",
|
|
|
|
"--quiet",
|
|
|
|
"--encoding", "UTF-8",
|
|
|
|
]
|
|
|
|
|
|
|
|
if (heapSize) {
|
|
|
|
maxHeapSize = heapSize
|
|
|
|
}
|
|
|
|
|
|
|
|
if (skeleton) {
|
|
|
|
args += ["--skel", skeleton.absolutePath]
|
|
|
|
}
|
|
|
|
|
|
|
|
args += [
|
|
|
|
"-d", target.parentFile.absolutePath,
|
|
|
|
jflexFile
|
|
|
|
]
|
|
|
|
}
|
|
|
|
|
|
|
|
// Correct line endings for Windows.
|
|
|
|
project.ant.fixcrlf(
|
|
|
|
file: target,
|
|
|
|
encoding: "UTF-8",
|
|
|
|
eol: "lf"
|
|
|
|
)
|
|
|
|
}
|
2020-01-27 06:36:13 -05:00
|
|
|
}
|