LUCENE-9134: add python-based regeneration of HTMLCharacterEntities.jflex inside jflexHTMLStripCharFilter.

This commit is contained in:
Dawid Weiss 2020-01-30 13:45:15 +01:00
parent 043dd207b6
commit 3a8ed5e8ed
1 changed files with 31 additions and 0 deletions

View File

@ -114,6 +114,7 @@ configure(project(":lucene:core")) {
}
configure(project(":lucene:analysis:common")) {
task jflexUAX29URLEmailTokenizerImpl(type: JFlexTask) {
description "Regenerate UAX29URLEmailTokenizerImpl.java"
group "generation"
@ -135,4 +136,34 @@ configure(project(":lucene:analysis:common")) {
)
}
}
task jflexHTMLStripCharFilter(type: JFlexTask) {
description "Regenerate HTMLStripCharFilter.java"
group "generation"
jflexFile = file('src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex')
skeleton = project(":lucene:core").file("src/data/jflex/skeleton.default")
doFirst {
// Regenerate HTMLCharacterEntities.jflex first.
def target = file('src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex')
target.withOutputStream { output ->
project.exec {
executable = "python"
workingDir = target.parentFile
standardOutput = output
args += [
"-B", // don't write any bytecode cache
"htmlentity.py"
]
}
}
project.ant.fixcrlf(
file: target,
encoding: "UTF-8",
eol: "lf"
)
}
}
}