From 3a8ed5e8ed12d4d3a166eff0e5a978021c764ab7 Mon Sep 17 00:00:00 2001 From: Dawid Weiss Date: Thu, 30 Jan 2020 13:45:15 +0100 Subject: [PATCH] LUCENE-9134: add python-based regeneration of HTMLCharacterEntities.jflex inside jflexHTMLStripCharFilter. --- gradle/generation/jflex.gradle | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/gradle/generation/jflex.gradle b/gradle/generation/jflex.gradle index e38a28c9633..9bdbc2c9e33 100644 --- a/gradle/generation/jflex.gradle +++ b/gradle/generation/jflex.gradle @@ -114,6 +114,7 @@ configure(project(":lucene:core")) { } configure(project(":lucene:analysis:common")) { + task jflexUAX29URLEmailTokenizerImpl(type: JFlexTask) { description "Regenerate UAX29URLEmailTokenizerImpl.java" group "generation" @@ -135,4 +136,34 @@ configure(project(":lucene:analysis:common")) { ) } } + + task jflexHTMLStripCharFilter(type: JFlexTask) { + description "Regenerate HTMLStripCharFilter.java" + group "generation" + + jflexFile = file('src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex') + skeleton = project(":lucene:core").file("src/data/jflex/skeleton.default") + + doFirst { + // Regenerate HTMLCharacterEntities.jflex first. + def target = file('src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex') + target.withOutputStream { output -> + project.exec { + executable = "python" + workingDir = target.parentFile + standardOutput = output + args += [ + "-B", // don't write any bytecode cache + "htmlentity.py" + ] + } + } + + project.ant.fixcrlf( + file: target, + encoding: "UTF-8", + eol: "lf" + ) + } + } }