Implement source code regeneration for test-framework perl scripts (#11952)

This commit is contained in:
Dawid Weiss 2022-11-19 23:40:45 +01:00 committed by GitHub
parent e78210b7f0
commit 3f6410b738
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 122 additions and 48 deletions

View File

@ -156,6 +156,7 @@ apply from: file('gradle/generation/icu.gradle')
apply from: file('gradle/generation/javacc.gradle')
apply from: file('gradle/generation/forUtil.gradle')
apply from: file('gradle/generation/antlr.gradle')
apply from: file('gradle/generation/unicode-test-classes.gradle')
apply from: file('gradle/datasets/external-datasets.gradle')

View File

@ -92,6 +92,7 @@ configure([
project(":lucene:core"),
project(":lucene:queryparser"),
project(":lucene:expressions"),
project(":lucene:test-framework"),
]) {
task regenerate() {
description "Rerun any code or static data generation tasks."

View File

@ -0,0 +1,76 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Regenerates test classes from unicode data.
configure(project(":lucene:test-framework")) {
task generateEmojiTokenizationTestInternal() {
def unicodeVersion = "12.1"
def genDir = file("src/java/org/apache/lucene/tests/analysis/standard")
def genScript = file("${genDir}/generateEmojiTokenizationTest.pl")
def genOutput = file("${genDir}/EmojiTokenizationTestUnicode_${unicodeVersion.replace('.', '_')}.java")
description "Regenerate ${genOutput}"
group "generation"
inputs.file genScript
inputs.property "unicodeVersion", unicodeVersion
outputs.file genOutput
doLast {
quietExec {
workingDir genDir
executable project.externalTool("perl")
args = [
genScript,
"-v", unicodeVersion
]
}
}
}
regenerate.dependsOn wrapWithPersistentChecksums(generateEmojiTokenizationTestInternal, [ andThenTasks: ["spotlessJava", "spotlessJavaApply"] ])
task generateJavaUnicodeWordBreakTestInternal() {
def unicodeVersion = "12.1"
def genDir = file("src/java/org/apache/lucene/tests/analysis/standard")
def genScript = file("${genDir}/generateJavaUnicodeWordBreakTest.pl")
def genOutput = file("${genDir}/WordBreakTestUnicode_${unicodeVersion.replace('.', '_')}.java")
description "Regenerate ${genOutput}"
group "generation"
inputs.file genScript
inputs.property "unicodeVersion", unicodeVersion
outputs.file genOutput
doLast {
quietExec {
workingDir genDir
executable project.externalTool("perl")
args = [
genScript,
"-v", unicodeVersion
]
}
}
}
regenerate.dependsOn wrapWithPersistentChecksums(generateJavaUnicodeWordBreakTestInternal, [ andThenTasks: ["spotlessJava", "spotlessJavaApply"] ])
}

View File

@ -113,6 +113,8 @@ allprojects {
"code: ${result.exitValue}, " +
"output at: ${outputFile} (and logged above).")
}
} else {
logger.info(new String(outputFile.bytes))
}
return result

View File

@ -678,8 +678,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
}
public void testUnicodeWordBreaks() throws Exception {
WordBreakTestUnicode_12_1_0 wordBreakTest = new WordBreakTestUnicode_12_1_0();
wordBreakTest.test(a);
WordBreakTestUnicode_12_1_0.test(a);
}
public void testSupplementary() throws Exception {
@ -852,8 +851,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
}
public void testUnicodeEmojiTests() throws Exception {
EmojiTokenizationTestUnicode_12_1 emojiTest = new EmojiTokenizationTestUnicode_12_1();
emojiTest.test(a);
EmojiTokenizationTestUnicode_12_1.test(a);
}
/** blast some random strings through the analyzer */

View File

@ -463,8 +463,7 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
}
public void testUnicodeWordBreaks() throws Exception {
WordBreakTestUnicode_12_1_0 wordBreakTest = new WordBreakTestUnicode_12_1_0();
wordBreakTest.test(a);
WordBreakTestUnicode_12_1_0.test(a);
}
public void testSupplementary() throws Exception {
@ -637,8 +636,7 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
}
public void testUnicodeEmojiTests() throws Exception {
EmojiTokenizationTestUnicode_12_1 emojiTest = new EmojiTokenizationTestUnicode_12_1();
emojiTest.test(a);
EmojiTokenizationTestUnicode_12_1.test(a);
}
/** blast some random strings through the analyzer */

View File

@ -0,0 +1,5 @@
{
"lucene/test-framework/src/java/org/apache/lucene/tests/analysis/standard/EmojiTokenizationTestUnicode_12_1.java": "22e03ada47168b0986220c57260cfaf8e6e12e16",
"lucene/test-framework/src/java/org/apache/lucene/tests/analysis/standard/generateEmojiTokenizationTest.pl": "a21d8aea5d2c30fb47b2bf9b24e20ddf605de46d",
"property:unicodeVersion": "12.1"
}

View File

@ -0,0 +1,5 @@
{
"lucene/test-framework/src/java/org/apache/lucene/tests/analysis/standard/WordBreakTestUnicode_12_1.java": "--",
"lucene/test-framework/src/java/org/apache/lucene/tests/analysis/standard/generateJavaUnicodeWordBreakTest.pl": "1dd7148f4514976503a2be2e00be75c20ce784fb",
"property:unicodeVersion": "12.1"
}

View File

@ -19,30 +19,27 @@ package org.apache.lucene.tests.analysis.standard;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
import org.junit.Ignore;
/**
* This class was automatically generated by generateEmojiTokenizationTest.pl from:
* http://www.unicode.org/Public/emoji/12.1/emoji-test.txt
*
* <p>emoji-test.txt contains emoji char sequences, which are represented as tokenization tests in
* this class.
* This class was automatically generated by generateEmojiTokenizationTest.pl. from: <a
* href="http://www.unicode.org/Public/emoji/12.1/emoji-test.txt"><code>
* http://www.unicode.org/Public/emoji/12.1/emoji-test.txt</code></a>
*/
@Ignore
public class EmojiTokenizationTestUnicode_12_1 extends BaseTokenStreamTestCase {
public final class EmojiTokenizationTestUnicode_12_1 {
public void test(Analyzer analyzer) throws Exception {
for (int i = 0; i < tests.length; i += 2) {
String test = tests[i + 1];
public static void test(Analyzer analyzer) throws Exception {
for (int i = 0; i < TESTS.length; i += 2) {
String test = TESTS[i + 1];
try {
assertAnalyzesTo(analyzer, test, new String[] {test}, new String[] {"<EMOJI>"});
BaseTokenStreamTestCase.assertAnalyzesTo(
analyzer, test, new String[] {test}, new String[] {"<EMOJI>"});
} catch (Throwable t) {
throw new Exception("Failed to tokenize \"" + tests[i] + "\":", t);
throw new Exception("Failed to tokenize \"" + TESTS[i] + "\":", t);
}
}
}
private String[] tests =
private static String[] TESTS =
new String[] {
"1F600 ; fully-qualified # 😀 E2.0 grinning face",
"\uD83D\uDE00",

View File

@ -17,9 +17,9 @@
package org.apache.lucene.tests.analysis.standard;
import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertAnalyzesTo;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
import org.junit.Ignore;
/**
* This class was automatically generated by generateJavaUnicodeWordBreakTest.pl from:
@ -38,10 +38,9 @@ import org.junit.Ignore;
* Hebrew_Letter} \p{WordBreak = Katakana} \p{WordBreak = Numeric} \p{Extended_Pictographic} (From
* http://www.unicode.org/Public/emoji/12.1/emoji-data.txt)
*/
@Ignore
public class WordBreakTestUnicode_12_1_0 extends BaseTokenStreamTestCase {
public final class WordBreakTestUnicode_12_1_0 {
public void test(Analyzer analyzer) throws Exception {
public static void test(Analyzer analyzer) throws Exception {
// ÷ 0001 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] <START OF HEADING> (Other) ÷
// [0.3]
assertAnalyzesTo(analyzer, "\u0001\u0001", new String[] {});

View File

@ -56,32 +56,26 @@ my $header =<<"__HEADER__";
package org.apache.lucene.tests.analysis.standard;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.junit.Ignore;
import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
/**
* This class was automatically generated by ${script_name}
* from: ${url}
*
* emoji-test.txt contains emoji char sequences, which are represented as
* tokenization tests in this class.
*
* This class was automatically generated by ${script_name}.
* from: <a href="${url}"><code>${url}</code></a>
*/
\@Ignore
public class ${class_name} extends BaseTokenStreamTestCase {
public final class ${class_name} {
public void test(Analyzer analyzer) throws Exception {
for (int i = 0 ; i < tests.length ; i += 2) {
String test = tests[i + 1];
public static void test(Analyzer analyzer) throws Exception {
for (int i = 0 ; i < TESTS.length ; i += 2) {
String test = TESTS[i + 1];
try {
assertAnalyzesTo(analyzer, test, new String[] { test }, new String[] { "<EMOJI>" });
BaseTokenStreamTestCase.assertAnalyzesTo(analyzer, test, new String[] { test }, new String[] { "<EMOJI>" });
} catch (Throwable t) {
throw new Exception("Failed to tokenize \\"" + tests[i] + "\\":", t);
throw new Exception("Failed to tokenize \\"" + TESTS[i] + "\\":", t);
}
}
}
private String[] tests = new String[] {
private static String[] TESTS = new String[] {
__HEADER__
my @tests = split /\r?\n/, get_URL_content($url);

View File

@ -62,8 +62,7 @@ my $header =<<"__HEADER__";
package org.apache.lucene.tests.analysis.standard;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.junit.Ignore;
import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertAnalyzesTo;
/**
* This class was automatically generated by ${script_name}
@ -85,10 +84,9 @@ import org.junit.Ignore;
* \\p{WordBreak = Numeric}
* \\p{Extended_Pictographic} (From $emoji_url)
*/
\@Ignore
public class ${class_name} extends BaseTokenStreamTestCase {
public final class ${class_name} {
public void test(Analyzer analyzer) throws Exception {
public static void test(Analyzer analyzer) throws Exception {
__HEADER__
my $codepoints = [];