mirror of https://github.com/apache/lucene.git
Implement source code regeneration for test-framework perl scripts (#11952)
This commit is contained in:
parent
e78210b7f0
commit
3f6410b738
|
@ -156,6 +156,7 @@ apply from: file('gradle/generation/icu.gradle')
|
|||
apply from: file('gradle/generation/javacc.gradle')
|
||||
apply from: file('gradle/generation/forUtil.gradle')
|
||||
apply from: file('gradle/generation/antlr.gradle')
|
||||
apply from: file('gradle/generation/unicode-test-classes.gradle')
|
||||
|
||||
apply from: file('gradle/datasets/external-datasets.gradle')
|
||||
|
||||
|
|
|
@ -92,6 +92,7 @@ configure([
|
|||
project(":lucene:core"),
|
||||
project(":lucene:queryparser"),
|
||||
project(":lucene:expressions"),
|
||||
project(":lucene:test-framework"),
|
||||
]) {
|
||||
task regenerate() {
|
||||
description "Rerun any code or static data generation tasks."
|
||||
|
|
|
@ -0,0 +1,76 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Regenerates test classes from unicode data.
|
||||
|
||||
configure(project(":lucene:test-framework")) {
|
||||
task generateEmojiTokenizationTestInternal() {
|
||||
def unicodeVersion = "12.1"
|
||||
|
||||
def genDir = file("src/java/org/apache/lucene/tests/analysis/standard")
|
||||
def genScript = file("${genDir}/generateEmojiTokenizationTest.pl")
|
||||
def genOutput = file("${genDir}/EmojiTokenizationTestUnicode_${unicodeVersion.replace('.', '_')}.java")
|
||||
|
||||
description "Regenerate ${genOutput}"
|
||||
group "generation"
|
||||
|
||||
inputs.file genScript
|
||||
inputs.property "unicodeVersion", unicodeVersion
|
||||
outputs.file genOutput
|
||||
|
||||
doLast {
|
||||
quietExec {
|
||||
workingDir genDir
|
||||
executable project.externalTool("perl")
|
||||
args = [
|
||||
genScript,
|
||||
"-v", unicodeVersion
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
regenerate.dependsOn wrapWithPersistentChecksums(generateEmojiTokenizationTestInternal, [ andThenTasks: ["spotlessJava", "spotlessJavaApply"] ])
|
||||
|
||||
task generateJavaUnicodeWordBreakTestInternal() {
|
||||
def unicodeVersion = "12.1"
|
||||
|
||||
def genDir = file("src/java/org/apache/lucene/tests/analysis/standard")
|
||||
def genScript = file("${genDir}/generateJavaUnicodeWordBreakTest.pl")
|
||||
def genOutput = file("${genDir}/WordBreakTestUnicode_${unicodeVersion.replace('.', '_')}.java")
|
||||
|
||||
description "Regenerate ${genOutput}"
|
||||
group "generation"
|
||||
|
||||
inputs.file genScript
|
||||
inputs.property "unicodeVersion", unicodeVersion
|
||||
outputs.file genOutput
|
||||
|
||||
doLast {
|
||||
quietExec {
|
||||
workingDir genDir
|
||||
executable project.externalTool("perl")
|
||||
args = [
|
||||
genScript,
|
||||
"-v", unicodeVersion
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
regenerate.dependsOn wrapWithPersistentChecksums(generateJavaUnicodeWordBreakTestInternal, [ andThenTasks: ["spotlessJava", "spotlessJavaApply"] ])
|
||||
|
||||
}
|
|
@ -113,6 +113,8 @@ allprojects {
|
|||
"code: ${result.exitValue}, " +
|
||||
"output at: ${outputFile} (and logged above).")
|
||||
}
|
||||
} else {
|
||||
logger.info(new String(outputFile.bytes))
|
||||
}
|
||||
|
||||
return result
|
||||
|
|
|
@ -678,8 +678,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testUnicodeWordBreaks() throws Exception {
|
||||
WordBreakTestUnicode_12_1_0 wordBreakTest = new WordBreakTestUnicode_12_1_0();
|
||||
wordBreakTest.test(a);
|
||||
WordBreakTestUnicode_12_1_0.test(a);
|
||||
}
|
||||
|
||||
public void testSupplementary() throws Exception {
|
||||
|
@ -852,8 +851,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testUnicodeEmojiTests() throws Exception {
|
||||
EmojiTokenizationTestUnicode_12_1 emojiTest = new EmojiTokenizationTestUnicode_12_1();
|
||||
emojiTest.test(a);
|
||||
EmojiTokenizationTestUnicode_12_1.test(a);
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
|
|
|
@ -463,8 +463,7 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testUnicodeWordBreaks() throws Exception {
|
||||
WordBreakTestUnicode_12_1_0 wordBreakTest = new WordBreakTestUnicode_12_1_0();
|
||||
wordBreakTest.test(a);
|
||||
WordBreakTestUnicode_12_1_0.test(a);
|
||||
}
|
||||
|
||||
public void testSupplementary() throws Exception {
|
||||
|
@ -637,8 +636,7 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testUnicodeEmojiTests() throws Exception {
|
||||
EmojiTokenizationTestUnicode_12_1 emojiTest = new EmojiTokenizationTestUnicode_12_1();
|
||||
emojiTest.test(a);
|
||||
EmojiTokenizationTestUnicode_12_1.test(a);
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"lucene/test-framework/src/java/org/apache/lucene/tests/analysis/standard/EmojiTokenizationTestUnicode_12_1.java": "22e03ada47168b0986220c57260cfaf8e6e12e16",
|
||||
"lucene/test-framework/src/java/org/apache/lucene/tests/analysis/standard/generateEmojiTokenizationTest.pl": "a21d8aea5d2c30fb47b2bf9b24e20ddf605de46d",
|
||||
"property:unicodeVersion": "12.1"
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"lucene/test-framework/src/java/org/apache/lucene/tests/analysis/standard/WordBreakTestUnicode_12_1.java": "--",
|
||||
"lucene/test-framework/src/java/org/apache/lucene/tests/analysis/standard/generateJavaUnicodeWordBreakTest.pl": "1dd7148f4514976503a2be2e00be75c20ce784fb",
|
||||
"property:unicodeVersion": "12.1"
|
||||
}
|
|
@ -19,30 +19,27 @@ package org.apache.lucene.tests.analysis.standard;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
|
||||
import org.junit.Ignore;
|
||||
|
||||
/**
|
||||
* This class was automatically generated by generateEmojiTokenizationTest.pl from:
|
||||
* http://www.unicode.org/Public/emoji/12.1/emoji-test.txt
|
||||
*
|
||||
* <p>emoji-test.txt contains emoji char sequences, which are represented as tokenization tests in
|
||||
* this class.
|
||||
* This class was automatically generated by generateEmojiTokenizationTest.pl. from: <a
|
||||
* href="http://www.unicode.org/Public/emoji/12.1/emoji-test.txt"><code>
|
||||
* http://www.unicode.org/Public/emoji/12.1/emoji-test.txt</code></a>
|
||||
*/
|
||||
@Ignore
|
||||
public class EmojiTokenizationTestUnicode_12_1 extends BaseTokenStreamTestCase {
|
||||
public final class EmojiTokenizationTestUnicode_12_1 {
|
||||
|
||||
public void test(Analyzer analyzer) throws Exception {
|
||||
for (int i = 0; i < tests.length; i += 2) {
|
||||
String test = tests[i + 1];
|
||||
public static void test(Analyzer analyzer) throws Exception {
|
||||
for (int i = 0; i < TESTS.length; i += 2) {
|
||||
String test = TESTS[i + 1];
|
||||
try {
|
||||
assertAnalyzesTo(analyzer, test, new String[] {test}, new String[] {"<EMOJI>"});
|
||||
BaseTokenStreamTestCase.assertAnalyzesTo(
|
||||
analyzer, test, new String[] {test}, new String[] {"<EMOJI>"});
|
||||
} catch (Throwable t) {
|
||||
throw new Exception("Failed to tokenize \"" + tests[i] + "\":", t);
|
||||
throw new Exception("Failed to tokenize \"" + TESTS[i] + "\":", t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private String[] tests =
|
||||
private static String[] TESTS =
|
||||
new String[] {
|
||||
"1F600 ; fully-qualified # 😀 E2.0 grinning face",
|
||||
"\uD83D\uDE00",
|
||||
|
|
|
@ -17,9 +17,9 @@
|
|||
|
||||
package org.apache.lucene.tests.analysis.standard;
|
||||
|
||||
import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertAnalyzesTo;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
|
||||
import org.junit.Ignore;
|
||||
|
||||
/**
|
||||
* This class was automatically generated by generateJavaUnicodeWordBreakTest.pl from:
|
||||
|
@ -38,10 +38,9 @@ import org.junit.Ignore;
|
|||
* Hebrew_Letter} \p{WordBreak = Katakana} \p{WordBreak = Numeric} \p{Extended_Pictographic} (From
|
||||
* http://www.unicode.org/Public/emoji/12.1/emoji-data.txt)
|
||||
*/
|
||||
@Ignore
|
||||
public class WordBreakTestUnicode_12_1_0 extends BaseTokenStreamTestCase {
|
||||
public final class WordBreakTestUnicode_12_1_0 {
|
||||
|
||||
public void test(Analyzer analyzer) throws Exception {
|
||||
public static void test(Analyzer analyzer) throws Exception {
|
||||
// ÷ 0001 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] <START OF HEADING> (Other) ÷
|
||||
// [0.3]
|
||||
assertAnalyzesTo(analyzer, "\u0001\u0001", new String[] {});
|
||||
|
|
|
@ -56,32 +56,26 @@ my $header =<<"__HEADER__";
|
|||
package org.apache.lucene.tests.analysis.standard;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.junit.Ignore;
|
||||
import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
|
||||
|
||||
/**
|
||||
* This class was automatically generated by ${script_name}
|
||||
* from: ${url}
|
||||
*
|
||||
* emoji-test.txt contains emoji char sequences, which are represented as
|
||||
* tokenization tests in this class.
|
||||
*
|
||||
* This class was automatically generated by ${script_name}.
|
||||
* from: <a href="${url}"><code>${url}</code></a>
|
||||
*/
|
||||
\@Ignore
|
||||
public class ${class_name} extends BaseTokenStreamTestCase {
|
||||
public final class ${class_name} {
|
||||
|
||||
public void test(Analyzer analyzer) throws Exception {
|
||||
for (int i = 0 ; i < tests.length ; i += 2) {
|
||||
String test = tests[i + 1];
|
||||
public static void test(Analyzer analyzer) throws Exception {
|
||||
for (int i = 0 ; i < TESTS.length ; i += 2) {
|
||||
String test = TESTS[i + 1];
|
||||
try {
|
||||
assertAnalyzesTo(analyzer, test, new String[] { test }, new String[] { "<EMOJI>" });
|
||||
BaseTokenStreamTestCase.assertAnalyzesTo(analyzer, test, new String[] { test }, new String[] { "<EMOJI>" });
|
||||
} catch (Throwable t) {
|
||||
throw new Exception("Failed to tokenize \\"" + tests[i] + "\\":", t);
|
||||
throw new Exception("Failed to tokenize \\"" + TESTS[i] + "\\":", t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private String[] tests = new String[] {
|
||||
private static String[] TESTS = new String[] {
|
||||
__HEADER__
|
||||
|
||||
my @tests = split /\r?\n/, get_URL_content($url);
|
||||
|
|
|
@ -62,8 +62,7 @@ my $header =<<"__HEADER__";
|
|||
package org.apache.lucene.tests.analysis.standard;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.junit.Ignore;
|
||||
import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertAnalyzesTo;
|
||||
|
||||
/**
|
||||
* This class was automatically generated by ${script_name}
|
||||
|
@ -85,10 +84,9 @@ import org.junit.Ignore;
|
|||
* \\p{WordBreak = Numeric}
|
||||
* \\p{Extended_Pictographic} (From $emoji_url)
|
||||
*/
|
||||
\@Ignore
|
||||
public class ${class_name} extends BaseTokenStreamTestCase {
|
||||
public final class ${class_name} {
|
||||
|
||||
public void test(Analyzer analyzer) throws Exception {
|
||||
public static void test(Analyzer analyzer) throws Exception {
|
||||
__HEADER__
|
||||
|
||||
my $codepoints = [];
|
||||
|
|
Loading…
Reference in New Issue