From 5cf142f972db9a658d768ba3eac42c29916545aa Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Tue, 13 Jul 2021 23:11:18 -0400 Subject: [PATCH] LUCENE-5595: re-enable TestICUNormalizer2CharFilter random test, splitting by mode (#211) Re-enable the randomized testing here, but with a separate test for each mode rather than all in one method. It gives better testing and also easier-to-debug testing. --- .../icu/TestICUNormalizer2CharFilter.java | 69 +++++++++++++++++-- 1 file changed, 64 insertions(+), 5 deletions(-) diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2CharFilter.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2CharFilter.java index 79b1dfe68f6..9026701b70e 100644 --- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2CharFilter.java +++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2CharFilter.java @@ -321,9 +321,47 @@ public class TestICUNormalizer2CharFilter extends BaseTokenStreamTestCase { 16); } - @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-5595") - public void testRandomStrings() throws IOException { - // nfkc_cf + public void testRandomStringsNFC() throws IOException { + Analyzer a = + new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + return new TokenStreamComponents(new MockTokenizer(MockTokenizer.WHITESPACE, false)); + } + + @Override + protected Reader initReader(String fieldName, Reader reader) { + return new ICUNormalizer2CharFilter( + reader, Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.COMPOSE)); + } + }; + checkRandomData(random(), a, 200 * RANDOM_MULTIPLIER); + // huge strings + checkRandomData(random(), a, 25 * RANDOM_MULTIPLIER, 8192); + a.close(); + } + + public void testRandomStringsNFKC() throws IOException { + Analyzer a = + new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + return new TokenStreamComponents(new MockTokenizer(MockTokenizer.WHITESPACE, false)); + } + + @Override + protected Reader initReader(String fieldName, Reader reader) { + return new ICUNormalizer2CharFilter( + reader, Normalizer2.getInstance(null, "nfkc", Normalizer2.Mode.COMPOSE)); + } + }; + checkRandomData(random(), a, 200 * RANDOM_MULTIPLIER); + // huge strings + checkRandomData(random(), a, 25 * RANDOM_MULTIPLIER, 8192); + a.close(); + } + + public void testRandomStringsNFKC_CF() throws IOException { Analyzer a = new Analyzer() { @Override @@ -341,9 +379,30 @@ public class TestICUNormalizer2CharFilter extends BaseTokenStreamTestCase { // huge strings checkRandomData(random(), a, 25 * RANDOM_MULTIPLIER, 8192); a.close(); + } - // nfkd - a = + public void testRandomStringsNFD() throws IOException { + Analyzer a = + new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + return new TokenStreamComponents(new MockTokenizer(MockTokenizer.WHITESPACE, false)); + } + + @Override + protected Reader initReader(String fieldName, Reader reader) { + return new ICUNormalizer2CharFilter( + reader, Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE)); + } + }; + checkRandomData(random(), a, 200 * RANDOM_MULTIPLIER); + // huge strings + checkRandomData(random(), a, 25 * RANDOM_MULTIPLIER, 8192); + a.close(); + } + + public void testRandomStringsNFKD() throws IOException { + Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) {