Fix failing ICU tests (#35207)

Fixes #35173
2025-02-17 02:14:54 +00:00 · 2018-11-06 09:02:40 +00:00 · 2018-11-06 09:02:40 +00:00 · 9f4b93fd5e
commit 9f4b93fd5e
parent 833e0f8ecf
3 changed files with 27 additions and 41 deletions
--- a/plugins/analysis-icu/build.gradle
+++ b/plugins/analysis-icu/build.gradle
@ -40,9 +40,3 @@ dependencyLicenses {
  mapping from: /lucene-.*/, to: 'lucene'
 }

-// Muted: https://github.com/elastic/elasticsearch/issues/35173
-integTestRunner {
-    systemProperty 'tests.rest.blacklist', 
-        'analysis_icu/10_basic/Normalization with a UnicodeSet Filter,' +
-          'analysis_icu/10_basic/Normalization with a CamcelCase UnicodeSet Filter'
-}
--- a/plugins/analysis-icu/src/main/java/org/elasticsearch/index/analysis/IcuNormalizerTokenFilterFactory.java
+++ b/plugins/analysis-icu/src/main/java/org/elasticsearch/index/analysis/IcuNormalizerTokenFilterFactory.java
@ -38,8 +38,10 @@ import org.elasticsearch.index.IndexSettings;
 * <p>The {@code unicodeSetFilter} attribute can be used to provide the UniCodeSet for filtering.</p>
 */
 public class IcuNormalizerTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
+
    private static final DeprecationLogger deprecationLogger =
        new DeprecationLogger(LogManager.getLogger(IcuNormalizerTokenFilterFactory.class));
+
    private final Normalizer2 normalizer;

    public IcuNormalizerTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
--- a/plugins/analysis-icu/src/test/resources/rest-api-spec/test/analysis_icu/10_basic.yml
+++ b/plugins/analysis-icu/src/test/resources/rest-api-spec/test/analysis_icu/10_basic.yml
@ -46,7 +46,7 @@
    - match:  { tokens.1.token: bar }
    - match:  { tokens.2.token: resume }
 ---
-"Normalization with a UnicodeSet Filter":
+"Normalization with unicode_set_filter":
    - do:
        indices.create:
            index:  test
@ -70,31 +70,42 @@
          index:    test
          body:
            char_filter: ["charfilter_icu_normalizer"]
-            tokenizer:  keyword
+            tokenizer:  standard
            text:     charfilter Föo Bâr Ruß
-    - length: { tokens: 1 }
-    - match:  { tokens.0.token: charfilter föo bâr ruß }
+    - length: { tokens: 4 }
+    - match:  { tokens.0.token: charfilter }
+    - match:  { tokens.1.token: föo }
+    - match:  { tokens.2.token: bâr }
+    - match:  { tokens.3.token: ruß }
+
    - do:
        indices.analyze:
          index:    test
          body:
-            tokenizer:  keyword
+            tokenizer:  standard
            filter: ["tokenfilter_icu_normalizer"]
            text:     tokenfilter Föo Bâr Ruß
-    - length: { tokens: 1 }
-    - match:  { tokens.0.token: tokenfilter föo Bâr ruß }
+    - length: { tokens: 4 }
+    - match:  { tokens.0.token: tokenfilter }
+    - match:  { tokens.1.token: föo }
+    - match:  { tokens.2.token: Bâr }
+    - match:  { tokens.3.token: ruß }
+
    - do:
        indices.analyze:
          index:    test
          body:
-            tokenizer:  keyword
+            tokenizer:  standard
            filter: ["tokenfilter_icu_folding"]
            text:     icufolding Föo Bâr Ruß
-    - length: { tokens: 1 }
-    - match:  { tokens.0.token: icufolding foo bâr russ }
+    - length: { tokens: 4 }
+    - match:  { tokens.0.token: icufolding }
+    - match:  { tokens.1.token: foo }
+    - match:  { tokens.2.token: bâr }
+    - match:  { tokens.3.token: russ }

 ---
-"Normalization with a CamcelCase UnicodeSet Filter":
+"Normalization with deprecated unicodeSetFilter":
    - skip:
        version: " - 6.99.99"
        reason:  unicodeSetFilter deprecated in 7.0.0, replaced by unicode_set_filter
@ -121,6 +132,8 @@
                                    type: icu_folding
                                    unicodeSetFilter: "[^â]"
    - do:
+        warnings:
+        - "[unicodeSetFilter] has been deprecated in favor of [unicode_set_filter]"
        indices.analyze:
          index:    test
          body:
@ -132,27 +145,4 @@
    - match:  { tokens.1.token: föo }
    - match:  { tokens.2.token: bâr }
    - match:  { tokens.3.token: ruß }
-    - do:
-        indices.analyze:
-          index:    test
-          body:
-            tokenizer:  standard
-            filter: ["tokenfilter_icu_normalizer"]
-            text:     tokenfilter Föo Bâr Ruß
-    - length: { tokens: 4 }
-    - match:  { tokens.0.token: tokenfilter }
-    - match:  { tokens.1.token: föo }
-    - match:  { tokens.2.token: Bâr }
-    - match:  { tokens.3.token: ruß }
-    - do:
-        indices.analyze:
-          index:    test
-          body:
-            tokenizer:  standard
-            filter: ["tokenfilter_icu_folding"]
-            text:     icufolding Föo Bâr Ruß
-    - length: { tokens: 4 }
-    - match:  { tokens.0.token: icufolding }
-    - match:  { tokens.1.token: foo }
-    - match:  { tokens.2.token: bâr }
-    - match:  { tokens.3.token: russ }
+