From 2e615fa370ceaa254480e08e64531f985a291b0d Mon Sep 17 00:00:00 2001 From: Steve Rowe Date: Mon, 27 Mar 2017 23:53:55 -0400 Subject: [PATCH] SOLR-10344: Update Solr default/example and test configs to use WordDelimiterGraphFilterFactory --- solr/CHANGES.txt | 2 + .../solr/collection1/conf/schema.xml | 23 +++++-- .../solr/collection1/conf/synonyms.txt | 2 +- .../conf/dataimport-schema-no-unique-key.xml | 7 +- .../collection1/conf/dataimport-schema.xml | 17 +++-- .../conf/dataimport-solr_id-schema.xml | 23 +++++-- .../solr/collection1/conf/schema.xml | 40 ++++++++--- .../solr/collection1/conf/synonyms.txt | 2 +- .../uima/solr/collection1/conf/schema.xml | 37 +++++++--- .../uima/solr/collection1/conf/synonyms.txt | 2 +- .../uima/uima-tokenizers-schema.xml | 34 +++++++--- .../conf/schema-HighlighterMaxOffsetTest.xml | 5 +- .../conf/schema-copyfield-test.xml | 29 +++++--- .../collection1/conf/schema-docValuesJoin.xml | 5 +- .../solr/collection1/conf/schema-folding.xml | 2 +- .../solr/collection1/conf/schema-hash.xml | 59 +++++++++++----- .../collection1/conf/schema-psuedo-fields.xml | 5 +- .../conf/schema-required-fields.xml | 18 +++-- .../solr/collection1/conf/schema-rest.xml | 63 +++++++++++------ .../conf/schema-single-dynamic-copy-field.xml | 63 +++++++++++------ .../solr/collection1/conf/schema-sql.xml | 59 +++++++++++----- .../solr/collection1/conf/schema-trie.xml | 23 +++++-- .../solr/collection1/conf/schema.xml | 58 ++++++++++++---- .../solr/collection1/conf/schema11.xml | 27 ++++++-- .../solr/collection1/conf/schema12.xml | 63 +++++++++++------ .../solr/collection1/conf/schema15.xml | 63 +++++++++++------ .../solr/collection1/conf/schema_latest.xml | 27 ++++++-- .../solr/collection1/conf/schemasurround.xml | 68 +++++++++++++------ .../solr/collection1/conf/synonyms.txt | 2 +- .../solr/collection1/conf/wdftypes.txt | 2 +- .../configsets/cloud-dynamic/conf/schema.xml | 35 +++++++--- .../configsets/doc-expiry/conf/schema.xml | 35 +++++++--- .../org/apache/solr/ConvertedLegacyTest.java | 2 +- .../FieldAnalysisRequestHandlerTest.java | 14 ++-- .../solr/rest/schema/TestBulkSchemaAPI.java | 29 +++++++- .../example-DIH/solr/db/conf/managed-schema | 26 +++++-- .../example-DIH/solr/db/conf/synonyms.txt | 2 +- .../example-DIH/solr/mail/conf/managed-schema | 26 +++++-- .../example-DIH/solr/mail/conf/synonyms.txt | 2 +- .../example-DIH/solr/rss/conf/managed-schema | 31 ++++++--- .../example-DIH/solr/rss/conf/synonyms.txt | 2 +- .../example-DIH/solr/solr/conf/managed-schema | 26 +++++-- .../example-DIH/solr/solr/conf/synonyms.txt | 2 +- .../example-DIH/solr/tika/conf/managed-schema | 23 +++++-- solr/example/files/conf/managed-schema | 20 ++++-- solr/example/files/conf/synonyms.txt | 2 +- .../basic_configs/conf/managed-schema | 26 +++++-- .../basic_configs/conf/synonyms.txt | 2 +- .../conf/managed-schema | 26 +++++-- .../conf/synonyms.txt | 2 +- .../conf/managed-schema | 26 +++++-- .../conf/synonyms.txt | 2 +- .../solr/collection1/conf/schema-sql.xml | 59 +++++++++++----- .../solrj/solr/collection1/conf/schema.xml | 59 +++++++++++----- .../solr/configsets/streaming/conf/schema.xml | 56 ++++++++++----- .../solr/client/solrj/request/SchemaTest.java | 2 +- 56 files changed, 984 insertions(+), 383 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index d0536a98261..22cee9a0dfa 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -157,6 +157,8 @@ Other Changes * SOLR-10352: bin/solr script now prints warning when available system entropy is lower than 300 (Esther Quansah via Ishan Chattopadhyaya) + +* SOLR-10344: Update Solr default/example and test configs to use WordDelimiterGraphFilterFactory. (Steve Rowe) ================== 6.5.0 ================== diff --git a/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/schema.xml b/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/schema.xml index 0c06a48adc2..02626a0eba4 100644 --- a/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/schema.xml +++ b/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/schema.xml @@ -155,7 +155,7 @@ - - @@ -198,11 +199,23 @@ - + - + + + + + + + + + + diff --git a/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/synonyms.txt b/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/synonyms.txt index b0e31cb7ec8..26d237a5943 100644 --- a/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/synonyms.txt +++ b/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/synonyms.txt @@ -23,7 +23,7 @@ fooaaa,baraaa,bazaaa GB,gib,gigabyte,gigabytes MB,mib,megabyte,megabytes Television, Televisions, TV, TVs -#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming #after us won't split it into two words. # Synonym mappings can be used for spelling correction too diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-schema-no-unique-key.xml b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-schema-no-unique-key.xml index 068322e21b3..84d0dbe0a54 100644 --- a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-schema-no-unique-key.xml +++ b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-schema-no-unique-key.xml @@ -158,7 +158,7 @@ - - + - - + - - + - + + + + + + + + + + - + - + + + + + + - + - + + + + + + @@ -233,15 +248,16 @@ - + - @@ -253,13 +269,14 @@ - + - @@ -269,13 +286,14 @@ - + - diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/synonyms.txt b/solr/contrib/ltr/src/test-files/solr/collection1/conf/synonyms.txt index 0ef0e8daaba..461ed4df6e4 100644 --- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/synonyms.txt +++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/synonyms.txt @@ -21,7 +21,7 @@ fooaaa,baraaa,bazaaa GB,gib,gigabyte,gigabytes MB,mib,megabyte,megabytes Television, Televisions, TV, TVs -#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming #after us won't split it into two words. # Synonym mappings can be used for spelling correction too diff --git a/solr/contrib/uima/src/test-files/uima/solr/collection1/conf/schema.xml b/solr/contrib/uima/src/test-files/uima/solr/collection1/conf/schema.xml index 5e0273e5f29..89d44e6616d 100644 --- a/solr/contrib/uima/src/test-files/uima/solr/collection1/conf/schema.xml +++ b/solr/contrib/uima/src/test-files/uima/solr/collection1/conf/schema.xml @@ -191,7 +191,7 @@ - - + @@ -224,7 +224,7 @@ ignoreCase="true" expand="true" /> - @@ -240,23 +240,36 @@ --> - + - + + + + + + + + + @@ -271,10 +284,11 @@ - + @@ -282,7 +296,7 @@ ignoreCase="true" expand="true" /> - @@ -301,13 +315,14 @@ - + @@ -315,7 +330,7 @@ ignoreCase="true" expand="true" /> - diff --git a/solr/contrib/uima/src/test-files/uima/solr/collection1/conf/synonyms.txt b/solr/contrib/uima/src/test-files/uima/solr/collection1/conf/synonyms.txt index b0e31cb7ec8..26d237a5943 100644 --- a/solr/contrib/uima/src/test-files/uima/solr/collection1/conf/synonyms.txt +++ b/solr/contrib/uima/src/test-files/uima/solr/collection1/conf/synonyms.txt @@ -23,7 +23,7 @@ fooaaa,baraaa,bazaaa GB,gib,gigabyte,gigabytes MB,mib,megabyte,megabytes Television, Televisions, TV, TVs -#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming #after us won't split it into two words. # Synonym mappings can be used for spelling correction too diff --git a/solr/contrib/uima/src/test-files/uima/uima-tokenizers-schema.xml b/solr/contrib/uima/src/test-files/uima/uima-tokenizers-schema.xml index 0007be79d2b..229d69bd586 100644 --- a/solr/contrib/uima/src/test-files/uima/uima-tokenizers-schema.xml +++ b/solr/contrib/uima/src/test-files/uima/uima-tokenizers-schema.xml @@ -191,7 +191,7 @@ - + + - @@ -235,19 +237,29 @@ --> - + - + + + + + + + + @@ -277,14 +289,15 @@ positionIncrementGap="100"> - + - @@ -301,17 +314,18 @@ positionIncrementGap="100"> - + - diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-HighlighterMaxOffsetTest.xml b/solr/core/src/test-files/solr/collection1/conf/schema-HighlighterMaxOffsetTest.xml index 808453c1766..122d4ce8a35 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-HighlighterMaxOffsetTest.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-HighlighterMaxOffsetTest.xml @@ -67,16 +67,17 @@ Test for HighlighterMaxOffsetTest which requires the use of ReversedWildcardFilt - + - diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-copyfield-test.xml b/solr/core/src/test-files/solr/collection1/conf/schema-copyfield-test.xml index 47ddeb2aeff..a9df7f80c9b 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-copyfield-test.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-copyfield-test.xml @@ -60,17 +60,25 @@ - + - + - + + + + + + @@ -229,15 +237,16 @@ - + - @@ -249,13 +258,14 @@ - + - @@ -265,13 +275,14 @@ - + - diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-docValuesJoin.xml b/solr/core/src/test-files/solr/collection1/conf/schema-docValuesJoin.xml index 1d559bbf7ef..baed872dd7f 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-docValuesJoin.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-docValuesJoin.xml @@ -42,11 +42,12 @@ ignoreCase="true" words="stopwords.txt" /> - + @@ -55,7 +56,7 @@ ignoreCase="true" words="stopwords.txt" /> - diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-folding.xml b/solr/core/src/test-files/solr/collection1/conf/schema-folding.xml index cc9cae3f553..2b0bf32139a 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-folding.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-folding.xml @@ -47,7 +47,7 @@ - diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-hash.xml b/solr/core/src/test-files/solr/collection1/conf/schema-hash.xml index b1324738933..6a7987cd551 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-hash.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-hash.xml @@ -72,26 +72,41 @@ - + - + - + + + + + + - + - + + + + + + @@ -191,9 +206,16 @@ - + - + + + + + + @@ -265,15 +287,16 @@ - + - @@ -285,15 +308,16 @@ - + - @@ -304,8 +328,9 @@ - + @@ -318,13 +343,14 @@ - + - @@ -334,13 +360,14 @@ - + - diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-psuedo-fields.xml b/solr/core/src/test-files/solr/collection1/conf/schema-psuedo-fields.xml index 20f2d2dd299..8491ce820a7 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-psuedo-fields.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-psuedo-fields.xml @@ -50,11 +50,12 @@ ignoreCase="true" words="stopwords.txt" /> - + @@ -63,7 +64,7 @@ ignoreCase="true" words="stopwords.txt" /> - diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-required-fields.xml b/solr/core/src/test-files/solr/collection1/conf/schema-required-fields.xml index d3a78185507..c92b90156ac 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-required-fields.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-required-fields.xml @@ -60,9 +60,10 @@ - + @@ -219,15 +220,16 @@ - + - @@ -239,13 +241,14 @@ - + - @@ -255,13 +258,14 @@ - + - diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml b/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml index 8bd603bc6ca..7d9bf02bc42 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml @@ -45,26 +45,41 @@ - + - + - + + + + + + - + - + + + + + + @@ -90,11 +105,12 @@ ignoreCase="true" words="stopwords.txt" /> - + @@ -103,7 +119,7 @@ ignoreCase="true" words="stopwords.txt" /> - @@ -153,11 +169,12 @@ ignoreCase="true" words="stopwords.txt" /> - + @@ -166,7 +183,7 @@ ignoreCase="true" words="stopwords.txt" /> - @@ -300,15 +317,16 @@ - + - @@ -320,16 +338,17 @@ - + - @@ -341,9 +360,10 @@ - + @@ -356,13 +376,14 @@ - + - @@ -372,13 +393,14 @@ - + - @@ -401,16 +423,17 @@ --> - + - diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-single-dynamic-copy-field.xml b/solr/core/src/test-files/solr/collection1/conf/schema-single-dynamic-copy-field.xml index f7cbf6dfd6c..72dc723b66b 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-single-dynamic-copy-field.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-single-dynamic-copy-field.xml @@ -45,26 +45,41 @@ - + - + - + + + + + + - + - + + + + + + @@ -90,11 +105,12 @@ ignoreCase="true" words="stopwords.txt" /> - + @@ -103,7 +119,7 @@ ignoreCase="true" words="stopwords.txt" /> - @@ -153,11 +169,12 @@ ignoreCase="true" words="stopwords.txt" /> - + @@ -166,7 +183,7 @@ ignoreCase="true" words="stopwords.txt" /> - @@ -300,15 +317,16 @@ - + - @@ -320,16 +338,17 @@ - + - @@ -341,9 +360,10 @@ - + @@ -356,13 +376,14 @@ - + - @@ -372,13 +393,14 @@ - + - @@ -401,16 +423,17 @@ --> - + - diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-sql.xml b/solr/core/src/test-files/solr/collection1/conf/schema-sql.xml index 818a6c8624d..dbaac576533 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-sql.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-sql.xml @@ -72,26 +72,41 @@ - + - + - + + + + + + - + - + + + + + + @@ -191,9 +206,16 @@ - + - + + + + + + @@ -265,15 +287,16 @@ - + - @@ -285,15 +308,16 @@ - + - @@ -304,8 +328,9 @@ - + @@ -318,13 +343,14 @@ - + - @@ -334,13 +360,14 @@ - + - diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-trie.xml b/solr/core/src/test-files/solr/collection1/conf/schema-trie.xml index 1df0a0fc252..375036f0fe3 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-trie.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-trie.xml @@ -151,7 +151,7 @@ - - + - @@ -189,11 +190,23 @@ - + - + + + + + + + + + + + diff --git a/solr/core/src/test-files/solr/collection1/conf/schema.xml b/solr/core/src/test-files/solr/collection1/conf/schema.xml index 6f5eddcaddf..23ac3260078 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema.xml @@ -55,26 +55,41 @@ - + - + - + + + + + + - + - + + + + + + @@ -196,7 +211,15 @@ - + + + + + + + + - + - @@ -294,16 +318,17 @@ - + - @@ -315,9 +340,10 @@ - + @@ -330,13 +356,14 @@ - + - @@ -346,13 +373,14 @@ - + - diff --git a/solr/core/src/test-files/solr/collection1/conf/schema11.xml b/solr/core/src/test-files/solr/collection1/conf/schema11.xml index 7591c96d7c5..db0770baf3b 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema11.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema11.xml @@ -147,7 +147,7 @@ - - + + - + @@ -183,11 +184,22 @@ - + - + + + + + + + + + + + + @@ -417,17 +429,18 @@ valued. --> --> - + + - + diff --git a/solr/core/src/test-files/solr/collection1/conf/schema12.xml b/solr/core/src/test-files/solr/collection1/conf/schema12.xml index 85774408763..db913771f8c 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema12.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema12.xml @@ -54,26 +54,41 @@ - + - + - + + + + + + - + - + + + + + + @@ -99,11 +114,12 @@ ignoreCase="true" words="stopwords.txt" /> - + @@ -112,7 +128,7 @@ ignoreCase="true" words="stopwords.txt" /> - @@ -129,11 +145,12 @@ ignoreCase="true" words="stopwords.txt" /> - + @@ -142,7 +159,7 @@ ignoreCase="true" words="stopwords.txt" /> - @@ -270,15 +287,16 @@ - + - @@ -290,16 +308,17 @@ - + - @@ -311,9 +330,10 @@ - + @@ -326,13 +346,14 @@ - + - @@ -342,13 +363,14 @@ - + - @@ -371,16 +393,17 @@ --> - + - diff --git a/solr/core/src/test-files/solr/collection1/conf/schema15.xml b/solr/core/src/test-files/solr/collection1/conf/schema15.xml index e2c14f0f76b..8fb8d4410b2 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema15.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema15.xml @@ -46,26 +46,41 @@ - + - + - + + + + + + - + - + + + + + + @@ -91,11 +106,12 @@ ignoreCase="true" words="stopwords.txt" /> - + @@ -104,7 +120,7 @@ ignoreCase="true" words="stopwords.txt" /> - @@ -121,11 +137,12 @@ ignoreCase="true" words="stopwords.txt" /> - + @@ -134,7 +151,7 @@ ignoreCase="true" words="stopwords.txt" /> - @@ -268,15 +285,16 @@ - + - @@ -288,16 +306,17 @@ - + - @@ -309,9 +328,10 @@ - + @@ -324,13 +344,14 @@ - + - @@ -340,13 +361,14 @@ - + - @@ -369,16 +391,17 @@ --> - + - diff --git a/solr/core/src/test-files/solr/collection1/conf/schema_latest.xml b/solr/core/src/test-files/solr/collection1/conf/schema_latest.xml index dfeac32c167..1163ab5a01d 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema_latest.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema_latest.xml @@ -541,7 +541,7 @@ - + - + possible with WordDelimiterGraphFilter in conjuncton with stemming. --> + + + + + + + + + + + + diff --git a/solr/core/src/test-files/solr/collection1/conf/schemasurround.xml b/solr/core/src/test-files/solr/collection1/conf/schemasurround.xml index e22eb2bdc9f..30719685672 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schemasurround.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schemasurround.xml @@ -47,26 +47,41 @@ - + - + - + + + + + + - + - + + + + + + @@ -92,11 +107,12 @@ ignoreCase="true" words="stopwords.txt" /> - + @@ -105,7 +121,7 @@ ignoreCase="true" words="stopwords.txt" /> - @@ -122,11 +138,12 @@ ignoreCase="true" words="stopwords.txt" /> - + @@ -135,7 +152,7 @@ ignoreCase="true" words="stopwords.txt" /> - @@ -263,15 +280,16 @@ - + - @@ -283,16 +301,17 @@ - + - @@ -304,9 +323,10 @@ - + @@ -319,13 +339,14 @@ - + - @@ -335,13 +356,14 @@ - + - @@ -364,16 +386,17 @@ --> - + - @@ -384,15 +407,16 @@ - + - diff --git a/solr/core/src/test-files/solr/collection1/conf/synonyms.txt b/solr/core/src/test-files/solr/collection1/conf/synonyms.txt index 340abd7847c..f64cb724a57 100644 --- a/solr/core/src/test-files/solr/collection1/conf/synonyms.txt +++ b/solr/core/src/test-files/solr/collection1/conf/synonyms.txt @@ -23,7 +23,7 @@ fooaaa,baraaa,bazaaa GB,gib,gigabyte,gigabytes MB,mib,megabyte,megabytes Television, Televisions, TV, TVs -#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming #after us won't split it into two words. # Synonym mappings can be used for spelling correction too diff --git a/solr/core/src/test-files/solr/collection1/conf/wdftypes.txt b/solr/core/src/test-files/solr/collection1/conf/wdftypes.txt index 7378b0802e7..9a453c8da9f 100644 --- a/solr/core/src/test-files/solr/collection1/conf/wdftypes.txt +++ b/solr/core/src/test-files/solr/collection1/conf/wdftypes.txt @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # -# A customized type mapping for WordDelimiterFilterFactory +# A customized type mapping for WordDelimiterGraphFilterFactory # the allowable types are: LOWER, UPPER, ALPHA, DIGIT, ALPHANUM, SUBWORD_DELIM # # the default for any character without a mapping is always computed from diff --git a/solr/core/src/test-files/solr/configsets/cloud-dynamic/conf/schema.xml b/solr/core/src/test-files/solr/configsets/cloud-dynamic/conf/schema.xml index af201c0364b..d15c19908ca 100644 --- a/solr/core/src/test-files/solr/configsets/cloud-dynamic/conf/schema.xml +++ b/solr/core/src/test-files/solr/configsets/cloud-dynamic/conf/schema.xml @@ -45,26 +45,41 @@ - + - + - + + + + + + - + - + + + + + + @@ -86,14 +101,15 @@ - + - @@ -111,14 +127,15 @@ - + - diff --git a/solr/core/src/test-files/solr/configsets/doc-expiry/conf/schema.xml b/solr/core/src/test-files/solr/configsets/doc-expiry/conf/schema.xml index 8400fe81d65..c622eba8a9f 100644 --- a/solr/core/src/test-files/solr/configsets/doc-expiry/conf/schema.xml +++ b/solr/core/src/test-files/solr/configsets/doc-expiry/conf/schema.xml @@ -45,26 +45,41 @@ - + - + - + + + + + + - + - + + + + + + @@ -86,14 +101,15 @@ - + - @@ -111,14 +127,15 @@ - + - diff --git a/solr/core/src/test/org/apache/solr/ConvertedLegacyTest.java b/solr/core/src/test/org/apache/solr/ConvertedLegacyTest.java index 767b811964d..bf7925a318d 100644 --- a/solr/core/src/test/org/apache/solr/ConvertedLegacyTest.java +++ b/solr/core/src/test/org/apache/solr/ConvertedLegacyTest.java @@ -872,7 +872,7 @@ public class ConvertedLegacyTest extends SolrTestCaseJ4 { ); - // intra-word delimiter testing (WordDelimiterFilter) + // intra-word delimiter testing (WordDelimiterGraphFilter) assertU("42foo bar"); assertU(""); diff --git a/solr/core/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java b/solr/core/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java index d2ef5553333..fc0f6bedbed 100644 --- a/solr/core/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java +++ b/solr/core/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java @@ -382,7 +382,7 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB } @Test - public void testPositionHistoryWithWDF() throws Exception { + public void testPositionHistoryWithWDGF() throws Exception { FieldAnalysisRequest request = new FieldAnalysisRequest(); request.addFieldType("skutype1"); @@ -407,12 +407,12 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB assertToken(tokenList.get(1), new TokenInfo("3456-12", null, "word", 4, 11, 2, new int[]{2}, null, false)); assertToken(tokenList.get(2), new TokenInfo("a", null, "word", 12, 13, 3, new int[]{3}, null, false)); assertToken(tokenList.get(3), new TokenInfo("Test", null, "word", 14, 18, 4, new int[]{4}, null, false)); - tokenList = indexPart.get("org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter"); - assertNotNull("Expcting WordDelimiterFilter analysis breakdown", tokenList); + tokenList = indexPart.get("org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter"); + assertNotNull("Expcting WordDelimiterGraphFilter analysis breakdown", tokenList); assertEquals(6, tokenList.size()); assertToken(tokenList.get(0), new TokenInfo("hi", null, "word", 0, 2, 1, new int[]{1,1}, null, false)); - assertToken(tokenList.get(1), new TokenInfo("3456", null, "word", 4, 8, 2, new int[]{2,2}, null, false)); - assertToken(tokenList.get(2), new TokenInfo("345612", null, "word", 4, 11, 2, new int[]{2,2}, null, false)); + assertToken(tokenList.get(1), new TokenInfo("345612", null, "word", 4, 11, 2, new int[]{2,2}, null, false)); + assertToken(tokenList.get(2), new TokenInfo("3456", null, "word", 4, 8, 2, new int[]{2,2}, null, false)); assertToken(tokenList.get(3), new TokenInfo("12", null, "word", 9, 11, 3, new int[]{2,3}, null, false)); assertToken(tokenList.get(4), new TokenInfo("a", null, "word", 12, 13, 4, new int[]{3,4}, null, false)); assertToken(tokenList.get(5), new TokenInfo("Test", null, "word", 14, 18, 5, new int[]{4,5}, null, false)); @@ -420,8 +420,8 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList); assertEquals(6, tokenList.size()); assertToken(tokenList.get(0), new TokenInfo("hi", null, "word", 0, 2, 1, new int[]{1,1,1}, null, false)); - assertToken(tokenList.get(1), new TokenInfo("3456", null, "word", 4, 8, 2, new int[]{2,2,2}, null, false)); - assertToken(tokenList.get(2), new TokenInfo("345612", null, "word", 4, 11, 2, new int[]{2,2,2}, null, false)); + assertToken(tokenList.get(1), new TokenInfo("345612", null, "word", 4, 11, 2, new int[]{2,2,2}, null, false)); + assertToken(tokenList.get(2), new TokenInfo("3456", null, "word", 4, 8, 2, new int[]{2,2,2}, null, false)); assertToken(tokenList.get(3), new TokenInfo("12", null, "word", 9, 11, 3, new int[]{2,3,3}, null, false)); assertToken(tokenList.get(4), new TokenInfo("a", null, "word", 12, 13, 4, new int[]{3,4,4}, null, false)); assertToken(tokenList.get(5), new TokenInfo("test", null, "word", 14, 18, 5, new int[]{4,5,5}, null, false)); diff --git a/solr/core/src/test/org/apache/solr/rest/schema/TestBulkSchemaAPI.java b/solr/core/src/test/org/apache/solr/rest/schema/TestBulkSchemaAPI.java index 166d1fc183c..9f379677098 100644 --- a/solr/core/src/test/org/apache/solr/rest/schema/TestBulkSchemaAPI.java +++ b/solr/core/src/test/org/apache/solr/rest/schema/TestBulkSchemaAPI.java @@ -396,7 +396,7 @@ public class TestBulkSchemaAPI extends RestTestBase { " 'name' : 'myNewTxtField',\n" + " 'class':'solr.TextField',\n" + " 'positionIncrementGap':'100',\n" + - " 'analyzer' : {\n" + + " 'indexAnalyzer' : {\n" + " 'charFilters':[\n" + " {\n" + " 'class':'solr.PatternReplaceCharFilterFactory',\n" + @@ -407,7 +407,32 @@ public class TestBulkSchemaAPI extends RestTestBase { " 'tokenizer':{'class':'solr.WhitespaceTokenizerFactory'},\n" + " 'filters':[\n" + " {\n" + - " 'class':'solr.WordDelimiterFilterFactory',\n" + + " 'class':'solr.WordDelimiterGraphFilterFactory',\n" + + " 'preserveOriginal':'0'\n" + + " },\n" + + " {\n" + + " 'class':'solr.StopFilterFactory',\n" + + " 'words':'stopwords.txt',\n" + + " 'ignoreCase':'true'\n" + + " },\n" + + " {'class':'solr.LowerCaseFilterFactory'},\n" + + " {'class':'solr.ASCIIFoldingFilterFactory'},\n" + + " {'class':'solr.KStemFilterFactory'},\n" + + " {'class':'solr.FlattenGraphFilterFactory'}\n" + + " ]\n" + + " },\n" + + " 'queryAnalyzer' : {\n" + + " 'charFilters':[\n" + + " {\n" + + " 'class':'solr.PatternReplaceCharFilterFactory',\n" + + " 'replacement':'$1$1',\n" + + " 'pattern':'([a-zA-Z])\\\\\\\\1+'\n" + + " }\n" + + " ],\n" + + " 'tokenizer':{'class':'solr.WhitespaceTokenizerFactory'},\n" + + " 'filters':[\n" + + " {\n" + + " 'class':'solr.WordDelimiterGraphFilterFactory',\n" + " 'preserveOriginal':'0'\n" + " },\n" + " {\n" + diff --git a/solr/example/example-DIH/solr/db/conf/managed-schema b/solr/example/example-DIH/solr/db/conf/managed-schema index eead56f7b07..1a1012fe850 100644 --- a/solr/example/example-DIH/solr/db/conf/managed-schema +++ b/solr/example/example-DIH/solr/db/conf/managed-schema @@ -500,7 +500,7 @@ - + - + + possible with WordDelimiterGraphFilter in conjuncton with stemming. --> + + + + + + + + + + + + diff --git a/solr/example/example-DIH/solr/db/conf/synonyms.txt b/solr/example/example-DIH/solr/db/conf/synonyms.txt index 7f72128303b..eab4ee87537 100644 --- a/solr/example/example-DIH/solr/db/conf/synonyms.txt +++ b/solr/example/example-DIH/solr/db/conf/synonyms.txt @@ -21,7 +21,7 @@ fooaaa,baraaa,bazaaa GB,gib,gigabyte,gigabytes MB,mib,megabyte,megabytes Television, Televisions, TV, TVs -#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming #after us won't split it into two words. # Synonym mappings can be used for spelling correction too diff --git a/solr/example/example-DIH/solr/mail/conf/managed-schema b/solr/example/example-DIH/solr/mail/conf/managed-schema index 076f83f6e6d..016f105513c 100644 --- a/solr/example/example-DIH/solr/mail/conf/managed-schema +++ b/solr/example/example-DIH/solr/mail/conf/managed-schema @@ -419,7 +419,7 @@ - + - + + possible with WordDelimiterGraphFilter in conjuncton with stemming. --> + + + + + + + + + + + + diff --git a/solr/example/example-DIH/solr/mail/conf/synonyms.txt b/solr/example/example-DIH/solr/mail/conf/synonyms.txt index 7f72128303b..eab4ee87537 100644 --- a/solr/example/example-DIH/solr/mail/conf/synonyms.txt +++ b/solr/example/example-DIH/solr/mail/conf/synonyms.txt @@ -21,7 +21,7 @@ fooaaa,baraaa,bazaaa GB,gib,gigabyte,gigabytes MB,mib,megabyte,megabytes Television, Televisions, TV, TVs -#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming #after us won't split it into two words. # Synonym mappings can be used for spelling correction too diff --git a/solr/example/example-DIH/solr/rss/conf/managed-schema b/solr/example/example-DIH/solr/rss/conf/managed-schema index e35f49d42c1..2064c5895b0 100644 --- a/solr/example/example-DIH/solr/rss/conf/managed-schema +++ b/solr/example/example-DIH/solr/rss/conf/managed-schema @@ -242,18 +242,19 @@ --> - + + - + @@ -450,7 +451,7 @@ - + - + + possible with WordDelimiterGraphFilter in conjuncton with stemming. --> + + + + + + + + + + + + diff --git a/solr/example/example-DIH/solr/rss/conf/synonyms.txt b/solr/example/example-DIH/solr/rss/conf/synonyms.txt index 7f72128303b..eab4ee87537 100644 --- a/solr/example/example-DIH/solr/rss/conf/synonyms.txt +++ b/solr/example/example-DIH/solr/rss/conf/synonyms.txt @@ -21,7 +21,7 @@ fooaaa,baraaa,bazaaa GB,gib,gigabyte,gigabytes MB,mib,megabyte,megabytes Television, Televisions, TV, TVs -#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming #after us won't split it into two words. # Synonym mappings can be used for spelling correction too diff --git a/solr/example/example-DIH/solr/solr/conf/managed-schema b/solr/example/example-DIH/solr/solr/conf/managed-schema index 6be0ad9a11e..04c85c08bb5 100644 --- a/solr/example/example-DIH/solr/solr/conf/managed-schema +++ b/solr/example/example-DIH/solr/solr/conf/managed-schema @@ -500,7 +500,7 @@ - + - + + possible with WordDelimiterGraphFilter in conjuncton with stemming. --> + + + + + + + + + + + + diff --git a/solr/example/example-DIH/solr/solr/conf/synonyms.txt b/solr/example/example-DIH/solr/solr/conf/synonyms.txt index 7f72128303b..eab4ee87537 100644 --- a/solr/example/example-DIH/solr/solr/conf/synonyms.txt +++ b/solr/example/example-DIH/solr/solr/conf/synonyms.txt @@ -21,7 +21,7 @@ fooaaa,baraaa,bazaaa GB,gib,gigabyte,gigabytes MB,mib,megabyte,megabytes Television, Televisions, TV, TVs -#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming #after us won't split it into two words. # Synonym mappings can be used for spelling correction too diff --git a/solr/example/example-DIH/solr/tika/conf/managed-schema b/solr/example/example-DIH/solr/tika/conf/managed-schema index c4dccb208e9..58b2a80b6e1 100644 --- a/solr/example/example-DIH/solr/tika/conf/managed-schema +++ b/solr/example/example-DIH/solr/tika/conf/managed-schema @@ -353,7 +353,7 @@ - + - + + possible with WordDelimiterGraphFilter in conjuncton with stemming. --> + + + + + + + + + diff --git a/solr/example/files/conf/managed-schema b/solr/example/files/conf/managed-schema index e936bcde4e3..ff209be05dc 100644 --- a/solr/example/files/conf/managed-schema +++ b/solr/example/files/conf/managed-schema @@ -145,27 +145,39 @@ - + + - + - + - + + + + + + + + + + + + diff --git a/solr/example/files/conf/synonyms.txt b/solr/example/files/conf/synonyms.txt index 7f72128303b..eab4ee87537 100644 --- a/solr/example/files/conf/synonyms.txt +++ b/solr/example/files/conf/synonyms.txt @@ -21,7 +21,7 @@ fooaaa,baraaa,bazaaa GB,gib,gigabyte,gigabytes MB,mib,megabyte,megabytes Television, Televisions, TV, TVs -#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming #after us won't split it into two words. # Synonym mappings can be used for spelling correction too diff --git a/solr/server/solr/configsets/basic_configs/conf/managed-schema b/solr/server/solr/configsets/basic_configs/conf/managed-schema index d7aacc48644..09aaae31d53 100644 --- a/solr/server/solr/configsets/basic_configs/conf/managed-schema +++ b/solr/server/solr/configsets/basic_configs/conf/managed-schema @@ -436,7 +436,7 @@ - + - + + possible with WordDelimiterGraphFilter in conjuncton with stemming. --> + + + + + + + + + + + + diff --git a/solr/server/solr/configsets/basic_configs/conf/synonyms.txt b/solr/server/solr/configsets/basic_configs/conf/synonyms.txt index 7f72128303b..eab4ee87537 100644 --- a/solr/server/solr/configsets/basic_configs/conf/synonyms.txt +++ b/solr/server/solr/configsets/basic_configs/conf/synonyms.txt @@ -21,7 +21,7 @@ fooaaa,baraaa,bazaaa GB,gib,gigabyte,gigabytes MB,mib,megabyte,megabytes Television, Televisions, TV, TVs -#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming #after us won't split it into two words. # Synonym mappings can be used for spelling correction too diff --git a/solr/server/solr/configsets/data_driven_schema_configs/conf/managed-schema b/solr/server/solr/configsets/data_driven_schema_configs/conf/managed-schema index b716f9c6d06..0319eb05b45 100644 --- a/solr/server/solr/configsets/data_driven_schema_configs/conf/managed-schema +++ b/solr/server/solr/configsets/data_driven_schema_configs/conf/managed-schema @@ -435,7 +435,7 @@ - + - + + possible with WordDelimiterGraphFilter in conjuncton with stemming. --> + + + + + + + + + + + + diff --git a/solr/server/solr/configsets/data_driven_schema_configs/conf/synonyms.txt b/solr/server/solr/configsets/data_driven_schema_configs/conf/synonyms.txt index 7f72128303b..eab4ee87537 100644 --- a/solr/server/solr/configsets/data_driven_schema_configs/conf/synonyms.txt +++ b/solr/server/solr/configsets/data_driven_schema_configs/conf/synonyms.txt @@ -21,7 +21,7 @@ fooaaa,baraaa,bazaaa GB,gib,gigabyte,gigabytes MB,mib,megabyte,megabytes Television, Televisions, TV, TVs -#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming #after us won't split it into two words. # Synonym mappings can be used for spelling correction too diff --git a/solr/server/solr/configsets/sample_techproducts_configs/conf/managed-schema b/solr/server/solr/configsets/sample_techproducts_configs/conf/managed-schema index 9caf3d6e2b0..17dadd48159 100644 --- a/solr/server/solr/configsets/sample_techproducts_configs/conf/managed-schema +++ b/solr/server/solr/configsets/sample_techproducts_configs/conf/managed-schema @@ -544,7 +544,7 @@ - + - + + possible with WordDelimiterGraphFilter in conjuncton with stemming. --> + + + + + + + + + + + + diff --git a/solr/server/solr/configsets/sample_techproducts_configs/conf/synonyms.txt b/solr/server/solr/configsets/sample_techproducts_configs/conf/synonyms.txt index 7f72128303b..eab4ee87537 100644 --- a/solr/server/solr/configsets/sample_techproducts_configs/conf/synonyms.txt +++ b/solr/server/solr/configsets/sample_techproducts_configs/conf/synonyms.txt @@ -21,7 +21,7 @@ fooaaa,baraaa,bazaaa GB,gib,gigabyte,gigabytes MB,mib,megabyte,megabytes Television, Televisions, TV, TVs -#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming #after us won't split it into two words. # Synonym mappings can be used for spelling correction too diff --git a/solr/solrj/src/test-files/solrj/solr/collection1/conf/schema-sql.xml b/solr/solrj/src/test-files/solrj/solr/collection1/conf/schema-sql.xml index 56bf625b967..96bbcd817e8 100644 --- a/solr/solrj/src/test-files/solrj/solr/collection1/conf/schema-sql.xml +++ b/solr/solrj/src/test-files/solrj/solr/collection1/conf/schema-sql.xml @@ -72,26 +72,41 @@ - + - + - + + + + + + - + - + + + + + + @@ -191,9 +206,16 @@ - + - + + + + + + @@ -265,15 +287,16 @@ - + - @@ -285,15 +308,16 @@ - + - @@ -304,8 +328,9 @@ - + @@ -318,13 +343,14 @@ - + - @@ -334,13 +360,14 @@ - + - diff --git a/solr/solrj/src/test-files/solrj/solr/collection1/conf/schema.xml b/solr/solrj/src/test-files/solrj/solr/collection1/conf/schema.xml index 49f08b876d6..fb1a1ac6625 100644 --- a/solr/solrj/src/test-files/solrj/solr/collection1/conf/schema.xml +++ b/solr/solrj/src/test-files/solrj/solr/collection1/conf/schema.xml @@ -47,26 +47,41 @@ - + - + - + + + + + + - + - + + + + + + @@ -166,9 +181,16 @@ - + - + + + + + + @@ -240,15 +262,16 @@ - + - @@ -260,15 +283,16 @@ - + - @@ -279,8 +303,9 @@ - + @@ -293,13 +318,14 @@ - + - @@ -309,13 +335,14 @@ - + - diff --git a/solr/solrj/src/test-files/solrj/solr/configsets/streaming/conf/schema.xml b/solr/solrj/src/test-files/solrj/solr/configsets/streaming/conf/schema.xml index b61a2e93ee8..a3b6a4e6c8e 100644 --- a/solr/solrj/src/test-files/solrj/solr/configsets/streaming/conf/schema.xml +++ b/solr/solrj/src/test-files/solrj/solr/configsets/streaming/conf/schema.xml @@ -73,24 +73,37 @@ - + + - + - + + + + + + + - + - + + + + + + + @@ -179,9 +192,15 @@ - + - + + + + + + + @@ -252,14 +271,15 @@ - + + - + @@ -270,14 +290,15 @@ - + + - + @@ -287,7 +308,8 @@ - + + @@ -300,12 +322,13 @@ - + + - + @@ -314,12 +337,13 @@ - + + - + diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/request/SchemaTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/request/SchemaTest.java index 0826c27259a..3bbcff1c8d3 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/request/SchemaTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/request/SchemaTest.java @@ -529,7 +529,7 @@ public class SchemaTest extends RestTestBase { tokenizerAttributes.put("class", "solr.WhitespaceTokenizerFactory"); analyzerDefinition.setTokenizer(tokenizerAttributes); Map filterAttributes = new LinkedHashMap<>(); - filterAttributes.put("class", "solr.WordDelimiterFilterFactory"); + filterAttributes.put("class", "solr.WordDelimiterGraphFilterFactory"); filterAttributes.put("preserveOriginal", "0"); analyzerDefinition.setFilters(Collections.singletonList(filterAttributes)); fieldTypeDefinition.setAnalyzer(analyzerDefinition);