mirror of https://github.com/apache/lucene.git
SOLR-10344: Update Solr default/example and test configs to use WordDelimiterGraphFilterFactory
This commit is contained in:
parent
cb20eae178
commit
b46b8278c6
|
@ -151,6 +151,8 @@ Other Changes
|
|||
|
||||
* SOLR-10352: bin/solr script now prints warning when available system entropy is lower than 300 (Esther Quansah via
|
||||
Ishan Chattopadhyaya)
|
||||
|
||||
* SOLR-10344: Update Solr default/example and test configs to use WordDelimiterGraphFilterFactory. (Steve Rowe)
|
||||
|
||||
================== 6.5.0 ==================
|
||||
|
||||
|
|
|
@ -155,7 +155,7 @@
|
|||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
|
||||
<!-- A text field that uses WordDelimiterGraphFilter to enable splitting and matching of
|
||||
words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
|
||||
so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
|
||||
Synonyms and stopwords are customized by external files, and stemming is enabled.
|
||||
|
@ -174,18 +174,19 @@
|
|||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<!--<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>-->
|
||||
<!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
|
@ -198,11 +199,23 @@
|
|||
<!-- Less flexible matching, but less false matches. Probably not ideal for product names,
|
||||
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
||||
<fieldType name="textTight" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<!--<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<!--<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
|
|
|
@ -23,7 +23,7 @@ fooaaa,baraaa,bazaaa
|
|||
GB,gib,gigabyte,gigabytes
|
||||
MB,mib,megabyte,megabytes
|
||||
Television, Televisions, TV, TVs
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming
|
||||
#after us won't split it into two words.
|
||||
|
||||
# Synonym mappings can be used for spelling correction too
|
||||
|
|
|
@ -158,7 +158,7 @@
|
|||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
|
||||
<!-- A text field that uses WordDelimiterGraphFilter to enable splitting and matching of
|
||||
words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
|
||||
so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
|
||||
Synonyms and stopwords are customized by external files, and stemming is enabled.
|
||||
|
@ -172,18 +172,19 @@
|
|||
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
-->
|
||||
<!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<!--<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>-->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<!--<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>-->
|
||||
<!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<!--<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
|
|
|
@ -10,23 +10,32 @@
|
|||
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="textTight" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
|
|
|
@ -156,7 +156,7 @@
|
|||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
|
||||
<!-- A text field that uses WordDelimiterGraphFilter to enable splitting and matching of
|
||||
words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
|
||||
so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
|
||||
Synonyms and stopwords are customized by external files, and stemming is enabled.
|
||||
|
@ -170,18 +170,19 @@
|
|||
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
-->
|
||||
<!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<!--<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>-->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<!--<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>-->
|
||||
<!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<!--<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
|
@ -194,11 +195,23 @@
|
|||
<!-- Less flexible matching, but less false matches. Probably not ideal for product names,
|
||||
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
||||
<fieldType name="textTight" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<!--<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>-->
|
||||
<!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<!--<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>-->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<!--<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>-->
|
||||
<!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<!--<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
|
|
|
@ -55,26 +55,41 @@
|
|||
<fieldType name="failtype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- Demonstrating ignoreCaseChange -->
|
||||
<fieldType name="wdf_nocase" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="wdf_preserve" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -233,15 +248,16 @@
|
|||
<fieldType name="subword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
|
@ -253,13 +269,14 @@
|
|||
<fieldType name="skutype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -269,13 +286,14 @@
|
|||
<fieldType name="skutype2" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
|
|
@ -21,7 +21,7 @@ fooaaa,baraaa,bazaaa
|
|||
GB,gib,gigabyte,gigabytes
|
||||
MB,mib,megabyte,megabytes
|
||||
Television, Televisions, TV, TVs
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming
|
||||
#after us won't split it into two words.
|
||||
|
||||
# Synonym mappings can be used for spelling correction too
|
||||
|
|
|
@ -191,7 +191,7 @@
|
|||
</fieldType>
|
||||
|
||||
<!--
|
||||
A text field that uses WordDelimiterFilter to enable splitting and
|
||||
A text field that uses WordDelimiterGraphFilter to enable splitting and
|
||||
matching of words on case-change, alpha numeric boundaries, and
|
||||
non-alphanumeric chars, so that a query of "wifi" or "wi fi" could
|
||||
match a document containing "Wi-Fi". Synonyms and stopwords are
|
||||
|
@ -212,11 +212,11 @@
|
|||
-->
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords.txt" />
|
||||
<filter class="solr.WordDelimiterFilterFactory"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1" />
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory" />
|
||||
|
@ -224,7 +224,7 @@
|
|||
ignoreCase="true" expand="true" />
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords.txt" />
|
||||
<filter class="solr.WordDelimiterFilterFactory"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1" />
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
|
@ -240,23 +240,36 @@
|
|||
-->
|
||||
<fieldType name="textTight" class="solr.TextField"
|
||||
positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory" />
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
|
||||
ignoreCase="true" expand="false" />
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords.txt" />
|
||||
<filter class="solr.WordDelimiterFilterFactory"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" />
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
|
||||
<!--
|
||||
this filter can remove any duplicate tokens that appear at the
|
||||
same position - sometimes possible with WordDelimiterFilter in
|
||||
same position - sometimes possible with WordDelimiterGraphFilter in
|
||||
conjuncton with stemming.
|
||||
-->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory" />
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
|
||||
ignoreCase="true" expand="false" />
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords.txt" />
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" />
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
@ -271,10 +284,11 @@
|
|||
<tokenizer class="solr.MockTokenizerFactory" />
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords.txt" />
|
||||
<filter class="solr.WordDelimiterFilterFactory"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0" />
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory" />
|
||||
|
@ -282,7 +296,7 @@
|
|||
ignoreCase="true" expand="true" />
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords.txt" />
|
||||
<filter class="solr.WordDelimiterFilterFactory"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" />
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
|
@ -301,13 +315,14 @@
|
|||
<tokenizer class="solr.MockTokenizerFactory" />
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords.txt" />
|
||||
<filter class="solr.WordDelimiterFilterFactory"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0" />
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
<filter class="solr.ReversedWildcardFilterFactory"
|
||||
withOriginal="true" maxPosAsterisk="3" maxPosQuestion="2"
|
||||
maxFractionAsterisk="0.33" />
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory" />
|
||||
|
@ -315,7 +330,7 @@
|
|||
ignoreCase="true" expand="true" />
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords.txt" />
|
||||
<filter class="solr.WordDelimiterFilterFactory"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" />
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
|
|
|
@ -23,7 +23,7 @@ fooaaa,baraaa,bazaaa
|
|||
GB,gib,gigabyte,gigabytes
|
||||
MB,mib,megabyte,megabytes
|
||||
Television, Televisions, TV, TVs
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming
|
||||
#after us won't split it into two words.
|
||||
|
||||
# Synonym mappings can be used for spelling correction too
|
||||
|
|
|
@ -191,7 +191,7 @@
|
|||
</fieldType>
|
||||
|
||||
<!--
|
||||
A text field that uses WordDelimiterFilter to enable splitting and
|
||||
A text field that uses WordDelimiterGraphFilter to enable splitting and
|
||||
matching of words on case-change, alpha numeric boundaries, and
|
||||
non-alphanumeric chars, so that a query of "wifi" or "wi fi" could
|
||||
match a document containing "Wi-Fi". Synonyms and stopwords are
|
||||
|
@ -210,16 +210,18 @@
|
|||
<!--
|
||||
Case insensitive stop word removal. add
|
||||
-->
|
||||
<filter class="solr.WordDelimiterFilterFactory"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
|
||||
<filter class="solr.WordDelimiterFilterFactory"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
|
@ -235,19 +237,29 @@
|
|||
-->
|
||||
<fieldType name="textTight" class="solr.TextField"
|
||||
positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
|
||||
<!--
|
||||
this filter can remove any duplicate tokens that appear at the
|
||||
same position - sometimes possible with WordDelimiterFilter in
|
||||
same position - sometimes possible with WordDelimiterGraphFilter in
|
||||
conjuncton with stemming.
|
||||
-->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
@ -277,14 +289,15 @@
|
|||
positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
|
@ -301,17 +314,18 @@
|
|||
positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.ReversedWildcardFilterFactory"
|
||||
withOriginal="true" maxPosAsterisk="3" maxPosQuestion="2"
|
||||
maxFractionAsterisk="0.33"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
|
|
|
@ -67,16 +67,17 @@ Test for HighlighterMaxOffsetTest which requires the use of ReversedWildcardFilt
|
|||
<fieldType name="text_stx" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
|
||||
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
|
|
@ -60,17 +60,25 @@
|
|||
<fieldType name="failtype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- Demonstrating ignoreCaseChange -->
|
||||
<fieldType name="wdf_nocase" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -229,15 +237,16 @@
|
|||
<fieldType name="subword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
|
@ -249,13 +258,14 @@
|
|||
<fieldType name="skutype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -265,13 +275,14 @@
|
|||
<fieldType name="skutype2" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
|
|
@ -42,11 +42,12 @@
|
|||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
|
@ -55,7 +56,7 @@
|
|||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
|
|
|
@ -47,7 +47,7 @@
|
|||
</analyzer>
|
||||
<analyzer type="multiterm"> <!-- Intentionally different to test that these are kept distinct -->
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||
|
|
|
@ -72,26 +72,41 @@
|
|||
<fieldtype name="failtype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<!-- Demonstrating ignoreCaseChange -->
|
||||
<fieldtype name="wdf_nocase" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<fieldtype name="wdf_preserve" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -191,9 +206,16 @@
|
|||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="lowerpunctfilt" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="1" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="1" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -265,15 +287,16 @@
|
|||
<fieldtype name="subword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
|
@ -285,15 +308,16 @@
|
|||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1"
|
||||
generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1"
|
||||
generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
|
@ -304,8 +328,9 @@
|
|||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1"
|
||||
generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
|
@ -318,13 +343,14 @@
|
|||
<fieldtype name="skutype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -334,13 +360,14 @@
|
|||
<fieldtype name="skutype2" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
|
|
@ -50,11 +50,12 @@
|
|||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
|
@ -63,7 +64,7 @@
|
|||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
|
|
|
@ -60,9 +60,10 @@
|
|||
<fieldType name="failtype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
@ -219,15 +220,16 @@
|
|||
<fieldType name="subword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
|
@ -239,13 +241,14 @@
|
|||
<fieldType name="skutype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -255,13 +258,14 @@
|
|||
<fieldType name="skutype2" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
|
|
@ -45,26 +45,41 @@
|
|||
<fieldType name="failtype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- Demonstrating ignoreCaseChange -->
|
||||
<fieldType name="wdf_nocase" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="wdf_preserve" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -90,11 +105,12 @@
|
|||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
|
@ -103,7 +119,7 @@
|
|||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
|
@ -153,11 +169,12 @@
|
|||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
|
@ -166,7 +183,7 @@
|
|||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
|
@ -300,15 +317,16 @@
|
|||
<fieldType name="subword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
|
@ -320,16 +338,17 @@
|
|||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
splitOnCaseChange="0" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0"
|
||||
catenateAll="0"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1"
|
||||
catenateAll="0"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
|
@ -341,9 +360,10 @@
|
|||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0"
|
||||
catenateAll="0"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
|
@ -356,13 +376,14 @@
|
|||
<fieldType name="skutype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -372,13 +393,14 @@
|
|||
<fieldType name="skutype2" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -401,16 +423,17 @@
|
|||
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
-->
|
||||
<!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
|
|
|
@ -45,26 +45,41 @@
|
|||
<fieldType name="failtype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- Demonstrating ignoreCaseChange -->
|
||||
<fieldType name="wdf_nocase" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="wdf_preserve" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -90,11 +105,12 @@
|
|||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
|
@ -103,7 +119,7 @@
|
|||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
|
@ -153,11 +169,12 @@
|
|||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
|
@ -166,7 +183,7 @@
|
|||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
|
@ -300,15 +317,16 @@
|
|||
<fieldType name="subword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
|
@ -320,16 +338,17 @@
|
|||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
splitOnCaseChange="0" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0"
|
||||
catenateAll="0"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1"
|
||||
catenateAll="0"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
|
@ -341,9 +360,10 @@
|
|||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0"
|
||||
catenateAll="0"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
|
@ -356,13 +376,14 @@
|
|||
<fieldType name="skutype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -372,13 +393,14 @@
|
|||
<fieldType name="skutype2" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -401,16 +423,17 @@
|
|||
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
-->
|
||||
<!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
|
|
|
@ -72,26 +72,41 @@
|
|||
<fieldtype name="failtype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<!-- Demonstrating ignoreCaseChange -->
|
||||
<fieldtype name="wdf_nocase" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<fieldtype name="wdf_preserve" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -191,9 +206,16 @@
|
|||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="lowerpunctfilt" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="1" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="1" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -265,15 +287,16 @@
|
|||
<fieldtype name="subword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
|
@ -285,15 +308,16 @@
|
|||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1"
|
||||
generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1"
|
||||
generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
|
@ -304,8 +328,9 @@
|
|||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1"
|
||||
generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
|
@ -318,13 +343,14 @@
|
|||
<fieldtype name="skutype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -334,13 +360,14 @@
|
|||
<fieldtype name="skutype2" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
|
|
@ -151,7 +151,7 @@
|
|||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
|
||||
<!-- A text field that uses WordDelimiterGraphFilter to enable splitting and matching of
|
||||
words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
|
||||
so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
|
||||
Synonyms and stopwords are customized by external files, and stemming is enabled.
|
||||
|
@ -165,18 +165,19 @@
|
|||
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
-->
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
|
@ -189,11 +190,23 @@
|
|||
<!-- Less flexible matching, but less false matches. Probably not ideal for product names,
|
||||
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
||||
<fieldType name="textTight" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
|
|
|
@ -55,26 +55,41 @@
|
|||
<fieldType name="failtype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- Demonstrating ignoreCaseChange -->
|
||||
<fieldType name="wdf_nocase" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="wdf_preserve" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -196,7 +211,15 @@
|
|||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="lowerpunctfilt" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" expand="true"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="1" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" expand="true"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
|
@ -274,15 +297,16 @@
|
|||
<fieldType name="subword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
|
@ -294,16 +318,17 @@
|
|||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
splitOnCaseChange="0" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0"
|
||||
catenateAll="0"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1"
|
||||
catenateAll="0"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
|
@ -315,9 +340,10 @@
|
|||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0"
|
||||
catenateAll="0"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
|
@ -330,13 +356,14 @@
|
|||
<fieldType name="skutype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -346,13 +373,14 @@
|
|||
<fieldType name="skutype2" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
|
|
@ -147,7 +147,7 @@
|
|||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
|
||||
<!-- A text field that uses WordDelimiterGraphFilter to enable splitting and matching of
|
||||
words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
|
||||
so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
|
||||
Synonyms and stopwords are customized by external files, and stemming is enabled.
|
||||
|
@ -161,17 +161,18 @@
|
|||
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
-->
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
|
@ -183,11 +184,22 @@
|
|||
<!-- Less flexible matching, but less false matches. Probably not ideal for product names,
|
||||
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
||||
<fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
|
@ -417,17 +429,18 @@ valued. -->
|
|||
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
-->
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
|
|
|
@ -54,26 +54,41 @@
|
|||
<fieldType name="failtype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- Demonstrating ignoreCaseChange -->
|
||||
<fieldType name="wdf_nocase" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="wdf_preserve" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -99,11 +114,12 @@
|
|||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
|
@ -112,7 +128,7 @@
|
|||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
|
@ -129,11 +145,12 @@
|
|||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
|
@ -142,7 +159,7 @@
|
|||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
|
@ -270,15 +287,16 @@
|
|||
<fieldType name="subword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
|
@ -290,16 +308,17 @@
|
|||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
splitOnCaseChange="0" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0"
|
||||
catenateAll="0"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1"
|
||||
catenateAll="0"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
|
@ -311,9 +330,10 @@
|
|||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0"
|
||||
catenateAll="0"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
|
@ -326,13 +346,14 @@
|
|||
<fieldType name="skutype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -342,13 +363,14 @@
|
|||
<fieldType name="skutype2" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -371,16 +393,17 @@
|
|||
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
-->
|
||||
<!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
|
|
|
@ -46,26 +46,41 @@
|
|||
<fieldType name="failtype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- Demonstrating ignoreCaseChange -->
|
||||
<fieldType name="wdf_nocase" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="wdf_preserve" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -91,11 +106,12 @@
|
|||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
|
@ -104,7 +120,7 @@
|
|||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
|
@ -121,11 +137,12 @@
|
|||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
|
@ -134,7 +151,7 @@
|
|||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
|
@ -268,15 +285,16 @@
|
|||
<fieldType name="subword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
|
@ -288,16 +306,17 @@
|
|||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
splitOnCaseChange="0" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0"
|
||||
catenateAll="0"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1"
|
||||
catenateAll="0"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
|
@ -309,9 +328,10 @@
|
|||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0"
|
||||
catenateAll="0"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
|
@ -324,13 +344,14 @@
|
|||
<fieldType name="skutype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -340,13 +361,14 @@
|
|||
<fieldType name="skutype2" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -369,16 +391,17 @@
|
|||
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
-->
|
||||
<!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
|
|
|
@ -541,7 +541,7 @@
|
|||
<!-- A text field with defaults appropriate for English, plus
|
||||
aggressive word-splitting and autophrase features enabled.
|
||||
This field is just like text_en, except it adds
|
||||
WordDelimiterFilter to enable splitting and matching of
|
||||
WordDelimiterGraphFilter to enable splitting and matching of
|
||||
words on case-change, alpha numeric boundaries, and
|
||||
non-alphanumeric chars. This means certain compound word
|
||||
cases will work, for example query "wi fi" will match
|
||||
|
@ -560,11 +560,12 @@
|
|||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
|
@ -573,7 +574,7 @@
|
|||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
|
@ -585,17 +586,31 @@
|
|||
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
||||
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100"
|
||||
autoGeneratePhraseQueries="true">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
||||
possible with WordDelimiterFilter in conjuncton with stemming. -->
|
||||
possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
||||
possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
|
|
@ -47,26 +47,41 @@
|
|||
<fieldType name="failtype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- Demonstrating ignoreCaseChange -->
|
||||
<fieldType name="wdf_nocase" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="wdf_preserve" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -92,11 +107,12 @@
|
|||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
|
@ -105,7 +121,7 @@
|
|||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
|
@ -122,11 +138,12 @@
|
|||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
|
@ -135,7 +152,7 @@
|
|||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
|
@ -263,15 +280,16 @@
|
|||
<fieldType name="subword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
|
@ -283,16 +301,17 @@
|
|||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
splitOnCaseChange="0" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0"
|
||||
catenateAll="0"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1"
|
||||
catenateAll="0"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
|
@ -304,9 +323,10 @@
|
|||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" protected="protwords.txt" splitOnNumerics="0"
|
||||
splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0"
|
||||
catenateAll="0"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
|
@ -319,13 +339,14 @@
|
|||
<fieldType name="skutype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -335,13 +356,14 @@
|
|||
<fieldType name="skutype2" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -364,16 +386,17 @@
|
|||
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
-->
|
||||
<!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
|
@ -384,15 +407,16 @@
|
|||
<fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
|
|
|
@ -23,7 +23,7 @@ fooaaa,baraaa,bazaaa
|
|||
GB,gib,gigabyte,gigabytes
|
||||
MB,mib,megabyte,megabytes
|
||||
Television, Televisions, TV, TVs
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming
|
||||
#after us won't split it into two words.
|
||||
|
||||
# Synonym mappings can be used for spelling correction too
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# A customized type mapping for WordDelimiterFilterFactory
|
||||
# A customized type mapping for WordDelimiterGraphFilterFactory
|
||||
# the allowable types are: LOWER, UPPER, ALPHA, DIGIT, ALPHANUM, SUBWORD_DELIM
|
||||
#
|
||||
# the default for any character without a mapping is always computed from
|
||||
|
|
|
@ -45,26 +45,41 @@
|
|||
<fieldType name="failtype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- Demonstrating ignoreCaseChange -->
|
||||
<fieldType name="wdf_nocase" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="wdf_preserve" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -86,14 +101,15 @@
|
|||
<fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
|
@ -111,14 +127,15 @@
|
|||
<fieldType name="text_np" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
|
|
|
@ -45,26 +45,41 @@
|
|||
<fieldType name="failtype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- Demonstrating ignoreCaseChange -->
|
||||
<fieldType name="wdf_nocase" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="wdf_preserve" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -86,14 +101,15 @@
|
|||
<fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
|
@ -111,14 +127,15 @@
|
|||
<fieldType name="text_np" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
|
|
|
@ -872,7 +872,7 @@ public class ConvertedLegacyTest extends SolrTestCaseJ4 {
|
|||
);
|
||||
|
||||
|
||||
// intra-word delimiter testing (WordDelimiterFilter)
|
||||
// intra-word delimiter testing (WordDelimiterGraphFilter)
|
||||
|
||||
assertU("<add><doc><field name=\"id\">42</field><field name=\"subword\">foo bar</field></doc></add>");
|
||||
assertU("<commit/>");
|
||||
|
|
|
@ -382,7 +382,7 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testPositionHistoryWithWDF() throws Exception {
|
||||
public void testPositionHistoryWithWDGF() throws Exception {
|
||||
|
||||
FieldAnalysisRequest request = new FieldAnalysisRequest();
|
||||
request.addFieldType("skutype1");
|
||||
|
@ -407,12 +407,12 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
|
|||
assertToken(tokenList.get(1), new TokenInfo("3456-12", null, "word", 4, 11, 2, new int[]{2}, null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("a", null, "word", 12, 13, 3, new int[]{3}, null, false));
|
||||
assertToken(tokenList.get(3), new TokenInfo("Test", null, "word", 14, 18, 4, new int[]{4}, null, false));
|
||||
tokenList = indexPart.get("org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter");
|
||||
assertNotNull("Expcting WordDelimiterFilter analysis breakdown", tokenList);
|
||||
tokenList = indexPart.get("org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter");
|
||||
assertNotNull("Expcting WordDelimiterGraphFilter analysis breakdown", tokenList);
|
||||
assertEquals(6, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("hi", null, "word", 0, 2, 1, new int[]{1,1}, null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("3456", null, "word", 4, 8, 2, new int[]{2,2}, null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("345612", null, "word", 4, 11, 2, new int[]{2,2}, null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("345612", null, "word", 4, 11, 2, new int[]{2,2}, null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("3456", null, "word", 4, 8, 2, new int[]{2,2}, null, false));
|
||||
assertToken(tokenList.get(3), new TokenInfo("12", null, "word", 9, 11, 3, new int[]{2,3}, null, false));
|
||||
assertToken(tokenList.get(4), new TokenInfo("a", null, "word", 12, 13, 4, new int[]{3,4}, null, false));
|
||||
assertToken(tokenList.get(5), new TokenInfo("Test", null, "word", 14, 18, 5, new int[]{4,5}, null, false));
|
||||
|
@ -420,8 +420,8 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
|
|||
assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList);
|
||||
assertEquals(6, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("hi", null, "word", 0, 2, 1, new int[]{1,1,1}, null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("3456", null, "word", 4, 8, 2, new int[]{2,2,2}, null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("345612", null, "word", 4, 11, 2, new int[]{2,2,2}, null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("345612", null, "word", 4, 11, 2, new int[]{2,2,2}, null, false));
|
||||
assertToken(tokenList.get(2), new TokenInfo("3456", null, "word", 4, 8, 2, new int[]{2,2,2}, null, false));
|
||||
assertToken(tokenList.get(3), new TokenInfo("12", null, "word", 9, 11, 3, new int[]{2,3,3}, null, false));
|
||||
assertToken(tokenList.get(4), new TokenInfo("a", null, "word", 12, 13, 4, new int[]{3,4,4}, null, false));
|
||||
assertToken(tokenList.get(5), new TokenInfo("test", null, "word", 14, 18, 5, new int[]{4,5,5}, null, false));
|
||||
|
|
|
@ -396,7 +396,7 @@ public class TestBulkSchemaAPI extends RestTestBase {
|
|||
" 'name' : 'myNewTxtField',\n" +
|
||||
" 'class':'solr.TextField',\n" +
|
||||
" 'positionIncrementGap':'100',\n" +
|
||||
" 'analyzer' : {\n" +
|
||||
" 'indexAnalyzer' : {\n" +
|
||||
" 'charFilters':[\n" +
|
||||
" {\n" +
|
||||
" 'class':'solr.PatternReplaceCharFilterFactory',\n" +
|
||||
|
@ -407,7 +407,32 @@ public class TestBulkSchemaAPI extends RestTestBase {
|
|||
" 'tokenizer':{'class':'solr.WhitespaceTokenizerFactory'},\n" +
|
||||
" 'filters':[\n" +
|
||||
" {\n" +
|
||||
" 'class':'solr.WordDelimiterFilterFactory',\n" +
|
||||
" 'class':'solr.WordDelimiterGraphFilterFactory',\n" +
|
||||
" 'preserveOriginal':'0'\n" +
|
||||
" },\n" +
|
||||
" {\n" +
|
||||
" 'class':'solr.StopFilterFactory',\n" +
|
||||
" 'words':'stopwords.txt',\n" +
|
||||
" 'ignoreCase':'true'\n" +
|
||||
" },\n" +
|
||||
" {'class':'solr.LowerCaseFilterFactory'},\n" +
|
||||
" {'class':'solr.ASCIIFoldingFilterFactory'},\n" +
|
||||
" {'class':'solr.KStemFilterFactory'},\n" +
|
||||
" {'class':'solr.FlattenGraphFilterFactory'}\n" +
|
||||
" ]\n" +
|
||||
" },\n" +
|
||||
" 'queryAnalyzer' : {\n" +
|
||||
" 'charFilters':[\n" +
|
||||
" {\n" +
|
||||
" 'class':'solr.PatternReplaceCharFilterFactory',\n" +
|
||||
" 'replacement':'$1$1',\n" +
|
||||
" 'pattern':'([a-zA-Z])\\\\\\\\1+'\n" +
|
||||
" }\n" +
|
||||
" ],\n" +
|
||||
" 'tokenizer':{'class':'solr.WhitespaceTokenizerFactory'},\n" +
|
||||
" 'filters':[\n" +
|
||||
" {\n" +
|
||||
" 'class':'solr.WordDelimiterGraphFilterFactory',\n" +
|
||||
" 'preserveOriginal':'0'\n" +
|
||||
" },\n" +
|
||||
" {\n" +
|
||||
|
|
|
@ -500,7 +500,7 @@
|
|||
<!-- A text field with defaults appropriate for English, plus
|
||||
aggressive word-splitting and autophrase features enabled.
|
||||
This field is just like text_en, except it adds
|
||||
WordDelimiterFilter to enable splitting and matching of
|
||||
WordDelimiterGraphFilter to enable splitting and matching of
|
||||
words on case-change, alpha numeric boundaries, and
|
||||
non-alphanumeric chars. This means certain compound word
|
||||
cases will work, for example query "wi fi" will match
|
||||
|
@ -518,10 +518,11 @@
|
|||
ignoreCase="true"
|
||||
words="lang/stopwords_en.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
|
@ -530,7 +531,7 @@
|
|||
ignoreCase="true"
|
||||
words="lang/stopwords_en.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
|
@ -540,16 +541,29 @@
|
|||
<!-- Less flexible matching, but less false matches. Probably not ideal for product names,
|
||||
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
||||
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
||||
possible with WordDelimiterFilter in conjuncton with stemming. -->
|
||||
possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
||||
possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
|
|
@ -21,7 +21,7 @@ fooaaa,baraaa,bazaaa
|
|||
GB,gib,gigabyte,gigabytes
|
||||
MB,mib,megabyte,megabytes
|
||||
Television, Televisions, TV, TVs
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming
|
||||
#after us won't split it into two words.
|
||||
|
||||
# Synonym mappings can be used for spelling correction too
|
||||
|
|
|
@ -419,7 +419,7 @@
|
|||
<!-- A text field with defaults appropriate for English, plus
|
||||
aggressive word-splitting and autophrase features enabled.
|
||||
This field is just like text_en, except it adds
|
||||
WordDelimiterFilter to enable splitting and matching of
|
||||
WordDelimiterGraphFilter to enable splitting and matching of
|
||||
words on case-change, alpha numeric boundaries, and
|
||||
non-alphanumeric chars. This means certain compound word
|
||||
cases will work, for example query "wi fi" will match
|
||||
|
@ -437,10 +437,11 @@
|
|||
ignoreCase="true"
|
||||
words="lang/stopwords_en.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
|
@ -449,7 +450,7 @@
|
|||
ignoreCase="true"
|
||||
words="lang/stopwords_en.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
|
@ -459,16 +460,29 @@
|
|||
<!-- Less flexible matching, but less false matches. Probably not ideal for product names,
|
||||
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
||||
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
||||
possible with WordDelimiterFilter in conjuncton with stemming. -->
|
||||
possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
||||
possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
|
|
@ -21,7 +21,7 @@ fooaaa,baraaa,bazaaa
|
|||
GB,gib,gigabyte,gigabytes
|
||||
MB,mib,megabyte,megabytes
|
||||
Television, Televisions, TV, TVs
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming
|
||||
#after us won't split it into two words.
|
||||
|
||||
# Synonym mappings can be used for spelling correction too
|
||||
|
|
|
@ -242,18 +242,19 @@
|
|||
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
-->
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<charFilter class="solr.HTMLStripCharFilterFactory"/>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
|
@ -450,7 +451,7 @@
|
|||
<!-- A text field with defaults appropriate for English, plus
|
||||
aggressive word-splitting and autophrase features enabled.
|
||||
This field is just like text_en, except it adds
|
||||
WordDelimiterFilter to enable splitting and matching of
|
||||
WordDelimiterGraphFilter to enable splitting and matching of
|
||||
words on case-change, alpha numeric boundaries, and
|
||||
non-alphanumeric chars. This means certain compound word
|
||||
cases will work, for example query "wi fi" will match
|
||||
|
@ -468,10 +469,11 @@
|
|||
ignoreCase="true"
|
||||
words="lang/stopwords_en.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
|
@ -480,7 +482,7 @@
|
|||
ignoreCase="true"
|
||||
words="lang/stopwords_en.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
|
@ -490,16 +492,29 @@
|
|||
<!-- Less flexible matching, but less false matches. Probably not ideal for product names,
|
||||
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
||||
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
||||
possible with WordDelimiterFilter in conjuncton with stemming. -->
|
||||
possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
||||
possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
|
|
@ -21,7 +21,7 @@ fooaaa,baraaa,bazaaa
|
|||
GB,gib,gigabyte,gigabytes
|
||||
MB,mib,megabyte,megabytes
|
||||
Television, Televisions, TV, TVs
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming
|
||||
#after us won't split it into two words.
|
||||
|
||||
# Synonym mappings can be used for spelling correction too
|
||||
|
|
|
@ -500,7 +500,7 @@
|
|||
<!-- A text field with defaults appropriate for English, plus
|
||||
aggressive word-splitting and autophrase features enabled.
|
||||
This field is just like text_en, except it adds
|
||||
WordDelimiterFilter to enable splitting and matching of
|
||||
WordDelimiterGraphFilter to enable splitting and matching of
|
||||
words on case-change, alpha numeric boundaries, and
|
||||
non-alphanumeric chars. This means certain compound word
|
||||
cases will work, for example query "wi fi" will match
|
||||
|
@ -518,10 +518,11 @@
|
|||
ignoreCase="true"
|
||||
words="lang/stopwords_en.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
|
@ -530,7 +531,7 @@
|
|||
ignoreCase="true"
|
||||
words="lang/stopwords_en.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
|
@ -540,16 +541,29 @@
|
|||
<!-- Less flexible matching, but less false matches. Probably not ideal for product names,
|
||||
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
||||
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
||||
possible with WordDelimiterFilter in conjuncton with stemming. -->
|
||||
possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
||||
possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
|
|
@ -21,7 +21,7 @@ fooaaa,baraaa,bazaaa
|
|||
GB,gib,gigabyte,gigabytes
|
||||
MB,mib,megabyte,megabytes
|
||||
Television, Televisions, TV, TVs
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming
|
||||
#after us won't split it into two words.
|
||||
|
||||
# Synonym mappings can be used for spelling correction too
|
||||
|
|
|
@ -353,7 +353,7 @@
|
|||
<!-- A text field with defaults appropriate for English, plus
|
||||
aggressive word-splitting and autophrase features enabled.
|
||||
This field is just like text_en, except it adds
|
||||
WordDelimiterFilter to enable splitting and matching of
|
||||
WordDelimiterGraphFilter to enable splitting and matching of
|
||||
words on case-change, alpha numeric boundaries, and
|
||||
non-alphanumeric chars. This means certain compound word
|
||||
cases will work, for example query "wi fi" will match
|
||||
|
@ -362,13 +362,14 @@
|
|||
<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -377,13 +378,23 @@
|
|||
<!-- Less flexible matching, but less false matches. Probably not ideal for product names,
|
||||
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
||||
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
||||
possible with WordDelimiterFilter in conjuncton with stemming. -->
|
||||
possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
||||
possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
|
|
@ -145,27 +145,39 @@
|
|||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" catenateNumbers="1" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="1"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
|
||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" catenateNumbers="0" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="0"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="text_en_splitting_tight" class="solr.TextField" autoGeneratePhraseQueries="true" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" expand="false" ignoreCase="true" synonyms="synonyms.txt"/>
|
||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" catenateNumbers="1" generateNumberParts="0" generateWordParts="0" catenateAll="0" catenateWords="1"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="0" generateWordParts="0" catenateAll="0" catenateWords="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" expand="false" ignoreCase="true" synonyms="synonyms.txt"/>
|
||||
<filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="0" generateWordParts="0" catenateAll="0" catenateWords="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
|
|
|
@ -21,7 +21,7 @@ fooaaa,baraaa,bazaaa
|
|||
GB,gib,gigabyte,gigabytes
|
||||
MB,mib,megabyte,megabytes
|
||||
Television, Televisions, TV, TVs
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming
|
||||
#after us won't split it into two words.
|
||||
|
||||
# Synonym mappings can be used for spelling correction too
|
||||
|
|
|
@ -436,7 +436,7 @@
|
|||
<!-- A text field with defaults appropriate for English, plus
|
||||
aggressive word-splitting and autophrase features enabled.
|
||||
This field is just like text_en, except it adds
|
||||
WordDelimiterFilter to enable splitting and matching of
|
||||
WordDelimiterGraphFilter to enable splitting and matching of
|
||||
words on case-change, alpha numeric boundaries, and
|
||||
non-alphanumeric chars. This means certain compound word
|
||||
cases will work, for example query "wi fi" will match
|
||||
|
@ -455,10 +455,11 @@
|
|||
ignoreCase="true"
|
||||
words="lang/stopwords_en.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
|
@ -467,7 +468,7 @@
|
|||
ignoreCase="true"
|
||||
words="lang/stopwords_en.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
|
@ -478,16 +479,29 @@
|
|||
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
||||
<dynamicField name="*_txt_en_split_tight" type="text_en_splitting_tight" indexed="true" stored="true"/>
|
||||
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
||||
possible with WordDelimiterFilter in conjuncton with stemming. -->
|
||||
possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
||||
possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
|
|
@ -21,7 +21,7 @@ fooaaa,baraaa,bazaaa
|
|||
GB,gib,gigabyte,gigabytes
|
||||
MB,mib,megabyte,megabytes
|
||||
Television, Televisions, TV, TVs
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming
|
||||
#after us won't split it into two words.
|
||||
|
||||
# Synonym mappings can be used for spelling correction too
|
||||
|
|
|
@ -435,7 +435,7 @@
|
|||
<!-- A text field with defaults appropriate for English, plus
|
||||
aggressive word-splitting and autophrase features enabled.
|
||||
This field is just like text_en, except it adds
|
||||
WordDelimiterFilter to enable splitting and matching of
|
||||
WordDelimiterGraphFilter to enable splitting and matching of
|
||||
words on case-change, alpha numeric boundaries, and
|
||||
non-alphanumeric chars. This means certain compound word
|
||||
cases will work, for example query "wi fi" will match
|
||||
|
@ -454,10 +454,11 @@
|
|||
ignoreCase="true"
|
||||
words="lang/stopwords_en.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
|
@ -466,7 +467,7 @@
|
|||
ignoreCase="true"
|
||||
words="lang/stopwords_en.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
|
@ -477,16 +478,29 @@
|
|||
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
||||
<dynamicField name="*_txt_en_split_tight" type="text_en_splitting_tight" indexed="true" stored="true"/>
|
||||
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
||||
possible with WordDelimiterFilter in conjuncton with stemming. -->
|
||||
possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
||||
possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
|
|
@ -21,7 +21,7 @@ fooaaa,baraaa,bazaaa
|
|||
GB,gib,gigabyte,gigabytes
|
||||
MB,mib,megabyte,megabytes
|
||||
Television, Televisions, TV, TVs
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming
|
||||
#after us won't split it into two words.
|
||||
|
||||
# Synonym mappings can be used for spelling correction too
|
||||
|
|
|
@ -544,7 +544,7 @@
|
|||
<!-- A text field with defaults appropriate for English, plus
|
||||
aggressive word-splitting and autophrase features enabled.
|
||||
This field is just like text_en, except it adds
|
||||
WordDelimiterFilter to enable splitting and matching of
|
||||
WordDelimiterGraphFilter to enable splitting and matching of
|
||||
words on case-change, alpha numeric boundaries, and
|
||||
non-alphanumeric chars. This means certain compound word
|
||||
cases will work, for example query "wi fi" will match
|
||||
|
@ -562,10 +562,11 @@
|
|||
ignoreCase="true"
|
||||
words="lang/stopwords_en.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
|
@ -574,7 +575,7 @@
|
|||
ignoreCase="true"
|
||||
words="lang/stopwords_en.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
|
@ -584,16 +585,29 @@
|
|||
<!-- Less flexible matching, but less false matches. Probably not ideal for product names,
|
||||
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
||||
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
||||
possible with WordDelimiterFilter in conjuncton with stemming. -->
|
||||
possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
||||
possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
|
|
@ -21,7 +21,7 @@ fooaaa,baraaa,bazaaa
|
|||
GB,gib,gigabyte,gigabytes
|
||||
MB,mib,megabyte,megabytes
|
||||
Television, Televisions, TV, TVs
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming
|
||||
#after us won't split it into two words.
|
||||
|
||||
# Synonym mappings can be used for spelling correction too
|
||||
|
|
|
@ -72,26 +72,41 @@
|
|||
<fieldtype name="failtype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<!-- Demonstrating ignoreCaseChange -->
|
||||
<fieldtype name="wdf_nocase" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<fieldtype name="wdf_preserve" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -191,9 +206,16 @@
|
|||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="lowerpunctfilt" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="1" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="1" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -265,15 +287,16 @@
|
|||
<fieldtype name="subword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
|
@ -285,15 +308,16 @@
|
|||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1"
|
||||
generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1"
|
||||
generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
|
@ -304,8 +328,9 @@
|
|||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1"
|
||||
generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
|
@ -318,13 +343,14 @@
|
|||
<fieldtype name="skutype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -334,13 +360,14 @@
|
|||
<fieldtype name="skutype2" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
|
|
@ -47,26 +47,41 @@
|
|||
<fieldType name="failtype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- Demonstrating ignoreCaseChange -->
|
||||
<fieldType name="wdf_nocase" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="wdf_preserve" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -166,9 +181,16 @@
|
|||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="lowerpunctfilt" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="1" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="1" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -240,15 +262,16 @@
|
|||
<fieldType name="subword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
|
@ -260,15 +283,16 @@
|
|||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1"
|
||||
generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1"
|
||||
generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
|
@ -279,8 +303,9 @@
|
|||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1"
|
||||
generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
|
@ -293,13 +318,14 @@
|
|||
<fieldType name="skutype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -309,13 +335,14 @@
|
|||
<fieldType name="skutype2" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
|
|
@ -73,24 +73,37 @@
|
|||
<fieldtype name="failtype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<!-- Demonstrating ignoreCaseChange -->
|
||||
<fieldtype name="wdf_nocase" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<fieldtype name="wdf_preserve" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
@ -179,9 +192,15 @@
|
|||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="lowerpunctfilt" class="solr.TextField">
|
||||
<analyzer>
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="1" splitOnCaseChange="1"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="1" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="1" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
@ -252,14 +271,15 @@
|
|||
<fieldtype name="subword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
|
@ -270,14 +290,15 @@
|
|||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
|
@ -287,7 +308,8 @@
|
|||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
|
@ -300,12 +322,13 @@
|
|||
<fieldtype name="skutype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
@ -314,12 +337,13 @@
|
|||
<fieldtype name="skutype2" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
|
|
@ -529,7 +529,7 @@ public class SchemaTest extends RestTestBase {
|
|||
tokenizerAttributes.put("class", "solr.WhitespaceTokenizerFactory");
|
||||
analyzerDefinition.setTokenizer(tokenizerAttributes);
|
||||
Map<String, Object> filterAttributes = new LinkedHashMap<>();
|
||||
filterAttributes.put("class", "solr.WordDelimiterFilterFactory");
|
||||
filterAttributes.put("class", "solr.WordDelimiterGraphFilterFactory");
|
||||
filterAttributes.put("preserveOriginal", "0");
|
||||
analyzerDefinition.setFilters(Collections.singletonList(filterAttributes));
|
||||
fieldTypeDefinition.setAnalyzer(analyzerDefinition);
|
||||
|
|
Loading…
Reference in New Issue