SOLR-13690: Migrate field type configurations in default/example schema files to look up factories by 'name'

This commit is contained in:
Tomoko Uchida 2019-08-31 23:06:32 +09:00
parent ea67d9c8c6
commit 77c1ed7d16
9 changed files with 1487 additions and 1487 deletions

View File

@ -64,7 +64,7 @@ Upgrade Notes
* SOLR-11266: default Content-Type override for JSONResponseWriter from _default configSet is removed. Example has been * SOLR-11266: default Content-Type override for JSONResponseWriter from _default configSet is removed. Example has been
provided in sample_techproducts_configs to override content-type. (Ishan Chattopadhyaya, Munendra S N, Gus Heck) provided in sample_techproducts_configs to override content-type. (Ishan Chattopadhyaya, Munendra S N, Gus Heck)
* SOLR-13593: Allow to look up analyzer components by their SPI names in field type configuration. (Tomoko Uchida) * SOLR-13593 SOLR-13690: Allow to look up analyzer components by their SPI names in field type configuration. (Tomoko Uchida)
Other Changes Other Changes
---------------------- ----------------------

View File

@ -61,31 +61,31 @@
<fieldType name="text_en_splitting" class="solr.TextField" <fieldType name="text_en_splitting" class="solr.TextField"
positionIncrementGap="100" autoGeneratePhraseQueries="true"> positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<!-- in this example, we will only use synonyms at query time <!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> <filter name="synonymGraph" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
--> -->
<!-- Case insensitive stop word removal. --> <!-- Case insensitive stop word removal. -->
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> <filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" <filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1"
catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/> <filter name="porterStem"/>
<filter class="solr.FlattenGraphFilterFactory"/> <filter name="flattenGraph"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" <filter name="stop"
ignoreCase="true" ignoreCase="true"
words="lang/stopwords_en.txt" words="lang/stopwords_en.txt"
/> />
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" <filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1"
catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/> <filter name="porterStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -95,11 +95,11 @@
--> -->
<fieldType name="url_only" class="solr.TextField" positionIncrementGap="100"> <fieldType name="url_only" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.UAX29URLEmailTokenizerFactory" maxTokenLength="255"/> <tokenizer name="UAX29URLEmail" maxTokenLength="255"/>
<filter class="solr.TypeTokenFilterFactory" types="url_types.txt" useWhitelist="true"/> <filter name="type" types="url_types.txt" useWhitelist="true"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer name="keyword"/>
</analyzer> </analyzer>
</fieldType> </fieldType>

View File

@ -389,7 +389,7 @@
<!-- A text field that only splits on whitespace for exact matching of words --> <!-- A text field that only splits on whitespace for exact matching of words -->
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -400,19 +400,19 @@
also applies synonyms. --> also applies synonyms. -->
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter name="stop" ignoreCase="true" words="stopwords.txt" />
<!-- in this example, we will only use synonyms at query time <!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> <filter name="synonymGraph" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.FlattenGraphFilterFactory"/> <filter name="flattenGraph"/>
--> -->
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter name="stop" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -423,39 +423,39 @@
also applies synonyms from synonyms.txt. --> also applies synonyms from synonyms.txt. -->
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- in this example, we will only use synonyms at query time <!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> <filter name="synonymGraph" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.FlattenGraphFilterFactory"/> <filter name="flattenGraph"/>
--> -->
<!-- Case insensitive stop word removal. <!-- Case insensitive stop word removal.
--> -->
<filter class="solr.StopFilterFactory" <filter name="stop"
ignoreCase="true" ignoreCase="true"
words="lang/stopwords_en.txt" words="lang/stopwords_en.txt"
/> />
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.EnglishPossessiveFilterFactory"/> <filter name="englishPossessive"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
<filter class="solr.EnglishMinimalStemFilterFactory"/> <filter name="englishMinimalStem"/>
--> -->
<filter class="solr.PorterStemFilterFactory"/> <filter name="porterStem"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" <filter name="stop"
ignoreCase="true" ignoreCase="true"
words="lang/stopwords_en.txt" words="lang/stopwords_en.txt"
/> />
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.EnglishPossessiveFilterFactory"/> <filter name="englishPossessive"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
<filter class="solr.EnglishMinimalStemFilterFactory"/> <filter name="englishMinimalStem"/>
--> -->
<filter class="solr.PorterStemFilterFactory"/> <filter name="porterStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -470,33 +470,33 @@
--> -->
<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<!-- in this example, we will only use synonyms at query time <!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> <filter name="synonymGraph" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
--> -->
<!-- Case insensitive stop word removal. <!-- Case insensitive stop word removal.
--> -->
<filter class="solr.StopFilterFactory" <filter name="stop"
ignoreCase="true" ignoreCase="true"
words="lang/stopwords_en.txt" words="lang/stopwords_en.txt"
/> />
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> <filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/> <filter name="porterStem"/>
<filter class="solr.FlattenGraphFilterFactory" /> <filter name="flattenGraph" />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" <filter name="stop"
ignoreCase="true" ignoreCase="true"
words="lang/stopwords_en.txt" words="lang/stopwords_en.txt"
/> />
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> <filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/> <filter name="porterStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -504,29 +504,29 @@
but may be good for SKUs. Can insert dashes in the wrong place and still match. --> but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> <filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> <filter name="wordDelimiterGraph" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/> <filter name="englishMinimalStem"/>
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
possible with WordDelimiterGraphFilter in conjuncton with stemming. --> possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> <filter name="removeDuplicates"/>
<filter class="solr.FlattenGraphFilterFactory" /> <filter name="flattenGraph" />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> <filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> <filter name="wordDelimiterGraph" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/> <filter name="englishMinimalStem"/>
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
possible with WordDelimiterGraphFilter in conjuncton with stemming. --> possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> <filter name="removeDuplicates"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -534,17 +534,17 @@
each token, to enable more efficient leading wildcard queries. --> each token, to enable more efficient leading wildcard queries. -->
<fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter name="stop" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true" <filter name="reversedWildcard" withOriginal="true"
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/> maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter name="stop" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -552,8 +552,8 @@
<!-- <!--
<fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" > <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >
<analyzer> <analyzer>
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/> <charFilter name="mapping" mapping="mapping-ISOLatin1Accent.txt"/>
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
--> -->
@ -567,13 +567,13 @@
<!-- KeywordTokenizer does no actual tokenizing, so the entire <!-- KeywordTokenizer does no actual tokenizing, so the entire
input string is preserved as a single token input string is preserved as a single token
--> -->
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer name="keyword"/>
<!-- The LowerCase TokenFilter does what you expect, which can be <!-- The LowerCase TokenFilter does what you expect, which can be
when you want your sorting to be case insensitive when you want your sorting to be case insensitive
--> -->
<filter class="solr.LowerCaseFilterFactory" /> <filter name="lowercase" />
<!-- The TrimFilter removes any leading or trailing whitespace --> <!-- The TrimFilter removes any leading or trailing whitespace -->
<filter class="solr.TrimFilterFactory" /> <filter name="trim" />
<!-- The PatternReplaceFilter gives you the flexibility to use <!-- The PatternReplaceFilter gives you the flexibility to use
Java Regular expression to replace any sequence of characters Java Regular expression to replace any sequence of characters
matching a pattern with an arbitrary replacement string, matching a pattern with an arbitrary replacement string,
@ -585,7 +585,7 @@
http://docs.oracle.com/javase/8/docs/api/java/util/regex/package-summary.html http://docs.oracle.com/javase/8/docs/api/java/util/regex/package-summary.html
--> -->
<filter class="solr.PatternReplaceFilterFactory" <filter name="patternReplace"
pattern="([^a-z])" replacement="" replace="all" pattern="([^a-z])" replacement="" replace="all"
/> />
</analyzer> </analyzer>
@ -593,14 +593,14 @@
<fieldType name="phonetic" stored="false" indexed="true" class="solr.TextField" > <fieldType name="phonetic" stored="false" indexed="true" class="solr.TextField" >
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/> <filter name="doubleMetaphone" inject="false"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="payloads" stored="false" indexed="true" class="solr.TextField" > <fieldType name="payloads" stored="false" indexed="true" class="solr.TextField" >
<analyzer> <analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<!-- <!--
The DelimitedPayloadTokenFilter can put payloads on tokens... for example, The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
@ -612,15 +612,15 @@
identity -> o.a.l.a.p.IdentityEncoder identity -> o.a.l.a.p.IdentityEncoder
Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor. Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
--> -->
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/> <filter name="delimitedPayload" encoder="float"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- lowercases the entire field value, keeping it as a single token. --> <!-- lowercases the entire field value, keeping it as a single token. -->
<fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100"> <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer name="keyword"/>
<filter class="solr.LowerCaseFilterFactory" /> <filter name="lowercase" />
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -630,10 +630,10 @@
--> -->
<fieldType name="descendent_path" class="solr.TextField"> <fieldType name="descendent_path" class="solr.TextField">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> <tokenizer name="pathHierarchy" delimiter="/" />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory" /> <tokenizer name="keyword" />
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- <!--
@ -642,10 +642,10 @@
--> -->
<fieldType name="ancestor_path" class="solr.TextField"> <fieldType name="ancestor_path" class="solr.TextField">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.KeywordTokenizerFactory" /> <tokenizer name="keyword" />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> <tokenizer name="pathHierarchy" delimiter="/" />
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -701,124 +701,124 @@
<!-- Arabic --> <!-- Arabic -->
<fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- for any non-arabic --> <!-- for any non-arabic -->
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_ar.txt" />
<!-- normalizes ﻯ to ﻱ, etc --> <!-- normalizes ﻯ to ﻱ, etc -->
<filter class="solr.ArabicNormalizationFilterFactory"/> <filter name="arabicNormalization"/>
<filter class="solr.ArabicStemFilterFactory"/> <filter name="arabicStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Bulgarian --> <!-- Bulgarian -->
<fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_bg.txt" />
<filter class="solr.BulgarianStemFilterFactory"/> <filter name="bulgarianStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Catalan --> <!-- Catalan -->
<fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- removes l', etc --> <!-- removes l', etc -->
<filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/> <filter name="elision" ignoreCase="true" articles="lang/contractions_ca.txt"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_ca.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Catalan"/> <filter name="snowballPorter" language="Catalan"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) --> <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) -->
<fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- normalize width before bigram, as e.g. half-width dakuten combine --> <!-- normalize width before bigram, as e.g. half-width dakuten combine -->
<filter class="solr.CJKWidthFilterFactory"/> <filter name="cjkWidth"/>
<!-- for any non-CJK --> <!-- for any non-CJK -->
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.CJKBigramFilterFactory"/> <filter name="cjkBigram"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Kurdish --> <!-- Kurdish -->
<fieldType name="text_ckb" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ckb" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.SoraniNormalizationFilterFactory"/> <filter name="soraniNormalization"/>
<!-- for any latin text --> <!-- for any latin text -->
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ckb.txt"/> <filter name="stop" ignoreCase="true" words="lang/stopwords_ckb.txt"/>
<filter class="solr.SoraniStemFilterFactory"/> <filter name="soraniStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Czech --> <!-- Czech -->
<fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_cz.txt" />
<filter class="solr.CzechStemFilterFactory"/> <filter name="czechStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Danish --> <!-- Danish -->
<fieldType name="text_da" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Danish"/> <filter name="snowballPorter" language="Danish"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- German --> <!-- German -->
<fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" />
<filter class="solr.GermanNormalizationFilterFactory"/> <filter name="germanNormalization"/>
<filter class="solr.GermanLightStemFilterFactory"/> <filter name="germanLightStem"/>
<!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> --> <!-- less aggressive: <filter name="germanMinimalStem"/> -->
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> --> <!-- more aggressive: <filter name="snowballPorter" language="German2"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Greek --> <!-- Greek -->
<fieldType name="text_el" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- greek specific lowercase for sigma --> <!-- greek specific lowercase for sigma -->
<filter class="solr.GreekLowerCaseFilterFactory"/> <filter name="greekLowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" /> <filter name="stop" ignoreCase="false" words="lang/stopwords_el.txt" />
<filter class="solr.GreekStemFilterFactory"/> <filter name="greekStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Spanish --> <!-- Spanish -->
<fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" />
<filter class="solr.SpanishLightStemFilterFactory"/> <filter name="spanishLightStem"/>
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> --> <!-- more aggressive: <filter name="snowballPorter" language="Spanish"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Basque --> <!-- Basque -->
<fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_eu.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Basque"/> <filter name="snowballPorter" language="Basque"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -826,121 +826,121 @@
<fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<!-- for ZWNJ --> <!-- for ZWNJ -->
<charFilter class="solr.PersianCharFilterFactory"/> <charFilter name="persian"/>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.ArabicNormalizationFilterFactory"/> <filter name="arabicNormalization"/>
<filter class="solr.PersianNormalizationFilterFactory"/> <filter name="persianNormalization"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_fa.txt" />
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Finnish --> <!-- Finnish -->
<fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Finnish"/> <filter name="snowballPorter" language="Finnish"/>
<!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> --> <!-- less aggressive: <filter name="finnishLightStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- French --> <!-- French -->
<fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- removes l', etc --> <!-- removes l', etc -->
<filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/> <filter name="elision" ignoreCase="true" articles="lang/contractions_fr.txt"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" />
<filter class="solr.FrenchLightStemFilterFactory"/> <filter name="frenchLightStem"/>
<!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> --> <!-- less aggressive: <filter name="frenchMinimalStem"/> -->
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> --> <!-- more aggressive: <filter name="snowballPorter" language="French"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Irish --> <!-- Irish -->
<fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- removes d', etc --> <!-- removes d', etc -->
<filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/> <filter name="elision" ignoreCase="true" articles="lang/contractions_ga.txt"/>
<!-- removes n-, etc. position increments is intentionally false! --> <!-- removes n-, etc. position increments is intentionally false! -->
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt"/> <filter name="stop" ignoreCase="true" words="lang/hyphenations_ga.txt"/>
<filter class="solr.IrishLowerCaseFilterFactory"/> <filter name="irishLowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt"/> <filter name="stop" ignoreCase="true" words="lang/stopwords_ga.txt"/>
<filter class="solr.SnowballPorterFilterFactory" language="Irish"/> <filter name="snowballPorter" language="Irish"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Galician --> <!-- Galician -->
<fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_gl.txt" />
<filter class="solr.GalicianStemFilterFactory"/> <filter name="galicianStem"/>
<!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> --> <!-- less aggressive: <filter name="galicianMinimalStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Hindi --> <!-- Hindi -->
<fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<!-- normalizes unicode representation --> <!-- normalizes unicode representation -->
<filter class="solr.IndicNormalizationFilterFactory"/> <filter name="indicNormalization"/>
<!-- normalizes variation in spelling --> <!-- normalizes variation in spelling -->
<filter class="solr.HindiNormalizationFilterFactory"/> <filter name="hindiNormalization"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_hi.txt" />
<filter class="solr.HindiStemFilterFactory"/> <filter name="hindiStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Hungarian --> <!-- Hungarian -->
<fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/> <filter name="snowballPorter" language="Hungarian"/>
<!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> --> <!-- less aggressive: <filter name="hungarianLightStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Armenian --> <!-- Armenian -->
<fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_hy.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Armenian"/> <filter name="snowballPorter" language="Armenian"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Indonesian --> <!-- Indonesian -->
<fieldType name="text_id" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_id.txt" />
<!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false --> <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->
<filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/> <filter name="indonesianStem" stemDerivational="true"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Italian --> <!-- Italian -->
<fieldType name="text_it" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- removes l', etc --> <!-- removes l', etc -->
<filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/> <filter name="elision" ignoreCase="true" articles="lang/contractions_it.txt"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" />
<filter class="solr.ItalianLightStemFilterFactory"/> <filter name="italianLightStem"/>
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> --> <!-- more aggressive: <filter name="snowballPorter" language="Italian"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -979,20 +979,20 @@
See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support. See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support.
--> -->
<tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/> <tokenizer name="japanese" mode="search"/>
<!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>--> <!--<tokenizer name="japanese" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
<!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) --> <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) -->
<filter class="solr.JapaneseBaseFormFilterFactory"/> <filter name="japaneseBaseForm"/>
<!-- Removes tokens with certain part-of-speech tags --> <!-- Removes tokens with certain part-of-speech tags -->
<filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" /> <filter name="japanesePartOfSpeechStop" tags="lang/stoptags_ja.txt" />
<!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) --> <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) -->
<filter class="solr.CJKWidthFilterFactory"/> <filter name="cjkWidth"/>
<!-- Removes common tokens typically not useful for search, but have a negative effect on ranking --> <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking -->
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_ja.txt" />
<!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) --> <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) -->
<filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/> <filter name="japaneseKatakanaStem" minimumLength="4"/>
<!-- Lower-cases romaji characters --> <!-- Lower-cases romaji characters -->
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -1017,49 +1017,49 @@
* decompoundMode: Decompound mode. Either 'none', 'discard', 'mixed'. Default is 'discard'. * decompoundMode: Decompound mode. Either 'none', 'discard', 'mixed'. Default is 'discard'.
* outputUnknownUnigrams: If true outputs unigrams for unknown words. * outputUnknownUnigrams: If true outputs unigrams for unknown words.
--> -->
<tokenizer class="solr.KoreanTokenizerFactory" decompoundMode="discard" outputUnknownUnigrams="false"/> <tokenizer name="korean" decompoundMode="discard" outputUnknownUnigrams="false"/>
<!-- Removes some part of speech stuff like EOMI (Pos.E), you can add a parameter 'tags', <!-- Removes some part of speech stuff like EOMI (Pos.E), you can add a parameter 'tags',
listing the tags to remove. By default it removes: listing the tags to remove. By default it removes:
E, IC, J, MAG, MAJ, MM, SP, SSC, SSO, SC, SE, XPN, XSA, XSN, XSV, UNA, NA, VSV E, IC, J, MAG, MAJ, MM, SP, SSC, SSO, SC, SE, XPN, XSA, XSN, XSV, UNA, NA, VSV
This is basically an equivalent to stemming. This is basically an equivalent to stemming.
--> -->
<filter class="solr.KoreanPartOfSpeechStopFilterFactory" /> <filter name="koreanPartOfSpeechStop" />
<!-- Replaces term text with the Hangul transcription of Hanja characters, if applicable: --> <!-- Replaces term text with the Hangul transcription of Hanja characters, if applicable: -->
<filter class="solr.KoreanReadingFormFilterFactory" /> <filter name="koreanReadingForm" />
<filter class="solr.LowerCaseFilterFactory" /> <filter name="lowercase" />
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Latvian --> <!-- Latvian -->
<fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_lv.txt" />
<filter class="solr.LatvianStemFilterFactory"/> <filter name="latvianStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Dutch --> <!-- Dutch -->
<fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" />
<filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/> <filter name="stemmerOverride" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
<filter class="solr.SnowballPorterFilterFactory" language="Dutch"/> <filter name="snowballPorter" language="Dutch"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Norwegian --> <!-- Norwegian -->
<fieldType name="text_no" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/> <filter name="snowballPorter" language="Norwegian"/>
<!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory" variant="nb"/> --> <!-- less aggressive: <filter name="norwegianLightStem" variant="nb"/> -->
<!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory" variant="nb"/> --> <!-- singular/plural: <filter name="norwegianMinimalStem" variant="nb"/> -->
<!-- The "light" and "minimal" stemmers support variants: nb=Bokmål, nn=Nynorsk, no=Both --> <!-- The "light" and "minimal" stemmers support variants: nb=Bokmål, nn=Nynorsk, no=Both -->
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -1067,65 +1067,65 @@
<!-- Portuguese --> <!-- Portuguese -->
<fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" />
<filter class="solr.PortugueseLightStemFilterFactory"/> <filter name="portugueseLightStem"/>
<!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> --> <!-- less aggressive: <filter name="prtugueseMinimalStem"/> -->
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> --> <!-- more aggressive: <filter name="snowballPorter" language="Portuguese"/> -->
<!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> --> <!-- most aggressive: <filter name="prtugueseStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Romanian --> <!-- Romanian -->
<fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_ro.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Romanian"/> <filter name="snowballPorter" language="Romanian"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Russian --> <!-- Russian -->
<fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Russian"/> <filter name="snowballPorter" language="Russian"/>
<!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> --> <!-- less aggressive: <filter name="russianLightStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Swedish --> <!-- Swedish -->
<fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Swedish"/> <filter name="snowballPorter" language="Swedish"/>
<!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> --> <!-- less aggressive: <filter name="swedishLightStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Thai --> <!-- Thai -->
<fieldType name="text_th" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.ThaiTokenizerFactory"/> <tokenizer name="thai"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_th.txt" />
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Turkish --> <!-- Turkish -->
<fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.ApostropheFilterFactory"/> <filter name="apostrophe"/>
<filter class="solr.TurkishLowerCaseFilterFactory"/> <filter name="turkishLowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" /> <filter name="stop" ignoreCase="false" words="lang/stopwords_tr.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Turkish"/> <filter name="snowballPorter" language="Turkish"/>
</analyzer> </analyzer>
</fieldType> </fieldType>

View File

@ -308,7 +308,7 @@
<!-- A text field that only splits on whitespace for exact matching of words --> <!-- A text field that only splits on whitespace for exact matching of words -->
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -319,19 +319,19 @@
also applies synonyms. --> also applies synonyms. -->
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter name="stop" ignoreCase="true" words="stopwords.txt" />
<!-- in this example, we will only use synonyms at query time <!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> <filter name="synonymGraph" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.FlattenGraphFilterFactory"/> <filter name="flattenGraph"/>
--> -->
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter name="stop" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -342,39 +342,39 @@
also applies synonyms from synonyms.txt. --> also applies synonyms from synonyms.txt. -->
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- in this example, we will only use synonyms at query time <!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> <filter name="synonymGraph" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.FlattenGraphFilterFactory"/> <filter name="flattenGraph"/>
--> -->
<!-- Case insensitive stop word removal. <!-- Case insensitive stop word removal.
--> -->
<filter class="solr.StopFilterFactory" <filter name="stop"
ignoreCase="true" ignoreCase="true"
words="lang/stopwords_en.txt" words="lang/stopwords_en.txt"
/> />
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.EnglishPossessiveFilterFactory"/> <filter name="englishPossessive"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
<filter class="solr.EnglishMinimalStemFilterFactory"/> <filter name="englishMinimalStem"/>
--> -->
<filter class="solr.PorterStemFilterFactory"/> <filter name="porterStem"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" <filter name="stop"
ignoreCase="true" ignoreCase="true"
words="lang/stopwords_en.txt" words="lang/stopwords_en.txt"
/> />
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.EnglishPossessiveFilterFactory"/> <filter name="englishPossessive"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
<filter class="solr.EnglishMinimalStemFilterFactory"/> <filter name="englishMinimalStem"/>
--> -->
<filter class="solr.PorterStemFilterFactory"/> <filter name="porterStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -389,33 +389,33 @@
--> -->
<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<!-- in this example, we will only use synonyms at query time <!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> <filter name="synonymGraph" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
--> -->
<!-- Case insensitive stop word removal. <!-- Case insensitive stop word removal.
--> -->
<filter class="solr.StopFilterFactory" <filter name="stop"
ignoreCase="true" ignoreCase="true"
words="lang/stopwords_en.txt" words="lang/stopwords_en.txt"
/> />
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> <filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/> <filter name="porterStem"/>
<filter class="solr.FlattenGraphFilterFactory" /> <filter name="flattenGraph" />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" <filter name="stop"
ignoreCase="true" ignoreCase="true"
words="lang/stopwords_en.txt" words="lang/stopwords_en.txt"
/> />
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> <filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/> <filter name="porterStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -423,29 +423,29 @@
but may be good for SKUs. Can insert dashes in the wrong place and still match. --> but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> <filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> <filter name="wordDelimiterGraph" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/> <filter name="englishMinimalStem"/>
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
possible with WordDelimiterGraphFilter in conjuncton with stemming. --> possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> <filter name="removeDuplicates"/>
<filter class="solr.FlattenGraphFilterFactory" /> <filter name="flattenGraph" />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> <filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> <filter name="wordDelimiterGraph" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/> <filter name="englishMinimalStem"/>
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
possible with WordDelimiterGraphFilter in conjuncton with stemming. --> possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> <filter name="removeDuplicates"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -453,17 +453,17 @@
each token, to enable more efficient leading wildcard queries. --> each token, to enable more efficient leading wildcard queries. -->
<fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter name="stop" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true" <filter name="reversedWildcard" withOriginal="true"
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/> maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter name="stop" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -471,8 +471,8 @@
<!-- <!--
<fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" > <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >
<analyzer> <analyzer>
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/> <charFilter name="mapping" mapping="mapping-ISOLatin1Accent.txt"/>
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
--> -->
@ -486,13 +486,13 @@
<!-- KeywordTokenizer does no actual tokenizing, so the entire <!-- KeywordTokenizer does no actual tokenizing, so the entire
input string is preserved as a single token input string is preserved as a single token
--> -->
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer name="keyword"/>
<!-- The LowerCase TokenFilter does what you expect, which can be <!-- The LowerCase TokenFilter does what you expect, which can be
when you want your sorting to be case insensitive when you want your sorting to be case insensitive
--> -->
<filter class="solr.LowerCaseFilterFactory" /> <filter name="lowercase" />
<!-- The TrimFilter removes any leading or trailing whitespace --> <!-- The TrimFilter removes any leading or trailing whitespace -->
<filter class="solr.TrimFilterFactory" /> <filter name="trim" />
<!-- The PatternReplaceFilter gives you the flexibility to use <!-- The PatternReplaceFilter gives you the flexibility to use
Java Regular expression to replace any sequence of characters Java Regular expression to replace any sequence of characters
matching a pattern with an arbitrary replacement string, matching a pattern with an arbitrary replacement string,
@ -504,7 +504,7 @@
http://docs.oracle.com/javase/8/docs/api/java/util/regex/package-summary.html http://docs.oracle.com/javase/8/docs/api/java/util/regex/package-summary.html
--> -->
<filter class="solr.PatternReplaceFilterFactory" <filter name="patternReplace"
pattern="([^a-z])" replacement="" replace="all" pattern="([^a-z])" replacement="" replace="all"
/> />
</analyzer> </analyzer>
@ -512,14 +512,14 @@
<fieldType name="phonetic" stored="false" indexed="true" class="solr.TextField" > <fieldType name="phonetic" stored="false" indexed="true" class="solr.TextField" >
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/> <filter name="doubleMetaphone" inject="false"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="payloads" stored="false" indexed="true" class="solr.TextField" > <fieldType name="payloads" stored="false" indexed="true" class="solr.TextField" >
<analyzer> <analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<!-- <!--
The DelimitedPayloadTokenFilter can put payloads on tokens... for example, The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
@ -531,15 +531,15 @@
identity -> o.a.l.a.p.IdentityEncoder identity -> o.a.l.a.p.IdentityEncoder
Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor. Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
--> -->
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/> <filter name="delimitedPayload" encoder="float"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- lowercases the entire field value, keeping it as a single token. --> <!-- lowercases the entire field value, keeping it as a single token. -->
<fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100"> <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer name="keyword"/>
<filter class="solr.LowerCaseFilterFactory" /> <filter name="lowercase" />
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -549,10 +549,10 @@
--> -->
<fieldType name="descendent_path" class="solr.TextField"> <fieldType name="descendent_path" class="solr.TextField">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> <tokenizer name="pathHierarchy" delimiter="/" />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory" /> <tokenizer name="keyword" />
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- <!--
@ -561,10 +561,10 @@
--> -->
<fieldType name="ancestor_path" class="solr.TextField"> <fieldType name="ancestor_path" class="solr.TextField">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.KeywordTokenizerFactory" /> <tokenizer name="keyword" />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> <tokenizer name="pathHierarchy" delimiter="/" />
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -620,124 +620,124 @@
<!-- Arabic --> <!-- Arabic -->
<fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- for any non-arabic --> <!-- for any non-arabic -->
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_ar.txt" />
<!-- normalizes ﻯ to ﻱ, etc --> <!-- normalizes ﻯ to ﻱ, etc -->
<filter class="solr.ArabicNormalizationFilterFactory"/> <filter name="arabicNormalization"/>
<filter class="solr.ArabicStemFilterFactory"/> <filter name="arabicStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Bulgarian --> <!-- Bulgarian -->
<fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_bg.txt" />
<filter class="solr.BulgarianStemFilterFactory"/> <filter name="bulgarianStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Catalan --> <!-- Catalan -->
<fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- removes l', etc --> <!-- removes l', etc -->
<filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/> <filter name="elision" ignoreCase="true" articles="lang/contractions_ca.txt"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_ca.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Catalan"/> <filter name="snowballPorter" language="Catalan"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) --> <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) -->
<fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- normalize width before bigram, as e.g. half-width dakuten combine --> <!-- normalize width before bigram, as e.g. half-width dakuten combine -->
<filter class="solr.CJKWidthFilterFactory"/> <filter name="cjkWidth"/>
<!-- for any non-CJK --> <!-- for any non-CJK -->
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.CJKBigramFilterFactory"/> <filter name="cjkBigram"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Kurdish --> <!-- Kurdish -->
<fieldType name="text_ckb" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ckb" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.SoraniNormalizationFilterFactory"/> <filter name="soraniNormalization"/>
<!-- for any latin text --> <!-- for any latin text -->
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ckb.txt"/> <filter name="stop" ignoreCase="true" words="lang/stopwords_ckb.txt"/>
<filter class="solr.SoraniStemFilterFactory"/> <filter name="soraniStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Czech --> <!-- Czech -->
<fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_cz.txt" />
<filter class="solr.CzechStemFilterFactory"/> <filter name="czechStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Danish --> <!-- Danish -->
<fieldType name="text_da" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Danish"/> <filter name="snowballPorter" language="Danish"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- German --> <!-- German -->
<fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" />
<filter class="solr.GermanNormalizationFilterFactory"/> <filter name="germanNormalization"/>
<filter class="solr.GermanLightStemFilterFactory"/> <filter name="germanLightStem"/>
<!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> --> <!-- less aggressive: <filter name="germanMinimalStem"/> -->
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> --> <!-- more aggressive: <filter name="snowballPorter" language="German2"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Greek --> <!-- Greek -->
<fieldType name="text_el" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- greek specific lowercase for sigma --> <!-- greek specific lowercase for sigma -->
<filter class="solr.GreekLowerCaseFilterFactory"/> <filter name="greekLowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" /> <filter name="stop" ignoreCase="false" words="lang/stopwords_el.txt" />
<filter class="solr.GreekStemFilterFactory"/> <filter name="greekStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Spanish --> <!-- Spanish -->
<fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" />
<filter class="solr.SpanishLightStemFilterFactory"/> <filter name="spanishLightStem"/>
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> --> <!-- more aggressive: <filter name="snowballPorter" language="Spanish"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Basque --> <!-- Basque -->
<fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_eu.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Basque"/> <filter name="snowballPorter" language="Basque"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -745,121 +745,121 @@
<fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<!-- for ZWNJ --> <!-- for ZWNJ -->
<charFilter class="solr.PersianCharFilterFactory"/> <charFilter name="persian"/>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.ArabicNormalizationFilterFactory"/> <filter name="arabicNormalization"/>
<filter class="solr.PersianNormalizationFilterFactory"/> <filter name="persianNormalization"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_fa.txt" />
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Finnish --> <!-- Finnish -->
<fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Finnish"/> <filter name="snowballPorter" language="Finnish"/>
<!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> --> <!-- less aggressive: <filter name="finnishLightStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- French --> <!-- French -->
<fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- removes l', etc --> <!-- removes l', etc -->
<filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/> <filter name="elision" ignoreCase="true" articles="lang/contractions_fr.txt"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" />
<filter class="solr.FrenchLightStemFilterFactory"/> <filter name="frenchLightStem"/>
<!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> --> <!-- less aggressive: <filter name="frenchMinimalStem"/> -->
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> --> <!-- more aggressive: <filter name="snowballPorter" language="French"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Irish --> <!-- Irish -->
<fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- removes d', etc --> <!-- removes d', etc -->
<filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/> <filter name="elision" ignoreCase="true" articles="lang/contractions_ga.txt"/>
<!-- removes n-, etc. position increments is intentionally false! --> <!-- removes n-, etc. position increments is intentionally false! -->
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt"/> <filter name="stop" ignoreCase="true" words="lang/hyphenations_ga.txt"/>
<filter class="solr.IrishLowerCaseFilterFactory"/> <filter name="irishLowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt"/> <filter name="stop" ignoreCase="true" words="lang/stopwords_ga.txt"/>
<filter class="solr.SnowballPorterFilterFactory" language="Irish"/> <filter name="snowballPorter" language="Irish"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Galician --> <!-- Galician -->
<fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_gl.txt" />
<filter class="solr.GalicianStemFilterFactory"/> <filter name="galicianStem"/>
<!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> --> <!-- less aggressive: <filter name="galicianMinimalStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Hindi --> <!-- Hindi -->
<fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<!-- normalizes unicode representation --> <!-- normalizes unicode representation -->
<filter class="solr.IndicNormalizationFilterFactory"/> <filter name="indicNormalization"/>
<!-- normalizes variation in spelling --> <!-- normalizes variation in spelling -->
<filter class="solr.HindiNormalizationFilterFactory"/> <filter name="hindiNormalization"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_hi.txt" />
<filter class="solr.HindiStemFilterFactory"/> <filter name="hindiStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Hungarian --> <!-- Hungarian -->
<fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/> <filter name="snowballPorter" language="Hungarian"/>
<!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> --> <!-- less aggressive: <filter name="hungarianLightStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Armenian --> <!-- Armenian -->
<fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_hy.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Armenian"/> <filter name="snowballPorter" language="Armenian"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Indonesian --> <!-- Indonesian -->
<fieldType name="text_id" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_id.txt" />
<!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false --> <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->
<filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/> <filter name="indonesianStem" stemDerivational="true"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Italian --> <!-- Italian -->
<fieldType name="text_it" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- removes l', etc --> <!-- removes l', etc -->
<filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/> <filter name="elision" ignoreCase="true" articles="lang/contractions_it.txt"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" />
<filter class="solr.ItalianLightStemFilterFactory"/> <filter name="italianLightStem"/>
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> --> <!-- more aggressive: <filter name="snowballPorter" language="Italian"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -898,20 +898,20 @@
See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support. See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support.
--> -->
<tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/> <tokenizer name="japanese" mode="search"/>
<!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>--> <!--<tokenizer name="japanese" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
<!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) --> <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) -->
<filter class="solr.JapaneseBaseFormFilterFactory"/> <filter name="japaneseBaseForm"/>
<!-- Removes tokens with certain part-of-speech tags --> <!-- Removes tokens with certain part-of-speech tags -->
<filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" /> <filter name="japanesePartOfSpeechStop" tags="lang/stoptags_ja.txt" />
<!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) --> <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) -->
<filter class="solr.CJKWidthFilterFactory"/> <filter name="cjkWidth"/>
<!-- Removes common tokens typically not useful for search, but have a negative effect on ranking --> <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking -->
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_ja.txt" />
<!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) --> <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) -->
<filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/> <filter name="japaneseKatakanaStem" minimumLength="4"/>
<!-- Lower-cases romaji characters --> <!-- Lower-cases romaji characters -->
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -936,115 +936,115 @@
* decompoundMode: Decompound mode. Either 'none', 'discard', 'mixed'. Default is 'discard'. * decompoundMode: Decompound mode. Either 'none', 'discard', 'mixed'. Default is 'discard'.
* outputUnknownUnigrams: If true outputs unigrams for unknown words. * outputUnknownUnigrams: If true outputs unigrams for unknown words.
--> -->
<tokenizer class="solr.KoreanTokenizerFactory" decompoundMode="discard" outputUnknownUnigrams="false"/> <tokenizer name="korean" decompoundMode="discard" outputUnknownUnigrams="false"/>
<!-- Removes some part of speech stuff like EOMI (Pos.E), you can add a parameter 'tags', <!-- Removes some part of speech stuff like EOMI (Pos.E), you can add a parameter 'tags',
listing the tags to remove. By default it removes: listing the tags to remove. By default it removes:
E, IC, J, MAG, MAJ, MM, SP, SSC, SSO, SC, SE, XPN, XSA, XSN, XSV, UNA, NA, VSV E, IC, J, MAG, MAJ, MM, SP, SSC, SSO, SC, SE, XPN, XSA, XSN, XSV, UNA, NA, VSV
This is basically an equivalent to stemming. This is basically an equivalent to stemming.
--> -->
<filter class="solr.KoreanPartOfSpeechStopFilterFactory" /> <filter name="koreanPartOfSpeechStop" />
<!-- Replaces term text with the Hangul transcription of Hanja characters, if applicable: --> <!-- Replaces term text with the Hangul transcription of Hanja characters, if applicable: -->
<filter class="solr.KoreanReadingFormFilterFactory" /> <filter name="koreanReadingForm" />
<filter class="solr.LowerCaseFilterFactory" /> <filter name="lowercase" />
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Latvian --> <!-- Latvian -->
<fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_lv.txt" />
<filter class="solr.LatvianStemFilterFactory"/> <filter name="latvianStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Dutch --> <!-- Dutch -->
<fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" />
<filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/> <filter name="stemmerOverride" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
<filter class="solr.SnowballPorterFilterFactory" language="Dutch"/> <filter name="snowballPorter" language="Dutch"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Norwegian --> <!-- Norwegian -->
<fieldType name="text_no" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/> <filter name="snowballPorter" language="Norwegian"/>
<!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory" variant="nb"/> --> <!-- less aggressive: <filter name="norwegianLightStem" variant="nb"/> -->
<!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory" variant="nb"/> --> <!-- singular/plural: <filter name="norwegianMinimalStem" variant="nb"/> -->
<!-- The "light" and "minimal" stemmers support variants: nb=Bokmål, nn=Nynorsk, no=Both --> <!-- Tfhe "light" and "minimal" stemmers support variants: nb=Bokmål, nn=Nynorsk, no=Both -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Portuguese --> <!-- Portuguese -->
<fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" />
<filter class="solr.PortugueseLightStemFilterFactory"/> <filter name="portugueseLightStem"/>
<!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> --> <!-- less aggressive: <filter name="portugueseMinimalStem"/> -->
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> --> <!-- more aggressive: <filter name="snowballPorter" language="Portuguese"/> -->
<!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> --> <!-- most aggressive: <filter name="portugueseStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Romanian --> <!-- Romanian -->
<fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_ro.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Romanian"/> <filter name="snowballPorter" language="Romanian"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Russian --> <!-- Russian -->
<fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Russian"/> <filter name="snowballPorter" language="Russian"/>
<!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> --> <!-- less aggressive: <filter name="russianLightStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Swedish --> <!-- Swedish -->
<fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Swedish"/> <filter name="snowballPorter" language="Swedish"/>
<!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> --> <!-- less aggressive: <filter name="swedishLightStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Thai --> <!-- Thai -->
<fieldType name="text_th" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.ThaiTokenizerFactory"/> <tokenizer name="thai"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_th.txt" />
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Turkish --> <!-- Turkish -->
<fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.ApostropheFilterFactory"/> <filter name="apostrophe"/>
<filter class="solr.TurkishLowerCaseFilterFactory"/> <filter name="turkishLowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" /> <filter name="stop" ignoreCase="false" words="lang/stopwords_tr.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Turkish"/> <filter name="snowballPorter" language="Turkish"/>
</analyzer> </analyzer>
</fieldType> </fieldType>

View File

@ -389,7 +389,7 @@
<!-- A text field that only splits on whitespace for exact matching of words --> <!-- A text field that only splits on whitespace for exact matching of words -->
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -400,19 +400,19 @@
also applies synonyms. --> also applies synonyms. -->
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter name="stop" ignoreCase="true" words="stopwords.txt" />
<!-- in this example, we will only use synonyms at query time <!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> <filter name="synonymGraph" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.FlattenGraphFilterFactory"/> <filter name="flattenGraph"/>
--> -->
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter name="stop" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -423,39 +423,39 @@
also applies synonyms from synonyms.txt. --> also applies synonyms from synonyms.txt. -->
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- in this example, we will only use synonyms at query time <!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> <filter name="synonymGraph" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.FlattenGraphFilterFactory"/> <filter name="flattenGraph"/>
--> -->
<!-- Case insensitive stop word removal. <!-- Case insensitive stop word removal.
--> -->
<filter class="solr.StopFilterFactory" <filter name="stop"
ignoreCase="true" ignoreCase="true"
words="lang/stopwords_en.txt" words="lang/stopwords_en.txt"
/> />
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.EnglishPossessiveFilterFactory"/> <filter name="englishPossessive"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
<filter class="solr.EnglishMinimalStemFilterFactory"/> <filter name="englishMinimalStem"/>
--> -->
<filter class="solr.PorterStemFilterFactory"/> <filter name="porterStem"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" <filter name="stop"
ignoreCase="true" ignoreCase="true"
words="lang/stopwords_en.txt" words="lang/stopwords_en.txt"
/> />
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.EnglishPossessiveFilterFactory"/> <filter name="englishPossessive"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
<filter class="solr.EnglishMinimalStemFilterFactory"/> <filter name="englishMinimalStem"/>
--> -->
<filter class="solr.PorterStemFilterFactory"/> <filter name="porterStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -470,33 +470,33 @@
--> -->
<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<!-- in this example, we will only use synonyms at query time <!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> <filter name="synonymGraph" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
--> -->
<!-- Case insensitive stop word removal. <!-- Case insensitive stop word removal.
--> -->
<filter class="solr.StopFilterFactory" <filter name="stop"
ignoreCase="true" ignoreCase="true"
words="lang/stopwords_en.txt" words="lang/stopwords_en.txt"
/> />
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> <filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/> <filter name="porterStem"/>
<filter class="solr.FlattenGraphFilterFactory" /> <filter name="flattenGraph" />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" <filter name="stop"
ignoreCase="true" ignoreCase="true"
words="lang/stopwords_en.txt" words="lang/stopwords_en.txt"
/> />
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> <filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/> <filter name="porterStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -504,29 +504,29 @@
but may be good for SKUs. Can insert dashes in the wrong place and still match. --> but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> <filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> <filter name="wordDelimiterGraph" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/> <filter name="englishMinimalStem"/>
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
possible with WordDelimiterGraphFilter in conjuncton with stemming. --> possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> <filter name="removeDuplicates"/>
<filter class="solr.FlattenGraphFilterFactory" /> <filter name="flattenGraph" />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> <filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> <filter name="wordDelimiterGraph" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/> <filter name="englishMinimalStem"/>
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
possible with WordDelimiterGraphFilter in conjuncton with stemming. --> possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> <filter name="removeDuplicates"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -534,17 +534,17 @@
each token, to enable more efficient leading wildcard queries. --> each token, to enable more efficient leading wildcard queries. -->
<fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter name="stop" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true" <filter name="reversedWildcard" withOriginal="true"
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/> maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter name="stop" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -552,8 +552,8 @@
<!-- <!--
<fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" > <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >
<analyzer> <analyzer>
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/> <charFilter name="mapping" mapping="mapping-ISOLatin1Accent.txt"/>
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
--> -->
@ -567,13 +567,13 @@
<!-- KeywordTokenizer does no actual tokenizing, so the entire <!-- KeywordTokenizer does no actual tokenizing, so the entire
input string is preserved as a single token input string is preserved as a single token
--> -->
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer name="keyword"/>
<!-- The LowerCase TokenFilter does what you expect, which can be <!-- The LowerCase TokenFilter does what you expect, which can be
when you want your sorting to be case insensitive when you want your sorting to be case insensitive
--> -->
<filter class="solr.LowerCaseFilterFactory" /> <filter name="lowercase" />
<!-- The TrimFilter removes any leading or trailing whitespace --> <!-- The TrimFilter removes any leading or trailing whitespace -->
<filter class="solr.TrimFilterFactory" /> <filter name="trim" />
<!-- The PatternReplaceFilter gives you the flexibility to use <!-- The PatternReplaceFilter gives you the flexibility to use
Java Regular expression to replace any sequence of characters Java Regular expression to replace any sequence of characters
matching a pattern with an arbitrary replacement string, matching a pattern with an arbitrary replacement string,
@ -585,7 +585,7 @@
http://docs.oracle.com/javase/8/docs/api/java/util/regex/package-summary.html http://docs.oracle.com/javase/8/docs/api/java/util/regex/package-summary.html
--> -->
<filter class="solr.PatternReplaceFilterFactory" <filter name="patternReplace"
pattern="([^a-z])" replacement="" replace="all" pattern="([^a-z])" replacement="" replace="all"
/> />
</analyzer> </analyzer>
@ -593,14 +593,14 @@
<fieldType name="phonetic" stored="false" indexed="true" class="solr.TextField" > <fieldType name="phonetic" stored="false" indexed="true" class="solr.TextField" >
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/> <filter name="doubleMetaphone" inject="false"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="payloads" stored="false" indexed="true" class="solr.TextField" > <fieldType name="payloads" stored="false" indexed="true" class="solr.TextField" >
<analyzer> <analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<!-- <!--
The DelimitedPayloadTokenFilter can put payloads on tokens... for example, The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
@ -612,15 +612,15 @@
identity -> o.a.l.a.p.IdentityEncoder identity -> o.a.l.a.p.IdentityEncoder
Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor. Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
--> -->
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/> <filter name="delimitedPayload" encoder="float"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- lowercases the entire field value, keeping it as a single token. --> <!-- lowercases the entire field value, keeping it as a single token. -->
<fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100"> <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer name="keyword"/>
<filter class="solr.LowerCaseFilterFactory" /> <filter name="lowercase" />
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -630,10 +630,10 @@
--> -->
<fieldType name="descendent_path" class="solr.TextField"> <fieldType name="descendent_path" class="solr.TextField">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> <tokenizer name="pathHierarchy" delimiter="/" />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory" /> <tokenizer name="keyword" />
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- <!--
@ -642,10 +642,10 @@
--> -->
<fieldType name="ancestor_path" class="solr.TextField"> <fieldType name="ancestor_path" class="solr.TextField">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.KeywordTokenizerFactory" /> <tokenizer name="keyword" />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> <tokenizer name="pathHierarchy" delimiter="/" />
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -701,124 +701,124 @@
<!-- Arabic --> <!-- Arabic -->
<fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- for any non-arabic --> <!-- for any non-arabic -->
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_ar.txt" />
<!-- normalizes ﻯ to ﻱ, etc --> <!-- normalizes ﻯ to ﻱ, etc -->
<filter class="solr.ArabicNormalizationFilterFactory"/> <filter name="arabicNormalization"/>
<filter class="solr.ArabicStemFilterFactory"/> <filter name="arabicStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Bulgarian --> <!-- Bulgarian -->
<fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_bg.txt" />
<filter class="solr.BulgarianStemFilterFactory"/> <filter name="bulgarianStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Catalan --> <!-- Catalan -->
<fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- removes l', etc --> <!-- removes l', etc -->
<filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/> <filter name="elision" ignoreCase="true" articles="lang/contractions_ca.txt"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_ca.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Catalan"/> <filter name="snowballPorter" language="Catalan"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) --> <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) -->
<fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- normalize width before bigram, as e.g. half-width dakuten combine --> <!-- normalize width before bigram, as e.g. half-width dakuten combine -->
<filter class="solr.CJKWidthFilterFactory"/> <filter name="cjkWidth"/>
<!-- for any non-CJK --> <!-- for any non-CJK -->
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.CJKBigramFilterFactory"/> <filter name="cjkBigram"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Kurdish --> <!-- Kurdish -->
<fieldType name="text_ckb" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ckb" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.SoraniNormalizationFilterFactory"/> <filter name="soraniNormalization"/>
<!-- for any latin text --> <!-- for any latin text -->
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ckb.txt"/> <filter name="stop" ignoreCase="true" words="lang/stopwords_ckb.txt"/>
<filter class="solr.SoraniStemFilterFactory"/> <filter name="soraniStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Czech --> <!-- Czech -->
<fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_cz.txt" />
<filter class="solr.CzechStemFilterFactory"/> <filter name="czechStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Danish --> <!-- Danish -->
<fieldType name="text_da" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Danish"/> <filter name="snowballPorter" language="Danish"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- German --> <!-- German -->
<fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" />
<filter class="solr.GermanNormalizationFilterFactory"/> <filter name="germanNormalization"/>
<filter class="solr.GermanLightStemFilterFactory"/> <filter name="germanLightStem"/>
<!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> --> <!-- less aggressive: <filter name="germanMinimalStem"/> -->
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> --> <!-- more aggressive: <filter name="snowballPorter" language="German2"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Greek --> <!-- Greek -->
<fieldType name="text_el" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- greek specific lowercase for sigma --> <!-- greek specific lowercase for sigma -->
<filter class="solr.GreekLowerCaseFilterFactory"/> <filter name="greekLowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" /> <filter name="stop" ignoreCase="false" words="lang/stopwords_el.txt" />
<filter class="solr.GreekStemFilterFactory"/> <filter name="greekStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Spanish --> <!-- Spanish -->
<fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" />
<filter class="solr.SpanishLightStemFilterFactory"/> <filter name="spanishLightStem"/>
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> --> <!-- more aggressive: <filter name="snowballPorter" language="Spanish"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Basque --> <!-- Basque -->
<fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_eu.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Basque"/> <filter name="snowballPorter" language="Basque"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -826,121 +826,121 @@
<fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<!-- for ZWNJ --> <!-- for ZWNJ -->
<charFilter class="solr.PersianCharFilterFactory"/> <charFilter name="persian"/>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.ArabicNormalizationFilterFactory"/> <filter name="arabicNormalization"/>
<filter class="solr.PersianNormalizationFilterFactory"/> <filter name="persianNormalization"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_fa.txt" />
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Finnish --> <!-- Finnish -->
<fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Finnish"/> <filter name="snowballPorter" language="Finnish"/>
<!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> --> <!-- less aggressive: <filter name="finnishLightStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- French --> <!-- French -->
<fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- removes l', etc --> <!-- removes l', etc -->
<filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/> <filter name="elision" ignoreCase="true" articles="lang/contractions_fr.txt"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" />
<filter class="solr.FrenchLightStemFilterFactory"/> <filter name="frenchLightStem"/>
<!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> --> <!-- less aggressive: <filter name="frenchMinimalStem"/> -->
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> --> <!-- more aggressive: <filter name="snowballPorter" language="French"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Irish --> <!-- Irish -->
<fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- removes d', etc --> <!-- removes d', etc -->
<filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/> <filter name="elision" ignoreCase="true" articles="lang/contractions_ga.txt"/>
<!-- removes n-, etc. position increments is intentionally false! --> <!-- removes n-, etc. position increments is intentionally false! -->
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt"/> <filter name="stop" ignoreCase="true" words="lang/hyphenations_ga.txt"/>
<filter class="solr.IrishLowerCaseFilterFactory"/> <filter name="irishLowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt"/> <filter name="stop" ignoreCase="true" words="lang/stopwords_ga.txt"/>
<filter class="solr.SnowballPorterFilterFactory" language="Irish"/> <filter name="snowballPorter" language="Irish"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Galician --> <!-- Galician -->
<fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_gl.txt" />
<filter class="solr.GalicianStemFilterFactory"/> <filter name="galicianStem"/>
<!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> --> <!-- less aggressive: <filter name="galicianMinimalStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Hindi --> <!-- Hindi -->
<fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<!-- normalizes unicode representation --> <!-- normalizes unicode representation -->
<filter class="solr.IndicNormalizationFilterFactory"/> <filter name="indicNormalization"/>
<!-- normalizes variation in spelling --> <!-- normalizes variation in spelling -->
<filter class="solr.HindiNormalizationFilterFactory"/> <filter name="hindiNormalization"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_hi.txt" />
<filter class="solr.HindiStemFilterFactory"/> <filter name="hindiStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Hungarian --> <!-- Hungarian -->
<fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/> <filter name="snowballPorter" language="Hungarian"/>
<!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> --> <!-- less aggressive: <filter name="hungarianLightStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Armenian --> <!-- Armenian -->
<fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_hy.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Armenian"/> <filter name="snowballPorter" language="Armenian"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Indonesian --> <!-- Indonesian -->
<fieldType name="text_id" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_id.txt" />
<!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false --> <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->
<filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/> <filter name="indonesianStem" stemDerivational="true"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Italian --> <!-- Italian -->
<fieldType name="text_it" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- removes l', etc --> <!-- removes l', etc -->
<filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/> <filter name="elision" ignoreCase="true" articles="lang/contractions_it.txt"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" />
<filter class="solr.ItalianLightStemFilterFactory"/> <filter name="italianLightStem"/>
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> --> <!-- more aggressive: <filter name="snowballPorter" language="Italian"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -979,20 +979,20 @@
See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support. See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support.
--> -->
<tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/> <tokenizer name="japanese" mode="search"/>
<!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>--> <!--<tokenizer name="japanese" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
<!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) --> <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) -->
<filter class="solr.JapaneseBaseFormFilterFactory"/> <filter name="japaneseBaseForm"/>
<!-- Removes tokens with certain part-of-speech tags --> <!-- Removes tokens with certain part-of-speech tags -->
<filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" /> <filter name="japanesePartOfSpeechStop" tags="lang/stoptags_ja.txt" />
<!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) --> <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) -->
<filter class="solr.CJKWidthFilterFactory"/> <filter name="cjkWidth"/>
<!-- Removes common tokens typically not useful for search, but have a negative effect on ranking --> <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking -->
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_ja.txt" />
<!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) --> <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) -->
<filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/> <filter name="japaneseKatakanaStem" minimumLength="4"/>
<!-- Lower-cases romaji characters --> <!-- Lower-cases romaji characters -->
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -1017,49 +1017,49 @@
* decompoundMode: Decompound mode. Either 'none', 'discard', 'mixed'. Default is 'discard'. * decompoundMode: Decompound mode. Either 'none', 'discard', 'mixed'. Default is 'discard'.
* outputUnknownUnigrams: If true outputs unigrams for unknown words. * outputUnknownUnigrams: If true outputs unigrams for unknown words.
--> -->
<tokenizer class="solr.KoreanTokenizerFactory" decompoundMode="discard" outputUnknownUnigrams="false"/> <tokenizer name="korean" decompoundMode="discard" outputUnknownUnigrams="false"/>
<!-- Removes some part of speech stuff like EOMI (Pos.E), you can add a parameter 'tags', <!-- Removes some part of speech stuff like EOMI (Pos.E), you can add a parameter 'tags',
listing the tags to remove. By default it removes: listing the tags to remove. By default it removes:
E, IC, J, MAG, MAJ, MM, SP, SSC, SSO, SC, SE, XPN, XSA, XSN, XSV, UNA, NA, VSV E, IC, J, MAG, MAJ, MM, SP, SSC, SSO, SC, SE, XPN, XSA, XSN, XSV, UNA, NA, VSV
This is basically an equivalent to stemming. This is basically an equivalent to stemming.
--> -->
<filter class="solr.KoreanPartOfSpeechStopFilterFactory" /> <filter name="koreanPartOfSpeechStop" />
<!-- Replaces term text with the Hangul transcription of Hanja characters, if applicable: --> <!-- Replaces term text with the Hangul transcription of Hanja characters, if applicable: -->
<filter class="solr.KoreanReadingFormFilterFactory" /> <filter name="koreanReadingForm" />
<filter class="solr.LowerCaseFilterFactory" /> <filter name="lowercase" />
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Latvian --> <!-- Latvian -->
<fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_lv.txt" />
<filter class="solr.LatvianStemFilterFactory"/> <filter name="latvianStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Dutch --> <!-- Dutch -->
<fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" />
<filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/> <filter name="stemmerOverride" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
<filter class="solr.SnowballPorterFilterFactory" language="Dutch"/> <filter name="snowballPorter" language="Dutch"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Norwegian --> <!-- Norwegian -->
<fieldType name="text_no" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/> <filter name="snowballPorter" language="Norwegian"/>
<!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory" variant="nb"/> --> <!-- less aggressive: <filter name="norwegianLightStem" variant="nb"/> -->
<!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory" variant="nb"/> --> <!-- singular/plural: <filter name="norwegianMinimalStem" variant="nb"/> -->
<!-- The "light" and "minimal" stemmers support variants: nb=Bokmål, nn=Nynorsk, no=Both --> <!-- The "light" and "minimal" stemmers support variants: nb=Bokmål, nn=Nynorsk, no=Both -->
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -1067,65 +1067,65 @@
<!-- Portuguese --> <!-- Portuguese -->
<fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" />
<filter class="solr.PortugueseLightStemFilterFactory"/> <filter name="portugueseLightStem"/>
<!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> --> <!-- less aggressive: <filter name="portugueseMinimalStem"/> -->
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> --> <!-- more aggressive: <filter name="snowballPorter" language="Portuguese"/> -->
<!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> --> <!-- most aggressive: <filter name="portugueseStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Romanian --> <!-- Romanian -->
<fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_ro.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Romanian"/> <filter name="snowballPorter" language="Romanian"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Russian --> <!-- Russian -->
<fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Russian"/> <filter name="snowballPorter" language="Russian"/>
<!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> --> <!-- less aggressive: <filter name="russianLightStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Swedish --> <!-- Swedish -->
<fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Swedish"/> <filter name="snowballPorter" language="Swedish"/>
<!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> --> <!-- less aggressive: <filter name="swedishLightStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Thai --> <!-- Thai -->
<fieldType name="text_th" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.ThaiTokenizerFactory"/> <tokenizer name="thai"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_th.txt" />
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Turkish --> <!-- Turkish -->
<fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.ApostropheFilterFactory"/> <filter name="apostrophe"/>
<filter class="solr.TurkishLowerCaseFilterFactory"/> <filter name="turkishLowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" /> <filter name="stop" ignoreCase="false" words="lang/stopwords_tr.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Turkish"/> <filter name="snowballPorter" language="Turkish"/>
</analyzer> </analyzer>
</fieldType> </fieldType>

View File

@ -46,8 +46,8 @@
--> -->
<fieldType name="text_simple" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_simple" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
</fieldType> </fieldType>

View File

@ -4,10 +4,10 @@
<uniqueKey>id</uniqueKey> <uniqueKey>id</uniqueKey>
<fieldType name="ancestor_path" class="solr.TextField"> <fieldType name="ancestor_path" class="solr.TextField">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer name="keyword"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/"/> <tokenizer name="pathHierarchy" delimiter="/"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="binary" class="solr.BinaryField"/> <fieldType name="binary" class="solr.BinaryField"/>
@ -16,10 +16,10 @@
<fieldType name="currency" class="solr.CurrencyFieldType" amountLongSuffix="_l_ns" codeStrSuffix="_s_ns" defaultCurrency="USD" currencyConfig="currency.xml" /> <fieldType name="currency" class="solr.CurrencyFieldType" amountLongSuffix="_l_ns" codeStrSuffix="_s_ns" defaultCurrency="USD" currencyConfig="currency.xml" />
<fieldType name="descendent_path" class="solr.TextField"> <fieldType name="descendent_path" class="solr.TextField">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/"/> <tokenizer name="pathHierarchy" delimiter="/"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer name="keyword"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="ignored" class="solr.StrField" indexed="false" stored="false" multiValued="true"/> <fieldType name="ignored" class="solr.StrField" indexed="false" stored="false" multiValued="true"/>
@ -27,14 +27,14 @@
<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType" geo="true" maxDistErr="0.001" distErrPct="0.025" distanceUnits="kilometers"/> <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType" geo="true" maxDistErr="0.001" distErrPct="0.025" distanceUnits="kilometers"/>
<fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100"> <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer name="keyword"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="phonetic_en" class="solr.TextField" indexed="true" stored="false"> <fieldType name="phonetic_en" class="solr.TextField" indexed="true" stored="false">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/> <filter name="doubleMetaphone" inject="false"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="pdate" class="solr.DatePointField" docValues="true"/> <fieldType name="pdate" class="solr.DatePointField" docValues="true"/>
@ -53,386 +53,386 @@
<fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true"/> <fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true"/>
<fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" words="lang/stopwords_ar.txt" ignoreCase="true"/> <filter name="stop" words="lang/stopwords_ar.txt" ignoreCase="true"/>
<filter class="solr.ArabicNormalizationFilterFactory"/> <filter name="arabicNormalization"/>
<filter class="solr.ArabicStemFilterFactory"/> <filter name="arabicStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" words="lang/stopwords_bg.txt" ignoreCase="true"/> <filter name="stop" words="lang/stopwords_bg.txt" ignoreCase="true"/>
<filter class="solr.BulgarianStemFilterFactory"/> <filter name="bulgarianStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.ElisionFilterFactory" articles="lang/contractions_ca.txt" ignoreCase="true"/> <filter name="elision" articles="lang/contractions_ca.txt" ignoreCase="true"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" words="lang/stopwords_ca.txt" ignoreCase="true"/> <filter name="stop" words="lang/stopwords_ca.txt" ignoreCase="true"/>
<filter class="solr.SnowballPorterFilterFactory" language="Catalan"/> <filter name="snowballPorter" language="Catalan"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.CJKWidthFilterFactory"/> <filter name="cjkWidth"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.CJKBigramFilterFactory"/> <filter name="cjkBigram"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" words="lang/stopwords_cz.txt" ignoreCase="true"/> <filter name="stop" words="lang/stopwords_cz.txt" ignoreCase="true"/>
<filter class="solr.CzechStemFilterFactory"/> <filter name="czechStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_da" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_da.txt" ignoreCase="true"/> <filter name="stop" format="snowball" words="lang/stopwords_da.txt" ignoreCase="true"/>
<filter class="solr.SnowballPorterFilterFactory" language="Danish"/> <filter name="snowballPorter" language="Danish"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_de.txt" ignoreCase="true"/> <filter name="stop" format="snowball" words="lang/stopwords_de.txt" ignoreCase="true"/>
<filter class="solr.GermanNormalizationFilterFactory"/> <filter name="germanNormalization"/>
<filter class="solr.GermanLightStemFilterFactory"/> <filter name="germanLightStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_el" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.GreekLowerCaseFilterFactory"/> <filter name="greekLowercase"/>
<filter class="solr.StopFilterFactory" words="lang/stopwords_el.txt" ignoreCase="false"/> <filter name="stop" words="lang/stopwords_el.txt" ignoreCase="false"/>
<filter class="solr.GreekStemFilterFactory"/> <filter name="greekStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> <filter name="stop" words="lang/stopwords_en.txt" ignoreCase="true"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.EnglishPossessiveFilterFactory"/> <filter name="englishPossessive"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/> <filter name="porterStem"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/> <filter name="synonymGraph" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
<filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> <filter name="stop" words="lang/stopwords_en.txt" ignoreCase="true"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.EnglishPossessiveFilterFactory"/> <filter name="englishPossessive"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/> <filter name="porterStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_en_splitting" class="solr.TextField" autoGeneratePhraseQueries="true" positionIncrementGap="100"> <fieldType name="text_en_splitting" class="solr.TextField" autoGeneratePhraseQueries="true" positionIncrementGap="100">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> <filter name="stop" words="lang/stopwords_en.txt" ignoreCase="true"/>
<filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="1"/> <filter name="wordDelimiterGraph" catenateNumbers="1" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="1"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/> <filter name="porterStem"/>
<filter class="solr.FlattenGraphFilterFactory" /> <filter name="flattenGraph" />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/> <filter name="synonymGraph" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
<filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> <filter name="stop" words="lang/stopwords_en.txt" ignoreCase="true"/>
<filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="0" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="0"/> <filter name="wordDelimiterGraph" catenateNumbers="0" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="0"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/> <filter name="porterStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_en_splitting_tight" class="solr.TextField" autoGeneratePhraseQueries="true" positionIncrementGap="100"> <fieldType name="text_en_splitting_tight" class="solr.TextField" autoGeneratePhraseQueries="true" positionIncrementGap="100">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<filter class="solr.SynonymGraphFilterFactory" expand="false" ignoreCase="true" synonyms="synonyms.txt"/> <filter name="synonymGraph" expand="false" ignoreCase="true" synonyms="synonyms.txt"/>
<filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> <filter name="stop" words="lang/stopwords_en.txt" ignoreCase="true"/>
<filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="0" generateWordParts="0" catenateAll="0" catenateWords="1"/> <filter name="wordDelimiterGraph" catenateNumbers="1" generateNumberParts="0" generateWordParts="0" catenateAll="0" catenateWords="1"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/> <filter name="englishMinimalStem"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> <filter name="removeDuplicates"/>
<filter class="solr.FlattenGraphFilterFactory" /> <filter name="flattenGraph" />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<filter class="solr.SynonymGraphFilterFactory" expand="false" ignoreCase="true" synonyms="synonyms.txt"/> <filter name="synonymGraph" expand="false" ignoreCase="true" synonyms="synonyms.txt"/>
<filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="true"/> <filter name="stop" words="lang/stopwords_en.txt" ignoreCase="true"/>
<filter class="solr.WordDelimiterGraphFilterFactory" catenateNumbers="1" generateNumberParts="0" generateWordParts="0" catenateAll="0" catenateWords="1"/> <filter name="wordDelimiterGraph" catenateNumbers="1" generateNumberParts="0" generateWordParts="0" catenateAll="0" catenateWords="1"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/> <filter name="englishMinimalStem"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> <filter name="removeDuplicates"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_es.txt" ignoreCase="true"/> <filter name="stop" format="snowball" words="lang/stopwords_es.txt" ignoreCase="true"/>
<filter class="solr.SpanishLightStemFilterFactory"/> <filter name="spanishLightStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" words="lang/stopwords_eu.txt" ignoreCase="true"/> <filter name="stop" words="lang/stopwords_eu.txt" ignoreCase="true"/>
<filter class="solr.SnowballPorterFilterFactory" language="Basque"/> <filter name="snowballPorter" language="Basque"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<charFilter class="solr.PersianCharFilterFactory"/> <charFilter name="persian"/>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.ArabicNormalizationFilterFactory"/> <filter name="arabicNormalization"/>
<filter class="solr.PersianNormalizationFilterFactory"/> <filter name="persianNormalization"/>
<filter class="solr.StopFilterFactory" words="lang/stopwords_fa.txt" ignoreCase="true"/> <filter name="stop" words="lang/stopwords_fa.txt" ignoreCase="true"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_fi.txt" ignoreCase="true"/> <filter name="stop" format="snowball" words="lang/stopwords_fi.txt" ignoreCase="true"/>
<filter class="solr.SnowballPorterFilterFactory" language="Finnish"/> <filter name="snowballPorter" language="Finnish"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.ElisionFilterFactory" articles="lang/contractions_fr.txt" ignoreCase="true"/> <filter name="elision" articles="lang/contractions_fr.txt" ignoreCase="true"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_fr.txt" ignoreCase="true"/> <filter name="stop" format="snowball" words="lang/stopwords_fr.txt" ignoreCase="true"/>
<filter class="solr.FrenchLightStemFilterFactory"/> <filter name="frenchLightStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.ElisionFilterFactory" articles="lang/contractions_ga.txt" ignoreCase="true"/> <filter name="elision" articles="lang/contractions_ga.txt" ignoreCase="true"/>
<filter class="solr.StopFilterFactory" words="lang/hyphenations_ga.txt" ignoreCase="true"/> <filter name="stop" words="lang/hyphenations_ga.txt" ignoreCase="true"/>
<filter class="solr.IrishLowerCaseFilterFactory"/> <filter name="irishLowercase"/>
<filter class="solr.StopFilterFactory" words="lang/stopwords_ga.txt" ignoreCase="true"/> <filter name="stop" words="lang/stopwords_ga.txt" ignoreCase="true"/>
<filter class="solr.SnowballPorterFilterFactory" language="Irish"/> <filter name="snowballPorter" language="Irish"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true"> <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> <filter name="stop" words="stopwords.txt" ignoreCase="true"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> <filter name="stop" words="stopwords.txt" ignoreCase="true"/>
<filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/> <filter name="synonymGraph" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> <filter name="stop" words="stopwords.txt" ignoreCase="true"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.ReversedWildcardFilterFactory" maxPosQuestion="2" maxFractionAsterisk="0.33" maxPosAsterisk="3" withOriginal="true"/> <filter name="reversedWildcard" maxPosQuestion="2" maxFractionAsterisk="0.33" maxPosAsterisk="3" withOriginal="true"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/> <filter name="synonymGraph" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> <filter name="stop" words="stopwords.txt" ignoreCase="true"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" words="lang/stopwords_gl.txt" ignoreCase="true"/> <filter name="stop" words="lang/stopwords_gl.txt" ignoreCase="true"/>
<filter class="solr.GalicianStemFilterFactory"/> <filter name="galicianStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.IndicNormalizationFilterFactory"/> <filter name="indicNormalization"/>
<filter class="solr.HindiNormalizationFilterFactory"/> <filter name="hindiNormalization"/>
<filter class="solr.StopFilterFactory" words="lang/stopwords_hi.txt" ignoreCase="true"/> <filter name="stop" words="lang/stopwords_hi.txt" ignoreCase="true"/>
<filter class="solr.HindiStemFilterFactory"/> <filter name="hindiStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_hu.txt" ignoreCase="true"/> <filter name="stop" format="snowball" words="lang/stopwords_hu.txt" ignoreCase="true"/>
<filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/> <filter name="snowballPorter" language="Hungarian"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" words="lang/stopwords_hy.txt" ignoreCase="true"/> <filter name="stop" words="lang/stopwords_hy.txt" ignoreCase="true"/>
<filter class="solr.SnowballPorterFilterFactory" language="Armenian"/> <filter name="snowballPorter" language="Armenian"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_id" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" words="lang/stopwords_id.txt" ignoreCase="true"/> <filter name="stop" words="lang/stopwords_id.txt" ignoreCase="true"/>
<filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/> <filter name="indonesianStem" stemDerivational="true"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_it" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.ElisionFilterFactory" articles="lang/contractions_it.txt" ignoreCase="true"/> <filter name="elision" articles="lang/contractions_it.txt" ignoreCase="true"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_it.txt" ignoreCase="true"/> <filter name="stop" format="snowball" words="lang/stopwords_it.txt" ignoreCase="true"/>
<filter class="solr.ItalianLightStemFilterFactory"/> <filter name="italianLightStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_ja" class="solr.TextField" autoGeneratePhraseQueries="false" positionIncrementGap="100"> <fieldType name="text_ja" class="solr.TextField" autoGeneratePhraseQueries="false" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/> <tokenizer name="japanese" mode="search"/>
<filter class="solr.JapaneseBaseFormFilterFactory"/> <filter name="japaneseBaseForm"/>
<filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt"/> <filter name="japanesePartOfSpeechStop" tags="lang/stoptags_ja.txt"/>
<filter class="solr.CJKWidthFilterFactory"/> <filter name="cjkWidth"/>
<filter class="solr.StopFilterFactory" words="lang/stopwords_ja.txt" ignoreCase="true"/> <filter name="stop" words="lang/stopwords_ja.txt" ignoreCase="true"/>
<filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/> <filter name="japaneseKatakanaStem" minimumLength="4"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_ko" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ko" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.KoreanTokenizerFactory" decompoundMode="discard" outputUnknownUnigrams="false"/> <tokenizer name="korean" decompoundMode="discard" outputUnknownUnigrams="false"/>
<filter class="solr.KoreanPartOfSpeechStopFilterFactory" /> <filter name="koreanPartOfSpeechStop" />
<filter class="solr.KoreanReadingFormFilterFactory" /> <filter name="koreanReadingForm" />
<filter class="solr.LowerCaseFilterFactory" /> <filter name="lowercase" />
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" words="lang/stopwords_lv.txt" ignoreCase="true"/> <filter name="stop" words="lang/stopwords_lv.txt" ignoreCase="true"/>
<filter class="solr.LatvianStemFilterFactory"/> <filter name="latvianStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_nl.txt" ignoreCase="true"/> <filter name="stop" format="snowball" words="lang/stopwords_nl.txt" ignoreCase="true"/>
<filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/> <filter name="stemmerOverride" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
<filter class="solr.SnowballPorterFilterFactory" language="Dutch"/> <filter name="snowballPorter" language="Dutch"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_no" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_no.txt" ignoreCase="true"/> <filter name="stop" format="snowball" words="lang/stopwords_no.txt" ignoreCase="true"/>
<filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/> <filter name="snowballPorter" language="Norwegian"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_pt.txt" ignoreCase="true"/> <filter name="stop" format="snowball" words="lang/stopwords_pt.txt" ignoreCase="true"/>
<filter class="solr.PortugueseLightStemFilterFactory"/> <filter name="portugueseLightStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" words="lang/stopwords_ro.txt" ignoreCase="true"/> <filter name="stop" words="lang/stopwords_ro.txt" ignoreCase="true"/>
<filter class="solr.SnowballPorterFilterFactory" language="Romanian"/> <filter name="snowballPorter" language="Romanian"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_ru.txt" ignoreCase="true"/> <filter name="stop" format="snowball" words="lang/stopwords_ru.txt" ignoreCase="true"/>
<filter class="solr.SnowballPorterFilterFactory" language="Russian"/> <filter name="snowballPorter" language="Russian"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_sv.txt" ignoreCase="true"/> <filter name="stop" format="snowball" words="lang/stopwords_sv.txt" ignoreCase="true"/>
<filter class="solr.SnowballPorterFilterFactory" language="Swedish"/> <filter name="snowballPorter" language="Swedish"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_th" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.ThaiTokenizerFactory"/> <tokenizer name="thai"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" words="lang/stopwords_th.txt" ignoreCase="true"/> <filter name="stop" words="lang/stopwords_th.txt" ignoreCase="true"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.TurkishLowerCaseFilterFactory"/> <filter name="turkishLowercase"/>
<filter class="solr.StopFilterFactory" words="lang/stopwords_tr.txt" ignoreCase="false"/> <filter name="stop" words="lang/stopwords_tr.txt" ignoreCase="false"/>
<filter class="solr.SnowballPorterFilterFactory" language="Turkish"/> <filter name="snowballPorter" language="Turkish"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_email_url" class="solr.TextField"> <fieldType name="text_email_url" class="solr.TextField">
<analyzer> <analyzer>
<tokenizer class="solr.UAX29URLEmailTokenizerFactory"/> <tokenizer name="UAX29URLEmail"/>
<filter class="solr.TypeTokenFilterFactory" types="email_url_types.txt" useWhitelist="true"/> <filter name="type" types="email_url_types.txt" useWhitelist="true"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="text_shingles" class="solr.TextField" positionIncrementGap="100" multiValued="true"> <fieldType name="text_shingles" class="solr.TextField" positionIncrementGap="100" multiValued="true">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt" ignoreCase="false" /> --> <!-- <filter name="stop" words="lang/stopwords_en.txt" ignoreCase="false" /> -->
<filter class="solr.LengthFilterFactory" min="2" max="18"/> <filter name="length" min="2" max="18"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.PatternReplaceFilterFactory" pattern="(^[^a-z]+$)" replacement="" replace="all"/> <filter name="patternReplace" pattern="(^[^a-z]+$)" replacement="" replace="all"/>
<filter class="solr.ShingleFilterFactory" minShingleSize="3" maxShingleSize="3" <filter name="shingle" minShingleSize="3" maxShingleSize="3"
outputUnigrams="false" outputUnigramsIfNoShingles="false" tokenSeparator=" " fillerToken="*"/> outputUnigrams="false" outputUnigramsIfNoShingles="false" tokenSeparator=" " fillerToken="*"/>
<filter class="solr.PatternReplaceFilterFactory" pattern="(.*[\*].*)" replacement=""/> <filter name="patternReplace" pattern="(.*[\*].*)" replacement=""/>
<filter class="solr.TrimFilterFactory"/> <filter name="trim"/>
<!-- PRFF could have removed everything down to an empty string, remove if so --> <!-- PRFF could have removed everything down to an empty string, remove if so -->
<filter class="solr.LengthFilterFactory" min="1" max="100"/> <filter name="length" min="1" max="100"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer name="keyword"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
</fieldType> </fieldType>

View File

@ -275,7 +275,7 @@
<dynamicField name="*_ws" type="text_ws" indexed="true" stored="true"/> <dynamicField name="*_ws" type="text_ws" indexed="true" stored="true"/>
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -287,19 +287,19 @@
--> -->
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true"> <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter name="stop" ignoreCase="true" words="stopwords.txt" />
<!-- in this example, we will only use synonyms at query time <!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> <filter name="synonymGraph" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.FlattenGraphFilterFactory"/> <filter name="flattenGraph"/>
--> -->
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter name="stop" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -316,15 +316,15 @@
<dynamicField name="*_txt_sort" type="text_gen_sort" indexed="true" stored="true"/> <dynamicField name="*_txt_sort" type="text_gen_sort" indexed="true" stored="true"/>
<fieldType name="text_gen_sort" class="solr.SortableTextField" positionIncrementGap="100" multiValued="true"> <fieldType name="text_gen_sort" class="solr.SortableTextField" positionIncrementGap="100" multiValued="true">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter name="stop" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter name="stop" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -334,39 +334,39 @@
<dynamicField name="*_txt_en" type="text_en" indexed="true" stored="true"/> <dynamicField name="*_txt_en" type="text_en" indexed="true" stored="true"/>
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- in this example, we will only use synonyms at query time <!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> <filter name="synonymGraph" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.FlattenGraphFilterFactory"/> <filter name="flattenGraph"/>
--> -->
<!-- Case insensitive stop word removal. <!-- Case insensitive stop word removal.
--> -->
<filter class="solr.StopFilterFactory" <filter name="stop"
ignoreCase="true" ignoreCase="true"
words="lang/stopwords_en.txt" words="lang/stopwords_en.txt"
/> />
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.EnglishPossessiveFilterFactory"/> <filter name="englishPossessive"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
<filter class="solr.EnglishMinimalStemFilterFactory"/> <filter name="englishMinimalStem"/>
--> -->
<filter class="solr.PorterStemFilterFactory"/> <filter name="porterStem"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" <filter name="stop"
ignoreCase="true" ignoreCase="true"
words="lang/stopwords_en.txt" words="lang/stopwords_en.txt"
/> />
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.EnglishPossessiveFilterFactory"/> <filter name="englishPossessive"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
<filter class="solr.EnglishMinimalStemFilterFactory"/> <filter name="englishMinimalStem"/>
--> -->
<filter class="solr.PorterStemFilterFactory"/> <filter name="porterStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -382,33 +382,33 @@
<dynamicField name="*_txt_en_split" type="text_en_splitting" indexed="true" stored="true"/> <dynamicField name="*_txt_en_split" type="text_en_splitting" indexed="true" stored="true"/>
<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<!-- in this example, we will only use synonyms at query time <!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> <filter name="synonymGraph" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
--> -->
<!-- Case insensitive stop word removal. <!-- Case insensitive stop word removal.
--> -->
<filter class="solr.StopFilterFactory" <filter name="stop"
ignoreCase="true" ignoreCase="true"
words="lang/stopwords_en.txt" words="lang/stopwords_en.txt"
/> />
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> <filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/> <filter name="porterStem"/>
<filter class="solr.FlattenGraphFilterFactory" /> <filter name="flattenGraph" />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" <filter name="stop"
ignoreCase="true" ignoreCase="true"
words="lang/stopwords_en.txt" words="lang/stopwords_en.txt"
/> />
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> <filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/> <filter name="porterStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -417,29 +417,29 @@
<dynamicField name="*_txt_en_split_tight" type="text_en_splitting_tight" indexed="true" stored="true"/> <dynamicField name="*_txt_en_split_tight" type="text_en_splitting_tight" indexed="true" stored="true"/>
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> <filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> <filter name="wordDelimiterGraph" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/> <filter name="englishMinimalStem"/>
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
possible with WordDelimiterGraphFilter in conjuncton with stemming. --> possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> <filter name="removeDuplicates"/>
<filter class="solr.FlattenGraphFilterFactory" /> <filter name="flattenGraph" />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> <filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> <filter name="wordDelimiterGraph" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/> <filter name="englishMinimalStem"/>
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
possible with WordDelimiterGraphFilter in conjuncton with stemming. --> possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> <filter name="removeDuplicates"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -449,25 +449,25 @@
<dynamicField name="*_txt_rev" type="text_general_rev" indexed="true" stored="true"/> <dynamicField name="*_txt_rev" type="text_general_rev" indexed="true" stored="true"/>
<fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter name="stop" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true" <filter name="reversedWildcard" withOriginal="true"
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/> maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter name="stop" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<dynamicField name="*_phon_en" type="phonetic_en" indexed="true" stored="true"/> <dynamicField name="*_phon_en" type="phonetic_en" indexed="true" stored="true"/>
<fieldType name="phonetic_en" stored="false" indexed="true" class="solr.TextField" > <fieldType name="phonetic_en" stored="false" indexed="true" class="solr.TextField" >
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/> <filter name="doubleMetaphone" inject="false"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -475,8 +475,8 @@
<dynamicField name="*_s_lower" type="lowercase" indexed="true" stored="true"/> <dynamicField name="*_s_lower" type="lowercase" indexed="true" stored="true"/>
<fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100"> <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer name="keyword"/>
<filter class="solr.LowerCaseFilterFactory" /> <filter name="lowercase" />
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -487,10 +487,10 @@
<dynamicField name="*_descendent_path" type="descendent_path" indexed="true" stored="true"/> <dynamicField name="*_descendent_path" type="descendent_path" indexed="true" stored="true"/>
<fieldType name="descendent_path" class="solr.TextField"> <fieldType name="descendent_path" class="solr.TextField">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> <tokenizer name="pathHierarchy" delimiter="/" />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory" /> <tokenizer name="keyword" />
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -501,10 +501,10 @@
<dynamicField name="*_ancestor_path" type="ancestor_path" indexed="true" stored="true"/> <dynamicField name="*_ancestor_path" type="ancestor_path" indexed="true" stored="true"/>
<fieldType name="ancestor_path" class="solr.TextField"> <fieldType name="ancestor_path" class="solr.TextField">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.KeywordTokenizerFactory" /> <tokenizer name="keyword" />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> <tokenizer name="pathHierarchy" delimiter="/" />
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -535,20 +535,20 @@
<!-- Payloaded field types --> <!-- Payloaded field types -->
<fieldType name="delimited_payloads_float" stored="false" indexed="true" class="solr.TextField"> <fieldType name="delimited_payloads_float" stored="false" indexed="true" class="solr.TextField">
<analyzer> <analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/> <filter name="delimitedPayload" encoder="float"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="delimited_payloads_int" stored="false" indexed="true" class="solr.TextField"> <fieldType name="delimited_payloads_int" stored="false" indexed="true" class="solr.TextField">
<analyzer> <analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="integer"/> <filter name="delimitedPayload" encoder="integer"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="delimited_payloads_string" stored="false" indexed="true" class="solr.TextField"> <fieldType name="delimited_payloads_string" stored="false" indexed="true" class="solr.TextField">
<analyzer> <analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="identity"/> <filter name="delimitedPayload" encoder="identity"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -558,13 +558,13 @@
<dynamicField name="*_txt_ar" type="text_ar" indexed="true" stored="true"/> <dynamicField name="*_txt_ar" type="text_ar" indexed="true" stored="true"/>
<fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- for any non-arabic --> <!-- for any non-arabic -->
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_ar.txt" />
<!-- normalizes ﻯ to ﻱ, etc --> <!-- normalizes ﻯ to ﻱ, etc -->
<filter class="solr.ArabicNormalizationFilterFactory"/> <filter name="arabicNormalization"/>
<filter class="solr.ArabicStemFilterFactory"/> <filter name="arabicStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -572,10 +572,10 @@
<dynamicField name="*_txt_bg" type="text_bg" indexed="true" stored="true"/> <dynamicField name="*_txt_bg" type="text_bg" indexed="true" stored="true"/>
<fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_bg.txt" />
<filter class="solr.BulgarianStemFilterFactory"/> <filter name="bulgarianStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -583,12 +583,12 @@
<dynamicField name="*_txt_ca" type="text_ca" indexed="true" stored="true"/> <dynamicField name="*_txt_ca" type="text_ca" indexed="true" stored="true"/>
<fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- removes l', etc --> <!-- removes l', etc -->
<filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/> <filter name="elision" ignoreCase="true" articles="lang/contractions_ca.txt"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_ca.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Catalan"/> <filter name="snowballPorter" language="Catalan"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -596,12 +596,12 @@
<dynamicField name="*_txt_cjk" type="text_cjk" indexed="true" stored="true"/> <dynamicField name="*_txt_cjk" type="text_cjk" indexed="true" stored="true"/>
<fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- normalize width before bigram, as e.g. half-width dakuten combine --> <!-- normalize width before bigram, as e.g. half-width dakuten combine -->
<filter class="solr.CJKWidthFilterFactory"/> <filter name="CJKWidth"/>
<!-- for any non-CJK --> <!-- for any non-CJK -->
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.CJKBigramFilterFactory"/> <filter name="CJKBigram"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -609,10 +609,10 @@
<dynamicField name="*_txt_cz" type="text_cz" indexed="true" stored="true"/> <dynamicField name="*_txt_cz" type="text_cz" indexed="true" stored="true"/>
<fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_cz.txt" />
<filter class="solr.CzechStemFilterFactory"/> <filter name="czechStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -620,10 +620,10 @@
<dynamicField name="*_txt_da" type="text_da" indexed="true" stored="true"/> <dynamicField name="*_txt_da" type="text_da" indexed="true" stored="true"/>
<fieldType name="text_da" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Danish"/> <filter name="snowballPorter" language="Danish"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -631,13 +631,13 @@
<dynamicField name="*_txt_de" type="text_de" indexed="true" stored="true"/> <dynamicField name="*_txt_de" type="text_de" indexed="true" stored="true"/>
<fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" />
<filter class="solr.GermanNormalizationFilterFactory"/> <filter name="germanNormalization"/>
<filter class="solr.GermanLightStemFilterFactory"/> <filter name="germanLightStem"/>
<!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> --> <!-- less aggressive: <filter name="germanMinimalStem"/> -->
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> --> <!-- more aggressive: <filter name="snowballPorter" language="German2"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -645,11 +645,11 @@
<dynamicField name="*_txt_el" type="text_el" indexed="true" stored="true"/> <dynamicField name="*_txt_el" type="text_el" indexed="true" stored="true"/>
<fieldType name="text_el" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- greek specific lowercase for sigma --> <!-- greek specific lowercase for sigma -->
<filter class="solr.GreekLowerCaseFilterFactory"/> <filter name="greekLowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" /> <filter name="stop" ignoreCase="false" words="lang/stopwords_el.txt" />
<filter class="solr.GreekStemFilterFactory"/> <filter name="greekStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -657,11 +657,11 @@
<dynamicField name="*_txt_es" type="text_es" indexed="true" stored="true"/> <dynamicField name="*_txt_es" type="text_es" indexed="true" stored="true"/>
<fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" />
<filter class="solr.SpanishLightStemFilterFactory"/> <filter name="spanishLightStem"/>
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> --> <!-- more aggressive: <filter name="snowballPorter" language="Spanish"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -669,10 +669,10 @@
<dynamicField name="*_txt_et" type="text_et" indexed="true" stored="true"/> <dynamicField name="*_txt_et" type="text_et" indexed="true" stored="true"/>
<fieldType name="text_et" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_et" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_et.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_et.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Estonian"/> <filter name="snowballPorter" language="Estonian"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -680,10 +680,10 @@
<dynamicField name="*_txt_eu" type="text_eu" indexed="true" stored="true"/> <dynamicField name="*_txt_eu" type="text_eu" indexed="true" stored="true"/>
<fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_eu.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Basque"/> <filter name="snowballPorter" language="Basque"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -692,12 +692,12 @@
<fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<!-- for ZWNJ --> <!-- for ZWNJ -->
<charFilter class="solr.PersianCharFilterFactory"/> <charFilter name="persian"/>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.ArabicNormalizationFilterFactory"/> <filter name="arabicNormalization"/>
<filter class="solr.PersianNormalizationFilterFactory"/> <filter name="persianNormalization"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_fa.txt" />
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -705,11 +705,11 @@
<dynamicField name="*_txt_fi" type="text_fi" indexed="true" stored="true"/> <dynamicField name="*_txt_fi" type="text_fi" indexed="true" stored="true"/>
<fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Finnish"/> <filter name="snowballPorter" language="Finnish"/>
<!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> --> <!-- less aggressive: <filter name="finnishLightStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -717,14 +717,14 @@
<dynamicField name="*_txt_fr" type="text_fr" indexed="true" stored="true"/> <dynamicField name="*_txt_fr" type="text_fr" indexed="true" stored="true"/>
<fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- removes l', etc --> <!-- removes l', etc -->
<filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/> <filter name="elision" ignoreCase="true" articles="lang/contractions_fr.txt"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" />
<filter class="solr.FrenchLightStemFilterFactory"/> <filter name="frenchLightStem"/>
<!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> --> <!-- less aggressive: <filter name="frenchMinimalStem"/> -->
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> --> <!-- more aggressive: <filter name="snowballPorter" language="French"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -732,14 +732,14 @@
<dynamicField name="*_txt_ga" type="text_ga" indexed="true" stored="true"/> <dynamicField name="*_txt_ga" type="text_ga" indexed="true" stored="true"/>
<fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- removes d', etc --> <!-- removes d', etc -->
<filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/> <filter name="elision" ignoreCase="true" articles="lang/contractions_ga.txt"/>
<!-- removes n-, etc. position increments is intentionally false! --> <!-- removes n-, etc. position increments is intentionally false! -->
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt"/> <filter name="stop" ignoreCase="true" words="lang/hyphenations_ga.txt"/>
<filter class="solr.IrishLowerCaseFilterFactory"/> <filter name="irishLowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt"/> <filter name="stop" ignoreCase="true" words="lang/stopwords_ga.txt"/>
<filter class="solr.SnowballPorterFilterFactory" language="Irish"/> <filter name="snowballPorter" language="Irish"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -747,11 +747,11 @@
<dynamicField name="*_txt_gl" type="text_gl" indexed="true" stored="true"/> <dynamicField name="*_txt_gl" type="text_gl" indexed="true" stored="true"/>
<fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_gl.txt" />
<filter class="solr.GalicianStemFilterFactory"/> <filter name="galicianStem"/>
<!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> --> <!-- less aggressive: <filter name="galicianMinimalStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -759,14 +759,14 @@
<dynamicField name="*_txt_hi" type="text_hi" indexed="true" stored="true"/> <dynamicField name="*_txt_hi" type="text_hi" indexed="true" stored="true"/>
<fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<!-- normalizes unicode representation --> <!-- normalizes unicode representation -->
<filter class="solr.IndicNormalizationFilterFactory"/> <filter name="indicNormalization"/>
<!-- normalizes variation in spelling --> <!-- normalizes variation in spelling -->
<filter class="solr.HindiNormalizationFilterFactory"/> <filter name="hindiNormalization"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_hi.txt" />
<filter class="solr.HindiStemFilterFactory"/> <filter name="hindiStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -774,11 +774,11 @@
<dynamicField name="*_txt_hu" type="text_hu" indexed="true" stored="true"/> <dynamicField name="*_txt_hu" type="text_hu" indexed="true" stored="true"/>
<fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/> <filter name="snowballPorter" language="Hungarian"/>
<!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> --> <!-- less aggressive: <filter name="hungarianLightStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -786,10 +786,10 @@
<dynamicField name="*_txt_hy" type="text_hy" indexed="true" stored="true"/> <dynamicField name="*_txt_hy" type="text_hy" indexed="true" stored="true"/>
<fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_hy.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Armenian"/> <filter name="snowballPorter" language="Armenian"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -797,11 +797,11 @@
<dynamicField name="*_txt_id" type="text_id" indexed="true" stored="true"/> <dynamicField name="*_txt_id" type="text_id" indexed="true" stored="true"/>
<fieldType name="text_id" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_id.txt" />
<!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false --> <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->
<filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/> <filter name="indonesianStem" stemDerivational="true"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -809,13 +809,13 @@
<dynamicField name="*_txt_it" type="text_it" indexed="true" stored="true"/> <dynamicField name="*_txt_it" type="text_it" indexed="true" stored="true"/>
<fieldType name="text_it" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- removes l', etc --> <!-- removes l', etc -->
<filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/> <filter name="elision" ignoreCase="true" articles="lang/contractions_it.txt"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" />
<filter class="solr.ItalianLightStemFilterFactory"/> <filter name="italianLightStem"/>
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> --> <!-- more aggressive: <filter name="snowballPorter" language="Italian"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -853,20 +853,20 @@
Punctuation characters are discarded by default. Use discardPunctuation="false" to keep them. Punctuation characters are discarded by default. Use discardPunctuation="false" to keep them.
--> -->
<tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/> <tokenizer name="japanese" mode="search"/>
<!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>--> <!--<tokenizer name="japanese" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
<!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) --> <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) -->
<filter class="solr.JapaneseBaseFormFilterFactory"/> <filter name="japaneseBaseForm"/>
<!-- Removes tokens with certain part-of-speech tags --> <!-- Removes tokens with certain part-of-speech tags -->
<filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" /> <filter name="japanesePartOfSpeechStop" tags="lang/stoptags_ja.txt" />
<!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) --> <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) -->
<filter class="solr.CJKWidthFilterFactory"/> <filter name="cjkWidth"/>
<!-- Removes common tokens typically not useful for search, but have a negative effect on ranking --> <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking -->
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_ja.txt" />
<!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) --> <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) -->
<filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/> <filter name="japaneseKatakanaStem" minimumLength="4"/>
<!-- Lower-cases romaji characters --> <!-- Lower-cases romaji characters -->
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -891,16 +891,16 @@
* decompoundMode: Decompound mode. Either 'none', 'discard', 'mixed'. Default is 'discard'. * decompoundMode: Decompound mode. Either 'none', 'discard', 'mixed'. Default is 'discard'.
* outputUnknownUnigrams: If true outputs unigrams for unknown words. * outputUnknownUnigrams: If true outputs unigrams for unknown words.
--> -->
<tokenizer class="solr.KoreanTokenizerFactory" decompoundMode="discard" outputUnknownUnigrams="false"/> <tokenizer name="korean" decompoundMode="discard" outputUnknownUnigrams="false"/>
<!-- Removes some part of speech stuff like EOMI (Pos.E), you can add a parameter 'tags', <!-- Removes some part of speech stuff like EOMI (Pos.E), you can add a parameter 'tags',
listing the tags to remove. By default it removes: listing the tags to remove. By default it removes:
E, IC, J, MAG, MAJ, MM, SP, SSC, SSO, SC, SE, XPN, XSA, XSN, XSV, UNA, NA, VSV E, IC, J, MAG, MAJ, MM, SP, SSC, SSO, SC, SE, XPN, XSA, XSN, XSV, UNA, NA, VSV
This is basically an equivalent to stemming. This is basically an equivalent to stemming.
--> -->
<filter class="solr.KoreanPartOfSpeechStopFilterFactory" /> <filter name="koreanPartOfSpeechStop" />
<!-- Replaces term text with the Hangul transcription of Hanja characters, if applicable: --> <!-- Replaces term text with the Hangul transcription of Hanja characters, if applicable: -->
<filter class="solr.KoreanReadingFormFilterFactory" /> <filter name="koreanReadingForm" />
<filter class="solr.LowerCaseFilterFactory" /> <filter name="lowercase" />
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -908,10 +908,10 @@
<dynamicField name="*_txt_lv" type="text_lv" indexed="true" stored="true"/> <dynamicField name="*_txt_lv" type="text_lv" indexed="true" stored="true"/>
<fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_lv.txt" />
<filter class="solr.LatvianStemFilterFactory"/> <filter name="latvianStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -919,11 +919,11 @@
<dynamicField name="*_txt_nl" type="text_nl" indexed="true" stored="true"/> <dynamicField name="*_txt_nl" type="text_nl" indexed="true" stored="true"/>
<fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" />
<filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/> <filter name="stemmerOverride" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
<filter class="solr.SnowballPorterFilterFactory" language="Dutch"/> <filter name="snowballPorter" language="Dutch"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -931,12 +931,12 @@
<dynamicField name="*_txt_no" type="text_no" indexed="true" stored="true"/> <dynamicField name="*_txt_no" type="text_no" indexed="true" stored="true"/>
<fieldType name="text_no" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/> <filter name="snowballPorter" language="Norwegian"/>
<!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory"/> --> <!-- less aggressive: <filter name="norwegianLightStem"/> -->
<!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory"/> --> <!-- singular/plural: <filter name="norwegianMinimalStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -944,13 +944,13 @@
<dynamicField name="*_txt_pt" type="text_pt" indexed="true" stored="true"/> <dynamicField name="*_txt_pt" type="text_pt" indexed="true" stored="true"/>
<fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" />
<filter class="solr.PortugueseLightStemFilterFactory"/> <filter name="portugueseLightStem"/>
<!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> --> <!-- less aggressive: <filter name="portugueseMinimalStem"/> -->
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> --> <!-- more aggressive: <filter name="snowballPorter" language="Portuguese"/> -->
<!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> --> <!-- most aggressive: <filter name="portugueseStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -958,10 +958,10 @@
<dynamicField name="*_txt_ro" type="text_ro" indexed="true" stored="true"/> <dynamicField name="*_txt_ro" type="text_ro" indexed="true" stored="true"/>
<fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_ro.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Romanian"/> <filter name="snowballPorter" language="Romanian"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -969,11 +969,11 @@
<dynamicField name="*_txt_ru" type="text_ru" indexed="true" stored="true"/> <dynamicField name="*_txt_ru" type="text_ru" indexed="true" stored="true"/>
<fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Russian"/> <filter name="snowballPorter" language="Russian"/>
<!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> --> <!-- less aggressive: <filter name="russianLightStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -981,11 +981,11 @@
<dynamicField name="*_txt_sv" type="text_sv" indexed="true" stored="true"/> <dynamicField name="*_txt_sv" type="text_sv" indexed="true" stored="true"/>
<fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Swedish"/> <filter name="snowballPorter" language="Swedish"/>
<!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> --> <!-- less aggressive: <filter name="swedishLightStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -993,9 +993,9 @@
<dynamicField name="*_txt_th" type="text_th" indexed="true" stored="true"/> <dynamicField name="*_txt_th" type="text_th" indexed="true" stored="true"/>
<fieldType name="text_th" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.ThaiTokenizerFactory"/> <tokenizer name="thai"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_th.txt" />
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -1003,10 +1003,10 @@
<dynamicField name="*_txt_tr" type="text_tr" indexed="true" stored="true"/> <dynamicField name="*_txt_tr" type="text_tr" indexed="true" stored="true"/>
<fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.TurkishLowerCaseFilterFactory"/> <filter name="turkishLowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" /> <filter name="stop" ignoreCase="false" words="lang/stopwords_tr.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Turkish"/> <filter name="snowballPorter" language="Turkish"/>
</analyzer> </analyzer>
</fieldType> </fieldType>

View File

@ -382,22 +382,22 @@
<!-- A text field that only splits on whitespace for exact matching of words --> <!-- A text field that only splits on whitespace for exact matching of words -->
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- A text type for English text where stopwords and synonyms are managed using the REST API --> <!-- A text type for English text where stopwords and synonyms are managed using the REST API -->
<fieldType name="managed_en" class="solr.TextField" positionIncrementGap="100"> <fieldType name="managed_en" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.ManagedStopFilterFactory" managed="english" /> <filter name="managedStop" managed="english" />
<filter class="solr.ManagedSynonymGraphFilterFactory" managed="english" /> <filter name="managedSynonymGraph" managed="english" />
<filter class="solr.FlattenGraphFilterFactory"/> <filter name="flattenGraph"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.ManagedStopFilterFactory" managed="english" /> <filter name="managedStop" managed="english" />
<filter class="solr.ManagedSynonymGraphFilterFactory" managed="english" /> <filter name="managedSynonymGraph" managed="english" />
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -408,19 +408,19 @@
also applies synonyms. --> also applies synonyms. -->
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter name="stop" ignoreCase="true" words="stopwords.txt" />
<!-- in this example, we will only use synonyms at query time <!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> <filter name="synonymGraph" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.FlattenGraphFilterFactory"/> <filter name="flattenGraph"/>
--> -->
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter name="stop" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -434,15 +434,15 @@
--> -->
<fieldType name="text_gen_sort" class="solr.SortableTextField" positionIncrementGap="100" multiValued="true"> <fieldType name="text_gen_sort" class="solr.SortableTextField" positionIncrementGap="100" multiValued="true">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter name="stop" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter name="stop" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -453,39 +453,39 @@
also applies synonyms from synonyms.txt. --> also applies synonyms from synonyms.txt. -->
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- in this example, we will only use synonyms at query time <!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> <filter name="synonymGraph" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.FlattenGraphFilterFactory"/> <filter name="flattenGraph"/>
--> -->
<!-- Case insensitive stop word removal. <!-- Case insensitive stop word removal.
--> -->
<filter class="solr.StopFilterFactory" <filter name="stop"
ignoreCase="true" ignoreCase="true"
words="lang/stopwords_en.txt" words="lang/stopwords_en.txt"
/> />
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.EnglishPossessiveFilterFactory"/> <filter name="englishPossessive"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
<filter class="solr.EnglishMinimalStemFilterFactory"/> <filter name="englishMinimalStem"/>
--> -->
<filter class="solr.PorterStemFilterFactory"/> <filter name="porterStem"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" <filter name="stop"
ignoreCase="true" ignoreCase="true"
words="lang/stopwords_en.txt" words="lang/stopwords_en.txt"
/> />
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.EnglishPossessiveFilterFactory"/> <filter name="englishPossessive"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
<filter class="solr.EnglishMinimalStemFilterFactory"/> <filter name="englishMinimalStem"/>
--> -->
<filter class="solr.PorterStemFilterFactory"/> <filter name="porterStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -500,33 +500,33 @@
--> -->
<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<!-- in this example, we will only use synonyms at query time <!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> <filter name="synonymGraph" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
--> -->
<!-- Case insensitive stop word removal. <!-- Case insensitive stop word removal.
--> -->
<filter class="solr.StopFilterFactory" <filter name="stop"
ignoreCase="true" ignoreCase="true"
words="lang/stopwords_en.txt" words="lang/stopwords_en.txt"
/> />
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> <filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/> <filter name="porterStem"/>
<filter class="solr.FlattenGraphFilterFactory" /> <filter name="flattenGraph" />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" <filter name="stop"
ignoreCase="true" ignoreCase="true"
words="lang/stopwords_en.txt" words="lang/stopwords_en.txt"
/> />
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> <filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/> <filter name="porterStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -534,29 +534,29 @@
but may be good for SKUs. Can insert dashes in the wrong place and still match. --> but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> <filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> <filter name="wordDelimiterGraph" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/> <filter name="englishMinimalStem"/>
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
possible with WordDelimiterGraphFilter in conjuncton with stemming. --> possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> <filter name="removeDuplicates"/>
<filter class="solr.FlattenGraphFilterFactory" /> <filter name="flattenGraph" />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> <filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> <filter name="wordDelimiterGraph" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter name="keywordMarker" protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/> <filter name="englishMinimalStem"/>
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
possible with WordDelimiterGraphFilter in conjuncton with stemming. --> possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> <filter name="removeDuplicates"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -564,17 +564,17 @@
each token, to enable more efficient leading wildcard queries. --> each token, to enable more efficient leading wildcard queries. -->
<fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter name="stop" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true" <filter name="reversedWildcard" withOriginal="true"
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/> maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter name="synonymGraph" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> <filter name="stop" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -582,8 +582,8 @@
<!-- <!--
<fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" > <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >
<analyzer> <analyzer>
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/> <charFilter name="mapping" mapping="mapping-ISOLatin1Accent.txt"/>
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
--> -->
@ -597,13 +597,13 @@
<!-- KeywordTokenizer does no actual tokenizing, so the entire <!-- KeywordTokenizer does no actual tokenizing, so the entire
input string is preserved as a single token input string is preserved as a single token
--> -->
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer name="keyword"/>
<!-- The LowerCase TokenFilter does what you expect, which can be <!-- The LowerCase TokenFilter does what you expect, which can be
when you want your sorting to be case insensitive when you want your sorting to be case insensitive
--> -->
<filter class="solr.LowerCaseFilterFactory" /> <filter name="lowercase" />
<!-- The TrimFilter removes any leading or trailing whitespace --> <!-- The TrimFilter removes any leading or trailing whitespace -->
<filter class="solr.TrimFilterFactory" /> <filter name="trim" />
<!-- The PatternReplaceFilter gives you the flexibility to use <!-- The PatternReplaceFilter gives you the flexibility to use
Java Regular expression to replace any sequence of characters Java Regular expression to replace any sequence of characters
matching a pattern with an arbitrary replacement string, matching a pattern with an arbitrary replacement string,
@ -615,7 +615,7 @@
http://docs.oracle.com/javase/8/docs/api/java/util/regex/package-summary.html http://docs.oracle.com/javase/8/docs/api/java/util/regex/package-summary.html
--> -->
<filter class="solr.PatternReplaceFilterFactory" <filter name="patternReplace"
pattern="([^a-z])" replacement="" replace="all" pattern="([^a-z])" replacement="" replace="all"
/> />
</analyzer> </analyzer>
@ -623,14 +623,14 @@
<fieldType name="phonetic" stored="false" indexed="true" class="solr.TextField" > <fieldType name="phonetic" stored="false" indexed="true" class="solr.TextField" >
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/> <filter name="doubleMetaphone" inject="false"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<fieldType name="payloads" stored="false" indexed="true" class="solr.TextField" > <fieldType name="payloads" stored="false" indexed="true" class="solr.TextField" >
<analyzer> <analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
<!-- <!--
The DelimitedPayloadTokenFilter can put payloads on tokens... for example, The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
@ -642,15 +642,15 @@
identity -> o.a.l.a.p.IdentityEncoder identity -> o.a.l.a.p.IdentityEncoder
Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor. Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
--> -->
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/> <filter name="delimitedPayload" encoder="float"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- lowercases the entire field value, keeping it as a single token. --> <!-- lowercases the entire field value, keeping it as a single token. -->
<fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100"> <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.KeywordTokenizerFactory"/> <tokenizer name="keyword"/>
<filter class="solr.LowerCaseFilterFactory" /> <filter name="lowercase" />
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -660,10 +660,10 @@
--> -->
<fieldType name="descendent_path" class="solr.TextField"> <fieldType name="descendent_path" class="solr.TextField">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> <tokenizer name="pathHierarchy" delimiter="/" />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory" /> <tokenizer name="keyword" />
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- <!--
@ -672,10 +672,10 @@
--> -->
<fieldType name="ancestor_path" class="solr.TextField"> <fieldType name="ancestor_path" class="solr.TextField">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.KeywordTokenizerFactory" /> <tokenizer name="keyword" />
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> <tokenizer name="pathHierarchy" delimiter="/" />
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -737,134 +737,134 @@
<!-- Arabic --> <!-- Arabic -->
<fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- for any non-arabic --> <!-- for any non-arabic -->
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_ar.txt" />
<!-- normalizes ﻯ to ﻱ, etc --> <!-- normalizes ﻯ to ﻱ, etc -->
<filter class="solr.ArabicNormalizationFilterFactory"/> <filter name="arabicNormalization"/>
<filter class="solr.ArabicStemFilterFactory"/> <filter name="arabicStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Bulgarian --> <!-- Bulgarian -->
<fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_bg.txt" />
<filter class="solr.BulgarianStemFilterFactory"/> <filter name="bulgarianStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Catalan --> <!-- Catalan -->
<fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- removes l', etc --> <!-- removes l', etc -->
<filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/> <filter name="elision" ignoreCase="true" articles="lang/contractions_ca.txt"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_ca.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Catalan"/> <filter name="snowballPorter" language="Catalan"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) --> <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) -->
<fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- normalize width before bigram, as e.g. half-width dakuten combine --> <!-- normalize width before bigram, as e.g. half-width dakuten combine -->
<filter class="solr.CJKWidthFilterFactory"/> <filter name="cjkWidth"/>
<!-- for any non-CJK --> <!-- for any non-CJK -->
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.CJKBigramFilterFactory"/> <filter name="cjkBigram"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Kurdish --> <!-- Kurdish -->
<fieldType name="text_ckb" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ckb" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.SoraniNormalizationFilterFactory"/> <filter name="soraniNormalization"/>
<!-- for any latin text --> <!-- for any latin text -->
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ckb.txt"/> <filter name="stop" ignoreCase="true" words="lang/stopwords_ckb.txt"/>
<filter class="solr.SoraniStemFilterFactory"/> <filter name="soraniStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Czech --> <!-- Czech -->
<fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_cz.txt" />
<filter class="solr.CzechStemFilterFactory"/> <filter name="czechStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Danish --> <!-- Danish -->
<fieldType name="text_da" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Danish"/> <filter name="snowballPorter" language="Danish"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- German --> <!-- German -->
<fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" />
<filter class="solr.GermanNormalizationFilterFactory"/> <filter name="germanNormalization"/>
<filter class="solr.GermanLightStemFilterFactory"/> <filter name="germanLightStem"/>
<!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> --> <!-- less aggressive: <filter name="germanMinimalStem"/> -->
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> --> <!-- more aggressive: <filter name="snowballPorter" language="German2"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Greek --> <!-- Greek -->
<fieldType name="text_el" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- greek specific lowercase for sigma --> <!-- greek specific lowercase for sigma -->
<filter class="solr.GreekLowerCaseFilterFactory"/> <filter name="greekLowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" /> <filter name="stop" ignoreCase="false" words="lang/stopwords_el.txt" />
<filter class="solr.GreekStemFilterFactory"/> <filter name="greekStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Spanish --> <!-- Spanish -->
<fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" />
<filter class="solr.SpanishLightStemFilterFactory"/> <filter name="spanishLightStem"/>
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> --> <!-- more aggressive: <filter name="snowballPorter" language="Spanish"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Estonian --> <!-- Estonian -->
<fieldType name="text_et" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_et" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_et.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_et.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Estonian"/> <filter name="snowballPorter" language="Estonian"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Basque --> <!-- Basque -->
<fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_eu.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Basque"/> <filter name="snowballPorter" language="Basque"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -872,121 +872,121 @@
<fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<!-- for ZWNJ --> <!-- for ZWNJ -->
<charFilter class="solr.PersianCharFilterFactory"/> <charFilter name="persian"/>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.ArabicNormalizationFilterFactory"/> <filter name="arabicNormalization"/>
<filter class="solr.PersianNormalizationFilterFactory"/> <filter name="persianNormalization"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_fa.txt" />
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Finnish --> <!-- Finnish -->
<fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Finnish"/> <filter name="snowballPorter" language="Finnish"/>
<!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> --> <!-- less aggressive: <filter name="finnishLightStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- French --> <!-- French -->
<fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- removes l', etc --> <!-- removes l', etc -->
<filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/> <filter name="elision" ignoreCase="true" articles="lang/contractions_fr.txt"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" />
<filter class="solr.FrenchLightStemFilterFactory"/> <filter name="frenchLightStem"/>
<!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> --> <!-- less aggressive: <filter name="frenchMinimalStem"/> -->
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> --> <!-- more aggressive: <filter name="snowballPorter" language="French"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Irish --> <!-- Irish -->
<fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- removes d', etc --> <!-- removes d', etc -->
<filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/> <filter name="elision" ignoreCase="true" articles="lang/contractions_ga.txt"/>
<!-- removes n-, etc. position increments is intentionally false! --> <!-- removes n-, etc. position increments is intentionally false! -->
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt"/> <filter name="stop" ignoreCase="true" words="lang/hyphenations_ga.txt"/>
<filter class="solr.IrishLowerCaseFilterFactory"/> <filter name="irishLowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt"/> <filter name="stop" ignoreCase="true" words="lang/stopwords_ga.txt"/>
<filter class="solr.SnowballPorterFilterFactory" language="Irish"/> <filter name="snowballPorter" language="Irish"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Galician --> <!-- Galician -->
<fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_gl.txt" />
<filter class="solr.GalicianStemFilterFactory"/> <filter name="galicianStem"/>
<!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> --> <!-- less aggressive: <filter name="galicianMinimalStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Hindi --> <!-- Hindi -->
<fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<!-- normalizes unicode representation --> <!-- normalizes unicode representation -->
<filter class="solr.IndicNormalizationFilterFactory"/> <filter name="indicNormalization"/>
<!-- normalizes variation in spelling --> <!-- normalizes variation in spelling -->
<filter class="solr.HindiNormalizationFilterFactory"/> <filter name="hindiNormalization"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_hi.txt" />
<filter class="solr.HindiStemFilterFactory"/> <filter name="hindiStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Hungarian --> <!-- Hungarian -->
<fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/> <filter name="snowballPorter" language="Hungarian"/>
<!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> --> <!-- less aggressive: <filter name="hungarianLightStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Armenian --> <!-- Armenian -->
<fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_hy.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Armenian"/> <filter name="snowballPorter" language="Armenian"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Indonesian --> <!-- Indonesian -->
<fieldType name="text_id" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_id.txt" />
<!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false --> <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->
<filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/> <filter name="indonesianStem" stemDerivational="true"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Italian --> <!-- Italian -->
<fieldType name="text_it" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<!-- removes l', etc --> <!-- removes l', etc -->
<filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/> <filter name="elision" ignoreCase="true" articles="lang/contractions_it.txt"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" />
<filter class="solr.ItalianLightStemFilterFactory"/> <filter name="italianLightStem"/>
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> --> <!-- more aggressive: <filter name="snowballPorter" language="Italian"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -1025,20 +1025,20 @@
See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support. See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support.
--> -->
<tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/> <tokenizer name="japanese" mode="search"/>
<!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>--> <!--<tokenizer name="japanese" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
<!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) --> <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) -->
<filter class="solr.JapaneseBaseFormFilterFactory"/> <filter name="japaneseBaseForm"/>
<!-- Removes tokens with certain part-of-speech tags --> <!-- Removes tokens with certain part-of-speech tags -->
<filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" /> <filter name="japanesePartOfSpeechStop" tags="lang/stoptags_ja.txt" />
<!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) --> <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) -->
<filter class="solr.CJKWidthFilterFactory"/> <filter name="cjkWidth"/>
<!-- Removes common tokens typically not useful for search, but have a negative effect on ranking --> <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking -->
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_ja.txt" />
<!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) --> <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) -->
<filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/> <filter name="japaneseKatakanaStem" minimumLength="4"/>
<!-- Lower-cases romaji characters --> <!-- Lower-cases romaji characters -->
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -1063,49 +1063,49 @@
* decompoundMode: Decompound mode. Either 'none', 'discard', 'mixed'. Default is 'discard'. * decompoundMode: Decompound mode. Either 'none', 'discard', 'mixed'. Default is 'discard'.
* outputUnknownUnigrams: If true outputs unigrams for unknown words. * outputUnknownUnigrams: If true outputs unigrams for unknown words.
--> -->
<tokenizer class="solr.KoreanTokenizerFactory" decompoundMode="discard" outputUnknownUnigrams="false"/> <tokenizer name="korean" decompoundMode="discard" outputUnknownUnigrams="false"/>
<!-- Removes some part of speech stuff like EOMI (Pos.E), you can add a parameter 'tags', <!-- Removes some part of speech stuff like EOMI (Pos.E), you can add a parameter 'tags',
listing the tags to remove. By default it removes: listing the tags to remove. By default it removes:
E, IC, J, MAG, MAJ, MM, SP, SSC, SSO, SC, SE, XPN, XSA, XSN, XSV, UNA, NA, VSV E, IC, J, MAG, MAJ, MM, SP, SSC, SSO, SC, SE, XPN, XSA, XSN, XSV, UNA, NA, VSV
This is basically an equivalent to stemming. This is basically an equivalent to stemming.
--> -->
<filter class="solr.KoreanPartOfSpeechStopFilterFactory" /> <filter name="koreanPartOfSpeechStop" />
<!-- Replaces term text with the Hangul transcription of Hanja characters, if applicable: --> <!-- Replaces term text with the Hangul transcription of Hanja characters, if applicable: -->
<filter class="solr.KoreanReadingFormFilterFactory" /> <filter name="koreanReadingForm" />
<filter class="solr.LowerCaseFilterFactory" /> <filter name="lowercase" />
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Latvian --> <!-- Latvian -->
<fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_lv.txt" />
<filter class="solr.LatvianStemFilterFactory"/> <filter name="latvianStem"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Dutch --> <!-- Dutch -->
<fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" />
<filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/> <filter name="stemmerOverride" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
<filter class="solr.SnowballPorterFilterFactory" language="Dutch"/> <filter name="snowballPorter" language="Dutch"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Norwegian --> <!-- Norwegian -->
<fieldType name="text_no" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/> <filter name="snowballPorter" language="Norwegian"/>
<!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory" variant="nb"/> --> <!-- less aggressive: <filter name="norwegianLightStem" variant="nb"/> -->
<!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory" variant="nb"/> --> <!-- singular/plural: <filter name="norwegianMinimalStem" variant="nb"/> -->
<!-- The "light" and "minimal" stemmers support variants: nb=Bokmål, nn=Nynorsk, no=Both --> <!-- The "light" and "minimal" stemmers support variants: nb=Bokmål, nn=Nynorsk, no=Both -->
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -1113,65 +1113,65 @@
<!-- Portuguese --> <!-- Portuguese -->
<fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" />
<filter class="solr.PortugueseLightStemFilterFactory"/> <filter name="portugueseLightStem"/>
<!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> --> <!-- less aggressive: <filter name="portugueseMinimalStem"/> -->
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> --> <!-- more aggressive: <filter name="snowballPorter" language="Portuguese"/> -->
<!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> --> <!-- most aggressive: <filter name="portugueseStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Romanian --> <!-- Romanian -->
<fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_ro.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Romanian"/> <filter name="snowballPorter" language="Romanian"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Russian --> <!-- Russian -->
<fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Russian"/> <filter name="snowballPorter" language="Russian"/>
<!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> --> <!-- less aggressive: <filter name="russianLightStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Swedish --> <!-- Swedish -->
<fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Swedish"/> <filter name="snowballPorter" language="Swedish"/>
<!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> --> <!-- less aggressive: <filter name="swedishLightStem"/> -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Thai --> <!-- Thai -->
<fieldType name="text_th" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.ThaiTokenizerFactory"/> <tokenizer name="thai"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter name="lowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" /> <filter name="stop" ignoreCase="true" words="lang/stopwords_th.txt" />
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- Turkish --> <!-- Turkish -->
<fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer name="standard"/>
<filter class="solr.ApostropheFilterFactory"/> <filter name="apostrophe"/>
<filter class="solr.TurkishLowerCaseFilterFactory"/> <filter name="turkishLowercase"/>
<filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" /> <filter name="stop" ignoreCase="false" words="lang/stopwords_tr.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Turkish"/> <filter name="snowballPorter" language="Turkish"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -1179,7 +1179,7 @@
<fieldType name="preanalyzed" class="solr.PreAnalyzedField"> <fieldType name="preanalyzed" class="solr.PreAnalyzedField">
<!-- PreAnalyzedField's builtin index analyzer just decodes the pre-analyzed token stream. --> <!-- PreAnalyzedField's builtin index analyzer just decodes the pre-analyzed token stream. -->
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer name="whitespace"/>
</analyzer> </analyzer>
</fieldType> </fieldType>