mirror of https://github.com/apache/lucene.git
Cleanup the data_driven configset
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1718294 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
6b2097090c
commit
72501bd0bf
|
@ -249,6 +249,9 @@ Other Changes
|
|||
are enabled by default and the schema is mutable. The schema file will be called managed-schema
|
||||
(Uwe Schindler, shalin, Varun Thacker)
|
||||
|
||||
* SOLR-8381: Cleanup data_driven managed-schema and solrconfig.xml files. Commented out copyFields are removed
|
||||
and solrconfig.xml doesn't refer to field which are not defined.
|
||||
|
||||
================== 5.4.0 ==================
|
||||
|
||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release
|
||||
|
|
|
@ -46,7 +46,7 @@
|
|||
-->
|
||||
|
||||
<schema name="example-data-driven-schema" version="1.5">
|
||||
<!-- attribute "name" is the name of this schema and is only used for display purposes.
|
||||
<!-- attribute "name" is the name of this schema and is only used for display purposes.
|
||||
version="x.y" is Solr's version number for the schema syntax and
|
||||
semantics. It should not normally be changed by applications.
|
||||
|
||||
|
@ -61,7 +61,7 @@
|
|||
to off for version >= 1.4
|
||||
1.5: omitNorms defaults to true for primitive field types
|
||||
(int, float, boolean, string...)
|
||||
-->
|
||||
-->
|
||||
|
||||
<!-- Valid attributes for fields:
|
||||
name: mandatory - the name for the field
|
||||
|
@ -96,14 +96,14 @@
|
|||
value does not exist
|
||||
default: a value that should be used if no value is specified
|
||||
when adding a document.
|
||||
-->
|
||||
-->
|
||||
|
||||
<!-- field names should consist of alphanumeric or underscore characters only and
|
||||
not start with a digit. This is not currently strictly enforced,
|
||||
but other field names will not have first class support from all components
|
||||
and back compatibility is not guaranteed. Names with both leading and
|
||||
trailing underscores (e.g. _version_) are reserved.
|
||||
-->
|
||||
-->
|
||||
|
||||
<!-- In this data_driven_schema_configs configset, only three fields are pre-declared:
|
||||
id, _version_, and _text_. All other fields will be type guessed and added via the
|
||||
|
@ -113,9 +113,9 @@
|
|||
Note that many dynamic fields are also defined - you can use them to specify a
|
||||
field's type via field naming conventions - see below.
|
||||
|
||||
WARNING: The _text_ catch-all field will significantly increase your index size.
|
||||
If you don't need it, consider removing it and the corresponding copyField directive.
|
||||
-->
|
||||
WARNING: The _text_ catch-all field will significantly increase your index size.
|
||||
If you don't need it, consider removing it and the corresponding copyField directive.
|
||||
-->
|
||||
<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
|
||||
<field name="_version_" type="long" indexed="true" stored="true"/>
|
||||
<field name="_root_" type="string" indexed="true" stored="false"/>
|
||||
|
@ -179,55 +179,18 @@
|
|||
NB: use of "*" dynamic fields will disable field type guessing and adding
|
||||
unknown fields to the schema. -->
|
||||
<!--dynamicField name="*" type="ignored" multiValued="true" /-->
|
||||
|
||||
|
||||
|
||||
<!-- Field to use to determine and enforce document uniqueness.
|
||||
<!-- Field to use to determine and enforce document uniqueness.
|
||||
Unless this field is marked with required="false", it will be a required field
|
||||
-->
|
||||
<uniqueKey>id</uniqueKey>
|
||||
-->
|
||||
<uniqueKey>id</uniqueKey>
|
||||
|
||||
<!-- copyField commands copy one field to another at the time a document
|
||||
<!-- copyField commands copy one field to another at the time a document
|
||||
is added to the index. It's used either to index the same field differently,
|
||||
or to add multiple fields to the same field for easier/faster searching.
|
||||
|
||||
<copyField source="cat" dest="text"/>
|
||||
<copyField source="name" dest="text"/>
|
||||
<copyField source="manu" dest="text"/>
|
||||
<copyField source="features" dest="text"/>
|
||||
<copyField source="includes" dest="text"/>
|
||||
<copyField source="manu" dest="manu_exact"/>
|
||||
-->
|
||||
|
||||
<!-- Copy the price into a currency enabled field (default USD)
|
||||
<copyField source="price" dest="price_c"/>
|
||||
-->
|
||||
|
||||
<!-- Text fields from SolrCell to search by default in our catch-all field
|
||||
<copyField source="title" dest="text"/>
|
||||
<copyField source="author" dest="text"/>
|
||||
<copyField source="description" dest="text"/>
|
||||
<copyField source="keywords" dest="text"/>
|
||||
<copyField source="content" dest="text"/>
|
||||
<copyField source="content_type" dest="text"/>
|
||||
<copyField source="resourcename" dest="text"/>
|
||||
<copyField source="url" dest="text"/>
|
||||
-->
|
||||
|
||||
<!-- Create a string version of author for faceting
|
||||
<copyField source="author" dest="author_s"/>
|
||||
-->
|
||||
|
||||
<!-- Above, multiple source fields are copied to the [text] field.
|
||||
Another way to map multiple source fields to the same
|
||||
destination field is to use the dynamic field syntax.
|
||||
copyField also supports a maxChars to copy setting. -->
|
||||
|
||||
<!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
|
||||
|
||||
<!-- copy name to alphaNameSort, a field designed for sorting by name -->
|
||||
<!-- <copyField source="name" dest="alphaNameSort"/> -->
|
||||
|
||||
<copyField source="sourceFieldName" dest="destinationFieldName"/>
|
||||
-->
|
||||
|
||||
<!-- field type definitions. The "name" attribute is
|
||||
just a label to be used by field definitions. The "class"
|
||||
|
@ -369,7 +332,7 @@
|
|||
-->
|
||||
|
||||
<!-- A text field that only splits on whitespace for exact matching of words -->
|
||||
<dynamicField name="*_ws" type="text_ws" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_ws" type="text_ws" indexed="true" stored="true"/>
|
||||
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
|
@ -378,9 +341,10 @@
|
|||
|
||||
<!-- A general text field that has reasonable, generic
|
||||
cross-language defaults: it tokenizes with StandardTokenizer,
|
||||
removes stop words from case-insensitive "stopwords.txt"
|
||||
(empty by default), and down cases. At query time only, it
|
||||
also applies synonyms. -->
|
||||
removes stop words from case-insensitive "stopwords.txt"
|
||||
(empty by default), and down cases. At query time only, it
|
||||
also applies synonyms.
|
||||
-->
|
||||
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -421,7 +385,7 @@
|
|||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
-->
|
||||
-->
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
|
@ -430,26 +394,26 @@
|
|||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="lang/stopwords_en.txt"
|
||||
/>
|
||||
/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.EnglishPossessiveFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
-->
|
||||
-->
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- A text field with defaults appropriate for English, plus
|
||||
aggressive word-splitting and autophrase features enabled.
|
||||
This field is just like text_en, except it adds
|
||||
WordDelimiterFilter to enable splitting and matching of
|
||||
words on case-change, alpha numeric boundaries, and
|
||||
non-alphanumeric chars. This means certain compound word
|
||||
cases will work, for example query "wi fi" will match
|
||||
document "WiFi" or "wi-fi".
|
||||
-->
|
||||
aggressive word-splitting and autophrase features enabled.
|
||||
This field is just like text_en, except it adds
|
||||
WordDelimiterFilter to enable splitting and matching of
|
||||
words on case-change, alpha numeric boundaries, and
|
||||
non-alphanumeric chars. This means certain compound word
|
||||
cases will work, for example query "wi fi" will match
|
||||
document "WiFi" or "wi-fi".
|
||||
-->
|
||||
<dynamicField name="*_txt_en_split" type="text_en_splitting" indexed="true" stored="true"/>
|
||||
<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
||||
<analyzer type="index">
|
||||
|
@ -462,7 +426,7 @@
|
|||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="lang/stopwords_en.txt"
|
||||
/>
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
|
@ -474,7 +438,7 @@
|
|||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="lang/stopwords_en.txt"
|
||||
/>
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
|
@ -501,9 +465,10 @@
|
|||
</fieldType>
|
||||
|
||||
<!-- Just like text_general except it reverses the characters of
|
||||
each token, to enable more efficient leading wildcard queries. -->
|
||||
<dynamicField name="*_txt_rev" type="text_general_rev" indexed="true" stored="true"/>
|
||||
<fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
|
||||
each token, to enable more efficient leading wildcard queries.
|
||||
-->
|
||||
<dynamicField name="*_txt_rev" type="text_general_rev" indexed="true" stored="true"/>
|
||||
<fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
|
||||
|
@ -519,8 +484,8 @@
|
|||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<dynamicField name="*_phon_en" type="phonetic_en" indexed="true" stored="true"/>
|
||||
<fieldType name="phonetic_en" stored="false" indexed="true" class="solr.TextField" >
|
||||
<dynamicField name="*_phon_en" type="phonetic_en" indexed="true" stored="true"/>
|
||||
<fieldType name="phonetic_en" stored="false" indexed="true" class="solr.TextField" >
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
|
||||
|
@ -540,8 +505,8 @@
|
|||
Example of using PathHierarchyTokenizerFactory at index time, so
|
||||
queries for paths match documents at that path, or in descendent paths
|
||||
-->
|
||||
<dynamicField name="*_descendent_path" type="descendent_path" indexed="true" stored="true"/>
|
||||
<fieldType name="descendent_path" class="solr.TextField">
|
||||
<dynamicField name="*_descendent_path" type="descendent_path" indexed="true" stored="true"/>
|
||||
<fieldType name="descendent_path" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
|
||||
</analyzer>
|
||||
|
@ -549,7 +514,8 @@
|
|||
<tokenizer class="solr.KeywordTokenizerFactory" />
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<!--
|
||||
|
||||
<!--
|
||||
Example of using PathHierarchyTokenizerFactory at query time, so
|
||||
queries for paths match documents at that path, or in ancestor paths
|
||||
-->
|
||||
|
@ -578,8 +544,8 @@
|
|||
The subFields are an implementation detail of the fieldType, and end
|
||||
users normally should not need to know about them.
|
||||
-->
|
||||
<dynamicField name="*_point" type="point" indexed="true" stored="true"/>
|
||||
<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
|
||||
<dynamicField name="*_point" type="point" indexed="true" stored="true"/>
|
||||
<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
|
||||
|
||||
<!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
|
||||
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
|
||||
|
@ -601,7 +567,7 @@
|
|||
solr.OpenExchangeRatesOrgProvider uses rates from openexchangerates.org:
|
||||
ratesFileLocation: URL or path to rates JSON file (default latest.json on the web)
|
||||
refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60)
|
||||
-->
|
||||
-->
|
||||
<fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" />
|
||||
|
||||
|
||||
|
@ -1022,12 +988,12 @@
|
|||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- Similarity is the scoring routine for each document vs. a query.
|
||||
<!-- Similarity is the scoring routine for each document vs. a query.
|
||||
A custom Similarity or SimilarityFactory may be specified here, but
|
||||
the default is fine for most applications.
|
||||
For more info: http://wiki.apache.org/solr/SchemaXml#Similarity
|
||||
-->
|
||||
<!--
|
||||
<!--
|
||||
<similarity class="com.example.solr.CustomSimilarityFactory">
|
||||
<str name="paramkey">param value</str>
|
||||
</similarity>
|
||||
|
|
|
@ -1066,7 +1066,7 @@
|
|||
<!-- a spellchecker built from a field of the main index -->
|
||||
<lst name="spellchecker">
|
||||
<str name="name">default</str>
|
||||
<str name="field">text</str>
|
||||
<str name="field">_text_</str>
|
||||
<str name="classname">solr.DirectSolrSpellChecker</str>
|
||||
<!-- the spellcheck distance measure used, the default is the internal levenshtein -->
|
||||
<str name="distanceMeasure">internal</str>
|
||||
|
@ -1088,6 +1088,7 @@
|
|||
</lst>
|
||||
|
||||
<!-- a spellchecker that can break or combine words. See "/spell" handler below for usage -->
|
||||
<!--
|
||||
<lst name="spellchecker">
|
||||
<str name="name">wordbreak</str>
|
||||
<str name="classname">solr.WordBreakSolrSpellChecker</str>
|
||||
|
@ -1096,44 +1097,7 @@
|
|||
<str name="breakWords">true</str>
|
||||
<int name="maxChanges">10</int>
|
||||
</lst>
|
||||
|
||||
<!-- a spellchecker that uses a different distance measure -->
|
||||
<!--
|
||||
<lst name="spellchecker">
|
||||
<str name="name">jarowinkler</str>
|
||||
<str name="field">spell</str>
|
||||
<str name="classname">solr.DirectSolrSpellChecker</str>
|
||||
<str name="distanceMeasure">
|
||||
org.apache.lucene.search.spell.JaroWinklerDistance
|
||||
</str>
|
||||
</lst>
|
||||
-->
|
||||
|
||||
<!-- a spellchecker that use an alternate comparator
|
||||
|
||||
comparatorClass be one of:
|
||||
1. score (default)
|
||||
2. freq (Frequency first, then score)
|
||||
3. A fully qualified class name
|
||||
-->
|
||||
<!--
|
||||
<lst name="spellchecker">
|
||||
<str name="name">freq</str>
|
||||
<str name="field">lowerfilt</str>
|
||||
<str name="classname">solr.DirectSolrSpellChecker</str>
|
||||
<str name="comparatorClass">freq</str>
|
||||
-->
|
||||
|
||||
<!-- A spellchecker that reads the list of words from a file -->
|
||||
<!--
|
||||
<lst name="spellchecker">
|
||||
<str name="classname">solr.FileBasedSpellChecker</str>
|
||||
<str name="name">file</str>
|
||||
<str name="sourceLocation">spellings.txt</str>
|
||||
<str name="characterEncoding">UTF-8</str>
|
||||
<str name="spellcheckIndexDir">spellcheckerFile</str>
|
||||
</lst>
|
||||
-->
|
||||
-->
|
||||
</searchComponent>
|
||||
|
||||
<!-- A request handler for demonstrating the spellcheck component.
|
||||
|
@ -1156,7 +1120,6 @@
|
|||
collations (re-written queries) can include a combination of
|
||||
corrections from both spellcheckers -->
|
||||
<str name="spellcheck.dictionary">default</str>
|
||||
<str name="spellcheck.dictionary">wordbreak</str>
|
||||
<str name="spellcheck">on</str>
|
||||
<str name="spellcheck.extendedResults">true</str>
|
||||
<str name="spellcheck.count">10</str>
|
||||
|
|
Loading…
Reference in New Issue