Cleanup the data_driven configset

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1718294 13f79535-47bb-0310-9956-ffa450edef68
2015-12-07 10:32:26 +00:00 · 2015-12-07 10:32:26 +00:00 · 72501bd0bf
parent 6b2097090c
commit 72501bd0bf
3 changed files with 52 additions and 120 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -249,6 +249,9 @@ Other Changes
  are enabled by default and the schema is mutable. The schema file will be called managed-schema
  (Uwe Schindler, shalin, Varun Thacker)

+* SOLR-8381: Cleanup data_driven managed-schema and solrconfig.xml files. Commented out copyFields are removed
+  and solrconfig.xml doesn't refer to field which are not defined.
+
 ==================  5.4.0 ==================

 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release
--- a/solr/server/solr/configsets/data_driven_schema_configs/conf/managed-schema
+++ b/solr/server/solr/configsets/data_driven_schema_configs/conf/managed-schema
@ -46,7 +46,7 @@
 -->

 <schema name="example-data-driven-schema" version="1.5">
-  <!-- attribute "name" is the name of this schema and is only used for display purposes.
+    <!-- attribute "name" is the name of this schema and is only used for display purposes.
       version="x.y" is Solr's version number for the schema syntax and 
       semantics.  It should not normally be changed by applications.

@ -61,7 +61,7 @@
            to off for version >= 1.4
       1.5: omitNorms defaults to true for primitive field types 
            (int, float, boolean, string...)
-     -->
+    -->

    <!-- Valid attributes for fields:
     name: mandatory - the name for the field
@ -96,14 +96,14 @@
       value does not exist
     default: a value that should be used if no value is specified
       when adding a document.
-   -->
+    -->

    <!-- field names should consist of alphanumeric or underscore characters only and
      not start with a digit.  This is not currently strictly enforced,
      but other field names will not have first class support from all components
      and back compatibility is not guaranteed.  Names with both leading and
      trailing underscores (e.g. _version_) are reserved.
-   -->
+    -->

    <!-- In this data_driven_schema_configs configset, only three fields are pre-declared: 
         id, _version_, and _text_.  All other fields will be type guessed and added via the
@ -113,9 +113,9 @@
         Note that many dynamic fields are also defined - you can use them to specify a 
         field's type via field naming conventions - see below.
  
-  WARNING: The _text_ catch-all field will significantly increase your index size.
-           If you don't need it, consider removing it and the corresponding copyField directive.
-      -->
+         WARNING: The _text_ catch-all field will significantly increase your index size.
+         If you don't need it, consider removing it and the corresponding copyField directive.
+    -->
    <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
    <field name="_version_" type="long" indexed="true" stored="true"/>
    <field name="_root_" type="string" indexed="true" stored="false"/>
@ -179,55 +179,18 @@
        NB: use of "*" dynamic fields will disable field type guessing and adding
        unknown fields to the schema. --> 
    <!--dynamicField name="*" type="ignored" multiValued="true" /-->
-   

-
-  <!-- Field to use to determine and enforce document uniqueness. 
+    <!-- Field to use to determine and enforce document uniqueness.
      Unless this field is marked with required="false", it will be a required field
-   -->
-  <uniqueKey>id</uniqueKey>
+    -->
+    <uniqueKey>id</uniqueKey>

-  <!-- copyField commands copy one field to another at the time a document
+    <!-- copyField commands copy one field to another at the time a document
       is added to the index.  It's used either to index the same field differently,
       or to add multiple fields to the same field for easier/faster searching.

-   <copyField source="cat" dest="text"/>
-   <copyField source="name" dest="text"/>
-   <copyField source="manu" dest="text"/>
-   <copyField source="features" dest="text"/>
-   <copyField source="includes" dest="text"/>
-   <copyField source="manu" dest="manu_exact"/>
-   -->
-
-  <!-- Copy the price into a currency enabled field (default USD)
-   <copyField source="price" dest="price_c"/>
-   -->
-
-  <!-- Text fields from SolrCell to search by default in our catch-all field
-   <copyField source="title" dest="text"/>
-   <copyField source="author" dest="text"/>
-   <copyField source="description" dest="text"/>
-   <copyField source="keywords" dest="text"/>
-   <copyField source="content" dest="text"/>
-   <copyField source="content_type" dest="text"/>
-   <copyField source="resourcename" dest="text"/>
-   <copyField source="url" dest="text"/>
-   -->
-
-  <!-- Create a string version of author for faceting
-   <copyField source="author" dest="author_s"/>
-   -->
-	
-  <!-- Above, multiple source fields are copied to the [text] field. 
-	  Another way to map multiple source fields to the same 
-	  destination field is to use the dynamic field syntax. 
-	  copyField also supports a maxChars to copy setting.  -->
-	   
-  <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
-
-  <!-- copy name to alphaNameSort, a field designed for sorting by name -->
-  <!-- <copyField source="name" dest="alphaNameSort"/> -->
- 
+    <copyField source="sourceFieldName" dest="destinationFieldName"/>
+    -->

    <!-- field type definitions. The "name" attribute is
       just a label to be used by field definitions.  The "class"
@ -369,7 +332,7 @@
    -->

    <!-- A text field that only splits on whitespace for exact matching of words -->
-  <dynamicField name="*_ws" type="text_ws"  indexed="true"  stored="true"/>
+    <dynamicField name="*_ws" type="text_ws"  indexed="true"  stored="true"/>
    <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
@ -378,9 +341,10 @@

    <!-- A general text field that has reasonable, generic
         cross-language defaults: it tokenizes with StandardTokenizer,
-	 removes stop words from case-insensitive "stopwords.txt"
-	 (empty by default), and down cases.  At query time only, it
-	 also applies synonyms. -->
+	       removes stop words from case-insensitive "stopwords.txt"
+	       (empty by default), and down cases.  At query time only, it
+	       also applies synonyms.
+	  -->
    <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true">
      <analyzer type="index">
        <tokenizer class="solr.StandardTokenizerFactory"/>
@ -421,7 +385,7 @@
        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
        <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
        <filter class="solr.EnglishMinimalStemFilterFactory"/>
-	-->
+	      -->
        <filter class="solr.PorterStemFilterFactory"/>
      </analyzer>
      <analyzer type="query">
@ -430,26 +394,26 @@
        <filter class="solr.StopFilterFactory"
                ignoreCase="true"
                words="lang/stopwords_en.txt"
-            />
+        />
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.EnglishPossessiveFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
        <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
        <filter class="solr.EnglishMinimalStemFilterFactory"/>
-	-->
+	      -->
        <filter class="solr.PorterStemFilterFactory"/>
      </analyzer>
    </fieldType>

    <!-- A text field with defaults appropriate for English, plus
-	 aggressive word-splitting and autophrase features enabled.
-	 This field is just like text_en, except it adds
-	 WordDelimiterFilter to enable splitting and matching of
-	 words on case-change, alpha numeric boundaries, and
-	 non-alphanumeric chars.  This means certain compound word
-	 cases will work, for example query "wi fi" will match
-	 document "WiFi" or "wi-fi".
-        -->
+         aggressive word-splitting and autophrase features enabled.
+         This field is just like text_en, except it adds
+         WordDelimiterFilter to enable splitting and matching of
+         words on case-change, alpha numeric boundaries, and
+         non-alphanumeric chars.  This means certain compound word
+         cases will work, for example query "wi fi" will match
+         document "WiFi" or "wi-fi".
+    -->
    <dynamicField name="*_txt_en_split" type="text_en_splitting"  indexed="true"  stored="true"/>
    <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
      <analyzer type="index">
@ -462,7 +426,7 @@
        <filter class="solr.StopFilterFactory"
                ignoreCase="true"
                words="lang/stopwords_en.txt"
-            />
+        />
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
@ -474,7 +438,7 @@
        <filter class="solr.StopFilterFactory"
                ignoreCase="true"
                words="lang/stopwords_en.txt"
-            />
+        />
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
@ -501,9 +465,10 @@
    </fieldType>

    <!-- Just like text_general except it reverses the characters of
-	 each token, to enable more efficient leading wildcard queries. -->
-  <dynamicField name="*_txt_rev" type="text_general_rev"  indexed="true"  stored="true"/>
-  <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
+	       each token, to enable more efficient leading wildcard queries.
+    -->
+    <dynamicField name="*_txt_rev" type="text_general_rev"  indexed="true"  stored="true"/>
+    <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
@ -519,8 +484,8 @@
      </analyzer>
    </fieldType>

-  <dynamicField name="*_phon_en" type="phonetic_en"  indexed="true"  stored="true"/>
-  <fieldType name="phonetic_en" stored="false" indexed="true" class="solr.TextField" >
+    <dynamicField name="*_phon_en" type="phonetic_en"  indexed="true"  stored="true"/>
+    <fieldType name="phonetic_en" stored="false" indexed="true" class="solr.TextField" >
      <analyzer>
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
@ -540,8 +505,8 @@
      Example of using PathHierarchyTokenizerFactory at index time, so
      queries for paths match documents at that path, or in descendent paths
    -->
-  <dynamicField name="*_descendent_path" type="descendent_path"  indexed="true"  stored="true"/>
-  <fieldType name="descendent_path" class="solr.TextField">
+    <dynamicField name="*_descendent_path" type="descendent_path"  indexed="true"  stored="true"/>
+    <fieldType name="descendent_path" class="solr.TextField">
      <analyzer type="index">
        <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
      </analyzer>
@ -549,7 +514,8 @@
        <tokenizer class="solr.KeywordTokenizerFactory" />
      </analyzer>
    </fieldType>
-    <!-- 
+
+    <!--
      Example of using PathHierarchyTokenizerFactory at query time, so
      queries for paths match documents at that path, or in ancestor paths
    -->
@ -578,8 +544,8 @@
      The subFields are an implementation detail of the fieldType, and end
      users normally should not need to know about them.
     -->
-  <dynamicField name="*_point" type="point"  indexed="true"  stored="true"/>
-  <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
+    <dynamicField name="*_point" type="point"  indexed="true"  stored="true"/>
+    <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>

    <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
    <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
@ -601,7 +567,7 @@
                           solr.OpenExchangeRatesOrgProvider uses rates from openexchangerates.org:
                             ratesFileLocation: URL or path to rates JSON file (default latest.json on the web)
                             refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60)
-   -->
+    -->
    <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" />
             

@ -1022,12 +988,12 @@
      </analyzer>
    </fieldType>

-  <!-- Similarity is the scoring routine for each document vs. a query.
+    <!-- Similarity is the scoring routine for each document vs. a query.
       A custom Similarity or SimilarityFactory may be specified here, but 
       the default is fine for most applications.  
       For more info: http://wiki.apache.org/solr/SchemaXml#Similarity
    -->
-  <!--
+    <!--
     <similarity class="com.example.solr.CustomSimilarityFactory">
       <str name="paramkey">param value</str>
     </similarity>
--- a/solr/server/solr/configsets/data_driven_schema_configs/conf/solrconfig.xml
+++ b/solr/server/solr/configsets/data_driven_schema_configs/conf/solrconfig.xml
@ -1066,7 +1066,7 @@
    <!-- a spellchecker built from a field of the main index -->
    <lst name="spellchecker">
      <str name="name">default</str>
-      <str name="field">text</str>
+      <str name="field">_text_</str>
      <str name="classname">solr.DirectSolrSpellChecker</str>
      <!-- the spellcheck distance measure used, the default is the internal levenshtein -->
      <str name="distanceMeasure">internal</str>
@ -1088,6 +1088,7 @@
    </lst>

    <!-- a spellchecker that can break or combine words.  See "/spell" handler below for usage -->
+    <!--
    <lst name="spellchecker">
      <str name="name">wordbreak</str>
      <str name="classname">solr.WordBreakSolrSpellChecker</str>
@ -1096,44 +1097,7 @@
      <str name="breakWords">true</str>
      <int name="maxChanges">10</int>
    </lst>
-
-    <!-- a spellchecker that uses a different distance measure -->
-    <!--
-       <lst name="spellchecker">
-         <str name="name">jarowinkler</str>
-         <str name="field">spell</str>
-         <str name="classname">solr.DirectSolrSpellChecker</str>
-         <str name="distanceMeasure">
-           org.apache.lucene.search.spell.JaroWinklerDistance
-         </str>
-       </lst>
-     -->
-
-    <!-- a spellchecker that use an alternate comparator 
-
-         comparatorClass be one of:
-          1. score (default)
-          2. freq (Frequency first, then score)
-          3. A fully qualified class name
-      -->
-    <!--
-       <lst name="spellchecker">
-         <str name="name">freq</str>
-         <str name="field">lowerfilt</str>
-         <str name="classname">solr.DirectSolrSpellChecker</str>
-         <str name="comparatorClass">freq</str>
-      -->
-
-    <!-- A spellchecker that reads the list of words from a file -->
-    <!--
-       <lst name="spellchecker">
-         <str name="classname">solr.FileBasedSpellChecker</str>
-         <str name="name">file</str>
-         <str name="sourceLocation">spellings.txt</str>
-         <str name="characterEncoding">UTF-8</str>
-         <str name="spellcheckIndexDir">spellcheckerFile</str>
-       </lst>
-      -->
+    -->
  </searchComponent>

  <!-- A request handler for demonstrating the spellcheck component.  
@ -1156,7 +1120,6 @@
           collations (re-written queries) can include a combination of
           corrections from both spellcheckers -->
      <str name="spellcheck.dictionary">default</str>
-      <str name="spellcheck.dictionary">wordbreak</str>
      <str name="spellcheck">on</str>
      <str name="spellcheck.extendedResults">true</str>
      <str name="spellcheck.count">10</str>