example config update

git-svn-id: https://svn.apache.org/repos/asf/incubator/solr/trunk@381523 13f79535-47bb-0310-9956-ffa450edef68
2006-02-28 01:07:50 +00:00 · 2006-02-28 01:07:50 +00:00 · 44c19f4a4b
parent e0df5763c1
commit 44c19f4a4b
4 changed files with 56 additions and 3 deletions
--- a/example/conf/schema.xml
+++ b/example/conf/schema.xml
@ -108,7 +108,9 @@
    <fieldtype name="text" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">
          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-          <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
+          <!-- in this example, we will only use synonyms at query time
+          <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+          -->
          <filter class="solr.StopFilterFactory" ignoreCase="true"/>
          <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
          <filter class="solr.LowerCaseFilterFactory"/>
@ -116,7 +118,7 @@
      </analyzer>
      <analyzer type="query">
          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-          <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
+          <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
          <filter class="solr.StopFilterFactory" ignoreCase="true"/>
          <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
          <filter class="solr.LowerCaseFilterFactory"/>
--- a/example/conf/stopwords.txt
+++ b/example/conf/stopwords.txt
@ -1,2 +1,41 @@
+#a couple of test stopwords to test that the words are really being configured
+#from this file:
 stopworda
 stopwordb
+
+#Standard english stop words taken from Lucene's StopAnalyzer
+an
+and
+are
+as
+at
+be
+but
+by
+for
+if
+in
+into
+is
+it
+no
+not
+of
+on
+or
+s
+such
+t
+that
+the
+their
+then
+there
+these
+they
+this
+to
+was
+will
+with
+
--- a/example/conf/synonyms.txt
+++ b/example/conf/synonyms.txt
@ -1,6 +1,18 @@
+
+#some test synonym mappings for things unlikely to appear in input text
 aaa => aaaa
 bbb => bbbb1 bbbb2
 ccc => cccc1,cccc2
 a\=>a => b\=>b
 a\,a => b\,b
 fooaaa,baraaa,bazaaa
+
+#synonyms, for our purposes
+GB,gib,gigabyte,gigabytes
+MB,mib,megabyte,megabytes
+#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
+#after us won't split it into two words.
+
+#spelling correction
+pixima => pixma
+
--- a/example/exampledocs/sd500.xml
+++ b/example/exampledocs/sd500.xml
@ -1,6 +1,6 @@
 <add><doc>
  <field name="id">9885A004</field>
-  <field name="name">Canon Powershot SD500</field>
+  <field name="name">Canon PowerShot SD500</field>
  <field name="manu">Canon Inc.</field>
  <field name="cat">electronics</field>
  <field name="cat">camera</field>