example config update

git-svn-id: https://svn.apache.org/repos/asf/incubator/solr/trunk@381523 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2006-02-28 01:07:50 +00:00
parent e0df5763c1
commit 44c19f4a4b
4 changed files with 56 additions and 3 deletions

View File

@ -108,7 +108,9 @@
<fieldtype name="text" class="solr.TextField" positionIncrementGap="100"> <fieldtype name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> <!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
-->
<filter class="solr.StopFilterFactory" ignoreCase="true"/> <filter class="solr.StopFilterFactory" ignoreCase="true"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.LowerCaseFilterFactory"/>
@ -116,7 +118,7 @@
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"/> <filter class="solr.StopFilterFactory" ignoreCase="true"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.LowerCaseFilterFactory"/>

View File

@ -1,2 +1,41 @@
#a couple of test stopwords to test that the words are really being configured
#from this file:
stopworda stopworda
stopwordb stopwordb
#Standard english stop words taken from Lucene's StopAnalyzer
an
and
are
as
at
be
but
by
for
if
in
into
is
it
no
not
of
on
or
s
such
t
that
the
their
then
there
these
they
this
to
was
will
with

View File

@ -1,6 +1,18 @@
#some test synonym mappings for things unlikely to appear in input text
aaa => aaaa aaa => aaaa
bbb => bbbb1 bbbb2 bbb => bbbb1 bbbb2
ccc => cccc1,cccc2 ccc => cccc1,cccc2
a\=>a => b\=>b a\=>a => b\=>b
a\,a => b\,b a\,a => b\,b
fooaaa,baraaa,bazaaa fooaaa,baraaa,bazaaa
#synonyms, for our purposes
GB,gib,gigabyte,gigabytes
MB,mib,megabyte,megabytes
#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
#after us won't split it into two words.
#spelling correction
pixima => pixma

View File

@ -1,6 +1,6 @@
<add><doc> <add><doc>
<field name="id">9885A004</field> <field name="id">9885A004</field>
<field name="name">Canon Powershot SD500</field> <field name="name">Canon PowerShot SD500</field>
<field name="manu">Canon Inc.</field> <field name="manu">Canon Inc.</field>
<field name="cat">electronics</field> <field name="cat">electronics</field>
<field name="cat">camera</field> <field name="cat">camera</field>