mirror of https://github.com/apache/lucene.git
example config update
git-svn-id: https://svn.apache.org/repos/asf/incubator/solr/trunk@381523 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e0df5763c1
commit
44c19f4a4b
|
@ -108,7 +108,9 @@
|
||||||
<fieldtype name="text" class="solr.TextField" positionIncrementGap="100">
|
<fieldtype name="text" class="solr.TextField" positionIncrementGap="100">
|
||||||
<analyzer type="index">
|
<analyzer type="index">
|
||||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
<!-- in this example, we will only use synonyms at query time
|
||||||
|
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
||||||
|
-->
|
||||||
<filter class="solr.StopFilterFactory" ignoreCase="true"/>
|
<filter class="solr.StopFilterFactory" ignoreCase="true"/>
|
||||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||||
<filter class="solr.LowerCaseFilterFactory"/>
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
@ -116,7 +118,7 @@
|
||||||
</analyzer>
|
</analyzer>
|
||||||
<analyzer type="query">
|
<analyzer type="query">
|
||||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||||
<filter class="solr.StopFilterFactory" ignoreCase="true"/>
|
<filter class="solr.StopFilterFactory" ignoreCase="true"/>
|
||||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
|
||||||
<filter class="solr.LowerCaseFilterFactory"/>
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
|
|
@ -1,2 +1,41 @@
|
||||||
|
#a couple of test stopwords to test that the words are really being configured
|
||||||
|
#from this file:
|
||||||
stopworda
|
stopworda
|
||||||
stopwordb
|
stopwordb
|
||||||
|
|
||||||
|
#Standard english stop words taken from Lucene's StopAnalyzer
|
||||||
|
an
|
||||||
|
and
|
||||||
|
are
|
||||||
|
as
|
||||||
|
at
|
||||||
|
be
|
||||||
|
but
|
||||||
|
by
|
||||||
|
for
|
||||||
|
if
|
||||||
|
in
|
||||||
|
into
|
||||||
|
is
|
||||||
|
it
|
||||||
|
no
|
||||||
|
not
|
||||||
|
of
|
||||||
|
on
|
||||||
|
or
|
||||||
|
s
|
||||||
|
such
|
||||||
|
t
|
||||||
|
that
|
||||||
|
the
|
||||||
|
their
|
||||||
|
then
|
||||||
|
there
|
||||||
|
these
|
||||||
|
they
|
||||||
|
this
|
||||||
|
to
|
||||||
|
was
|
||||||
|
will
|
||||||
|
with
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,18 @@
|
||||||
|
|
||||||
|
#some test synonym mappings for things unlikely to appear in input text
|
||||||
aaa => aaaa
|
aaa => aaaa
|
||||||
bbb => bbbb1 bbbb2
|
bbb => bbbb1 bbbb2
|
||||||
ccc => cccc1,cccc2
|
ccc => cccc1,cccc2
|
||||||
a\=>a => b\=>b
|
a\=>a => b\=>b
|
||||||
a\,a => b\,b
|
a\,a => b\,b
|
||||||
fooaaa,baraaa,bazaaa
|
fooaaa,baraaa,bazaaa
|
||||||
|
|
||||||
|
#synonyms, for our purposes
|
||||||
|
GB,gib,gigabyte,gigabytes
|
||||||
|
MB,mib,megabyte,megabytes
|
||||||
|
#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
|
||||||
|
#after us won't split it into two words.
|
||||||
|
|
||||||
|
#spelling correction
|
||||||
|
pixima => pixma
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
<add><doc>
|
<add><doc>
|
||||||
<field name="id">9885A004</field>
|
<field name="id">9885A004</field>
|
||||||
<field name="name">Canon Powershot SD500</field>
|
<field name="name">Canon PowerShot SD500</field>
|
||||||
<field name="manu">Canon Inc.</field>
|
<field name="manu">Canon Inc.</field>
|
||||||
<field name="cat">electronics</field>
|
<field name="cat">electronics</field>
|
||||||
<field name="cat">camera</field>
|
<field name="cat">camera</field>
|
||||||
|
|
Loading…
Reference in New Issue