use HTMLStripCharFilterFactory instead of HTMLStripStandardTokenizerFactory in example-DIH schema

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@950950 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Koji Sekiguchi 2010-06-03 10:51:46 +00:00
parent 428d4b1633
commit 729a4dc431
1 changed files with 4 additions and 2 deletions

View File

@ -239,7 +239,8 @@
<fieldtype name="html" stored="true" indexed="true" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.HTMLStripStandardTokenizerFactory"/>
<charFilter class="solr.HTMLStripCharFilterFactory"/>
<tokenizer class="solr.StandardTokenizerFactory"/>
<!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
-->
@ -251,7 +252,8 @@
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.HTMLStripStandardTokenizerFactory"/>
<charFilter class="solr.HTMLStripCharFilterFactory"/>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>