mirror of https://github.com/apache/lucene.git
SOLR-284: improve example server for extracting request handler
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@814378 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4060aac850
commit
6acf11c821
|
@ -275,6 +275,32 @@
|
|||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<!-- A general unstemmed text field that indexes tokens normally and also
|
||||
reversed (via ReversedWildcardFilterFactory), to enable more efficient
|
||||
leading wildcard queries. -->
|
||||
<fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="false" />
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
|
||||
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- charFilter + WhitespaceTokenizer -->
|
||||
<!--
|
||||
<fieldType name="textCharNorm" class="solr.TextField" positionIncrementGap="100" >
|
||||
|
@ -401,12 +427,31 @@
|
|||
<field name="inStock" type="boolean" indexed="true" stored="true" />
|
||||
|
||||
|
||||
<field name="title" type="text" indexed="true" stored="true"/>
|
||||
<!-- Common metadata fields, named specifically to match up with
|
||||
SolrCell metadata when parsing rich documents such as Word, PDF.
|
||||
Some fields are multiValued only because Tika currently may return
|
||||
multiple values for them.
|
||||
-->
|
||||
<field name="title" type="text" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="subject" type="text" indexed="true" stored="true"/>
|
||||
<field name="description" type="text" indexed="true" stored="true"/>
|
||||
<field name="comments" type="text" indexed="true" stored="true"/>
|
||||
<field name="author" type="textgen" indexed="true" stored="true"/>
|
||||
<field name="keywords" type="textgen" indexed="true" stored="true"/>
|
||||
<field name="category" type="textgen" indexed="true" stored="true"/>
|
||||
<field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="last_modified" type="date" indexed="true" stored="true"/>
|
||||
<field name="links" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
|
||||
|
||||
<!-- catchall field, containing all other searchable text fields (implemented
|
||||
via copyField further on in this schema -->
|
||||
<field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||
|
||||
<!-- catchall text field that indexes tokens both normally and in reverse for efficient
|
||||
leading wildcard queries. -->
|
||||
<field name="text_rev" type="text_rev" indexed="true" stored="false" multiValued="true"/>
|
||||
|
||||
<!-- non-tokenized version of manufacturer to make it easier to sort or group
|
||||
results by manufacturer. copied from "manu" via copyField -->
|
||||
<field name="manu_exact" type="string" indexed="true" stored="false"/>
|
||||
|
|
|
@ -655,10 +655,19 @@
|
|||
</arr>
|
||||
</requestHandler>
|
||||
|
||||
<!-- Solr Cell: http://wiki.apache.org/solr/ExtractingRequestHandler -->
|
||||
<requestHandler name="/update/extract" class="org.apache.solr.handler.extraction.ExtractingRequestHandler" startup="lazy">
|
||||
<lst name="defaults">
|
||||
<str name="uprefix">ignored_</str>
|
||||
<!-- All the main content goes into "text"... if you need to return
|
||||
the extracted text or do highlighting, use a stored field. -->
|
||||
<str name="map.content">text</str>
|
||||
<str name="lowernames">true</str>
|
||||
<str name="uprefix">ignored_</str>
|
||||
|
||||
<!-- capture link hrefs but ignore div attributes -->
|
||||
<str name="captureAttr">true</str>
|
||||
<str name="map.a">links</str>
|
||||
<str name="map.div">ignored_</str>
|
||||
</lst>
|
||||
</requestHandler>
|
||||
|
||||
|
|
Loading…
Reference in New Issue