example config

git-svn-id: https://svn.apache.org/repos/asf/incubator/solr/trunk@377065 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2006-02-11 21:35:33 +00:00
parent d4ae734c63
commit 389c5e6125
5 changed files with 380 additions and 0 deletions

View File

@ -0,0 +1,7 @@
#use a protected word file to protect against the stemmer reducing two
#unrelated words to the same base word.
#some test non-words that would normally be stemmed:
dontstems
zwhacky

172
example/conf/schema.xml Executable file
View File

@ -0,0 +1,172 @@
<?xml version="1.0" ?>
<!-- The Solr schema file. This file should be named "schema.xml" and
should be in the conf directory or located where the classloader
for the Solr webapp can find it. -->
<schema name="example" version="1.1">
<types>
<!-- field type definitions. The "name" attribute is
just a label to be used by field definitions. The "class"
attribute and any other attributes determine the real
behavior of the fieldtype. -->
<!-- The StringField type is not analyzed, but indexed/stored verbatim -->
<fieldtype name="string" class="solr.StrField" sortMissingLast="true"/>
<!-- boolean type: "true" or "false" -->
<fieldtype name="boolean" class="solr.BoolField" sortMissingLast="true"/>
<!-- The optional sortMissingLast and sortMissingFirst attributes are
currently supported on types that are sorted internally as a strings.
- If sortMissingLast="true" then a sort on this field will cause documents
without the field to come after documents with the field,
regardless of the requested sort order (asc or desc).
- If sortMissingFirst="true" then a sort on this field will cause documents
without the field to come before documents with the field,
regardless of the requested sort order.
- If sortMissingLast="false" and sortMissingFirst="false" (the default),
then default lucene sorting will be used which places docs without the field
first in an ascending sort and last in a descending sort.
-->
<!-- numeric field types that store and index the text
value verbatim (and hence don't support range queries since the
lexicographic ordering isn't equal to the numeric ordering) -->
<fieldtype name="integer" class="solr.IntField"/>
<fieldtype name="long" class="solr.LongField"/>
<fieldtype name="float" class="solr.FloatField"/>
<fieldtype name="double" class="solr.DoubleField"/>
<!-- Numeric field types that manipulate the value into
a string value that isn't human readable in it's internal form,
but with a lexicographic odering the same as the numeric ordering
so that range queries correctly work. -->
<fieldtype name="sint" class="solr.SortableIntField" sortMissingLast="true"/>
<fieldtype name="slong" class="solr.SortableLongField" sortMissingLast="true"/>
<fieldtype name="sfloat" class="solr.SortableFloatField" sortMissingLast="true"/>
<fieldtype name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true"/>
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
is a more restricted form of the canonical representation of dateTime
http://www.w3.org/TR/xmlschema-2/#dateTime
The trailing "Z" designates UTC time and is mandatory.
Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
All other components are mandatory. -->
<fieldtype name="date" class="solr.DateField" sortMissingLast="true"/>
<!-- solr.TextField allows the specification of custom text analyzers
specified as a tokenizer and a list of token filters. Different
analyzers may be specified for indexing and querying.
The optional positionIncrementGap puts space between multiple fields of
this type on the same document, with the purpose of preventing false phrase
matching across fields.
-->
<!-- Standard analyzer commonly used by Lucene developers -->
<fieldtype name="text_lu" class="solr.TextField" positionIncrementGap="10">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.EnglishPorterFilterFactory"/>
</analyzer>
</fieldtype>
<!-- A text field that only splits on whitespace for more exact matching -->
<fieldtype name="text_ws" class="solr.TextField" positionIncrementGap="10">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
</analyzer>
</fieldtype>
<!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
words on case-change, alpha numeric boundaries, and non-alphanumeric chars
so that a query of "wifi" or "wi fi" could match a document containing Wi-Fi.
Synonyms and stopwords are customized by external files, and stemming is enabled -->
<fieldtype name="text" class="solr.TextField" positionIncrementGap="10">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
</analyzer>
</fieldtype>
</types>
<fields>
<!-- Valid attributes for fields:
name: mandatory - the name for the field
type: mandatory - the name of a previously defined type from the <types> section
indexed: true if this field should be indexed (searchable)
stored: true if this field should be retrievable
multiValued: true if this field may contain multiple values per document
omitNorms: (expert) set to true to omit the norms associated with this field
(this disables length normalization and index-time boosting for the field)
-->
<field name="id" type="string" indexed="true" stored="true"/>
<field name="date" type="date" indexed="true" stored="true"/>
<field name="title" type="text" indexed="true" stored="true"/>
<field name="subject" type="text" indexed="true" stored="true"/>
<field name="body" type="text" indexed="true" stored="true"/>
<!-- catchall field, containing all other searchable text fields (implemented
via copyField further on in this schema -->
<field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
will be used if the name matches any of the patterns.
RESTRICTION: the glob-like pattern in the name attribute must have
a "*" only at the start or the end.
EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
Longer patterns will be matched first. if equal size patterns
both match, the first appearing in the schema will be used. -->
<dynamicField name="*_i" type="sint" indexed="true" stored="true"/>
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
<dynamicField name="*_l" type="slong" indexed="true" stored="true"/>
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
<dynamicField name="*_f" type="sfloat" indexed="true" stored="true"/>
<dynamicField name="*_d" type="sdouble" indexed="true" stored="true"/>
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
</fields>
<!-- field to use to determine document uniqueness... used when
overwriting one document with another -->
<uniqueKey>id</uniqueKey>
<!-- field for the QueryParser to use when an explicit fieldname is absent -->
<defaultSearchField>text</defaultSearchField>
<!-- copyField commands copy one field to another at the time a document
is added to the index. It's used either to index the same field different
ways, or to add multiple fields to the same field for easier/faster searching. -->
<copyField source="title" dest="text"/>
<copyField source="subject" dest="text"/>
<copyField source="body" dest="text"/>
<!-- Similarity is the scoring routine for each document vs a query.
A custom similarity may be specified here, but the default is fine
for most applications. -->
<!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
</schema>

193
example/conf/solrconfig.xml Executable file
View File

@ -0,0 +1,193 @@
<?xml version="1.0" ?>
<config>
<!-- Used to specify an alternate directory to hold all index data
other than the default ./data
If replication is in use, this should match the replication configuration. -->
<!--
<dataDir>data</dataDir>
-->
<indexDefaults>
<!-- Values here affect all index writers and act as a default unless overridden. -->
<useCompoundFile>false</useCompoundFile>
<mergeFactor>10</mergeFactor>
<maxBufferedDocs>1000</maxBufferedDocs>
<maxMergeDocs>2147483647</maxMergeDocs>
<maxFieldLength>10000</maxFieldLength>
<!-- these are global... can't currently override per index -->
<writeLockTimeout>1000</writeLockTimeout>
<commitLockTimeout>10000</commitLockTimeout>
</indexDefaults>
<mainIndex>
<!-- options specific to the main on-disk lucene index -->
<useCompoundFile>false</useCompoundFile>
<mergeFactor>10</mergeFactor>
<maxBufferedDocs>1000</maxBufferedDocs>
<maxMergeDocs>2147483647</maxMergeDocs>
<maxFieldLength>10000</maxFieldLength>
<!-- If true, unlock any held write or commit locks on startup.
This defeats the locking mechanism that allows multiple
processes to safely access a lucene index, and should be
used with care. -->
<unlockOnStartup>false</unlockOnStartup>
</mainIndex>
<!-- the default high-performance update handler -->
<updateHandler class="solr.DirectUpdateHandler2">
<!-- The RunExecutableListener executes an external command.
exe - the name of the executable to run
dir - dir to use as the current working directory. default="."
wait - the calling thread waits until the executable returns. default="true"
args - the arguments to pass to the program. default=nothing
env - environment variables to set. default=nothing
-->
<!-- A postCommit event is fired after every commit
<listener event="postCommit" class="solr.RunExecutableListener">
<str name="exe">snapshooter</str>
<str name="dir">bin</str>
<bool name="wait">true</bool>
<arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
<arr name="env"> <str>MYVAR=val1</str> </arr>
</listener>
-->
</updateHandler>
<query>
<!-- Maximum number of clauses in a boolean query... can affect
range or prefix queries that expand to big boolean
queries. An exception is thrown if exceeded. -->
<maxBooleanClauses>1024</maxBooleanClauses>
<!-- Cache used by SolrIndexSearcher for filters (DocSets),
unordered sets of *all* documents that match a query.
When a new searcher is opened, its caches may be prepopulated
or "autowarmed" using data from caches in the old searcher.
autowarmCount is the number of items to prepopulate. For LRUCache,
the autowarmed items will be the most recently accessed items. -->
<filterCache
class="solr.LRUCache"
size="512"
initialSize="512"
autowarmCount="256"/>
<!-- queryResultCache caches results of searches - ordered lists of
document ids (DocList) based on a query, a sort, and the range
of documents requested. -->
<queryResultCache
class="solr.LRUCache"
size="512"
initialSize="512"
autowarmCount="256"/>
<!-- documentCache caches Lucene Document objects (the stored fields for each document).
Since Lucene internal document ids are transient, this cache will not be autowarmed. -->
<documentCache
class="solr.LRUCache"
size="512"
initialSize="512"
autowarmCount="0"/>
<!-- Example of a generic cache. These caches may be accessed by name
through SolrIndexSearcher.getCache(),cacheLookup(), and cacheInsert().
The purpose is to enable easy caching of user/application level data.
The regenerator argument should be specified as an implementation
of solr.search.CacheRegenerator if autowarming is desired. -->
<!--
<cache name="myUserCache"
class="solr.LRUCache"
size="4096"
initialSize="1024"
autowarmCount="1024"
regenerator="org.mycompany.mypackage.MyRegenerator"
/>
-->
<!-- An optimization that attempts to use a filter to satisfy a search.
If the requested sort does not include score, then the filterCache
will be checked for a filter matching the query. If found, the filter
will be used as the source of document ids, and then the sort will be
applied to that. -->
<useFilterForSortedQuery>true</useFilterForSortedQuery>
<!-- An optimization for use with the queryResultCache. When a search
is requested, a superset of the requested number of document ids
are collected. For example, of a search for a particular query
requests matching documents 10 through 19, and queryWindowSize is 50,
then documents 0 through 50 will be collected and cached. Any further
requests in that range can be satisfied via the cache. -->
<queryResultWindowSize>10</queryResultWindowSize>
<!-- This entry enables an int hash representation for filters (DocSets)
when the number of items in the set is less than maxSize. For smaller
sets, this representation is more memory efficient, more efficient to
iterate over, and faster to take intersections. -->
<HashDocSet maxSize="3000" loadFactor="0.75"/>
<!-- boolToFilterOptimizer converts boolean clauses with zero boost
into cached filters if the number of docs selected by the clause exceeds
the threshold (represented as a fraction of the total index) -->
<boolTofilterOptimizer enabled="true" cacheSize="32" threshold=".05"/>
<!-- a newSearcher event is fired whenever a new searcher is being prepared
and there is a current searcher handling requests (aka registered). -->
<!-- QuerySenderListener takes an array of NamedList and executes a
local query request for each NamedList in sequence. -->
<!--
<listener event="newSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst>
<lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst>
</arr>
</listener>
-->
<!-- a firstSearcher event is fired whenever a new searcher is being
prepared but there is no current registered searcher to handle
requests or to gain autowarming data from. -->
<!--
<listener event="firstSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<lst> <str name="q">fast_warm</str> <str name="start">0</str> <str name="rows">10</str> </lst>
</arr>
</listener>
-->
</query>
<!-- requestHandler plugins... incoming queries will be dispatched to the
correct handler based on the qt (query type) param matching the
name of registered handlers.
The "standard" request handler is the default and will be used if qt
is not specified in the request.
-->
<requestHandler name="standard" class="solr.StandardRequestHandler" />
<!-- example of a request handler with custom parameters passed to it's init()
<requestHandler name="example" class="myorg.mypkg.MyRequestHandler" >
<int name="myparam">1000</int>
<float name="ratio">1.4142135</float>
<arr name="myarr"><int>1</int><int>2</int></arr>
<str>foo</str>
</requestHandler>
-->
<!-- config for the admin interface -->
<admin>
<defaultQuery>solr</defaultQuery>
<gettableFiles>solrconfig.xml schema.xml</gettableFiles>
<pingQuery>q=id:0&amp;start=0&amp;rows=0</pingQuery>
</admin>
</config>

View File

@ -0,0 +1,2 @@
stopworda
stopwordb

View File

@ -0,0 +1,6 @@
aaa => aaaa
bbb => bbbb1 bbbb2
ccc => cccc1,cccc2
a\=>a => b\=>b
a\,a => b\,b
fooaaa,baraaa,bazaaa