SOLR-1142: faster example schema

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@802443 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2009-08-08 20:08:16 +00:00
parent b4acfc32df
commit 8a3969fb7b
7 changed files with 133 additions and 170 deletions
example
src/test/org/apache/solr/client/solrj/response

View File

@ -28,6 +28,7 @@
<field name="price">19.95</field> <field name="price">19.95</field>
<field name="popularity">1</field> <field name="popularity">1</field>
<field name="inStock">false</field> <field name="inStock">false</field>
<field name="timestamp">2005-08-01T16:30:25Z</field>
</doc> </doc>
<doc> <doc>
@ -41,6 +42,7 @@
<field name="price">11.50</field> <field name="price">11.50</field>
<field name="popularity">1</field> <field name="popularity">1</field>
<field name="inStock">false</field> <field name="inStock">false</field>
<field name="timestamp">2006-02-14T23:55:59Z</field>
</doc> </doc>

View File

@ -32,4 +32,5 @@
<field name="price">399.00</field> <field name="price">399.00</field>
<field name="popularity">10</field> <field name="popularity">10</field>
<field name="inStock">true</field> <field name="inStock">true</field>
<field name="timestamp">2005-10-12T08:00:00Z</field>
</doc></add> </doc></add>

View File

@ -26,7 +26,7 @@
<field name="price">185</field> <field name="price">185</field>
<field name="popularity">5</field> <field name="popularity">5</field>
<field name="inStock">true</field> <field name="inStock">true</field>
<field name="catPay">electronics|6.0 memory|3.0</field> <field name="payloads">electronics|6.0 memory|3.0</field>
</doc> </doc>
<doc> <doc>
@ -38,7 +38,7 @@
<field name="price">74.99</field> <field name="price">74.99</field>
<field name="popularity">7</field> <field name="popularity">7</field>
<field name="inStock">true</field> <field name="inStock">true</field>
<field name="catPay">electronics|4.0 memory|2.0</field> <field name="payloads">electronics|4.0 memory|2.0</field>
</doc> </doc>
<doc> <doc>
@ -51,7 +51,7 @@
<!-- note: price is missing on this one --> <!-- note: price is missing on this one -->
<field name="popularity">5</field> <field name="popularity">5</field>
<field name="inStock">true</field> <field name="inStock">true</field>
<field name="catPay">electronics|0.9 memory|0.1</field> <field name="payloads">electronics|0.9 memory|0.1</field>
</doc> </doc>
</add> </add>

View File

@ -1,58 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<add>
<doc>
<field name="id">1</field>
<field name="word">ipod</field>
</doc>
<doc>
<field name="id">2</field>
<field name="word">ipod nano</field>
</doc>
<doc>
<field name="id">3</field>
<field name="word">ipod video</field>
</doc>
<doc>
<field name="id">4</field>
<field name="word">ipod shuffle</field>
</doc>
<doc>
<field name="id">5</field>
<field name="word">wii</field>
</doc>
<doc>
<field name="id">6</field>
<field name="word">blackberry</field>
</doc>
<doc>
<field name="id">7</field>
<field name="word">blackjack</field>
</doc>
<doc>
<field name="id">8</field>
<field name="word">creative</field>
</doc>
<doc>
<field name="id">9</field>
<field name="word">creative labs</field>
</doc>
<doc>
<field name="id">10</field>
<field name="word">creative zen</field>
</doc>
</add>

View File

@ -28,8 +28,17 @@
For more information, on how to customize this file, please see For more information, on how to customize this file, please see
http://wiki.apache.org/solr/SchemaXml http://wiki.apache.org/solr/SchemaXml
NOTE: this schema includes many optional features and should not PERFORMANCE NOTE: this schema includes many optional features and should not
be used for benchmarking. be used for benchmarking. To improve performance one could
- set stored="false" for all fields possible (esp large fields) when you
only need to search on the field but don't need to return the original
value.
- set indexed="false" if you don't need to search on the field, but only
return the field as a result of searching on other indexed fields.
- remove all unneeded copyField statements
- for best index size and searching performance, set "index" to false
for all general text fields, use copyField to copy them to the
catchall "text" field, and use that for searching.
--> -->
<schema name="example" version="1.2"> <schema name="example" version="1.2">
@ -191,8 +200,6 @@
words on case-change, alpha numeric boundaries, and non-alphanumeric chars, words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi". so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
Synonyms and stopwords are customized by external files, and stemming is enabled. Synonyms and stopwords are customized by external files, and stemming is enabled.
Duplicate tokens at the same position (which may result from Stemmed Synonyms or
WordDelim parts) are removed.
--> -->
<fieldType name="text" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index"> <analyzer type="index">
@ -212,7 +219,6 @@
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/> <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer class="solr.WhitespaceTokenizerFactory"/>
@ -225,7 +231,6 @@
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/> <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -240,18 +245,31 @@
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/> <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
possible with WordDelimiterFilter in conjuncton with stemming. -->
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!--
Setup simple analysis for spell checking <!-- A general unstemmed text field - good if one does not know the language of the field -->
--> <fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
<fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100" > <analyzer type="index">
<analyzer> <tokenizer class="solr.WhitespaceTokenizerFactory"/>
<tokenizer class="solr.StandardTokenizerFactory"/> <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="false" />
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="stopwords.txt"
enablePositionIncrements="true"
/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -323,7 +341,14 @@
</analyzer> </analyzer>
</fieldtype> </fieldtype>
<!-- lowercases the entire field value, keeping it as a single token. -->
<fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>
<!-- since fields of this type are by default not stored or indexed, <!-- since fields of this type are by default not stored or indexed,
any data added to them will be ignored outright. --> any data added to them will be ignored outright. -->
@ -355,14 +380,12 @@
<field name="id" type="string" indexed="true" stored="true" required="true" /> <field name="id" type="string" indexed="true" stored="true" required="true" />
<field name="sku" type="textTight" indexed="true" stored="true" omitNorms="true"/> <field name="sku" type="textTight" indexed="true" stored="true" omitNorms="true"/>
<field name="name" type="text" indexed="true" stored="true"/> <field name="name" type="textgen" indexed="true" stored="true"/>
<field name="nameSort" type="string" indexed="true" stored="false"/>
<field name="alphaNameSort" type="alphaOnlySort" indexed="true" stored="false"/> <field name="alphaNameSort" type="alphaOnlySort" indexed="true" stored="false"/>
<field name="manu" type="text" indexed="true" stored="true" omitNorms="true"/> <field name="manu" type="textgen" indexed="true" stored="true" omitNorms="true"/>
<field name="cat" type="text_ws" indexed="true" stored="true" multiValued="true" omitNorms="true" termVectors="true" /> <field name="cat" type="text_ws" indexed="true" stored="true" multiValued="true" omitNorms="true" />
<field name="features" type="text" indexed="true" stored="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/> <field name="features" type="text" indexed="true" stored="true" multiValued="true"/>
<field name="includes" type="text" indexed="true" stored="true"/> <field name="includes" type="text" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
<field name="weight" type="float" indexed="true" stored="true"/> <field name="weight" type="float" indexed="true" stored="true"/>
<field name="price" type="float" indexed="true" stored="true"/> <field name="price" type="float" indexed="true" stored="true"/>
@ -375,14 +398,6 @@
<field name="title" type="text" indexed="true" stored="true"/> <field name="title" type="text" indexed="true" stored="true"/>
<!-- Some sample docs exists solely to demonstrate the spellchecker
functionality, this is the only field they contain.
Typically you might build the spellchecker off "catchall" type field
containing all of the text in each document.
-->
<field name="word" type="string" indexed="true" stored="true"/>
<!-- catchall field, containing all other searchable text fields (implemented <!-- catchall field, containing all other searchable text fields (implemented
via copyField further on in this schema --> via copyField further on in this schema -->
<field name="text" type="text" indexed="true" stored="false" multiValued="true"/> <field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
@ -391,15 +406,14 @@
results by manufacturer. copied from "manu" via copyField --> results by manufacturer. copied from "manu" via copyField -->
<field name="manu_exact" type="string" indexed="true" stored="false"/> <field name="manu_exact" type="string" indexed="true" stored="false"/>
<field name="catPay" type="payloads" indexed="true" stored="true"/> <field name="payloads" type="payloads" indexed="true" stored="true"/>
<!-- Here, default is used to create a "timestamp" field indicating <!-- Here, default is used to create a "timestamp" field indicating
When each document was indexed. When each document was indexed.
<field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
--> -->
<field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/> <field name="timestamp" type="date" indexed="true" stored="true" multiValued="false"/>
<field name="spell" type="textSpell" indexed="true" stored="true" multiValued="true"/>
<!-- Dynamic field definitions. If a field name is not found, dynamicFields <!-- Dynamic field definitions. If a field name is not found, dynamicFields
will be used if the name matches any of the patterns. will be used if the name matches any of the patterns.
@ -427,7 +441,7 @@
<dynamicField name="*_pi" type="pint" indexed="true" stored="true"/> <dynamicField name="*_pi" type="pint" indexed="true" stored="true"/>
<dynamicField name="ignored_*" type="ignored" multiValued="true"/> <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
<dynamicField name="attr_*" type="text" indexed="true" stored="true" multiValued="true"/> <dynamicField name="attr_*" type="textgen" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="random_*" type="random" /> <dynamicField name="random_*" type="random" />
@ -453,28 +467,24 @@
<!-- copyField commands copy one field to another at the time a document <!-- copyField commands copy one field to another at the time a document
is added to the index. It's used either to index the same field differently, is added to the index. It's used either to index the same field differently,
or to add multiple fields to the same field for easier/faster searching. --> or to add multiple fields to the same field for easier/faster searching. -->
<copyField source="id" dest="sku"/>
<copyField source="incubationdate_dt" dest="incubationdate_s"/>
<copyField source="cat" dest="text"/> <copyField source="cat" dest="text"/>
<copyField source="name" dest="text"/> <copyField source="name" dest="text"/>
<copyField source="name" dest="nameSort"/>
<copyField source="name" dest="alphaNameSort"/>
<copyField source="manu" dest="text"/> <copyField source="manu" dest="text"/>
<copyField source="features" dest="text"/> <copyField source="features" dest="text"/>
<copyField source="includes" dest="text"/> <copyField source="includes" dest="text"/>
<copyField source="manu" dest="manu_exact"/>
<!-- Above, multiple source fields are copied to the [text] field. <!-- Above, multiple source fields are copied to the [text] field.
Another way to map multiple source fields to the same Another way to map multiple source fields to the same
destination field is to use the dynamic field syntax. destination field is to use the dynamic field syntax.
copyField also supports a maxChars to copy setting. --> copyField also supports a maxChars to copy setting. -->
<!-- <copyField source="*" dest="text" maxChars="3000"/> --> <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
<copyField source="manu" dest="manu_exact"/> <!-- copy name to alphaNameSort, a field designed for sorting by name -->
<!-- <copyField source="name" dest="alphaNameSort"/> -->
<copyField source="name" dest="spell"/>
<!-- Similarity is the scoring routine for each document vs. a query. <!-- Similarity is the scoring routine for each document vs. a query.
A custom similarity may be specified here, but the default is fine A custom similarity may be specified here, but the default is fine

View File

@ -37,38 +37,36 @@
<useCompoundFile>false</useCompoundFile> <useCompoundFile>false</useCompoundFile>
<mergeFactor>10</mergeFactor> <mergeFactor>10</mergeFactor>
<!-- <!-- If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush
If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush based on whichever limit is hit first. based on whichever limit is hit first. -->
-->
<!--<maxBufferedDocs>1000</maxBufferedDocs>--> <!--<maxBufferedDocs>1000</maxBufferedDocs>-->
<!-- Tell Lucene when to flush documents to disk.
Giving Lucene more memory for indexing means faster indexing at the cost of more RAM
If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush based on whichever limit is hit first. <!-- Sets the amount of RAM that may be used by Lucene indexing
for buffering added documents and deletions before they are
--> flushed to the Directory. -->
<ramBufferSizeMB>32</ramBufferSizeMB> <ramBufferSizeMB>32</ramBufferSizeMB>
<maxMergeDocs>2147483647</maxMergeDocs> <!-- <maxMergeDocs>2147483647</maxMergeDocs> -->
<maxFieldLength>10000</maxFieldLength> <maxFieldLength>10000</maxFieldLength>
<writeLockTimeout>1000</writeLockTimeout> <writeLockTimeout>1000</writeLockTimeout>
<commitLockTimeout>10000</commitLockTimeout> <commitLockTimeout>10000</commitLockTimeout>
<!-- <!--
Expert: Turn on Lucene's auto commit capability. Expert: Turn on Lucene's auto commit capability. This causes intermediate
This causes intermediate segment flushes to write a new lucene segment flushes to write a new lucene index descriptor, enabling it to be
index descriptor, enabling it to be opened by an external opened by an external IndexReader. This can greatly slow down indexing
IndexReader. speed. NOTE: Despite the name, this value does not have any relation to
NOTE: Despite the name, this value does not have any relation to Solr's autoCommit functionality Solr's autoCommit functionality
--> -->
<!--<luceneAutoCommit>false</luceneAutoCommit>--> <!--<luceneAutoCommit>false</luceneAutoCommit>-->
<!-- <!--
Expert: Expert: The Merge Policy in Lucene controls how merging is handled by
The Merge Policy in Lucene controls how merging is handled by Lucene. The default in 2.3 is the LogByteSizeMergePolicy, previous Lucene. The default in 2.3 is the LogByteSizeMergePolicy, previous
versions used LogDocMergePolicy. versions used LogDocMergePolicy.
LogByteSizeMergePolicy chooses segments to merge based on their size. The Lucene 2.2 default, LogDocMergePolicy chose when LogByteSizeMergePolicy chooses segments to merge based on their size. The
to merge based on number of documents Lucene 2.2 default, LogDocMergePolicy chose when to merge based on number
of documents
Other implementations of MergePolicy must have a no-argument constructor Other implementations of MergePolicy must have a no-argument constructor
--> -->
@ -76,16 +74,17 @@
<!-- <!--
Expert: Expert:
The Merge Scheduler in Lucene controls how merges are performed. The ConcurrentMergeScheduler (Lucene 2.3 default) The Merge Scheduler in Lucene controls how merges are performed. The
can perform merges in the background using separate threads. The SerialMergeScheduler (Lucene 2.2 default) does not. ConcurrentMergeScheduler (Lucene 2.3 default) can perform merges in the
background using separate threads. The SerialMergeScheduler (Lucene 2.2
default) does not.
--> -->
<!--<mergeScheduler>org.apache.lucene.index.ConcurrentMergeScheduler</mergeScheduler>--> <!--<mergeScheduler>org.apache.lucene.index.ConcurrentMergeScheduler</mergeScheduler>-->
<!-- <!-- To aid in advanced debugging, you may turn on IndexWriter debug logging.
To aid in advanced debugging, you may turn on IndexWriter debug logging. Uncommenting this and setting to true Uncommenting this and setting to true will set the file that the underlying
will set the file that the underlying Lucene IndexWriter will write its debug infostream to. Lucene IndexWriter will write its debug infostream to. -->
--> <!-- <infoStream file="/path/file">false</infoStream> -->
<!-- <infoStream file="/path/file">false</infoStream> -->
<!-- <!--
This option specifies which Lucene LockFactory implementation to use. This option specifies which Lucene LockFactory implementation to use.
@ -93,13 +92,13 @@
single = SingleInstanceLockFactory - suggested for a read-only index single = SingleInstanceLockFactory - suggested for a read-only index
or when there is no possibility of another process trying or when there is no possibility of another process trying
to modify the index. to modify the index.
native = NativeFSLockFactory native = NativeFSLockFactory - uses OS native file locking
simple = SimpleFSLockFactory simple = SimpleFSLockFactory - uses a plain file for locking
(For backwards compatibility with Solr 1.2, 'simple' is the default (For backwards compatibility with Solr 1.2, 'simple' is the default
if not specified.) if not specified.)
--> -->
<lockType>single</lockType> <lockType>native</lockType>
</indexDefaults> </indexDefaults>
<mainIndex> <mainIndex>
@ -109,7 +108,7 @@
<mergeFactor>10</mergeFactor> <mergeFactor>10</mergeFactor>
<!-- Deprecated --> <!-- Deprecated -->
<!--<maxBufferedDocs>1000</maxBufferedDocs>--> <!--<maxBufferedDocs>1000</maxBufferedDocs>-->
<maxMergeDocs>2147483647</maxMergeDocs> <!--<maxMergeDocs>2147483647</maxMergeDocs>-->
<maxFieldLength>10000</maxFieldLength> <maxFieldLength>10000</maxFieldLength>
<!-- If true, unlock any held write or commit locks on startup. <!-- If true, unlock any held write or commit locks on startup.
@ -121,8 +120,7 @@
<unlockOnStartup>false</unlockOnStartup> <unlockOnStartup>false</unlockOnStartup>
<!-- If true, IndexReaders will be reopened (often more efficient) instead <!-- If true, IndexReaders will be reopened (often more efficient) instead
of closed and then opened. of closed and then opened. -->
-->
<reopenReaders>true</reopenReaders> <reopenReaders>true</reopenReaders>
<!-- <!--
@ -154,15 +152,15 @@
</mainIndex> </mainIndex>
<!-- Enables JMX if and only if an existing MBeanServer is found, use <!-- Enables JMX if and only if an existing MBeanServer is found, use this
this if you want to configure JMX through JVM parameters. Remove if you want to configure JMX through JVM parameters. Remove this to disable
this to disable exposing Solr configuration and statistics to JMX. exposing Solr configuration and statistics to JMX.
If you want to connect to a particular server, specify the agentId If you want to connect to a particular server, specify the agentId
e.g. <jmx agentId="myAgent" /> e.g. <jmx agentId="myAgent" />
If you want to start a new MBeanServer, specify the serviceUrl If you want to start a new MBeanServer, specify the serviceUrl
e.g <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr" /> e.g <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/>
For more details see http://wiki.apache.org/solr/SolrJmx For more details see http://wiki.apache.org/solr/SolrJmx
--> -->
@ -170,7 +168,6 @@
<!-- the default high-performance update handler --> <!-- the default high-performance update handler -->
<updateHandler class="solr.DirectUpdateHandler2"> <updateHandler class="solr.DirectUpdateHandler2">
<!-- A prefix of "solr." for class names is an alias that <!-- A prefix of "solr." for class names is an alias that
causes solr to search appropriate packages, including causes solr to search appropriate packages, including
org.apache.solr.(search|update|request|core|analysis) org.apache.solr.(search|update|request|core|analysis)
@ -179,6 +176,8 @@
<!-- Perform a <commit/> automatically under certain conditions: <!-- Perform a <commit/> automatically under certain conditions:
maxDocs - number of updates since last commit is greater than this maxDocs - number of updates since last commit is greater than this
maxTime - oldest uncommited update (in ms) is this long ago maxTime - oldest uncommited update (in ms) is this long ago
Instead of enabling autoCommit, consider using "commitWithin"
when adding documents. http://wiki.apache.org/solr/UpdateXmlMessages
<autoCommit> <autoCommit>
<maxDocs>10000</maxDocs> <maxDocs>10000</maxDocs>
<maxTime>1000</maxTime> <maxTime>1000</maxTime>
@ -252,7 +251,7 @@
class="solr.FastLRUCache" class="solr.FastLRUCache"
size="512" size="512"
initialSize="512" initialSize="512"
autowarmCount="128"/> autowarmCount="0"/>
<!-- Cache used to hold field values that are quickly accessible <!-- Cache used to hold field values that are quickly accessible
by document id. The fieldValueCache is created by default by document id. The fieldValueCache is created by default
@ -272,7 +271,7 @@
class="solr.LRUCache" class="solr.LRUCache"
size="512" size="512"
initialSize="512" initialSize="512"
autowarmCount="32"/> autowarmCount="0"/>
<!-- documentCache caches Lucene Document objects (the stored fields for each document). <!-- documentCache caches Lucene Document objects (the stored fields for each document).
Since Lucene internal document ids are transient, this cache will not be autowarmed. --> Since Lucene internal document ids are transient, this cache will not be autowarmed. -->
@ -283,10 +282,9 @@
autowarmCount="0"/> autowarmCount="0"/>
<!-- If true, stored fields that are not requested will be loaded lazily. <!-- If true, stored fields that are not requested will be loaded lazily.
This can result in a significant speed improvement if the usual case is to
This can result in a significant speed improvement if the usual case is to not load all stored fields, especially if the skipped fields are large
not load all stored fields, especially if the skipped fields are large compressed compressed text fields.
text fields.
--> -->
<enableLazyFieldLoading>true</enableLazyFieldLoading> <enableLazyFieldLoading>true</enableLazyFieldLoading>
@ -319,21 +317,26 @@
requests matching documents 10 through 19, and queryWindowSize is 50, requests matching documents 10 through 19, and queryWindowSize is 50,
then documents 0 through 49 will be collected and cached. Any further then documents 0 through 49 will be collected and cached. Any further
requests in that range can be satisfied via the cache. --> requests in that range can be satisfied via the cache. -->
<queryResultWindowSize>50</queryResultWindowSize> <queryResultWindowSize>20</queryResultWindowSize>
<!-- Maximum number of documents to cache for any entry in the <!-- Maximum number of documents to cache for any entry in the
queryResultCache. --> queryResultCache. -->
<queryResultMaxDocsCached>200</queryResultMaxDocsCached> <queryResultMaxDocsCached>200</queryResultMaxDocsCached>
<!-- a newSearcher event is fired whenever a new searcher is being prepared <!-- a newSearcher event is fired whenever a new searcher is being prepared
and there is a current searcher handling requests (aka registered). --> and there is a current searcher handling requests (aka registered).
It can be used to prime certain caches to prevent long request times for
certain requests.
-->
<!-- QuerySenderListener takes an array of NamedList and executes a <!-- QuerySenderListener takes an array of NamedList and executes a
local query request for each NamedList in sequence. --> local query request for each NamedList in sequence. -->
<listener event="newSearcher" class="solr.QuerySenderListener"> <listener event="newSearcher" class="solr.QuerySenderListener">
<arr name="queries"> <arr name="queries">
<!--
<lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst> <lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst>
<lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst> <lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst>
<lst><str name="q">static newSearcher warming query from solrconfig.xml</str></lst> <lst><str name="q">static newSearcher warming query from solrconfig.xml</str></lst>
-->
</arr> </arr>
</listener> </listener>
@ -342,7 +345,7 @@
requests or to gain autowarming data from. --> requests or to gain autowarming data from. -->
<listener event="firstSearcher" class="solr.QuerySenderListener"> <listener event="firstSearcher" class="solr.QuerySenderListener">
<arr name="queries"> <arr name="queries">
<lst> <str name="q">fast_warm</str> <str name="start">0</str> <str name="rows">10</str> </lst> <lst> <str name="q">solr rocks</str><str name="start">0</str><str name="rows">10</str></lst>
<lst><str name="q">static firstSearcher warming query from solrconfig.xml</str></lst> <lst><str name="q">static firstSearcher warming query from solrconfig.xml</str></lst>
</arr> </arr>
</listener> </listener>
@ -583,19 +586,20 @@
<lst name="spellchecker"> <lst name="spellchecker">
<str name="name">default</str> <str name="name">default</str>
<str name="field">spell</str> <str name="field">name</str>
<str name="spellcheckIndexDir">./spellchecker1</str> <str name="spellcheckIndexDir">./spellchecker</str>
</lst> </lst>
<!-- a spellchecker that uses a different distance measure
<lst name="spellchecker"> <lst name="spellchecker">
<str name="name">jarowinkler</str> <str name="name">jarowinkler</str>
<str name="field">spell</str> <str name="field">spell</str>
<!-- Use a different Distance Measure -->
<str name="distanceMeasure">org.apache.lucene.search.spell.JaroWinklerDistance</str> <str name="distanceMeasure">org.apache.lucene.search.spell.JaroWinklerDistance</str>
<str name="spellcheckIndexDir">./spellchecker2</str> <str name="spellcheckIndexDir">./spellchecker2</str>
</lst> </lst>
-->
<!-- a file based spell checker
<lst name="spellchecker"> <lst name="spellchecker">
<str name="classname">solr.FileBasedSpellChecker</str> <str name="classname">solr.FileBasedSpellChecker</str>
<str name="name">file</str> <str name="name">file</str>
@ -603,18 +607,21 @@
<str name="characterEncoding">UTF-8</str> <str name="characterEncoding">UTF-8</str>
<str name="spellcheckIndexDir">./spellcheckerFile</str> <str name="spellcheckIndexDir">./spellcheckerFile</str>
</lst> </lst>
-->
</searchComponent> </searchComponent>
<!-- A request handler utilizing the spellcheck component. <!-- A request handler utilizing the spellcheck component.
################################################################################################ #############################################################################
NOTE: This is purely as an example. The whole purpose of the SpellCheckComponent is to hook it into NOTE: This is purely as an example. The whole purpose of the
the request handler that handles (i.e. the standard or dismax SearchHandler) SpellCheckComponent is to hook it into the request handler that handles (i.e.
queries such that a separate request is not needed to get suggestions. the standard or dismax SearchHandler) queries such that a separate request is
not needed to get suggestions.
IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM! IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS NOT WHAT YOU
################################################################################################ WANT FOR YOUR PRODUCTION SYSTEM!
#############################################################################
--> -->
<requestHandler name="/spellCheckCompRH" class="solr.SearchHandler"> <requestHandler name="/spell" class="solr.SearchHandler" lazy="true">
<lst name="defaults"> <lst name="defaults">
<!-- omp = Only More Popular --> <!-- omp = Only More Popular -->
<str name="spellcheck.onlyMorePopular">false</str> <str name="spellcheck.onlyMorePopular">false</str>
@ -686,7 +693,6 @@
Note: Since solr1.1 requestHandlers requires a valid content type header if posted in Note: Since solr1.1 requestHandlers requires a valid content type header if posted in
the body. For example, curl now requires: -H 'Content-type:text/xml; charset=utf-8' the body. For example, curl now requires: -H 'Content-type:text/xml; charset=utf-8'
The response format differs from solr1.1 formatting and returns a standard error code. The response format differs from solr1.1 formatting and returns a standard error code.
To enable solr1.1 behavior, remove the /update handler or change its path To enable solr1.1 behavior, remove the /update handler or change its path
--> -->
<requestHandler name="/update" class="solr.XmlUpdateRequestHandler" /> <requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />

View File

@ -41,6 +41,8 @@ public class TestSpellCheckResponse extends SolrExampleTestBase {
int port = 0; int port = 0;
static final String context = "/example"; static final String context = "/example";
static String field = "name";
public void setUp() throws Exception { public void setUp() throws Exception {
super.setUp(); super.setUp();
@ -53,13 +55,13 @@ public class TestSpellCheckResponse extends SolrExampleTestBase {
public void testSpellCheckResponse() throws Exception { public void testSpellCheckResponse() throws Exception {
SolrInputDocument doc = new SolrInputDocument(); SolrInputDocument doc = new SolrInputDocument();
doc.setField("id", "AAA"); doc.setField("id", "111");
doc.setField("name", "Samsung"); doc.setField(field, "Samsung");
server.add(doc); server.add(doc);
server.commit(true, true); server.commit(true, true);
SolrQuery query = new SolrQuery("*:*"); SolrQuery query = new SolrQuery("*:*");
query.set(CommonParams.QT, "/spellCheckCompRH"); query.set(CommonParams.QT, "/spell");
query.set("spellcheck", true); query.set("spellcheck", true);
query.set(SpellingParams.SPELLCHECK_Q, "samsang"); query.set(SpellingParams.SPELLCHECK_Q, "samsang");
query.set(SpellingParams.SPELLCHECK_BUILD, true); query.set(SpellingParams.SPELLCHECK_BUILD, true);
@ -70,13 +72,13 @@ public class TestSpellCheckResponse extends SolrExampleTestBase {
public void testSpellCheckResponse_Extended() throws Exception { public void testSpellCheckResponse_Extended() throws Exception {
SolrInputDocument doc = new SolrInputDocument(); SolrInputDocument doc = new SolrInputDocument();
doc.setField("id", "AAA"); doc.setField("id", "111");
doc.setField("name", "Samsung"); doc.setField(field, "Samsung");
server.add(doc); server.add(doc);
server.commit(true, true); server.commit(true, true);
SolrQuery query = new SolrQuery("*:*"); SolrQuery query = new SolrQuery("*:*");
query.set(CommonParams.QT, "/spellCheckCompRH"); query.set(CommonParams.QT, "/spell");
query.set("spellcheck", true); query.set("spellcheck", true);
query.set(SpellingParams.SPELLCHECK_Q, "samsang"); query.set(SpellingParams.SPELLCHECK_Q, "samsang");
query.set(SpellingParams.SPELLCHECK_BUILD, true); query.set(SpellingParams.SPELLCHECK_BUILD, true);