SOLR-617 -- Allow configurable index deletion policy and provide a default implementation.

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@699975 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shalin Shekhar Mangar 2008-09-29 03:37:41 +00:00
parent ef87edc8ae
commit b34bc9cfcc
12 changed files with 1491 additions and 39 deletions

View File

@ -35,6 +35,12 @@ New Features
be identical to solr 1.3. However, if you are using the .jar file, you can select be identical to solr 1.3. However, if you are using the .jar file, you can select
which logging implementation to use by dropping a different binding. which logging implementation to use by dropping a different binding.
See: http://www.slf4j.org/ (ryan) See: http://www.slf4j.org/ (ryan)
2. SOLR-617: Allow configurable index deletion policy and provide a default implementation which
allows deletion of commit points on various criteria such as number of commits, age of commit
point and optimized status.
See http://lucene.apache.org/java/2_3_2/api/org/apache/lucene/index/IndexDeletionPolicy.html
(yonik, Noble Paul, Akshay Ukey via shalin)
Optimizations Optimizations

View File

@ -113,18 +113,46 @@
This is not needed if lock type is 'none' or 'single' This is not needed if lock type is 'none' or 'single'
--> -->
<unlockOnStartup>false</unlockOnStartup> <unlockOnStartup>false</unlockOnStartup>
<!--
Custom deletion policies can specified here. The class must
implement org.apache.lucene.index.IndexDeletionPolicy.
http://lucene.apache.org/java/2_3_2/api/org/apache/lucene/index/IndexDeletionPolicy.html
The standard Solr IndexDeletionPolicy implementation supports deleting
index commit points on number of commits, age of commit point and
optimized status.
The latest commit point should always be preserved regardless
of the criteria.
-->
<deletionPolicy class="solr.SolrDeletionPolicy">
<!-- Keep only optimized commit points -->
<str name="keepOptimizedOnly">false</str>
<!-- The maximum number of commit points to be kept -->
<str name="maxCommitsToKeep">1</str>
<!--
Delete all commit points once they have reached the given age.
Supports DateMathParser syntax e.g.
<str name="maxCommitAge">30MINUTES</str>
<str name="maxCommitAge">1DAY</str>
-->
</deletionPolicy>
</mainIndex> </mainIndex>
<!-- Enables JMX if and only if an existing MBeanServer is found, use <!-- Enables JMX if and only if an existing MBeanServer is found, use
this if you want to configure JMX through JVM parameters. Remove this if you want to configure JMX through JVM parameters. Remove
this to disable exposing Solr configuration and statistics to JMX. this to disable exposing Solr configuration and statistics to JMX.
If you want to connect to a particular server, specify the agentId If you want to connect to a particular server, specify the agentId
e.g. <jmx agentId="myAgent" /> e.g. <jmx agentId="myAgent" />
If you want to start a new MBeanServer, specify the serviceUrl If you want to start a new MBeanServer, specify the serviceUrl
e.g <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr" /> e.g <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr" />
For more details see http://wiki.apache.org/solr/SolrJmx For more details see http://wiki.apache.org/solr/SolrJmx
--> -->
<jmx /> <jmx />
@ -180,7 +208,7 @@
queries. An exception is thrown if exceeded. --> queries. An exception is thrown if exceeded. -->
<maxBooleanClauses>1024</maxBooleanClauses> <maxBooleanClauses>1024</maxBooleanClauses>
<!-- Cache used by SolrIndexSearcher for filters (DocSets), <!-- Cache used by SolrIndexSearcher for filters (DocSets),
unordered sets of *all* documents that match a query. unordered sets of *all* documents that match a query.
When a new searcher is opened, its caches may be prepopulated When a new searcher is opened, its caches may be prepopulated
@ -256,7 +284,7 @@
then documents 0 through 49 will be collected and cached. Any further then documents 0 through 49 will be collected and cached. Any further
requests in that range can be satisfied via the cache. --> requests in that range can be satisfied via the cache. -->
<queryResultWindowSize>50</queryResultWindowSize> <queryResultWindowSize>50</queryResultWindowSize>
<!-- Maximum number of documents to cache for any entry in the <!-- Maximum number of documents to cache for any entry in the
queryResultCache. --> queryResultCache. -->
<queryResultMaxDocsCached>200</queryResultMaxDocsCached> <queryResultMaxDocsCached>200</queryResultMaxDocsCached>
@ -310,7 +338,7 @@
<requestDispatcher handleSelect="true" > <requestDispatcher handleSelect="true" >
<!--Make sure your system has some authentication before enabling remote streaming! --> <!--Make sure your system has some authentication before enabling remote streaming! -->
<requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" /> <requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" />
<!-- Set HTTP caching related parameters (for proxy caches and clients). <!-- Set HTTP caching related parameters (for proxy caches and clients).
To get the behaviour of Solr 1.2 (ie: no caching related headers) To get the behaviour of Solr 1.2 (ie: no caching related headers)
@ -326,7 +354,7 @@
You can change it to lastModFrom="dirLastMod" if you want the You can change it to lastModFrom="dirLastMod" if you want the
value to exactly corrispond to when the physical index was last value to exactly corrispond to when the physical index was last
modified. modified.
etagSeed="..." is an option you can change to force the ETag etagSeed="..." is an option you can change to force the ETag
header (and validation against If-None-Match requests) to be header (and validation against If-None-Match requests) to be
differnet even if the index has not changed (ie: when making differnet even if the index has not changed (ie: when making
@ -338,7 +366,7 @@
<!-- If you include a <cacheControl> directive, it will be used to <!-- If you include a <cacheControl> directive, it will be used to
generate a Cache-Control header, as well as an Expires header generate a Cache-Control header, as well as an Expires header
if the value contains "max-age=" if the value contains "max-age="
By default, no Cache-Control header is generated. By default, no Cache-Control header is generated.
You can use the <cacheControl> option even if you have set You can use the <cacheControl> option even if you have set
@ -347,8 +375,8 @@
<!-- <cacheControl>max-age=30, public</cacheControl> --> <!-- <cacheControl>max-age=30, public</cacheControl> -->
</httpCaching> </httpCaching>
</requestDispatcher> </requestDispatcher>
<!-- requestHandler plugins... incoming queries will be dispatched to the <!-- requestHandler plugins... incoming queries will be dispatched to the
correct handler based on the path or the qt (query type) param. correct handler based on the path or the qt (query type) param.
Names starting with a '/' are accessed with the a path equal to the Names starting with a '/' are accessed with the a path equal to the
@ -361,7 +389,7 @@
<!-- default values for query parameters --> <!-- default values for query parameters -->
<lst name="defaults"> <lst name="defaults">
<str name="echoParams">explicit</str> <str name="echoParams">explicit</str>
<!-- <!--
<int name="rows">10</int> <int name="rows">10</int>
<str name="fl">*</str> <str name="fl">*</str>
<str name="version">2.1</str> <str name="version">2.1</str>
@ -398,7 +426,7 @@
</str> </str>
<int name="ps">100</int> <int name="ps">100</int>
<str name="q.alt">*:*</str> <str name="q.alt">*:*</str>
<!-- example highlighter config, enable per-query with hl=true --> <!-- example highlighter config, enable per-query with hl=true -->
<str name="hl.fl">text features name</str> <str name="hl.fl">text features name</str>
<!-- for this field, we want no fragmenting, just highlighting --> <!-- for this field, we want no fragmenting, just highlighting -->
<str name="f.name.hl.fragsize">0</str> <str name="f.name.hl.fragsize">0</str>
@ -462,7 +490,7 @@
<str name="facet.query">price:[500 TO *]</str> <str name="facet.query">price:[500 TO *]</str>
</lst> </lst>
</requestHandler> </requestHandler>
<!-- <!--
Search components are registered to SolrCore and used by Search Handlers Search components are registered to SolrCore and used by Search Handlers
@ -540,7 +568,7 @@
<str>spellcheck</str> <str>spellcheck</str>
</arr> </arr>
</requestHandler> </requestHandler>
<!-- a search component that enables you to configure the top results for <!-- a search component that enables you to configure the top results for
a given query regardless of the normal lucene scoring.--> a given query regardless of the normal lucene scoring.-->
<searchComponent name="elevator" class="solr.QueryElevationComponent" > <searchComponent name="elevator" class="solr.QueryElevationComponent" >
@ -548,7 +576,7 @@
<str name="queryFieldType">string</str> <str name="queryFieldType">string</str>
<str name="config-file">elevate.xml</str> <str name="config-file">elevate.xml</str>
</searchComponent> </searchComponent>
<!-- a request handler utilizing the elevator component --> <!-- a request handler utilizing the elevator component -->
<requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy"> <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
<lst name="defaults"> <lst name="defaults">
@ -558,14 +586,14 @@
<str>elevator</str> <str>elevator</str>
</arr> </arr>
</requestHandler> </requestHandler>
<!-- Update request handler. <!-- Update request handler.
Note: Since solr1.1 requestHandlers requires a valid content type header if posted in Note: Since solr1.1 requestHandlers requires a valid content type header if posted in
the body. For example, curl now requires: -H 'Content-type:text/xml; charset=utf-8' the body. For example, curl now requires: -H 'Content-type:text/xml; charset=utf-8'
The response format differs from solr1.1 formatting and returns a standard error code. The response format differs from solr1.1 formatting and returns a standard error code.
To enable solr1.1 behavior, remove the /update handler or change its path To enable solr1.1 behavior, remove the /update handler or change its path
--> -->
<requestHandler name="/update" class="solr.XmlUpdateRequestHandler" /> <requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />
@ -575,7 +603,7 @@
for debugging and as a token server for other types of applications for debugging and as a token server for other types of applications
--> -->
<requestHandler name="/analysis" class="solr.AnalysisRequestHandler" /> <requestHandler name="/analysis" class="solr.AnalysisRequestHandler" />
<!-- CSV update handler, loaded on demand --> <!-- CSV update handler, loaded on demand -->
<requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy" /> <requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy" />
@ -601,7 +629,7 @@
</requestHandler> </requestHandler>
--> -->
<requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" /> <requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" />
<!-- ping/healthcheck --> <!-- ping/healthcheck -->
<requestHandler name="/admin/ping" class="PingRequestHandler"> <requestHandler name="/admin/ping" class="PingRequestHandler">
<lst name="defaults"> <lst name="defaults">
@ -610,7 +638,7 @@
<str name="echoParams">all</str> <str name="echoParams">all</str>
</lst> </lst>
</requestHandler> </requestHandler>
<!-- Echo the request contents back to the client --> <!-- Echo the request contents back to the client -->
<requestHandler name="/debug/dump" class="solr.DumpRequestHandler" > <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
<lst name="defaults"> <lst name="defaults">
@ -618,7 +646,7 @@
<str name="echoHandler">true</str> <str name="echoHandler">true</str>
</lst> </lst>
</requestHandler> </requestHandler>
<highlighting> <highlighting>
<!-- Configure the standard fragmenter --> <!-- Configure the standard fragmenter -->
<!-- This could most likely be commented out in the "default" case --> <!-- This could most likely be commented out in the "default" case -->
@ -634,12 +662,12 @@
<!-- slightly smaller fragsizes work better because of slop --> <!-- slightly smaller fragsizes work better because of slop -->
<int name="hl.fragsize">70</int> <int name="hl.fragsize">70</int>
<!-- allow 50% slop on fragment sizes --> <!-- allow 50% slop on fragment sizes -->
<float name="hl.regex.slop">0.5</float> <float name="hl.regex.slop">0.5</float>
<!-- a basic sentence pattern --> <!-- a basic sentence pattern -->
<str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str> <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str>
</lst> </lst>
</fragmenter> </fragmenter>
<!-- Configure the standard formatter --> <!-- Configure the standard formatter -->
<formatter name="html" class="org.apache.solr.highlight.HtmlFormatter" default="true"> <formatter name="html" class="org.apache.solr.highlight.HtmlFormatter" default="true">
<lst name="defaults"> <lst name="defaults">
@ -648,8 +676,8 @@
</lst> </lst>
</formatter> </formatter>
</highlighting> </highlighting>
<!-- queryResponseWriter plugins... query responses will be written using the <!-- queryResponseWriter plugins... query responses will be written using the
writer specified by the 'wt' request parameter matching the name of a registered writer specified by the 'wt' request parameter matching the name of a registered
writer. writer.
@ -673,7 +701,7 @@
--> -->
<queryResponseWriter name="xslt" class="org.apache.solr.request.XSLTResponseWriter"> <queryResponseWriter name="xslt" class="org.apache.solr.request.XSLTResponseWriter">
<int name="xsltCacheLifetimeSeconds">5</int> <int name="xsltCacheLifetimeSeconds">5</int>
</queryResponseWriter> </queryResponseWriter>
<!-- example of registering a query parser <!-- example of registering a query parser
@ -683,11 +711,11 @@
<!-- example of registering a custom function parser <!-- example of registering a custom function parser
<valueSourceParser name="myfunc" class="com.mycompany.MyValueSourceParser" /> <valueSourceParser name="myfunc" class="com.mycompany.MyValueSourceParser" />
--> -->
<!-- config for the admin interface --> <!-- config for the admin interface -->
<admin> <admin>
<defaultQuery>solr</defaultQuery> <defaultQuery>solr</defaultQuery>
<!-- configure a healthcheck file for servers behind a loadbalancer <!-- configure a healthcheck file for servers behind a loadbalancer
<healthcheck type="file">server-enabled</healthcheck> <healthcheck type="file">server-enabled</healthcheck>
--> -->

View File

@ -0,0 +1,167 @@
package org.apache.solr.core;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexDeletionPolicy;
import org.apache.lucene.store.Directory;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
/**
* A wrapper for an IndexDeletionPolicy instance.
* <p/>
* Provides features for looking up IndexCommit given a version. Allows reserving index
* commit points for certain amounts of time to support features such as index replication
* or snapshooting directly out of a live index directory.
*
* @version $Id$
* @see org.apache.lucene.index.IndexDeletionPolicy
*/
public class IndexDeletionPolicyWrapper implements IndexDeletionPolicy {
private IndexDeletionPolicy deletionPolicy;
private Map<Long, IndexCommit> solrVersionVsCommits = new ConcurrentHashMap<Long, IndexCommit>();
private Map<Long, Long> reserves = new HashMap<Long, Long>();
private IndexCommit latestCommit;
public IndexDeletionPolicyWrapper(IndexDeletionPolicy deletionPolicy) {
this.deletionPolicy = deletionPolicy;
}
/**
* Gets the most recent commit point
* <p/>
* It is recommended to reserve a commit point for the duration of usage so that
* it is not deleted by the underlying deletion policy
*
* @return the most recent commit point
*/
public IndexCommit getLatestCommit() {
return latestCommit;
}
public IndexDeletionPolicy getWrappedDeletionPolicy() {
return deletionPolicy;
}
/**
* Set the duration for which commit point is to be reserved by the deletion policy.
*
* @param indexVersion version of the commit point to be reserved
* @param reserveTime time in milliseconds for which the commit point is to be reserved
*/
public void setReserveDuration(Long indexVersion, long reserveTime) {
synchronized (reserves) {
reserves.put(indexVersion, System.currentTimeMillis() + reserveTime);
List<Long> removeThese = new ArrayList<Long>();
for (Map.Entry<Long, Long> entry : reserves.entrySet()) {
if (entry.getValue() < System.currentTimeMillis()) removeThese.add(entry.getKey());
}
for (Long l : removeThese) reserves.remove(l);
}
}
private List<IndexCommitWrapper> wrap(List<IndexCommit> list) {
List<IndexCommitWrapper> result = new ArrayList<IndexCommitWrapper>();
for (IndexCommit indexCommit : list) result.add(new IndexCommitWrapper(indexCommit));
return result;
}
/**
* Internal use for Lucene... do not explicitly call.
*/
public void onInit(List list) throws IOException {
List<IndexCommitWrapper> wrapperList = wrap(list);
deletionPolicy.onInit(wrapperList);
updateCommitPoints(wrapperList);
}
/**
* Internal use for Lucene... do not explicitly call.
*/
public void onCommit(List list) throws IOException {
List<IndexCommitWrapper> wrapperList = wrap(list);
deletionPolicy.onCommit(wrapperList);
updateCommitPoints(wrapperList);
}
private class IndexCommitWrapper extends IndexCommit {
IndexCommit delegate;
IndexCommitWrapper(IndexCommit delegate) {
this.delegate = delegate;
}
public String getSegmentsFileName() {
return delegate.getSegmentsFileName();
}
public Collection getFileNames() throws IOException {
return delegate.getFileNames();
}
public Directory getDirectory() {
return delegate.getDirectory();
}
public void delete() {
Long reserve = reserves.get(delegate.getVersion());
if (reserve != null && System.currentTimeMillis() < reserve) return;
delegate.delete();
}
public boolean isOptimized() {
return delegate.isOptimized();
}
public boolean equals(Object o) {
return delegate.equals(o);
}
public int hashCode() {
return delegate.hashCode();
}
public long getVersion() {
return delegate.getVersion();
}
public long getGeneration() {
return delegate.getGeneration();
}
public boolean isDeleted() {
return delegate.isDeleted();
}
}
/**
* @param version the version of the commit point
* @return a commit point corresponding to the given version
*/
public IndexCommit getCommitPoint(Long version) {
return solrVersionVsCommits.get(version);
}
/**
* Gets the commit points for the index.
* This map instance may change between commits and commit points may be deleted.
* It is recommended to reserve a commit point for the duration of usage
*
* @return a Map of version to commit points
*/
public Map<Long, IndexCommit> getCommits() {
return solrVersionVsCommits;
}
private void updateCommitPoints(List<IndexCommitWrapper> list) {
Map<Long, IndexCommit> map = new ConcurrentHashMap<Long, IndexCommit>();
for (IndexCommitWrapper wrapper : list) {
if (!wrapper.isDeleted())
map.put(wrapper.getVersion(), wrapper.delegate);
}
solrVersionVsCommits = map;
latestCommit = ((list.get(list.size() - 1)).delegate);
}
}

View File

@ -17,6 +17,7 @@
package org.apache.solr.core; package org.apache.solr.core;
import org.apache.lucene.index.IndexDeletionPolicy;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
@ -47,6 +48,7 @@ import org.apache.solr.update.processor.UpdateRequestProcessorChain;
import org.apache.solr.update.processor.UpdateRequestProcessorFactory; import org.apache.solr.update.processor.UpdateRequestProcessorFactory;
import org.apache.solr.util.RefCounted; import org.apache.solr.util.RefCounted;
import org.apache.solr.util.plugin.AbstractPluginLoader; import org.apache.solr.util.plugin.AbstractPluginLoader;
import org.apache.solr.util.plugin.NamedListInitializedPlugin;
import org.apache.solr.util.plugin.NamedListPluginLoader; import org.apache.solr.util.plugin.NamedListPluginLoader;
import org.apache.solr.util.plugin.SolrCoreAware; import org.apache.solr.util.plugin.SolrCoreAware;
import org.w3c.dom.Node; import org.w3c.dom.Node;
@ -90,7 +92,8 @@ public final class SolrCore implements SolrInfoMBean {
private final Map<String,SearchComponent> searchComponents; private final Map<String,SearchComponent> searchComponents;
private final Map<String,UpdateRequestProcessorChain> updateProcessorChains; private final Map<String,UpdateRequestProcessorChain> updateProcessorChains;
private final Map<String, SolrInfoMBean> infoRegistry; private final Map<String, SolrInfoMBean> infoRegistry;
private IndexDeletionPolicyWrapper solrDelPolicy;
public long getStartTime() { return startTime; } public long getStartTime() { return startTime; }
/** /**
@ -198,6 +201,17 @@ public final class SolrCore implements SolrInfoMBean {
return infoRegistry; return infoRegistry;
} }
private void initDeletionPolicy() {
String className = solrConfig.get("mainIndex/deletionPolicy/@class", SolrDeletionPolicy.class.getName());
IndexDeletionPolicy delPolicy = createInstance(className, IndexDeletionPolicy.class, "Deletion Policy for SOLR");
Node node = (Node) solrConfig.evaluate("mainIndex/deletionPolicy", XPathConstants.NODE);
if (node != null) {
if (delPolicy instanceof NamedListInitializedPlugin)
((NamedListInitializedPlugin) delPolicy).init(DOMUtil.childNodesToNamedList(node));
}
solrDelPolicy = new IndexDeletionPolicyWrapper(delPolicy);
}
public List<SolrEventListener> parseListener(String path) { public List<SolrEventListener> parseListener(String path) {
List<SolrEventListener> lst = new ArrayList<SolrEventListener>(); List<SolrEventListener> lst = new ArrayList<SolrEventListener>();
@ -430,7 +444,9 @@ public final class SolrCore implements SolrInfoMBean {
booleanQueryMaxClauseCount(); booleanQueryMaxClauseCount();
parseListeners(); parseListeners();
initDeletionPolicy();
initIndex(); initIndex();
initWriters(); initWriters();
@ -1452,6 +1468,9 @@ public final class SolrCore implements SolrInfoMBean {
return coreDescriptor; return coreDescriptor;
} }
public IndexDeletionPolicyWrapper getDeletionPolicy(){
return solrDelPolicy;
}
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
// SolrInfoMBean stuff: Statistics and Module Info // SolrInfoMBean stuff: Statistics and Module Info

View File

@ -0,0 +1,177 @@
package org.apache.solr.core;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexDeletionPolicy;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.schema.DateField;
import org.apache.solr.util.DateMathParser;
import org.apache.solr.util.plugin.NamedListInitializedPlugin;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Locale;
/**
* Standard Solr deletion policy that allows reserving index commit points
* for certain amounts of time to support features such as index replication
* or snapshooting directly out of a live index directory.
*
* @version $Id$
* @see org.apache.lucene.index.IndexDeletionPolicy
*/
public class SolrDeletionPolicy implements IndexDeletionPolicy, NamedListInitializedPlugin {
public static Logger log = LoggerFactory.getLogger(SolrCore.class);
private boolean keepOptimizedOnly = false;
private String maxCommitAge = null;
private int maxCommitsToKeep = 1;
public void init(NamedList args) {
String keepOptimizedOnlyString = (String) args.get("keepOptimizedOnly");
String maxCommitsToKeepString = (String) args.get("maxCommitsToKeep");
String maxCommitAgeString = (String) args.get("maxCommitAge");
if (keepOptimizedOnlyString != null && keepOptimizedOnlyString.trim().length() > 0)
keepOptimizedOnly = Boolean.parseBoolean(keepOptimizedOnlyString);
if (maxCommitsToKeepString != null && maxCommitsToKeepString.trim().length() > 0)
maxCommitsToKeep = Integer.parseInt(maxCommitsToKeepString);
if (maxCommitAgeString != null && maxCommitAgeString.trim().length() > 0)
maxCommitAge = "-" + maxCommitAgeString;
}
static String str(IndexCommit commit) {
StringBuilder sb = new StringBuilder();
try {
sb.append("commit{");
Directory dir = commit.getDirectory();
if (dir instanceof FSDirectory) {
FSDirectory fsd = (FSDirectory) dir;
sb.append("dir=").append(fsd.getFile());
} else {
sb.append("dir=").append(dir);
}
sb.append(",segFN=").append(commit.getSegmentsFileName());
sb.append(",version=").append(commit.getVersion());
sb.append(",generation=").append(commit.getGeneration());
sb.append(",filenames=").append(commit.getFileNames());
} catch (Exception e) {
sb.append(e);
}
return sb.toString();
}
static String str(List commits) {
StringBuilder sb = new StringBuilder();
sb.append("num=").append(commits.size());
IndexCommit comm = (IndexCommit) commits.get(0);
for (IndexCommit commit : (List<IndexCommit>) commits) {
sb.append("\n\t");
sb.append(str(commit));
}
return sb.toString();
}
/**
* Internal use for Lucene... do not explicitly call.
*/
public void onInit(List commits) throws IOException {
log.info("SolrDeletionPolicy.onInit: commits:" + str(commits));
updateCommits((List<IndexCommit>) commits);
}
/**
* Internal use for Lucene... do not explicitly call.
*/
public void onCommit(List commits) throws IOException {
log.info("SolrDeletionPolicy.onCommit: commits:" + str(commits));
updateCommits((List<IndexCommit>) commits);
}
private void updateCommits(List<IndexCommit> commits) {
// to be safe, we should only call delete on a commit point passed to us
// in this specific call (may be across diff IndexWriter instances).
// this will happen rarely, so just synchronize everything
// for safety and to avoid race conditions
DateMathParser dmp = new DateMathParser(DateField.UTC, Locale.US);
synchronized (this) {
IndexCommit last = commits.get(commits.size() - 1);
log.info("last commit = " + last.getVersion());
int numCommitsToDelete = commits.size() - maxCommitsToKeep;
int i = 0;
for (IndexCommit commit : commits) {
// don't delete the last commit point
if (commit == last) {
continue;
}
if (i < numCommitsToDelete) {
commit.delete();
i++;
continue;
}
try {
//TODO: replace LHS of if condition with commit.getTimestamp()
if (maxCommitAge != null)
if (commit.getDirectory().fileModified(commit.getSegmentsFileName()) < dmp.parseMath(maxCommitAge).getTime()) {
commit.delete();
continue;
}
} catch (Exception e) {
log.warn("Exception while checking commit point's age for deletion", e);
}
if (keepOptimizedOnly) {
if (!commit.isOptimized()) {
commit.delete();
log.info("Marking unoptimized index " + getId(commit) + " for deletion.");
}
}
}
} // end synchronized
}
private String getId(IndexCommit commit) {
StringBuilder sb = new StringBuilder();
Directory dir = commit.getDirectory();
// For anything persistent, make something that will
// be the same, regardless of the Directory instance.
if (dir instanceof FSDirectory) {
FSDirectory fsd = (FSDirectory) dir;
File fdir = fsd.getFile();
sb.append(fdir.getPath());
} else {
sb.append(dir);
}
sb.append('/');
sb.append(commit.getGeneration());
sb.append('_');
sb.append(commit.getVersion());
return sb.toString();
}
public boolean isKeepOptimizedOnly() {
return keepOptimizedOnly;
}
public String getMaxCommitAge() {
return maxCommitAge;
}
public int getMaxCommitsToKeep() {
return maxCommitsToKeep;
}
}

View File

@ -17,10 +17,7 @@
package org.apache.solr.update; package org.apache.solr.update;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.*;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.MergeScheduler;
import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.store.*; import org.apache.lucene.store.*;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchema;
@ -118,6 +115,12 @@ public class SolrIndexWriter extends IndexWriter {
init(name, schema, config); init(name, schema, config);
} }
public SolrIndexWriter(String name, String path, boolean create, IndexSchema schema, SolrIndexConfig config, IndexDeletionPolicy delPolicy) throws IOException {
super(getDirectory(path, config), schema.getAnalyzer(), create, delPolicy, new MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH));
init(name, schema, config);
}
/** /**
* use DocumentBuilder now... * use DocumentBuilder now...
* private final void addField(Document doc, String name, String val) { * private final void addField(Document doc, String name, String val) {

View File

@ -120,7 +120,7 @@ public abstract class UpdateHandler implements SolrInfoMBean {
} }
protected SolrIndexWriter createMainIndexWriter(String name, boolean removeAllExisting) throws IOException { protected SolrIndexWriter createMainIndexWriter(String name, boolean removeAllExisting) throws IOException {
return new SolrIndexWriter(name,core.getIndexDir(), removeAllExisting, schema, core.getSolrConfig().mainIndexConfig); return new SolrIndexWriter(name,core.getIndexDir(), removeAllExisting, schema, core.getSolrConfig().mainIndexConfig, core.getDeletionPolicy());
} }
protected final Term idTerm(String readableId) { protected final Term idTerm(String readableId) {

View File

@ -0,0 +1,41 @@
package org.apache.solr.core;
import org.apache.lucene.index.IndexDeletionPolicy;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.util.plugin.NamedListInitializedPlugin;
import java.io.IOException;
import java.util.List;
/**
* @version $Id$
*/
public class FakeDeletionPolicy implements IndexDeletionPolicy, NamedListInitializedPlugin {
private String var1;
private String var2;
//@Override
public void init(NamedList args) {
var1 = (String) args.get("var1");
var2 = (String) args.get("var2");
}
public String getVar1() {
return var1;
}
public String getVar2() {
return var2;
}
// @Override
public void onCommit(List arg0) throws IOException {
System.setProperty("onCommit", "test.org.apache.solr.core.FakeDeletionPolicy.onCommit");
}
// @Override
public void onInit(List arg0) throws IOException {
System.setProperty("onInit", "test.org.apache.solr.core.FakeDeletionPolicy.onInit");
}
}

View File

@ -0,0 +1,112 @@
package org.apache.solr.core;
import org.apache.lucene.index.IndexCommit;
import org.apache.solr.util.AbstractSolrTestCase;
import org.junit.Test;
import java.util.Map;
/**
* @version $Id$
*/
public class TestSolrDeletionPolicy1 extends AbstractSolrTestCase {
@Override
public String getSchemaFile() {
return "schema.xml";
}
@Override
public String getSolrConfigFile() {
return "solrconfig-delpolicy1.xml";
}
private void addDocs() {
assertU(adoc("id", String.valueOf(1),
"name", "name" + String.valueOf(1)));
assertU(commit());
assertQ("return all docs",
req("id:[0 TO 1]"),
"*[count(//doc)=1]"
);
assertU(adoc("id", String.valueOf(2),
"name", "name" + String.valueOf(2)));
assertU(commit());
assertQ("return all docs",
req("id:[0 TO 2]"),
"*[count(//doc)=2]"
);
assertU(adoc("id", String.valueOf(3),
"name", "name" + String.valueOf(3)));
assertU(optimize());
assertQ("return all docs",
req("id:[0 TO 3]"),
"*[count(//doc)=3]"
);
assertU(adoc("id", String.valueOf(4),
"name", "name" + String.valueOf(4)));
assertU(optimize());
assertQ("return all docs",
req("id:[0 TO 4]"),
"*[count(//doc)=4]"
);
assertU(adoc("id", String.valueOf(5),
"name", "name" + String.valueOf(5)));
assertU(optimize());
assertQ("return all docs",
req("id:[0 TO 5]"),
"*[count(//doc)=5]"
);
}
@Test
public void testKeepOptimizedOnlyCommits() {
IndexDeletionPolicyWrapper delPolicy = h.getCore().getDeletionPolicy();
addDocs();
Map<Long, IndexCommit> commits = delPolicy.getCommits();
IndexCommit latest = delPolicy.getLatestCommit();
for (Long version : commits.keySet()) {
if (commits.get(version) == latest)
continue;
assertTrue(commits.get(version).isOptimized());
}
}
@Test
public void testNumCommitsConfigured() {
IndexDeletionPolicyWrapper delPolicy = h.getCore().getDeletionPolicy();
addDocs();
Map<Long, IndexCommit> commits = delPolicy.getCommits();
assertTrue(commits.size() == ((SolrDeletionPolicy) (delPolicy.getWrappedDeletionPolicy())).getMaxCommitsToKeep());
}
@Test
public void testCommitAge() throws InterruptedException {
IndexDeletionPolicyWrapper delPolicy = h.getCore().getDeletionPolicy();
addDocs();
Map<Long, IndexCommit> commits = delPolicy.getCommits();
IndexCommit ic = delPolicy.getLatestCommit();
String agestr = ((SolrDeletionPolicy) (delPolicy.getWrappedDeletionPolicy())).getMaxCommitAge().replaceAll("[a-zA-Z]", "").replaceAll("-", "");
long age = Long.parseLong(agestr) * 1000;
Thread.sleep(age);
assertU(adoc("id", String.valueOf(6),
"name", "name" + String.valueOf(6)));
assertU(optimize());
assertQ("return all docs",
req("id:[0 TO 6]"),
"*[count(//doc)=6]"
);
commits = delPolicy.getCommits();
assertTrue(!commits.containsKey(ic.getVersion()));
}
}

View File

@ -0,0 +1,50 @@
package org.apache.solr.core;
import org.apache.solr.util.AbstractSolrTestCase;
import org.junit.Test;
/**
* @version $Id$
*/
public class TestSolrDeletionPolicy2 extends AbstractSolrTestCase {
@Override
public String getSchemaFile() {
return "schema.xml";
}
@Override
public String getSolrConfigFile() {
return "solrconfig-delpolicy2.xml";
}
@Test
public void testFakeDeletionPolicyClass() {
IndexDeletionPolicyWrapper delPolicy = h.getCore().getDeletionPolicy();
assertTrue(delPolicy.getWrappedDeletionPolicy() instanceof FakeDeletionPolicy);
FakeDeletionPolicy f = (FakeDeletionPolicy) delPolicy.getWrappedDeletionPolicy();
assertTrue("value1".equals(f.getVar1()));
assertTrue("value2".equals(f.getVar2()));
assertU(adoc("id", String.valueOf(1),
"name", "name" + String.valueOf(1)));
assertTrue(System.getProperty("onInit").equals("test.org.apache.solr.core.FakeDeletionPolicy.onInit"));
assertU(commit());
assertQ("return all docs",
req("id:[0 TO 1]"),
"*[count(//doc)=1]"
);
assertTrue(System.getProperty("onCommit").equals("test.org.apache.solr.core.FakeDeletionPolicy.onCommit"));
System.clearProperty("onInit");
System.clearProperty("onCommit");
}
}

View File

@ -0,0 +1,426 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- $Id$
$Source$
$Name$
-->
<config>
<jmx />
<!-- Used to specify an alternate directory to hold all index data.
It defaults to "index" if not present, and should probably
not be changed if replication is in use. -->
<dataDir>${solr.data.dir:./solr/data}</dataDir>
<indexDefaults>
<!-- Values here affect all index writers and act as a default
unless overridden. -->
<!-- Values here affect all index writers and act as a default unless overridden. -->
<useCompoundFile>false</useCompoundFile>
<mergeFactor>10</mergeFactor>
<!-- If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush based on whichever limit is hit first.
-->
<!--<maxBufferedDocs>1000</maxBufferedDocs>-->
<!-- Tell Lucene when to flush documents to disk.
Giving Lucene more memory for indexing means faster indexing at the cost of more RAM
If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush based on whichever limit is hit first.
-->
<ramBufferSizeMB>32</ramBufferSizeMB>
<maxMergeDocs>2147483647</maxMergeDocs>
<maxFieldLength>10000</maxFieldLength>
<writeLockTimeout>1000</writeLockTimeout>
<commitLockTimeout>10000</commitLockTimeout>
<!--
Expert: Turn on Lucene's auto commit capability.
NOTE: Despite the name, this value does not have any relation to Solr's autoCommit functionality
-->
<luceneAutoCommit>false</luceneAutoCommit>
<!--
Expert:
The Merge Policy in Lucene controls how merging is handled by Lucene. The default in 2.3 is the LogByteSizeMergePolicy, previous
versions used LogDocMergePolicy.
LogByteSizeMergePolicy chooses segments to merge based on their size. The Lucene 2.2 default, LogDocMergePolicy chose when
to merge based on number of documents
Other implementations of MergePolicy must have a no-argument constructor
-->
<mergePolicy>org.apache.lucene.index.LogByteSizeMergePolicy</mergePolicy>
<!--
Expert:
The Merge Scheduler in Lucene controls how merges are performed. The ConcurrentMergeScheduler (Lucene 2.3 default)
can perform merges in the background using separate threads. The SerialMergeScheduler (Lucene 2.2 default) does not.
-->
<mergeScheduler>org.apache.lucene.index.ConcurrentMergeScheduler</mergeScheduler>
<!-- these are global... can't currently override per index -->
<writeLockTimeout>1000</writeLockTimeout>
<commitLockTimeout>10000</commitLockTimeout>
<lockType>single</lockType>
</indexDefaults>
<mainIndex>
<!-- lucene options specific to the main on-disk lucene index -->
<useCompoundFile>false</useCompoundFile>
<mergeFactor>10</mergeFactor>
<ramBufferSizeMB>32</ramBufferSizeMB>
<maxMergeDocs>2147483647</maxMergeDocs>
<maxFieldLength>10000</maxFieldLength>
<unlockOnStartup>true</unlockOnStartup>
<deletionPolicy class="solr.SolrDeletionPolicy">
<str name="keepOptimizedOnly">true</str>
<str name="maxCommitsToKeep">3</str>
<str name="maxCommitAge">5SECONDS</str>
</deletionPolicy>
</mainIndex>
<updateHandler class="solr.DirectUpdateHandler2">
<!-- autocommit pending docs if certain criteria are met
<autoCommit>
<maxDocs>10000</maxDocs>
<maxTime>3600000</maxTime>
</autoCommit>
-->
<!-- represents a lower bound on the frequency that commits may
occur (in seconds). NOTE: not yet implemented
<commitIntervalLowerBound>0</commitIntervalLowerBound>
-->
<!-- The RunExecutableListener executes an external command.
exe - the name of the executable to run
dir - dir to use as the current working directory. default="."
wait - the calling thread waits until the executable returns. default="true"
args - the arguments to pass to the program. default=nothing
env - environment variables to set. default=nothing
-->
<!-- A postCommit event is fired after every commit
<listener event="postCommit" class="solr.RunExecutableListener">
<str name="exe">/var/opt/resin3/__PORT__/scripts/solr/snapshooter</str>
<str name="dir">/var/opt/resin3/__PORT__</str>
<bool name="wait">true</bool>
<arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
<arr name="env"> <str>MYVAR=val1</str> </arr>
</listener>
-->
</updateHandler>
<query>
<!-- Maximum number of clauses in a boolean query... can affect
range or wildcard queries that expand to big boolean
queries. An exception is thrown if exceeded.
-->
<maxBooleanClauses>1024</maxBooleanClauses>
<!-- Cache specification for Filters or DocSets - unordered set of *all* documents
that match a particular query.
-->
<filterCache
class="solr.search.LRUCache"
size="512"
initialSize="512"
autowarmCount="256"/>
<queryResultCache
class="solr.search.LRUCache"
size="512"
initialSize="512"
autowarmCount="1024"/>
<documentCache
class="solr.search.LRUCache"
size="512"
initialSize="512"
autowarmCount="0"/>
<!-- If true, stored fields that are not requested will be loaded lazily.
-->
<enableLazyFieldLoading>true</enableLazyFieldLoading>
<!--
<cache name="myUserCache"
class="solr.search.LRUCache"
size="4096"
initialSize="1024"
autowarmCount="1024"
regenerator="MyRegenerator"
/>
-->
<useFilterForSortedQuery>true</useFilterForSortedQuery>
<queryResultWindowSize>10</queryResultWindowSize>
<!-- set maxSize artificially low to exercise both types of sets -->
<HashDocSet maxSize="3" loadFactor="0.75"/>
<!-- boolToFilterOptimizer converts boolean clauses with zero boost
into cached filters if the number of docs selected by the clause exceeds
the threshold (represented as a fraction of the total index)
-->
<boolTofilterOptimizer enabled="false" cacheSize="32" threshold=".05"/>
<!-- a newSearcher event is fired whenever a new searcher is being prepared
and there is a current searcher handling requests (aka registered). -->
<!-- QuerySenderListener takes an array of NamedList and executes a
local query request for each NamedList in sequence. -->
<!--
<listener event="newSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst>
<lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst>
</arr>
</listener>
-->
<!-- a firstSearcher event is fired whenever a new searcher is being
prepared but there is no current registered searcher to handle
requests or to gain prewarming data from. -->
<!--
<listener event="firstSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<lst> <str name="q">fast_warm</str> <str name="start">0</str> <str name="rows">10</str> </lst>
</arr>
</listener>
-->
</query>
<!-- An alternate set representation that uses an integer hash to store filters (sets of docids).
If the set cardinality <= maxSize elements, then HashDocSet will be used instead of the bitset
based HashBitset. -->
<!-- requestHandler plugins... incoming queries will be dispatched to the
correct handler based on the qt (query type) param matching the
name of registered handlers.
The "standard" request handler is the default and will be used if qt
is not specified in the request.
-->
<requestHandler name="standard" class="solr.StandardRequestHandler">
<bool name="httpCaching">true</bool>
</requestHandler>
<requestHandler name="dismaxOldStyleDefaults"
class="solr.DisMaxRequestHandler" >
<!-- for historic reasons, DisMaxRequestHandler will use all of
it's init params as "defaults" if there is no "defaults" list
specified
-->
<float name="tie">0.01</float>
<str name="qf">
text^0.5 features_t^1.0 subject^1.4 title_stemmed^2.0
</str>
<str name="pf">
text^0.2 features_t^1.1 subject^1.4 title_stemmed^2.0 title^1.5
</str>
<str name="bf">
ord(weight)^0.5 recip(rord(iind),1,1000,1000)^0.3
</str>
<str name="mm">
3&lt;-1 5&lt;-2 6&lt;90%
</str>
<int name="ps">100</int>
</requestHandler>
<requestHandler name="dismax" class="solr.DisMaxRequestHandler" >
<lst name="defaults">
<str name="q.alt">*:*</str>
<float name="tie">0.01</float>
<str name="qf">
text^0.5 features_t^1.0 subject^1.4 title_stemmed^2.0
</str>
<str name="pf">
text^0.2 features_t^1.1 subject^1.4 title_stemmed^2.0 title^1.5
</str>
<str name="bf">
ord(weight)^0.5 recip(rord(iind),1,1000,1000)^0.3
</str>
<str name="mm">
3&lt;-1 5&lt;-2 6&lt;90%
</str>
<int name="ps">100</int>
</lst>
</requestHandler>
<requestHandler name="old" class="solr.tst.OldRequestHandler" >
<int name="myparam">1000</int>
<float name="ratio">1.4142135</float>
<arr name="myarr"><int>1</int><int>2</int></arr>
<str>foo</str>
</requestHandler>
<requestHandler name="oldagain" class="solr.tst.OldRequestHandler" >
<lst name="lst1"> <str name="op">sqrt</str> <int name="val">2</int> </lst>
<lst name="lst2"> <str name="op">log</str> <float name="val">10</float> </lst>
</requestHandler>
<requestHandler name="test" class="solr.tst.TestRequestHandler" />
<!-- test query parameter defaults -->
<requestHandler name="defaults" class="solr.StandardRequestHandler">
<lst name="defaults">
<int name="rows">4</int>
<bool name="hl">true</bool>
<str name="hl.fl">text,name,subject,title,whitetok</str>
</lst>
</requestHandler>
<!-- test query parameter defaults -->
<requestHandler name="lazy" class="solr.StandardRequestHandler" startup="lazy">
<lst name="defaults">
<int name="rows">4</int>
<bool name="hl">true</bool>
<str name="hl.fl">text,name,subject,title,whitetok</str>
</lst>
</requestHandler>
<requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />
<requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy">
<bool name="httpCaching">false</bool>
</requestHandler>
<!-- test elevation -->
<!--searchComponent name="elevate" class="org.apache.solr.handler.component.QueryElevationComponent" >
<str name="queryFieldType">string</str>
<str name="config-file">elevate.xml</str>
</searchComponent-->
<requestHandler name="/elevate" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<str name="echoParams">explicit</str>
</lst>
<!--arr name="last-components">
<str>elevate</str>
</arr-->
</requestHandler>
<searchComponent name="spellcheck" class="org.apache.solr.handler.component.SpellCheckComponent">
<str name="queryAnalyzerFieldType">lowerfilt</str>
<lst name="spellchecker">
<str name="name">default</str>
<str name="field">lowerfilt</str>
<str name="spellcheckIndexDir">spellchecker1</str>
<str name="buildOnCommit">true</str>
</lst>
<!-- Example of using different distance measure -->
<lst name="spellchecker">
<str name="name">jarowinkler</str>
<str name="field">lowerfilt</str>
<!-- Use a different Distance Measure -->
<str name="distanceMeasure">org.apache.lucene.search.spell.JaroWinklerDistance</str>
<str name="spellcheckIndexDir">spellchecker2</str>
</lst>
<lst name="spellchecker">
<str name="classname">solr.FileBasedSpellChecker</str>
<str name="name">external</str>
<str name="sourceLocation">spellings.txt</str>
<str name="characterEncoding">UTF-8</str>
<str name="spellcheckIndexDir">spellchecker3</str>
</lst>
</searchComponent>
<!--
The SpellingQueryConverter to convert raw (CommonParams.Q) queries into tokens. Uses a simple regular expression
to strip off field markup, boosts, ranges, etc. but it is not guaranteed to match an exact parse from the query parser.
-->
<queryConverter name="queryConverter" class="org.apache.solr.spelling.SpellingQueryConverter"/>
<requestHandler name="spellCheckCompRH" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<!-- omp = Only More Popular -->
<str name="spellcheck.onlyMorePopular">false</str>
<!-- exr = Extended Results -->
<str name="spellcheck.extendedResults">false</str>
<!-- The number of suggestions to return -->
<str name="spellcheck.count">1</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
<highlighting>
<!-- Configure the standard fragmenter -->
<fragmenter name="gap" class="org.apache.solr.highlight.GapFragmenter" default="true">
<lst name="defaults">
<int name="hl.fragsize">100</int>
</lst>
</fragmenter>
<fragmenter name="regex" class="org.apache.solr.highlight.RegexFragmenter">
<lst name="defaults">
<int name="hl.fragsize">70</int>
</lst>
</fragmenter>
<!-- Configure the standard formatter -->
<formatter name="html" class="org.apache.solr.highlight.HtmlFormatter" default="true">
<lst name="defaults">
<str name="hl.simple.pre"><![CDATA[<em>]]></str>
<str name="hl.simple.post"><![CDATA[</em>]]></str>
</lst>
</formatter>
</highlighting>
<!-- enable streaming for testing... -->
<requestDispatcher handleSelect="true" >
<requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048" />
<httpCaching lastModifiedFrom="openTime" etagSeed="Solr" never304="false">
<cacheControl>max-age=30, public</cacheControl>
</httpCaching>
</requestDispatcher>
<admin>
<defaultQuery>solr</defaultQuery>
<!--gettableFiles>solrconfig.xml scheam.xml admin-extra.html</gettableFiles-->
</admin>
<!-- test getting system property -->
<propTest attr1="${solr.test.sys.prop1}-$${literal}"
attr2="${non.existent.sys.prop:default-from-config}">prefix-${solr.test.sys.prop2}-suffix</propTest>
<queryParser name="foo" class="FooQParserPlugin"/>
</config>

View File

@ -0,0 +1,423 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- $Id$
$Source$
$Name$
-->
<config>
<jmx />
<!-- Used to specify an alternate directory to hold all index data.
It defaults to "index" if not present, and should probably
not be changed if replication is in use. -->
<dataDir>${solr.data.dir:./solr/data}</dataDir>
<indexDefaults>
<!-- Values here affect all index writers and act as a default
unless overridden. -->
<!-- Values here affect all index writers and act as a default unless overridden. -->
<useCompoundFile>false</useCompoundFile>
<mergeFactor>10</mergeFactor>
<!-- If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush based on whichever limit is hit first.
-->
<!--<maxBufferedDocs>1000</maxBufferedDocs>-->
<!-- Tell Lucene when to flush documents to disk.
Giving Lucene more memory for indexing means faster indexing at the cost of more RAM
If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush based on whichever limit is hit first.
-->
<ramBufferSizeMB>32</ramBufferSizeMB>
<maxMergeDocs>2147483647</maxMergeDocs>
<maxFieldLength>10000</maxFieldLength>
<writeLockTimeout>1000</writeLockTimeout>
<commitLockTimeout>10000</commitLockTimeout>
<!--
Expert: Turn on Lucene's auto commit capability.
NOTE: Despite the name, this value does not have any relation to Solr's autoCommit functionality
-->
<luceneAutoCommit>false</luceneAutoCommit>
<!--
Expert:
The Merge Policy in Lucene controls how merging is handled by Lucene. The default in 2.3 is the LogByteSizeMergePolicy, previous
versions used LogDocMergePolicy.
LogByteSizeMergePolicy chooses segments to merge based on their size. The Lucene 2.2 default, LogDocMergePolicy chose when
to merge based on number of documents
Other implementations of MergePolicy must have a no-argument constructor
-->
<mergePolicy>org.apache.lucene.index.LogByteSizeMergePolicy</mergePolicy>
<!--
Expert:
The Merge Scheduler in Lucene controls how merges are performed. The ConcurrentMergeScheduler (Lucene 2.3 default)
can perform merges in the background using separate threads. The SerialMergeScheduler (Lucene 2.2 default) does not.
-->
<mergeScheduler>org.apache.lucene.index.ConcurrentMergeScheduler</mergeScheduler>
<!-- these are global... can't currently override per index -->
<writeLockTimeout>1000</writeLockTimeout>
<commitLockTimeout>10000</commitLockTimeout>
<lockType>single</lockType>
</indexDefaults>
<mainIndex>
<!-- lucene options specific to the main on-disk lucene index -->
<useCompoundFile>false</useCompoundFile>
<mergeFactor>10</mergeFactor>
<ramBufferSizeMB>32</ramBufferSizeMB>
<maxMergeDocs>2147483647</maxMergeDocs>
<maxFieldLength>10000</maxFieldLength>
<unlockOnStartup>true</unlockOnStartup>
<deletionPolicy class="org.apache.solr.core.FakeDeletionPolicy">
<str name="var1">value1</str>
<str name="var2">value2</str>
</deletionPolicy>
</mainIndex>
<updateHandler class="solr.DirectUpdateHandler2">
<!-- autocommit pending docs if certain criteria are met
<autoCommit>
<maxDocs>10000</maxDocs>
<maxTime>3600000</maxTime>
</autoCommit>
-->
<!-- represents a lower bound on the frequency that commits may
occur (in seconds). NOTE: not yet implemented
<commitIntervalLowerBound>0</commitIntervalLowerBound>
-->
<!-- The RunExecutableListener executes an external command.
exe - the name of the executable to run
dir - dir to use as the current working directory. default="."
wait - the calling thread waits until the executable returns. default="true"
args - the arguments to pass to the program. default=nothing
env - environment variables to set. default=nothing
-->
<!-- A postCommit event is fired after every commit
<listener event="postCommit" class="solr.RunExecutableListener">
<str name="exe">/var/opt/resin3/__PORT__/scripts/solr/snapshooter</str>
<str name="dir">/var/opt/resin3/__PORT__</str>
<bool name="wait">true</bool>
<arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
<arr name="env"> <str>MYVAR=val1</str> </arr>
</listener>
-->
</updateHandler>
<query>
<!-- Maximum number of clauses in a boolean query... can affect
range or wildcard queries that expand to big boolean
queries. An exception is thrown if exceeded.
-->
<maxBooleanClauses>1024</maxBooleanClauses>
<!-- Cache specification for Filters or DocSets - unordered set of *all* documents
that match a particular query.
-->
<filterCache
class="solr.search.LRUCache"
size="512"
initialSize="512"
autowarmCount="256"/>
<queryResultCache
class="solr.search.LRUCache"
size="512"
initialSize="512"
autowarmCount="1024"/>
<documentCache
class="solr.search.LRUCache"
size="512"
initialSize="512"
autowarmCount="0"/>
<!-- If true, stored fields that are not requested will be loaded lazily.
-->
<enableLazyFieldLoading>true</enableLazyFieldLoading>
<!--
<cache name="myUserCache"
class="solr.search.LRUCache"
size="4096"
initialSize="1024"
autowarmCount="1024"
regenerator="MyRegenerator"
/>
-->
<useFilterForSortedQuery>true</useFilterForSortedQuery>
<queryResultWindowSize>10</queryResultWindowSize>
<!-- set maxSize artificially low to exercise both types of sets -->
<HashDocSet maxSize="3" loadFactor="0.75"/>
<!-- boolToFilterOptimizer converts boolean clauses with zero boost
into cached filters if the number of docs selected by the clause exceeds
the threshold (represented as a fraction of the total index)
-->
<boolTofilterOptimizer enabled="false" cacheSize="32" threshold=".05"/>
<!-- a newSearcher event is fired whenever a new searcher is being prepared
and there is a current searcher handling requests (aka registered). -->
<!-- QuerySenderListener takes an array of NamedList and executes a
local query request for each NamedList in sequence. -->
<!--
<listener event="newSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst>
<lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst>
</arr>
</listener>
-->
<!-- a firstSearcher event is fired whenever a new searcher is being
prepared but there is no current registered searcher to handle
requests or to gain prewarming data from. -->
<!--
<listener event="firstSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<lst> <str name="q">fast_warm</str> <str name="start">0</str> <str name="rows">10</str> </lst>
</arr>
</listener>
-->
</query>
<!-- An alternate set representation that uses an integer hash to store filters (sets of docids).
If the set cardinality <= maxSize elements, then HashDocSet will be used instead of the bitset
based HashBitset. -->
<!-- requestHandler plugins... incoming queries will be dispatched to the
correct handler based on the qt (query type) param matching the
name of registered handlers.
The "standard" request handler is the default and will be used if qt
is not specified in the request.
-->
<requestHandler name="standard" class="solr.StandardRequestHandler">
<bool name="httpCaching">true</bool>
</requestHandler>
<requestHandler name="dismaxOldStyleDefaults"
class="solr.DisMaxRequestHandler" >
<!-- for historic reasons, DisMaxRequestHandler will use all of
it's init params as "defaults" if there is no "defaults" list
specified
-->
<float name="tie">0.01</float>
<str name="qf">
text^0.5 features_t^1.0 subject^1.4 title_stemmed^2.0
</str>
<str name="pf">
text^0.2 features_t^1.1 subject^1.4 title_stemmed^2.0 title^1.5
</str>
<str name="bf">
ord(weight)^0.5 recip(rord(iind),1,1000,1000)^0.3
</str>
<str name="mm">
3&lt;-1 5&lt;-2 6&lt;90%
</str>
<int name="ps">100</int>
</requestHandler>
<requestHandler name="dismax" class="solr.DisMaxRequestHandler" >
<lst name="defaults">
<str name="q.alt">*:*</str>
<float name="tie">0.01</float>
<str name="qf">
text^0.5 features_t^1.0 subject^1.4 title_stemmed^2.0
</str>
<str name="pf">
text^0.2 features_t^1.1 subject^1.4 title_stemmed^2.0 title^1.5
</str>
<str name="bf">
ord(weight)^0.5 recip(rord(iind),1,1000,1000)^0.3
</str>
<str name="mm">
3&lt;-1 5&lt;-2 6&lt;90%
</str>
<int name="ps">100</int>
</lst>
</requestHandler>
<requestHandler name="old" class="solr.tst.OldRequestHandler" >
<int name="myparam">1000</int>
<float name="ratio">1.4142135</float>
<arr name="myarr"><int>1</int><int>2</int></arr>
<str>foo</str>
</requestHandler>
<requestHandler name="oldagain" class="solr.tst.OldRequestHandler" >
<lst name="lst1"> <str name="op">sqrt</str> <int name="val">2</int> </lst>
<lst name="lst2"> <str name="op">log</str> <float name="val">10</float> </lst>
</requestHandler>
<requestHandler name="test" class="solr.tst.TestRequestHandler" />
<!-- test query parameter defaults -->
<requestHandler name="defaults" class="solr.StandardRequestHandler">
<lst name="defaults">
<int name="rows">4</int>
<bool name="hl">true</bool>
<str name="hl.fl">text,name,subject,title,whitetok</str>
</lst>
</requestHandler>
<!-- test query parameter defaults -->
<requestHandler name="lazy" class="solr.StandardRequestHandler" startup="lazy">
<lst name="defaults">
<int name="rows">4</int>
<bool name="hl">true</bool>
<str name="hl.fl">text,name,subject,title,whitetok</str>
</lst>
</requestHandler>
<requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />
<requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy">
<bool name="httpCaching">false</bool>
</requestHandler>
<!-- test elevation -->
<!--searchComponent name="elevate" class="org.apache.solr.handler.component.QueryElevationComponent" >
<str name="queryFieldType">string</str>
<str name="config-file">elevate.xml</str>
</searchComponent-->
<requestHandler name="/elevate" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<str name="echoParams">explicit</str>
</lst>
<!--arr name="last-components">
<str>elevate</str>
</arr-->
</requestHandler>
<searchComponent name="spellcheck" class="org.apache.solr.handler.component.SpellCheckComponent">
<str name="queryAnalyzerFieldType">lowerfilt</str>
<lst name="spellchecker">
<str name="name">default</str>
<str name="field">lowerfilt</str>
<str name="spellcheckIndexDir">spellchecker1</str>
<str name="buildOnCommit">true</str>
</lst>
<!-- Example of using different distance measure -->
<lst name="spellchecker">
<str name="name">jarowinkler</str>
<str name="field">lowerfilt</str>
<!-- Use a different Distance Measure -->
<str name="distanceMeasure">org.apache.lucene.search.spell.JaroWinklerDistance</str>
<str name="spellcheckIndexDir">spellchecker2</str>
</lst>
<lst name="spellchecker">
<str name="classname">solr.FileBasedSpellChecker</str>
<str name="name">external</str>
<str name="sourceLocation">spellings.txt</str>
<str name="characterEncoding">UTF-8</str>
<str name="spellcheckIndexDir">spellchecker3</str>
</lst>
</searchComponent>
<!--
The SpellingQueryConverter to convert raw (CommonParams.Q) queries into tokens. Uses a simple regular expression
to strip off field markup, boosts, ranges, etc. but it is not guaranteed to match an exact parse from the query parser.
-->
<queryConverter name="queryConverter" class="org.apache.solr.spelling.SpellingQueryConverter"/>
<requestHandler name="spellCheckCompRH" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<!-- omp = Only More Popular -->
<str name="spellcheck.onlyMorePopular">false</str>
<!-- exr = Extended Results -->
<str name="spellcheck.extendedResults">false</str>
<!-- The number of suggestions to return -->
<str name="spellcheck.count">1</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
<highlighting>
<!-- Configure the standard fragmenter -->
<fragmenter name="gap" class="org.apache.solr.highlight.GapFragmenter" default="true">
<lst name="defaults">
<int name="hl.fragsize">100</int>
</lst>
</fragmenter>
<fragmenter name="regex" class="org.apache.solr.highlight.RegexFragmenter">
<lst name="defaults">
<int name="hl.fragsize">70</int>
</lst>
</fragmenter>
<!-- Configure the standard formatter -->
<formatter name="html" class="org.apache.solr.highlight.HtmlFormatter" default="true">
<lst name="defaults">
<str name="hl.simple.pre"><![CDATA[<em>]]></str>
<str name="hl.simple.post"><![CDATA[</em>]]></str>
</lst>
</formatter>
</highlighting>
<!-- enable streaming for testing... -->
<requestDispatcher handleSelect="true" >
<requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048" />
<httpCaching lastModifiedFrom="openTime" etagSeed="Solr" never304="false">
<cacheControl>max-age=30, public</cacheControl>
</httpCaching>
</requestDispatcher>
<admin>
<defaultQuery>solr</defaultQuery>
<!--gettableFiles>solrconfig.xml scheam.xml admin-extra.html</gettableFiles-->
</admin>
<!-- test getting system property -->
<propTest attr1="${solr.test.sys.prop1}-$${literal}"
attr2="${non.existent.sys.prop:default-from-config}">prefix-${solr.test.sys.prop2}-suffix</propTest>
<queryParser name="foo" class="FooQParserPlugin"/>
</config>