SOLR-6820: Make the number of version buckets used by the UpdateLog configurable as increasing beyond the default 256 has been shown to help with high volume indexing performance in SolrCloud

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1680586 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Timothy Potter 2015-05-20 14:51:44 +00:00
parent 2183e67cfd
commit 645f15bae0
5 changed files with 44 additions and 6 deletions

View File

@ -406,6 +406,11 @@ Other Changes
ZkCmdExecutor#ensureExists as they were doing the same thing. Also ZkCmdExecutor#ensureExists now respects the
CreateMode passed to it. (Varun Thacker)
* SOLR-6820: Make the number of version buckets used by the UpdateLog configurable as
increasing beyond the default 256 has been shown to help with high volume indexing
performance in SolrCloud; helps overcome a limitation where Lucene uses the request
thread to perform expensive index housekeeping work. (Mark Miller, yonik, Timothy Potter)
================== 5.1.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release

View File

@ -144,6 +144,7 @@ public class UpdateLog implements PluginInfoInitialized {
protected final int numDeletesByQueryToKeep = 100;
protected int numRecordsToKeep;
protected int maxNumLogsToKeep;
protected int numVersionBuckets; // This should only be used to initialize VersionInfo... the actual number of buckets may be rounded up to a power of two.
// keep track of deletes only... this is not updated on an add
protected LinkedHashMap<BytesRef, LogPtr> oldDeletes = new LinkedHashMap<BytesRef, LogPtr>(numDeletesToKeep) {
@ -224,6 +225,10 @@ public class UpdateLog implements PluginInfoInitialized {
return maxNumLogsToKeep;
}
public int getNumVersionBuckets() {
return numVersionBuckets;
}
protected static int objToInt(Object obj, int def) {
if (obj != null) {
return Integer.parseInt(obj.toString());
@ -238,9 +243,13 @@ public class UpdateLog implements PluginInfoInitialized {
numRecordsToKeep = objToInt(info.initArgs.get("numRecordsToKeep"), 100);
maxNumLogsToKeep = objToInt(info.initArgs.get("maxNumLogsToKeep"), 10);
numVersionBuckets = objToInt(info.initArgs.get("numVersionBuckets"), 256);
if (numVersionBuckets <= 0)
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Number of version buckets must be greater than 0!");
log.info("Initializing UpdateLog: dataDir={} defaultSyncLevel={} numRecordsToKeep={} maxNumLogsToKeep={}",
dataDir, defaultSyncLevel, numRecordsToKeep, maxNumLogsToKeep);
log.info("Initializing UpdateLog: dataDir={} defaultSyncLevel={} numRecordsToKeep={} maxNumLogsToKeep={} numVersionBuckets={}",
dataDir, defaultSyncLevel, numRecordsToKeep, maxNumLogsToKeep, numVersionBuckets);
}
/* Note, when this is called, uhandler is not completely constructed.
@ -292,7 +301,7 @@ public class UpdateLog implements PluginInfoInitialized {
}
try {
versionInfo = new VersionInfo(this, 256);
versionInfo = new VersionInfo(this, numVersionBuckets);
} catch (SolrException e) {
log.error("Unable to use updateLog: " + e.getMessage(), e);
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,

View File

@ -147,9 +147,17 @@
uncommitted changes to the index, so use of a hard autoCommit
is recommended (see below).
"dir" - the target directory for transaction logs, defaults to the
solr data directory. -->
solr data directory.
"numVersionBuckets" - sets the number of buckets used to keep
track of max version values when checking for re-ordered
updates; increase this value to reduce the cost of
synchronizing access to version buckets during high-volume
indexing, this requires 8 bytes (long) * numVersionBuckets
of heap space per Solr core.
-->
<updateLog>
<str name="dir">${solr.ulog.dir:}</str>
<int name="">${solr.ulog.numVersionBuckets:256}</int>
</updateLog>
<!-- AutoCommit

View File

@ -324,9 +324,17 @@
uncommitted changes to the index, so use of a hard autoCommit
is recommended (see below).
"dir" - the target directory for transaction logs, defaults to the
solr data directory. -->
solr data directory.
"numVersionBuckets" - sets the number of buckets used to keep
track of max version values when checking for re-ordered
updates; increase this value to reduce the cost of
synchronizing access to version buckets during high-volume
indexing, this requires 8 bytes (long) * numVersionBuckets
of heap space per Solr core.
-->
<updateLog>
<str name="dir">${solr.ulog.dir:}</str>
<int name="">${solr.ulog.numVersionBuckets:256}</int>
</updateLog>
<!-- AutoCommit

View File

@ -327,9 +327,17 @@
uncommitted changes to the index, so use of a hard autoCommit
is recommended (see below).
"dir" - the target directory for transaction logs, defaults to the
solr data directory. -->
solr data directory.
"numVersionBuckets" - sets the number of buckets used to keep
track of max version values when checking for re-ordered
updates; increase this value to reduce the cost of
synchronizing access to version buckets during high-volume
indexing, this requires 8 bytes (long) * numVersionBuckets
of heap space per Solr core.
-->
<updateLog>
<str name="dir">${solr.ulog.dir:}</str>
<int name="">${solr.ulog.numVersionBuckets:256}</int>
</updateLog>
<!-- AutoCommit