SOLR-4798: use correct router during index splitting

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1480383 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2013-05-08 18:10:56 +00:00
parent e19e272d62
commit 72065147d7
7 changed files with 43 additions and 18 deletions

View File

@ -99,6 +99,13 @@ Bug Fixes
* SOLR-4795: Sub shard leader should not accept any updates from parent after
it goes active (shalin)
* SOLR-4798: shard splitting does not respect the router for the collection
when executing the index split. One effect of this is that documents
may be placed in the wrong shard when the default compositeId router
is used in conjunction with IDs containing "!". (yonik)
Other Changes
----------------------

View File

@ -252,6 +252,7 @@ public class CoreAdminHandler extends RequestHandlerBase {
List<String> paths = null;
int partitions = pathsArr != null ? pathsArr.length : newCoreNames.length;
DocRouter router = null;
if (coreContainer.isZooKeeperAware()) {
ClusterState clusterState = coreContainer.getZkController().getClusterState();
String collectionName = req.getCore().getCoreDescriptor().getCloudDescriptor().getCollectionName();
@ -259,8 +260,8 @@ public class CoreAdminHandler extends RequestHandlerBase {
String sliceName = req.getCore().getCoreDescriptor().getCloudDescriptor().getShardId();
Slice slice = clusterState.getSlice(collectionName, sliceName);
DocRouter.Range currentRange = slice.getRange();
DocRouter hp = collection.getRouter() != null ? collection.getRouter() : DocRouter.DEFAULT;
ranges = currentRange != null ? hp.partitionRange(partitions, currentRange) : null;
router = collection.getRouter() != null ? collection.getRouter() : DocRouter.DEFAULT;
ranges = currentRange != null ? router.partitionRange(partitions, currentRange) : null;
}
if (pathsArr == null) {
@ -278,7 +279,7 @@ public class CoreAdminHandler extends RequestHandlerBase {
}
SplitIndexCommand cmd = new SplitIndexCommand(req, paths, newCores, ranges);
SplitIndexCommand cmd = new SplitIndexCommand(req, paths, newCores, ranges, router);
core.getUpdateHandler().split(cmd);
// After the split has completed, someone (here?) should start the process of replaying the buffered updates.

View File

@ -32,9 +32,11 @@ import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.OpenBitSet;
import org.apache.solr.common.cloud.DocRouter;
import org.apache.solr.common.cloud.HashBasedRouter;
import org.apache.solr.common.util.Hash;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.StrField;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted;
import org.slf4j.Logger;
@ -53,6 +55,8 @@ public class SolrIndexSplitter {
DocRouter.Range[] rangesArr; // same as ranges list, but an array for extra speed in inner loops
List<String> paths;
List<SolrCore> cores;
DocRouter router;
HashBasedRouter hashRouter;
int numPieces;
int currPartition = 0;
@ -62,6 +66,9 @@ public class SolrIndexSplitter {
ranges = cmd.ranges;
paths = cmd.paths;
cores = cmd.cores;
router = cmd.router;
hashRouter = router instanceof HashBasedRouter ? (HashBasedRouter)router : null;
if (ranges == null) {
numPieces = paths != null ? paths.size() : cores.size();
} else {
@ -151,16 +158,24 @@ public class SolrIndexSplitter {
BytesRef term = null;
DocsEnum docsEnum = null;
CharsRef idRef = new CharsRef(100);
for (;;) {
term = termsEnum.next();
if (term == null) break;
// figure out the hash for the term
// TODO: hook in custom hashes (or store hashes)
// TODO: performance implications of using indexedToReadable?
CharsRef ref = new CharsRef(term.length);
ref = field.getType().indexedToReadable(term, ref);
int hash = Hash.murmurhash3_x86_32(ref, ref.offset, ref.length, 0);
// FUTURE: if conversion to strings costs too much, we could
// specialize and use the hash function that can work over bytes.
idRef = field.getType().indexedToReadable(term, idRef);
String idString = idRef.toString();
int hash = 0;
if (hashRouter != null) {
hash = hashRouter.sliceHash(idString, null, null);
}
// int hash = Hash.murmurhash3_x86_32(ref, ref.offset, ref.length, 0);
docsEnum = termsEnum.docs(liveDocs, docsEnum, DocsEnum.FLAG_NONE);
for (;;) {
int doc = docsEnum.nextDoc();

View File

@ -34,13 +34,14 @@ public class SplitIndexCommand extends UpdateCommand {
public List<String> paths;
public List<SolrCore> cores; // either paths or cores should be specified
public List<DocRouter.Range> ranges;
// TODO: allow specification of custom hash function
public DocRouter router;
public SplitIndexCommand(SolrQueryRequest req, List<String> paths, List<SolrCore> cores, List<DocRouter.Range> ranges) {
public SplitIndexCommand(SolrQueryRequest req, List<String> paths, List<SolrCore> cores, List<DocRouter.Range> ranges, DocRouter router) {
super(req);
this.paths = paths;
this.cores = cores;
this.ranges = ranges;
this.router = router;
}
@Override
@ -54,6 +55,7 @@ public class SplitIndexCommand extends UpdateCommand {
sb.append(",paths=" + paths);
sb.append(",cores=" + cores);
sb.append(",ranges=" + ranges);
sb.append(",router=" + router);
sb.append('}');
return sb.toString();
}

View File

@ -95,7 +95,7 @@ public class SolrIndexSplitterTest extends SolrTestCaseJ4 {
request = lrf.makeRequest("q", "dummy");
SplitIndexCommand command = new SplitIndexCommand(request,
Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath()), null, ranges);
Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath()), null, ranges, new PlainIdRouter());
new SolrIndexSplitter(command).split();
Directory directory = h.getCore().getDirectoryFactory().get(indexDir1.getAbsolutePath(),
@ -148,7 +148,7 @@ public class SolrIndexSplitterTest extends SolrTestCaseJ4 {
try {
request = lrf.makeRequest("q", "dummy");
SplitIndexCommand command = new SplitIndexCommand(request, null, Lists.newArrayList(core1, core2), ranges);
SplitIndexCommand command = new SplitIndexCommand(request, null, Lists.newArrayList(core1, core2), ranges, new PlainIdRouter());
new SolrIndexSplitter(command).split();
} finally {
if (request != null) request.close();
@ -185,7 +185,7 @@ public class SolrIndexSplitterTest extends SolrTestCaseJ4 {
request = lrf.makeRequest("q", "dummy");
SplitIndexCommand command = new SplitIndexCommand(request,
Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath(), indexDir3.getAbsolutePath()), null, null);
Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath(), indexDir3.getAbsolutePath()), null, null, new PlainIdRouter());
new SolrIndexSplitter(command).split();
directory = h.getCore().getDirectoryFactory().get(indexDir1.getAbsolutePath(),

View File

@ -36,7 +36,7 @@ public class CompositeIdRouter extends HashBasedRouter {
private int separator = '!';
// separator used to optionally specify number of bits to allocate toward first part.
private int bitsSepartor = '/';
private int bitsSeparator = '/';
private int bits = 16;
private int mask1 = 0xffff0000;
private int mask2 = 0x0000ffff;
@ -59,7 +59,7 @@ public class CompositeIdRouter extends HashBasedRouter {
}
@Override
protected int sliceHash(String id, SolrInputDocument doc, SolrParams params) {
public int sliceHash(String id, SolrInputDocument doc, SolrParams params) {
int idx = id.indexOf(separator);
if (idx < 0) {
return Hash.murmurhash3_x86_32(id, 0, id.length(), 0);
@ -69,7 +69,7 @@ public class CompositeIdRouter extends HashBasedRouter {
int m2 = mask2;
String part1 = id.substring(0,idx);
int commaIdx = part1.indexOf(bitsSepartor);
int commaIdx = part1.indexOf(bitsSeparator);
if (commaIdx > 0) {
int firstBits = getBits(part1, commaIdx);
if (firstBits >= 0) {
@ -105,7 +105,7 @@ public class CompositeIdRouter extends HashBasedRouter {
int m2 = mask2;
String part1 = id.substring(0,idx);
int bitsSepIdx = part1.indexOf(bitsSepartor);
int bitsSepIdx = part1.indexOf(bitsSeparator);
if (bitsSepIdx > 0) {
int firstBits = getBits(part1, bitsSepIdx);
if (firstBits >= 0) {

View File

@ -42,7 +42,7 @@ public abstract class HashBasedRouter extends DocRouter {
return range != null && range.includes(hash);
}
protected int sliceHash(String id, SolrInputDocument sdoc, SolrParams params) {
public int sliceHash(String id, SolrInputDocument sdoc, SolrParams params) {
return Hash.murmurhash3_x86_32(id, 0, id.length(), 0);
}