mirror of https://github.com/apache/lucene.git
SOLR-4798: use correct router during index splitting
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1480383 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e19e272d62
commit
72065147d7
|
@ -99,6 +99,13 @@ Bug Fixes
|
|||
* SOLR-4795: Sub shard leader should not accept any updates from parent after
|
||||
it goes active (shalin)
|
||||
|
||||
* SOLR-4798: shard splitting does not respect the router for the collection
|
||||
when executing the index split. One effect of this is that documents
|
||||
may be placed in the wrong shard when the default compositeId router
|
||||
is used in conjunction with IDs containing "!". (yonik)
|
||||
|
||||
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -252,6 +252,7 @@ public class CoreAdminHandler extends RequestHandlerBase {
|
|||
List<String> paths = null;
|
||||
int partitions = pathsArr != null ? pathsArr.length : newCoreNames.length;
|
||||
|
||||
DocRouter router = null;
|
||||
if (coreContainer.isZooKeeperAware()) {
|
||||
ClusterState clusterState = coreContainer.getZkController().getClusterState();
|
||||
String collectionName = req.getCore().getCoreDescriptor().getCloudDescriptor().getCollectionName();
|
||||
|
@ -259,8 +260,8 @@ public class CoreAdminHandler extends RequestHandlerBase {
|
|||
String sliceName = req.getCore().getCoreDescriptor().getCloudDescriptor().getShardId();
|
||||
Slice slice = clusterState.getSlice(collectionName, sliceName);
|
||||
DocRouter.Range currentRange = slice.getRange();
|
||||
DocRouter hp = collection.getRouter() != null ? collection.getRouter() : DocRouter.DEFAULT;
|
||||
ranges = currentRange != null ? hp.partitionRange(partitions, currentRange) : null;
|
||||
router = collection.getRouter() != null ? collection.getRouter() : DocRouter.DEFAULT;
|
||||
ranges = currentRange != null ? router.partitionRange(partitions, currentRange) : null;
|
||||
}
|
||||
|
||||
if (pathsArr == null) {
|
||||
|
@ -278,7 +279,7 @@ public class CoreAdminHandler extends RequestHandlerBase {
|
|||
}
|
||||
|
||||
|
||||
SplitIndexCommand cmd = new SplitIndexCommand(req, paths, newCores, ranges);
|
||||
SplitIndexCommand cmd = new SplitIndexCommand(req, paths, newCores, ranges, router);
|
||||
core.getUpdateHandler().split(cmd);
|
||||
|
||||
// After the split has completed, someone (here?) should start the process of replaying the buffered updates.
|
||||
|
|
|
@ -32,9 +32,11 @@ import org.apache.lucene.util.CharsRef;
|
|||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.solr.common.cloud.DocRouter;
|
||||
import org.apache.solr.common.cloud.HashBasedRouter;
|
||||
import org.apache.solr.common.util.Hash;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.schema.StrField;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.util.RefCounted;
|
||||
import org.slf4j.Logger;
|
||||
|
@ -53,6 +55,8 @@ public class SolrIndexSplitter {
|
|||
DocRouter.Range[] rangesArr; // same as ranges list, but an array for extra speed in inner loops
|
||||
List<String> paths;
|
||||
List<SolrCore> cores;
|
||||
DocRouter router;
|
||||
HashBasedRouter hashRouter;
|
||||
int numPieces;
|
||||
int currPartition = 0;
|
||||
|
||||
|
@ -62,6 +66,9 @@ public class SolrIndexSplitter {
|
|||
ranges = cmd.ranges;
|
||||
paths = cmd.paths;
|
||||
cores = cmd.cores;
|
||||
router = cmd.router;
|
||||
hashRouter = router instanceof HashBasedRouter ? (HashBasedRouter)router : null;
|
||||
|
||||
if (ranges == null) {
|
||||
numPieces = paths != null ? paths.size() : cores.size();
|
||||
} else {
|
||||
|
@ -151,16 +158,24 @@ public class SolrIndexSplitter {
|
|||
BytesRef term = null;
|
||||
DocsEnum docsEnum = null;
|
||||
|
||||
CharsRef idRef = new CharsRef(100);
|
||||
for (;;) {
|
||||
term = termsEnum.next();
|
||||
if (term == null) break;
|
||||
|
||||
// figure out the hash for the term
|
||||
// TODO: hook in custom hashes (or store hashes)
|
||||
// TODO: performance implications of using indexedToReadable?
|
||||
CharsRef ref = new CharsRef(term.length);
|
||||
ref = field.getType().indexedToReadable(term, ref);
|
||||
int hash = Hash.murmurhash3_x86_32(ref, ref.offset, ref.length, 0);
|
||||
|
||||
// FUTURE: if conversion to strings costs too much, we could
|
||||
// specialize and use the hash function that can work over bytes.
|
||||
idRef = field.getType().indexedToReadable(term, idRef);
|
||||
String idString = idRef.toString();
|
||||
|
||||
int hash = 0;
|
||||
if (hashRouter != null) {
|
||||
hash = hashRouter.sliceHash(idString, null, null);
|
||||
}
|
||||
// int hash = Hash.murmurhash3_x86_32(ref, ref.offset, ref.length, 0);
|
||||
|
||||
docsEnum = termsEnum.docs(liveDocs, docsEnum, DocsEnum.FLAG_NONE);
|
||||
for (;;) {
|
||||
int doc = docsEnum.nextDoc();
|
||||
|
|
|
@ -34,13 +34,14 @@ public class SplitIndexCommand extends UpdateCommand {
|
|||
public List<String> paths;
|
||||
public List<SolrCore> cores; // either paths or cores should be specified
|
||||
public List<DocRouter.Range> ranges;
|
||||
// TODO: allow specification of custom hash function
|
||||
public DocRouter router;
|
||||
|
||||
public SplitIndexCommand(SolrQueryRequest req, List<String> paths, List<SolrCore> cores, List<DocRouter.Range> ranges) {
|
||||
public SplitIndexCommand(SolrQueryRequest req, List<String> paths, List<SolrCore> cores, List<DocRouter.Range> ranges, DocRouter router) {
|
||||
super(req);
|
||||
this.paths = paths;
|
||||
this.cores = cores;
|
||||
this.ranges = ranges;
|
||||
this.router = router;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -54,6 +55,7 @@ public class SplitIndexCommand extends UpdateCommand {
|
|||
sb.append(",paths=" + paths);
|
||||
sb.append(",cores=" + cores);
|
||||
sb.append(",ranges=" + ranges);
|
||||
sb.append(",router=" + router);
|
||||
sb.append('}');
|
||||
return sb.toString();
|
||||
}
|
||||
|
|
|
@ -95,7 +95,7 @@ public class SolrIndexSplitterTest extends SolrTestCaseJ4 {
|
|||
request = lrf.makeRequest("q", "dummy");
|
||||
|
||||
SplitIndexCommand command = new SplitIndexCommand(request,
|
||||
Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath()), null, ranges);
|
||||
Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath()), null, ranges, new PlainIdRouter());
|
||||
new SolrIndexSplitter(command).split();
|
||||
|
||||
Directory directory = h.getCore().getDirectoryFactory().get(indexDir1.getAbsolutePath(),
|
||||
|
@ -148,7 +148,7 @@ public class SolrIndexSplitterTest extends SolrTestCaseJ4 {
|
|||
try {
|
||||
request = lrf.makeRequest("q", "dummy");
|
||||
|
||||
SplitIndexCommand command = new SplitIndexCommand(request, null, Lists.newArrayList(core1, core2), ranges);
|
||||
SplitIndexCommand command = new SplitIndexCommand(request, null, Lists.newArrayList(core1, core2), ranges, new PlainIdRouter());
|
||||
new SolrIndexSplitter(command).split();
|
||||
} finally {
|
||||
if (request != null) request.close();
|
||||
|
@ -185,7 +185,7 @@ public class SolrIndexSplitterTest extends SolrTestCaseJ4 {
|
|||
request = lrf.makeRequest("q", "dummy");
|
||||
|
||||
SplitIndexCommand command = new SplitIndexCommand(request,
|
||||
Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath(), indexDir3.getAbsolutePath()), null, null);
|
||||
Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath(), indexDir3.getAbsolutePath()), null, null, new PlainIdRouter());
|
||||
new SolrIndexSplitter(command).split();
|
||||
|
||||
directory = h.getCore().getDirectoryFactory().get(indexDir1.getAbsolutePath(),
|
||||
|
|
|
@ -36,7 +36,7 @@ public class CompositeIdRouter extends HashBasedRouter {
|
|||
private int separator = '!';
|
||||
|
||||
// separator used to optionally specify number of bits to allocate toward first part.
|
||||
private int bitsSepartor = '/';
|
||||
private int bitsSeparator = '/';
|
||||
private int bits = 16;
|
||||
private int mask1 = 0xffff0000;
|
||||
private int mask2 = 0x0000ffff;
|
||||
|
@ -59,7 +59,7 @@ public class CompositeIdRouter extends HashBasedRouter {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected int sliceHash(String id, SolrInputDocument doc, SolrParams params) {
|
||||
public int sliceHash(String id, SolrInputDocument doc, SolrParams params) {
|
||||
int idx = id.indexOf(separator);
|
||||
if (idx < 0) {
|
||||
return Hash.murmurhash3_x86_32(id, 0, id.length(), 0);
|
||||
|
@ -69,7 +69,7 @@ public class CompositeIdRouter extends HashBasedRouter {
|
|||
int m2 = mask2;
|
||||
|
||||
String part1 = id.substring(0,idx);
|
||||
int commaIdx = part1.indexOf(bitsSepartor);
|
||||
int commaIdx = part1.indexOf(bitsSeparator);
|
||||
if (commaIdx > 0) {
|
||||
int firstBits = getBits(part1, commaIdx);
|
||||
if (firstBits >= 0) {
|
||||
|
@ -105,7 +105,7 @@ public class CompositeIdRouter extends HashBasedRouter {
|
|||
int m2 = mask2;
|
||||
|
||||
String part1 = id.substring(0,idx);
|
||||
int bitsSepIdx = part1.indexOf(bitsSepartor);
|
||||
int bitsSepIdx = part1.indexOf(bitsSeparator);
|
||||
if (bitsSepIdx > 0) {
|
||||
int firstBits = getBits(part1, bitsSepIdx);
|
||||
if (firstBits >= 0) {
|
||||
|
|
|
@ -42,7 +42,7 @@ public abstract class HashBasedRouter extends DocRouter {
|
|||
return range != null && range.includes(hash);
|
||||
}
|
||||
|
||||
protected int sliceHash(String id, SolrInputDocument sdoc, SolrParams params) {
|
||||
public int sliceHash(String id, SolrInputDocument sdoc, SolrParams params) {
|
||||
return Hash.murmurhash3_x86_32(id, 0, id.length(), 0);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue