mirror of https://github.com/apache/lucene.git
SOLR-5353: Enhance CoreAdmin api to split a route key's documents from an index and leave behind all other documents
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1533163 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8b47e1c575
commit
ea81f1add6
|
@ -97,6 +97,9 @@ New Features
|
|||
|
||||
* SOLR-5338: Split shards by a route key using split.key parameter. (shalin)
|
||||
|
||||
* SOLR-5353: Enhance CoreAdmin api to split a route key's documents from an index
|
||||
and leave behind all other documents. (shalin)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -249,6 +249,7 @@ public class CoreAdminHandler extends RequestHandlerBase {
|
|||
}
|
||||
}
|
||||
}
|
||||
String splitKey = params.get("split.key");
|
||||
String[] newCoreNames = params.getParams("targetCore");
|
||||
String cname = params.get(CoreAdminParams.CORE, "");
|
||||
|
||||
|
@ -300,7 +301,7 @@ public class CoreAdminHandler extends RequestHandlerBase {
|
|||
}
|
||||
|
||||
|
||||
SplitIndexCommand cmd = new SplitIndexCommand(req, paths, newCores, ranges, router, routeFieldName);
|
||||
SplitIndexCommand cmd = new SplitIndexCommand(req, paths, newCores, ranges, router, routeFieldName, splitKey);
|
||||
core.getUpdateHandler().split(cmd);
|
||||
|
||||
// After the split has completed, someone (here?) should start the process of replaying the buffered updates.
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.solr.common.cloud.CompositeIdRouter;
|
||||
import org.apache.solr.common.cloud.DocRouter;
|
||||
import org.apache.solr.common.cloud.HashBasedRouter;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
|
@ -58,6 +59,7 @@ public class SolrIndexSplitter {
|
|||
int numPieces;
|
||||
int currPartition = 0;
|
||||
String routeFieldName;
|
||||
String splitKey;
|
||||
|
||||
public SolrIndexSplitter(SplitIndexCommand cmd) {
|
||||
searcher = cmd.getReq().getSearcher();
|
||||
|
@ -79,6 +81,9 @@ public class SolrIndexSplitter {
|
|||
} else {
|
||||
field = searcher.getSchema().getField(routeFieldName);
|
||||
}
|
||||
if (cmd.splitKey != null) {
|
||||
splitKey = getRouteKey(cmd.splitKey);
|
||||
}
|
||||
}
|
||||
|
||||
public void split() throws IOException {
|
||||
|
@ -174,11 +179,20 @@ public class SolrIndexSplitter {
|
|||
idRef = field.getType().indexedToReadable(term, idRef);
|
||||
String idString = idRef.toString();
|
||||
|
||||
if (splitKey != null) {
|
||||
// todo have composite routers support these kind of things instead
|
||||
String part1 = getRouteKey(idString);
|
||||
if (part1 == null)
|
||||
continue;
|
||||
if (!splitKey.equals(part1)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
int hash = 0;
|
||||
if (hashRouter != null) {
|
||||
hash = hashRouter.sliceHash(idString, null, null, null);
|
||||
}
|
||||
// int hash = Hash.murmurhash3_x86_32(ref, ref.offset, ref.length, 0);
|
||||
|
||||
docsEnum = termsEnum.docs(liveDocs, docsEnum, DocsEnum.FLAG_NONE);
|
||||
for (;;) {
|
||||
|
@ -200,6 +214,22 @@ public class SolrIndexSplitter {
|
|||
return docSets;
|
||||
}
|
||||
|
||||
private String getRouteKey(String idString) {
|
||||
int idx = idString.indexOf(CompositeIdRouter.separator);
|
||||
if (idx <= 0) return null;
|
||||
String part1 = idString.substring(0, idx);
|
||||
int commaIdx = part1.indexOf(CompositeIdRouter.bitsSeparator);
|
||||
if (commaIdx > 0) {
|
||||
if (commaIdx + 1 < part1.length()) {
|
||||
char ch = part1.charAt(commaIdx + 1);
|
||||
if (ch >= '0' && ch <= '9') {
|
||||
part1 = part1.substring(0, commaIdx);
|
||||
}
|
||||
}
|
||||
}
|
||||
return part1;
|
||||
}
|
||||
|
||||
|
||||
// change livedocs on the reader to delete those docs we don't want
|
||||
static class LiveDocsReader extends FilterAtomicReader {
|
||||
|
|
|
@ -36,14 +36,16 @@ public class SplitIndexCommand extends UpdateCommand {
|
|||
public List<DocRouter.Range> ranges;
|
||||
public DocRouter router;
|
||||
public String routeFieldName;
|
||||
public String splitKey;
|
||||
|
||||
public SplitIndexCommand(SolrQueryRequest req, List<String> paths, List<SolrCore> cores, List<DocRouter.Range> ranges, DocRouter router, String routeFieldName) {
|
||||
public SplitIndexCommand(SolrQueryRequest req, List<String> paths, List<SolrCore> cores, List<DocRouter.Range> ranges, DocRouter router, String routeFieldName, String splitKey) {
|
||||
super(req);
|
||||
this.paths = paths;
|
||||
this.cores = cores;
|
||||
this.ranges = ranges;
|
||||
this.router = router;
|
||||
this.routeFieldName = routeFieldName;
|
||||
this.splitKey = splitKey;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -58,6 +60,12 @@ public class SplitIndexCommand extends UpdateCommand {
|
|||
sb.append(",cores=" + cores);
|
||||
sb.append(",ranges=" + ranges);
|
||||
sb.append(",router=" + router);
|
||||
if (routeFieldName != null) {
|
||||
sb.append(",routeFieldName=" + routeFieldName);
|
||||
}
|
||||
if (splitKey != null) {
|
||||
sb.append(",split.key=" + splitKey);
|
||||
}
|
||||
sb.append('}');
|
||||
return sb.toString();
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.client.solrj.SolrQuery;
|
||||
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
|
||||
import org.apache.solr.common.cloud.CompositeIdRouter;
|
||||
import org.apache.solr.common.cloud.DocRouter;
|
||||
import org.apache.solr.common.cloud.PlainIdRouter;
|
||||
import org.apache.solr.common.util.Hash;
|
||||
|
@ -96,7 +97,7 @@ public class SolrIndexSplitterTest extends SolrTestCaseJ4 {
|
|||
request = lrf.makeRequest("q", "dummy");
|
||||
|
||||
SplitIndexCommand command = new SplitIndexCommand(request,
|
||||
Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath()), null, ranges, new PlainIdRouter(), null);
|
||||
Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath()), null, ranges, new PlainIdRouter(), null, null);
|
||||
new SolrIndexSplitter(command).split();
|
||||
|
||||
Directory directory = h.getCore().getDirectoryFactory().get(indexDir1.getAbsolutePath(),
|
||||
|
@ -141,7 +142,7 @@ public class SolrIndexSplitterTest extends SolrTestCaseJ4 {
|
|||
request = lrf.makeRequest("q", "dummy");
|
||||
|
||||
SplitIndexCommand command = new SplitIndexCommand(request,
|
||||
Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath()), null, ranges, new PlainIdRouter(), null);
|
||||
Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath()), null, ranges, new PlainIdRouter(), null, null);
|
||||
new SolrIndexSplitter(command).split();
|
||||
|
||||
Directory directory = h.getCore().getDirectoryFactory().get(indexDir1.getAbsolutePath(),
|
||||
|
@ -198,7 +199,7 @@ public class SolrIndexSplitterTest extends SolrTestCaseJ4 {
|
|||
try {
|
||||
request = lrf.makeRequest("q", "dummy");
|
||||
|
||||
SplitIndexCommand command = new SplitIndexCommand(request, null, Lists.newArrayList(core1, core2), ranges, new PlainIdRouter(), null);
|
||||
SplitIndexCommand command = new SplitIndexCommand(request, null, Lists.newArrayList(core1, core2), ranges, new PlainIdRouter(), null, null);
|
||||
new SolrIndexSplitter(command).split();
|
||||
} finally {
|
||||
if (request != null) request.close();
|
||||
|
@ -235,7 +236,7 @@ public class SolrIndexSplitterTest extends SolrTestCaseJ4 {
|
|||
request = lrf.makeRequest("q", "dummy");
|
||||
|
||||
SplitIndexCommand command = new SplitIndexCommand(request,
|
||||
Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath(), indexDir3.getAbsolutePath()), null, null, new PlainIdRouter(), null);
|
||||
Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath(), indexDir3.getAbsolutePath()), null, null, new PlainIdRouter(), null, null);
|
||||
new SolrIndexSplitter(command).split();
|
||||
|
||||
directory = h.getCore().getDirectoryFactory().get(indexDir1.getAbsolutePath(),
|
||||
|
@ -266,6 +267,63 @@ public class SolrIndexSplitterTest extends SolrTestCaseJ4 {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSplitByRouteKey() throws Exception {
|
||||
File indexDir = new File(TEMP_DIR, this.getClass().getName() + "testSplitByRouteKey");
|
||||
if (indexDir.exists()) {
|
||||
FileUtils.deleteDirectory(indexDir);
|
||||
}
|
||||
indexDir.mkdirs();
|
||||
|
||||
CompositeIdRouter r1 = new CompositeIdRouter();
|
||||
String splitKey = "sea-line!";
|
||||
String key2 = "soul-raising!";
|
||||
|
||||
// murmur2 has a collision on the above two keys
|
||||
assertEquals(r1.keyHashRange(splitKey), r1.keyHashRange(key2));
|
||||
|
||||
/*
|
||||
More strings with collisions on murmur2 for future reference:
|
||||
"Drava" "dessert spoon"
|
||||
"Bighorn" "pleasure lover"
|
||||
"attributable to" "second edition"
|
||||
"sea-line" "soul-raising"
|
||||
"lift direction" "testimony meeting"
|
||||
*/
|
||||
|
||||
for (int i=0; i<10; i++) {
|
||||
assertU(adoc("id", splitKey + i));
|
||||
assertU(adoc("id", key2 + i));
|
||||
}
|
||||
assertU(commit());
|
||||
assertJQ(req("q", "*:*"), "/response/numFound==20");
|
||||
|
||||
DocRouter.Range splitKeyRange = r1.keyHashRange(splitKey);
|
||||
|
||||
LocalSolrQueryRequest request = null;
|
||||
Directory directory = null;
|
||||
try {
|
||||
request = lrf.makeRequest("q", "dummy");
|
||||
SplitIndexCommand command = new SplitIndexCommand(request,
|
||||
Lists.newArrayList(indexDir.getAbsolutePath()), null, Lists.newArrayList(splitKeyRange), new CompositeIdRouter(), null, splitKey);
|
||||
new SolrIndexSplitter(command).split();
|
||||
directory = h.getCore().getDirectoryFactory().get(indexDir.getAbsolutePath(),
|
||||
DirectoryFactory.DirContext.DEFAULT, h.getCore().getSolrConfig().indexConfig.lockType);
|
||||
DirectoryReader reader = DirectoryReader.open(directory);
|
||||
assertEquals("split index has wrong number of documents", 10, reader.numDocs());
|
||||
reader.close();
|
||||
h.getCore().getDirectoryFactory().release(directory);
|
||||
directory = null;
|
||||
} finally {
|
||||
if (request != null) {
|
||||
request.close();
|
||||
}
|
||||
if (directory != null) {
|
||||
h.getCore().getDirectoryFactory().release(directory);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private List<DocRouter.Range> getRanges(String id1, String id2) throws UnsupportedEncodingException {
|
||||
// find minHash/maxHash hash ranges
|
||||
byte[] bytes = id1.getBytes("UTF-8");
|
||||
|
|
|
@ -34,10 +34,10 @@ import java.util.List;
|
|||
public class CompositeIdRouter extends HashBasedRouter {
|
||||
public static final String NAME = "compositeId";
|
||||
|
||||
private static final int separator = '!';
|
||||
public static final int separator = '!';
|
||||
|
||||
// separator used to optionally specify number of bits to allocate toward first part.
|
||||
private static final int bitsSeparator = '/';
|
||||
public static final int bitsSeparator = '/';
|
||||
private int bits = 16;
|
||||
private int mask1 = 0xffff0000;
|
||||
private int mask2 = 0x0000ffff;
|
||||
|
|
Loading…
Reference in New Issue