SOLR-11011: Assign.buildCoreName can lead to error in creating a new core when legacyCloud=false

This commit is contained in:
Cao Manh Dat 2017-07-20 11:49:56 +07:00
parent 82d29e3a09
commit 227eeefcd0
10 changed files with 194 additions and 100 deletions

View File

@ -71,6 +71,8 @@ Bug Fixes
* SOLR-11012: Fix three (JavaBinCodec not being closed) Resource Leak warnings. (Christine Poerschke)
* SOLR-11011: Assign.buildCoreName can lead to error in creating a new core when legacyCloud=false (Cao Manh Dat)
Optimizations
----------------------

View File

@ -73,6 +73,7 @@ public class AddReplicaCmd implements OverseerCollectionMessageHandler.Cmd {
String node = message.getStr(CoreAdminParams.NODE);
String shard = message.getStr(SHARD_ID_PROP);
String coreName = message.getStr(CoreAdminParams.NAME);
String coreNodeName = message.getStr(CoreAdminParams.CORE_NODE_NAME);
Replica.Type replicaType = Replica.Type.valueOf(message.getStr(ZkStateReader.REPLICA_TYPE, Replica.Type.NRT.name()).toUpperCase(Locale.ROOT));
boolean parallel = message.getBool("parallel", false);
if (StringUtils.isBlank(coreName)) {
@ -103,7 +104,7 @@ public class AddReplicaCmd implements OverseerCollectionMessageHandler.Cmd {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Node: " + node + " is not live");
}
if (coreName == null) {
coreName = Assign.buildCoreName(coll, shard, replicaType);
coreName = Assign.buildCoreName(ocmh.zkStateReader.getZkClient(), coll.getName(), shard, replicaType);
} else if (!skipCreateReplicaInClusterState) {
//Validate that the core name is unique in that collection
for (Slice slice : coll.getSlices()) {
@ -130,6 +131,9 @@ public class AddReplicaCmd implements OverseerCollectionMessageHandler.Cmd {
ZkStateReader.BASE_URL_PROP, zkStateReader.getBaseUrlForNodeName(node),
ZkStateReader.NODE_NAME_PROP, node,
ZkStateReader.REPLICA_TYPE, replicaType.name());
if (coreNodeName != null) {
props = props.plus(ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName);
}
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(props));
}
params.set(CoreAdminParams.CORE_NODE_NAME,

View File

@ -28,8 +28,6 @@ import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.function.Supplier;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import com.google.common.collect.ImmutableMap;
@ -46,12 +44,16 @@ import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.ReplicaPosition;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.common.util.Utils;
import org.apache.solr.core.CoreContainer;
import org.apache.solr.util.NumberUtils;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.data.Stat;
import static java.util.Collections.singletonMap;
import static org.apache.solr.client.solrj.cloud.autoscaling.Policy.POLICY;
@ -60,31 +62,58 @@ import static org.apache.solr.cloud.OverseerCollectionMessageHandler.CREATE_NODE
import static org.apache.solr.cloud.OverseerCollectionMessageHandler.CREATE_NODE_SET_SHUFFLE;
import static org.apache.solr.cloud.OverseerCollectionMessageHandler.CREATE_NODE_SET_SHUFFLE_DEFAULT;
import static org.apache.solr.common.cloud.DocCollection.SNITCH;
import static org.apache.solr.common.cloud.ZkStateReader.CORE_NAME_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.MAX_SHARDS_PER_NODE;
import static org.apache.solr.common.cloud.ZkStateReader.SOLR_AUTOSCALING_CONF_PATH;
public class Assign {
private static Pattern COUNT = Pattern.compile("core_node(\\d+)");
public static String assignNode(DocCollection collection) {
Map<String, Slice> sliceMap = collection != null ? collection.getSlicesMap() : null;
if (sliceMap == null) {
return "core_node1";
}
int max = 0;
for (Slice slice : sliceMap.values()) {
for (Replica replica : slice.getReplicas()) {
Matcher m = COUNT.matcher(replica.getName());
if (m.matches()) {
max = Math.max(max, Integer.parseInt(m.group(1)));
public static int incAndGetId(SolrZkClient zkClient, String collection) {
String path = "/collections/"+collection;
try {
if (!zkClient.exists(path, true)) {
try {
zkClient.makePath(path, true);
} catch (KeeperException.NodeExistsException e) {
// it's okay if another beats us creating the node
}
}
path += "/counter";
if (!zkClient.exists(path, true)) {
try {
zkClient.create(path, NumberUtils.intToBytes(0), CreateMode.PERSISTENT, true);
} catch (KeeperException.NodeExistsException e) {
// it's okay if another beats us creating the node
}
}
} catch (InterruptedException e) {
Thread.interrupted();
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error creating counter node in Zookeeper for collection:" + collection, e);
} catch (KeeperException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error creating counter node in Zookeeper for collection:" + collection, e);
}
return "core_node" + (max + 1);
while (true) {
Stat stat = new Stat();
try {
byte[] data = zkClient.getData(path, null, stat, true);
int currentId = NumberUtils.bytesToInt(data);
data = NumberUtils.intToBytes(++currentId);
zkClient.setData(path, data, stat.getVersion(), true);
return currentId;
} catch (KeeperException e) {
if (e.code() != KeeperException.Code.BADVERSION) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error inc and get counter from Zookeeper for collection:"+collection, e);
}
} catch (InterruptedException e) {
Thread.interrupted();
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error inc and get counter from Zookeeper for collection:" + collection, e);
}
}
}
public static String assignNode(SolrZkClient client, String collection) {
return "core_node" + incAndGetId(client, collection);
}
/**
@ -136,21 +165,9 @@ public class Assign {
return String.format(Locale.ROOT, "%s_%s_replica_%s%s", collectionName, shard, type.name().substring(0,1).toLowerCase(Locale.ROOT), replicaNum);
}
public static String buildCoreName(DocCollection collection, String shard, Replica.Type type) {
Slice slice = collection.getSlice(shard);
int replicaNum = slice.getReplicas().size();
for (; ; ) {
String replicaName = buildCoreName(collection.getName(), shard, type, replicaNum);
boolean exists = false;
for (Replica replica : slice.getReplicas()) {
if (replicaName.equals(replica.getStr(CORE_NAME_PROP))) {
exists = true;
break;
}
}
if (exists) replicaNum++;
else return replicaName;
}
public static String buildCoreName(SolrZkClient zkClient, String collection, String shard, Replica.Type type) {
int replicaNum = incAndGetId(zkClient, collection);
return buildCoreName(collection, shard, type, replicaNum);
}
public static List<String> getLiveOrLiveAndCreateNodeSetList(final Set<String> liveNodes, final ZkNodeProps message, final Random random) {
// TODO: add smarter options that look at the current number of cores per

View File

@ -178,7 +178,7 @@ public class MoveReplicaCmd implements Cmd{
private void moveNormalReplica(ClusterState clusterState, NamedList results, String targetNode, String async,
DocCollection coll, Replica replica, Slice slice, int timeout) throws Exception {
String newCoreName = Assign.buildCoreName(coll, slice.getName(), replica.getType());
String newCoreName = Assign.buildCoreName(ocmh.zkStateReader.getZkClient(), coll.getName(), slice.getName(), replica.getType());
ZkNodeProps addReplicasProps = new ZkNodeProps(
COLLECTION_PROP, coll.getName(),
SHARD_ID_PROP, slice.getName(),

View File

@ -240,7 +240,7 @@ public class ReplicaMutator {
log.debug("node=" + coreNodeName + " is already registered");
} else {
// if coreNodeName is null, auto assign one
coreNodeName = Assign.assignNode(collection);
coreNodeName = Assign.assignNode(zkStateReader.getZkClient(), collection.getName());
}
message.getProperties().put(ZkStateReader.CORE_NODE_NAME_PROP,
coreNodeName);

View File

@ -66,7 +66,12 @@ public class SliceMutator {
log.error("Invalid Collection/Slice {}/{} ", coll, slice);
return ZkStateWriter.NO_OP;
}
String coreNodeName = Assign.assignNode(collection);
String coreNodeName;
if (message.getStr(ZkStateReader.CORE_NODE_NAME_PROP) != null) {
coreNodeName = message.getStr(ZkStateReader.CORE_NODE_NAME_PROP);
} else {
coreNodeName = Assign.assignNode(zkStateReader.getZkClient(), collection.getName());
}
Replica replica = new Replica(coreNodeName,
makeMap(
ZkStateReader.CORE_NAME_PROP, message.getStr(ZkStateReader.CORE_NAME_PROP),

View File

@ -198,4 +198,24 @@ public class NumberUtils {
// TODO: operate directly on BytesRef
return SortableStr2long(sval.utf8ToString(), offset, len);
}
public static byte[] intToBytes(int val) {
byte[] result = new byte[4];
result[0] = (byte) (val >> 24);
result[1] = (byte) (val >> 16);
result[2] = (byte) (val >> 8);
result[3] = (byte) val;
return result;
}
public static int bytesToInt(byte[] bytes) {
if (bytes == null) return 0;
int val = 0;
for (int i = 0; i < bytes.length; i++) {
val = val << 8;
val += bytes[i];
}
return val;
}
}

View File

@ -16,24 +16,29 @@
*/
package org.apache.solr.cloud;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.DocRouter;
import org.apache.solr.common.cloud.ImplicitDocRouter;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.util.ExecutorUtil;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyBoolean;
import static org.mockito.ArgumentMatchers.anyInt;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
public class AssignTest extends SolrTestCaseJ4 {
@Override
@ -51,42 +56,69 @@ public class AssignTest extends SolrTestCaseJ4 {
@Test
public void testAssignNode() throws Exception {
String cname = "collection1";
Map<String,DocCollection> collectionStates = new HashMap<>();
Map<String,Slice> slices = new HashMap<>();
Map<String,Replica> replicas = new HashMap<>();
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, "state",
ZkStateReader.STATE_PROP, Replica.State.ACTIVE.toString(),
ZkStateReader.BASE_URL_PROP, "0.0.0.0",
ZkStateReader.CORE_NAME_PROP, "core1",
ZkStateReader.ROLES_PROP, null,
ZkStateReader.NODE_NAME_PROP, "0_0_0_0",
ZkStateReader.SHARD_ID_PROP, "shard1",
ZkStateReader.COLLECTION_PROP, cname,
ZkStateReader.NUM_SHARDS_PROP, "1",
ZkStateReader.CORE_NODE_NAME_PROP, "core_node1");
Replica replica = new Replica("core_node1" , m.getProperties());
replicas.put("core_node1", replica);
Slice slice = new Slice("slice1", replicas , new HashMap<String,Object>(0));
slices.put("slice1", slice);
DocRouter router = new ImplicitDocRouter();
DocCollection docCollection = new DocCollection(cname, slices, new HashMap<String,Object>(0), router);
collectionStates.put(cname, docCollection);
Set<String> liveNodes = new HashSet<>();
ClusterState state = new ClusterState(-1,liveNodes, collectionStates);
String nodeName = Assign.assignNode(state.getCollection("collection1"));
SolrZkClient zkClient = mock(SolrZkClient.class);
Map<String, byte[]> zkClientData = new HashMap<>();
when(zkClient.setData(anyString(), any(), anyInt(), anyBoolean())).then(invocation -> {
zkClientData.put(invocation.getArgument(0), invocation.getArgument(1));
return null;
}
);
when(zkClient.getData(anyString(), any(), any(), anyBoolean())).then(invocation ->
zkClientData.get(invocation.getArgument(0)));
String nodeName = Assign.assignNode(zkClient, "collection1");
assertEquals("core_node1", nodeName);
nodeName = Assign.assignNode(zkClient, "collection2");
assertEquals("core_node1", nodeName);
nodeName = Assign.assignNode(zkClient, "collection1");
assertEquals("core_node2", nodeName);
}
@Test
public void testIdIsUnique() throws Exception {
String zkDir = createTempDir("zkData").toFile().getAbsolutePath();
ZkTestServer server = new ZkTestServer(zkDir);
Object fixedValue = new Object();
String[] collections = new String[]{"c1","c2","c3","c4","c5","c6","c7","c8","c9"};
Map<String, ConcurrentHashMap<Integer, Object>> collectionUniqueIds = new HashMap<>();
for (String c : collections) {
collectionUniqueIds.put(c, new ConcurrentHashMap<>());
}
ExecutorService executor = ExecutorUtil.newMDCAwareCachedThreadPool("threadpool");
try {
server.run();
try (SolrZkClient zkClient = new SolrZkClient(server.getZkAddress(), 10000)) {
assertTrue(zkClient.isConnected());
zkClient.makePath("/", true);
for (String c : collections) {
zkClient.makePath("/collections/"+c, true);
}
List<Future<?>> futures = new ArrayList<>();
for (int i = 0; i < 1000; i++) {
futures.add(executor.submit(() -> {
String collection = collections[random().nextInt(collections.length)];
int id = Assign.incAndGetId(zkClient, collection);
Object val = collectionUniqueIds.get(collection).put(id, fixedValue);
if (val != null) {
fail("ZkController do not generate unique id for " + collection);
}
}));
}
for (Future<?> future : futures) {
future.get();
}
}
assertEquals(1000, (long) collectionUniqueIds.values().stream()
.map(ConcurrentHashMap::size)
.reduce((m1, m2) -> m1 + m2).get());
} finally {
server.shutdown();
ExecutorUtil.shutdownAndAwaitTermination(executor);
}
}
@Test
public void testBuildCoreName() {
assertEquals("Core name pattern changed", "collection1_shard1_replica_n1", Assign.buildCoreName("collection1", "shard1", Replica.Type.NRT, 1));

View File

@ -34,6 +34,7 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.TimeUnit;
@ -603,24 +604,20 @@ public class CollectionsAPIDistributedZkTest extends SolrCloudTestCase {
= new ArrayList<>(cluster.getSolrClient().getZkStateReader().getClusterState().getLiveNodes());
Collections.shuffle(nodeList, random());
String newReplicaName = Assign.assignNode(getCollectionState(collectionName));
CollectionAdminRequest.addReplicaToShard(collectionName, "shard1")
CollectionAdminResponse response = CollectionAdminRequest.addReplicaToShard(collectionName, "shard1")
.setNode(nodeList.get(0))
.process(cluster.getSolrClient());
Replica newReplica = getCollectionState(collectionName).getReplica(newReplicaName);
Replica newReplica = grabNewReplica(response, getCollectionState(collectionName));
assertEquals("Replica should be created on the right node",
cluster.getSolrClient().getZkStateReader().getBaseUrlForNodeName(nodeList.get(0)),
newReplica.getStr(ZkStateReader.BASE_URL_PROP));
newReplicaName = Assign.assignNode(getCollectionState(collectionName));
Path instancePath = createTempDir();
CollectionAdminRequest.addReplicaToShard(collectionName, "shard1")
response = CollectionAdminRequest.addReplicaToShard(collectionName, "shard1")
.withProperty(CoreAdminParams.INSTANCE_DIR, instancePath.toString())
.process(cluster.getSolrClient());
newReplica = getCollectionState(collectionName).getReplica(newReplicaName);
newReplica = grabNewReplica(response, getCollectionState(collectionName));
assertNotNull(newReplica);
try (HttpSolrClient coreclient = getHttpSolrClient(newReplica.getStr(ZkStateReader.BASE_URL_PROP))) {
@ -647,14 +644,24 @@ public class CollectionsAPIDistributedZkTest extends SolrCloudTestCase {
// Check that specifying property.name works. DO NOT remove this when the "name" property is deprecated
// for ADDREPLICA, this is "property.name". See SOLR-7132
newReplicaName = Assign.assignNode(getCollectionState(collectionName));
CollectionAdminRequest.addReplicaToShard(collectionName, "shard1")
response = CollectionAdminRequest.addReplicaToShard(collectionName, "shard1")
.withProperty(CoreAdminParams.NAME, "propertyDotName")
.process(cluster.getSolrClient());
newReplica = getCollectionState(collectionName).getReplica(newReplicaName);
newReplica = grabNewReplica(response, getCollectionState(collectionName));
assertEquals("'core' should be 'propertyDotName' ", "propertyDotName", newReplica.getStr("core"));
}
private Replica grabNewReplica(CollectionAdminResponse response, DocCollection docCollection) {
String replicaName = response.getCollectionCoresStatus().keySet().iterator().next();
Optional<Replica> optional = docCollection.getReplicas().stream()
.filter(replica -> replicaName.equals(replica.getCoreName()))
.findAny();
if (optional.isPresent()) {
return optional.get();
}
throw new AssertionError("Can not find " + replicaName + " from " + docCollection);
}
}

View File

@ -23,6 +23,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Map;
import java.util.Optional;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
@ -267,7 +268,6 @@ public class CollectionsAPISolrJTest extends SolrCloudTestCase {
CollectionAdminRequest.createCollection(collectionName, "conf", 1, 2)
.process(cluster.getSolrClient());
String newReplicaName = Assign.assignNode(getCollectionState(collectionName));
ArrayList<String> nodeList
= new ArrayList<>(cluster.getSolrClient().getZkStateReader().getClusterState().getLiveNodes());
Collections.shuffle(nodeList, random());
@ -276,25 +276,32 @@ public class CollectionsAPISolrJTest extends SolrCloudTestCase {
CollectionAdminResponse response = CollectionAdminRequest.addReplicaToShard(collectionName, "shard1")
.setNode(node)
.process(cluster.getSolrClient());
Replica newReplica = grabNewReplica(response, getCollectionState(collectionName));
assertEquals(0, response.getStatus());
assertTrue(response.isSuccess());
waitForState("Expected to see replica " + newReplicaName + " on node " + node, collectionName, (n, c) -> {
Replica r = c.getSlice("shard1").getReplica(newReplicaName);
return r != null && r.getNodeName().equals(node);
});
assertTrue(newReplica.getNodeName().equals(node));
// Test DELETEREPLICA
response = CollectionAdminRequest.deleteReplica(collectionName, "shard1", newReplicaName)
response = CollectionAdminRequest.deleteReplica(collectionName, "shard1", newReplica.getName())
.process(cluster.getSolrClient());
assertEquals(0, response.getStatus());
waitForState("Expected replica " + newReplicaName + " to vanish from cluster state", collectionName,
(n, c) -> c.getSlice("shard1").getReplica(newReplicaName) == null);
waitForState("Expected replica " + newReplica.getName() + " to vanish from cluster state", collectionName,
(n, c) -> c.getSlice("shard1").getReplica(newReplica.getName()) == null);
}
private Replica grabNewReplica(CollectionAdminResponse response, DocCollection docCollection) {
String replicaName = response.getCollectionCoresStatus().keySet().iterator().next();
Optional<Replica> optional = docCollection.getReplicas().stream()
.filter(replica -> replicaName.equals(replica.getCoreName()))
.findAny();
if (optional.isPresent()) {
return optional.get();
}
throw new AssertionError("Can not find " + replicaName + " from " + docCollection);
}
@Test
public void testClusterProp() throws InterruptedException, IOException, SolrServerException {