SOLR-6511: keep track of the node that created the leader-initiated recovery znode, helpful for debugging

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1629966 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Timothy Potter 2014-10-07 19:35:12 +00:00
parent 4244b5f82d
commit 511996c5ba
1 changed files with 32 additions and 15 deletions

View File

@ -33,6 +33,7 @@ import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
@ -83,6 +84,8 @@ import org.apache.zookeeper.KeeperException.ConnectionLossException;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.KeeperException.SessionExpiredException;
import org.apache.zookeeper.data.Stat;
import org.noggit.JSONParser;
import org.noggit.ObjectBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -1952,16 +1955,19 @@ public final class ZkController {
}
public String getLeaderInitiatedRecoveryState(String collection, String shardId, String coreNodeName) {
Map<String,Object> stateObj = getLeaderInitiatedRecoveryStateObject(collection, shardId, coreNodeName);
return (stateObj != null) ? (String)stateObj.get("state") : null;
}
public Map<String,Object> getLeaderInitiatedRecoveryStateObject(String collection, String shardId, String coreNodeName) {
if (collection == null || shardId == null || coreNodeName == null)
return null; // if we don't have complete data about a core in cloud mode, return null
String znodePath = getLeaderInitiatedRecoveryZnodePath(collection, shardId, coreNodeName);
String state = null;
byte[] stateData = null;
try {
byte[] data = zkClient.getData(znodePath, null, new Stat(), false);
if (data != null && data.length > 0)
state = new String(data, "UTF-8");
stateData = zkClient.getData(znodePath, null, new Stat(), false);
} catch (NoNodeException ignoreMe) {
// safe to ignore as this znode will only exist if the leader initiated recovery
} catch (ConnectionLossException cle) {
@ -1972,8 +1978,6 @@ public final class ZkController {
// sort of safe to ignore ??? Usually these are seen when the core is going down
// or there are bigger issues to deal with than reading this znode
log.warn("Unable to read "+znodePath+" due to: "+see);
} catch (UnsupportedEncodingException e) {
throw new Error("JVM Does not seem to support UTF-8", e);
} catch (Exception exc) {
log.error("Failed to read data from znode "+znodePath+" due to: "+exc);
if (exc instanceof SolrException) {
@ -1983,7 +1987,12 @@ public final class ZkController {
"Failed to read data from znodePath: "+znodePath, exc);
}
}
return state;
Map<String,Object> stateObj = null;
if (stateData != null && stateData.length > 0)
stateObj = (Map<String, Object>) ZkStateReader.fromJSON(stateData);
return stateObj;
}
private void updateLeaderInitiatedRecoveryState(String collection, String shardId, String coreNodeName, String state) {
@ -2005,13 +2014,21 @@ public final class ZkController {
return;
}
byte[] znodeData = null;
Map<String,Object> stateObj = null;
try {
znodeData = state.getBytes("UTF-8");
} catch (UnsupportedEncodingException e) {
throw new Error("JVM Does not seem to support UTF-8", e);
stateObj = getLeaderInitiatedRecoveryStateObject(collection, shardId, coreNodeName);
} catch (Exception exc) {
log.warn(exc.getMessage(), exc);
}
if (stateObj == null)
stateObj = new LinkedHashMap<String,Object>();
stateObj.put("state", state);
// only update the createdBy value if its not set
if (stateObj.get("createdByNodeName") == null)
stateObj.put("createdByNodeName", String.valueOf(this.nodeName));
byte[] znodeData = ZkStateReader.toJSON(stateObj);
boolean retryOnConnLoss = true; // be a little more robust when trying to write data
try {
if (zkClient.exists(znodePath, retryOnConnLoss)) {