mirror of https://github.com/apache/lucene.git
SOLR-12454: Tweak Overseer leadership transition related logging for easier troubleshooting.
This commit is contained in:
parent
f7aed28760
commit
e9c6effd07
|
@ -185,6 +185,8 @@ Other Changes
|
|||
|
||||
* SOLR-12464: Reduce Overseer.close() logging (for non-Overseer leaders) (Christine Poerschke)
|
||||
|
||||
* SOLR-12454: Tweak Overseer leadership transition related logging for easier troubleshooting. (Christine Poerschke)
|
||||
|
||||
================== 7.4.0 ==================
|
||||
|
||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
|
||||
|
|
|
@ -132,7 +132,7 @@ public class Overseer implements SolrCloseable {
|
|||
isLeader = amILeader(); // not a no, not a yes, try ask again
|
||||
}
|
||||
|
||||
log.debug("Starting to work on the main queue");
|
||||
log.info("Starting to work on the main queue : {}", LeaderElector.getNodeName(myId));
|
||||
try {
|
||||
ZkStateWriter zkStateWriter = null;
|
||||
ClusterState clusterState = null;
|
||||
|
@ -302,12 +302,12 @@ public class Overseer implements SolrCloseable {
|
|||
return;//shutting down no need to go further
|
||||
}
|
||||
org.apache.zookeeper.data.Stat stat = new org.apache.zookeeper.data.Stat();
|
||||
String path = OVERSEER_ELECT + "/leader";
|
||||
final String path = OVERSEER_ELECT + "/leader";
|
||||
byte[] data;
|
||||
try {
|
||||
data = zkClient.getData(path, null, stat, true);
|
||||
} catch (Exception e) {
|
||||
log.error("could not read the data" ,e);
|
||||
log.error("could not read the "+path+" data" ,e);
|
||||
return;
|
||||
}
|
||||
try {
|
||||
|
@ -315,16 +315,17 @@ public class Overseer implements SolrCloseable {
|
|||
String id = (String) m.get(ID);
|
||||
if(overseerCollectionConfigSetProcessor.getId().equals(id)){
|
||||
try {
|
||||
log.warn("I'm exiting, but I'm still the leader");
|
||||
log.warn("I (id={}) am exiting, but I'm still the leader",
|
||||
overseerCollectionConfigSetProcessor.getId());
|
||||
zkClient.delete(path,stat.getVersion(),true);
|
||||
} catch (KeeperException.BadVersionException e) {
|
||||
//no problem ignore it some other Overseer has already taken over
|
||||
} catch (Exception e) {
|
||||
log.error("Could not delete my leader node ", e);
|
||||
log.error("Could not delete my leader node "+path, e);
|
||||
}
|
||||
|
||||
} else{
|
||||
log.debug("somebody else has already taken up the overseer position");
|
||||
log.info("somebody else (id={}) has already taken up the overseer position", id);
|
||||
}
|
||||
} finally {
|
||||
//if I am not shutting down, Then I need to rejoin election
|
||||
|
@ -413,10 +414,12 @@ public class Overseer implements SolrCloseable {
|
|||
private LeaderStatus amILeader() {
|
||||
Timer.Context timerContext = stats.time("am_i_leader");
|
||||
boolean success = true;
|
||||
String propsId = null;
|
||||
try {
|
||||
ZkNodeProps props = ZkNodeProps.load(zkClient.getData(
|
||||
OVERSEER_ELECT + "/leader", null, null, true));
|
||||
if (myId.equals(props.getStr(ID))) {
|
||||
propsId = props.getStr(ID);
|
||||
if (myId.equals(propsId)) {
|
||||
return LeaderStatus.YES;
|
||||
}
|
||||
} catch (KeeperException e) {
|
||||
|
@ -426,6 +429,8 @@ public class Overseer implements SolrCloseable {
|
|||
return LeaderStatus.DONT_KNOW;
|
||||
} else if (e.code() != KeeperException.Code.SESSIONEXPIRED) {
|
||||
log.warn("", e);
|
||||
} else {
|
||||
log.debug("", e);
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
success = false;
|
||||
|
@ -438,7 +443,7 @@ public class Overseer implements SolrCloseable {
|
|||
stats.error("am_i_leader");
|
||||
}
|
||||
}
|
||||
log.info("According to ZK I (id=" + myId + ") am no longer a leader.");
|
||||
log.info("According to ZK I (id={}) am no longer a leader. propsId={}", myId, propsId);
|
||||
return LeaderStatus.NO;
|
||||
}
|
||||
|
||||
|
|
|
@ -388,10 +388,12 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
|
|||
String statsName = "collection_am_i_leader";
|
||||
Timer.Context timerContext = stats.time(statsName);
|
||||
boolean success = true;
|
||||
String propsId = null;
|
||||
try {
|
||||
ZkNodeProps props = ZkNodeProps.load(zkStateReader.getZkClient().getData(
|
||||
Overseer.OVERSEER_ELECT + "/leader", null, null, true));
|
||||
if (myId.equals(props.getStr(ID))) {
|
||||
propsId = props.getStr(ID);
|
||||
if (myId.equals(propsId)) {
|
||||
return LeaderStatus.YES;
|
||||
}
|
||||
} catch (KeeperException e) {
|
||||
|
@ -401,6 +403,8 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
|
|||
return LeaderStatus.DONT_KNOW;
|
||||
} else if (e.code() != KeeperException.Code.SESSIONEXPIRED) {
|
||||
log.warn("", e);
|
||||
} else {
|
||||
log.debug("", e);
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
success = false;
|
||||
|
@ -413,7 +417,7 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
|
|||
stats.error(statsName);
|
||||
}
|
||||
}
|
||||
log.info("According to ZK I (id=" + myId + ") am no longer a leader.");
|
||||
log.info("According to ZK I (id={}) am no longer a leader. propsId={}", myId, propsId);
|
||||
return LeaderStatus.NO;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue