SOLR-13229: Cleanup replicasMetTragicEvent after all exceptions

This commit is contained in:
Tomas Fernandez Lobbe 2019-02-06 16:43:47 -08:00 committed by Tomas Fernandez Lobbe
parent 4a513fa99f
commit 8ac34c2d6d
3 changed files with 19 additions and 12 deletions

View File

@ -59,6 +59,8 @@ Bug Fixes
* SOLR-12330: 500 error code on json.facet syntax errors (Munendra S N, Mikhail Khludnev)
* SOLR-13229: Cleanup replicasMetTragicEvent after all types of exception (Tomás Fernández Löbbe)
Improvements
----------------------
* SOLR-12999: Index replication could delete segments before downloading segments from master if there is not enough

View File

@ -628,7 +628,14 @@ public class ZkController implements Closeable {
assert ObjectReleaseTracker.release(this);
}
/**
* Best effort to give up the leadership of a shard in a core after hitting a tragic exception
* @param cd The current core descriptor
* @param tragicException The tragic exception from the {@code IndexWriter}
*/
public void giveupLeadership(CoreDescriptor cd, Throwable tragicException) {
assert tragicException != null;
assert cd != null;
DocCollection dc = getClusterState().getCollectionOrNull(cd.getCollectionName());
if (dc == null) return;
@ -666,13 +673,12 @@ public class ZkController implements Closeable {
props.put(ZkStateReader.REPLICA_TYPE, cd.getCloudDescriptor().getReplicaType().name().toUpperCase(Locale.ROOT));
props.put(CoreAdminParams.NODE, getNodeName());
getOverseerCollectionQueue().offer(Utils.toJSON(new ZkNodeProps(props)));
} catch (KeeperException e) {
log.info("Met exception on give up leadership for {}", key, e);
replicasMetTragicEvent.remove(key);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
log.info("Met exception on give up leadership for {}", key, e);
} catch (Exception e) {
// Exceptions are not bubbled up. giveupLeadership is best effort, and is only called in case of some other
// unrecoverable error happened
log.error("Met exception on give up leadership for {}", key, e);
replicasMetTragicEvent.remove(key);
SolrZkClient.checkInterrupted(e);
}
}
}

View File

@ -1867,8 +1867,9 @@ public class CoreContainer {
}
/**
* @param solrCore te core against which we check if there has been a tragic exception
* @return whether this solr core has tragic exception
* @param solrCore the core against which we check if there has been a tragic exception
* @return whether this Solr core has tragic exception
* @see org.apache.lucene.index.IndexWriter#getTragicException()
*/
public boolean checkTragicException(SolrCore solrCore) {
Throwable tragicException;
@ -1879,10 +1880,8 @@ public class CoreContainer {
tragicException = e;
}
if (tragicException != null) {
if (isZooKeeperAware()) {
getZkController().giveupLeadership(solrCore.getCoreDescriptor(), tragicException);
}
if (tragicException != null && isZooKeeperAware()) {
getZkController().giveupLeadership(solrCore.getCoreDescriptor(), tragicException);
}
return tragicException != null;