HDDS-1464. Client should have different retry policies for different exceptions. (#785)

This commit is contained in:
Siddharth 2019-05-05 09:21:15 -07:00 committed by Hanisha Koneru
parent 69b903bbd8
commit 1d70c8ca0f
2 changed files with 52 additions and 19 deletions

View File

@ -17,22 +17,29 @@
*/ */
package org.apache.hadoop.ozone.client; package org.apache.hadoop.ozone.client;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import org.apache.hadoop.hdds.client.OzoneQuota; import org.apache.hadoop.hdds.client.OzoneQuota;
import org.apache.hadoop.hdds.scm.client.HddsClientUtils; import org.apache.hadoop.hdds.scm.client.HddsClientUtils;
import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerNotOpenException; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerNotOpenException;
import org.apache.hadoop.io.retry.RetryPolicies; import org.apache.hadoop.io.retry.RetryPolicies;
import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.OzoneConsts;
import org.apache.hadoop.ozone.client.rest.response.*; import org.apache.hadoop.ozone.client.rest.response.BucketInfo;
import org.apache.hadoop.ozone.client.rest.response.KeyInfo;
import org.apache.hadoop.ozone.client.rest.response.KeyInfoDetails;
import org.apache.hadoop.ozone.client.rest.response.KeyLocation;
import org.apache.hadoop.ozone.client.rest.response.VolumeInfo;
import org.apache.hadoop.ozone.client.rest.response.VolumeOwner;
import org.apache.ratis.protocol.AlreadyClosedException; import org.apache.ratis.protocol.AlreadyClosedException;
import org.apache.ratis.protocol.GroupMismatchException; import org.apache.ratis.protocol.GroupMismatchException;
import org.apache.ratis.protocol.RaftRetryFailureException; import org.apache.ratis.protocol.RaftRetryFailureException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
/** A utility class for OzoneClient. */ /** A utility class for OzoneClient. */
public final class OzoneClientUtils { public final class OzoneClientUtils {
@ -129,14 +136,31 @@ public static KeyInfoDetails asKeyInfoDetails(OzoneKeyDetails key) {
public static RetryPolicy createRetryPolicy(int maxRetryCount, public static RetryPolicy createRetryPolicy(int maxRetryCount,
long retryInterval) { long retryInterval) {
// just retry without sleep // retry with fixed sleep between retries
RetryPolicy retryPolicy = RetryPolicies return RetryPolicies.retryUpToMaximumCountWithFixedSleep(
.retryUpToMaximumCountWithFixedSleep(maxRetryCount, retryInterval, maxRetryCount, retryInterval, TimeUnit.MILLISECONDS);
TimeUnit.MILLISECONDS);
return retryPolicy;
} }
public static List<Class<? extends Exception>> getExceptionList() { public static List<Class<? extends Exception>> getExceptionList() {
return EXCEPTION_LIST; return EXCEPTION_LIST;
} }
public static Map<Class<? extends Throwable>, RetryPolicy>
getRetryPolicyByException(int maxRetryCount, long retryInterval) {
Map<Class<? extends Throwable>, RetryPolicy> policyMap = new HashMap<>();
for (Class<? extends Exception> ex : EXCEPTION_LIST) {
if (ex == TimeoutException.class ||
ex == RaftRetryFailureException.class) {
// retry without sleep
policyMap.put(ex, createRetryPolicy(maxRetryCount, 0));
} else {
// retry with fixed sleep between retries
policyMap.put(ex, createRetryPolicy(maxRetryCount, retryInterval));
}
}
// Default retry policy
policyMap.put(Exception.class, createRetryPolicy(
maxRetryCount, retryInterval));
return policyMap;
}
} }

View File

@ -22,8 +22,7 @@
import org.apache.hadoop.fs.FSExceptionMessages; import org.apache.hadoop.fs.FSExceptionMessages;
import org.apache.hadoop.fs.FileEncryptionInfo; import org.apache.hadoop.fs.FileEncryptionInfo;
import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ChecksumType;
.ChecksumType;
import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerID;
import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerNotOpenException; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerNotOpenException;
import org.apache.hadoop.hdds.scm.storage.BufferPool; import org.apache.hadoop.hdds.scm.storage.BufferPool;
@ -52,7 +51,10 @@
import java.util.List; import java.util.List;
import java.util.Collection; import java.util.Collection;
import java.util.ListIterator; import java.util.ListIterator;
import java.util.Map;
import java.util.concurrent.TimeoutException; import java.util.concurrent.TimeoutException;
import java.util.function.Function;
import java.util.stream.Collectors;
/** /**
* Maintaining a list of BlockInputStream. Write based on offset. * Maintaining a list of BlockInputStream. Write based on offset.
@ -95,7 +97,7 @@ enum StreamAction {
private OmMultipartCommitUploadPartInfo commitUploadPartInfo; private OmMultipartCommitUploadPartInfo commitUploadPartInfo;
private FileEncryptionInfo feInfo; private FileEncryptionInfo feInfo;
private ExcludeList excludeList; private ExcludeList excludeList;
private final RetryPolicy retryPolicy; private final Map<Class<? extends Throwable>, RetryPolicy> retryPolicyMap;
private int retryCount; private int retryCount;
private long offset; private long offset;
/** /**
@ -121,7 +123,10 @@ public KeyOutputStream() {
OzoneConfigKeys.OZONE_CLIENT_CHECKSUM_TYPE_DEFAULT); OzoneConfigKeys.OZONE_CLIENT_CHECKSUM_TYPE_DEFAULT);
this.bytesPerChecksum = OzoneConfigKeys this.bytesPerChecksum = OzoneConfigKeys
.OZONE_CLIENT_BYTES_PER_CHECKSUM_DEFAULT_BYTES; // Default is 1MB .OZONE_CLIENT_BYTES_PER_CHECKSUM_DEFAULT_BYTES; // Default is 1MB
this.retryPolicy = RetryPolicies.TRY_ONCE_THEN_FAIL; this.retryPolicyMap = OzoneClientUtils.getExceptionList()
.stream()
.collect(Collectors.toMap(Function.identity(),
e -> RetryPolicies.TRY_ONCE_THEN_FAIL));
retryCount = 0; retryCount = 0;
offset = 0; offset = 0;
} }
@ -200,8 +205,8 @@ public KeyOutputStream(OpenKeySession handler,
this.bufferPool = this.bufferPool =
new BufferPool(chunkSize, (int)streamBufferMaxSize / chunkSize); new BufferPool(chunkSize, (int)streamBufferMaxSize / chunkSize);
this.excludeList = new ExcludeList(); this.excludeList = new ExcludeList();
this.retryPolicy = OzoneClientUtils.createRetryPolicy(maxRetryCount, this.retryPolicyMap = OzoneClientUtils.getRetryPolicyByException(
retryInterval); maxRetryCount, retryInterval);
this.retryCount = 0; this.retryCount = 0;
} }
@ -502,10 +507,14 @@ private void markStreamClosed() {
} }
private void handleRetry(IOException exception, long len) throws IOException { private void handleRetry(IOException exception, long len) throws IOException {
RetryPolicy retryPolicy =
retryPolicyMap.get(checkForException(exception).getClass());
if (retryPolicy == null) {
retryPolicy = retryPolicyMap.get(Exception.class);
}
RetryPolicy.RetryAction action; RetryPolicy.RetryAction action;
try { try {
action = retryPolicy action = retryPolicy.shouldRetry(exception, retryCount, 0, true);
.shouldRetry(exception, retryCount, 0, true);
} catch (Exception e) { } catch (Exception e) {
throw e instanceof IOException ? (IOException) e : new IOException(e); throw e instanceof IOException ? (IOException) e : new IOException(e);
} }