HDFS-6970. Move startFile EDEK retries to the DFSClient. (wang)
(cherry picked from commit 20a076bafc
)
Conflicts:
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
This commit is contained in:
parent
d329237990
commit
ef693b541c
|
@ -225,6 +225,8 @@ Release 2.6.0 - UNRELEASED
|
||||||
HDFS-6727. Refresh data volumes on DataNode based on configuration changes
|
HDFS-6727. Refresh data volumes on DataNode based on configuration changes
|
||||||
(Lei Xu via cmccabe)
|
(Lei Xu via cmccabe)
|
||||||
|
|
||||||
|
HDFS-6970. Move startFile EDEK retries to the DFSClient. (wang)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
HDFS-6690. Deduplicate xattr names in memory. (wang)
|
HDFS-6690. Deduplicate xattr names in memory. (wang)
|
||||||
|
|
|
@ -41,6 +41,7 @@ import java.util.concurrent.TimeUnit;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import java.util.concurrent.atomic.AtomicReference;
|
import java.util.concurrent.atomic.AtomicReference;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.crypto.CipherSuite;
|
import org.apache.hadoop.crypto.CipherSuite;
|
||||||
import org.apache.hadoop.fs.CanSetDropBehind;
|
import org.apache.hadoop.fs.CanSetDropBehind;
|
||||||
|
@ -76,6 +77,7 @@ import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
||||||
import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
|
import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
|
import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NotReplicatedYetException;
|
import org.apache.hadoop.hdfs.server.namenode.NotReplicatedYetException;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.RetryStartFileException;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.SafeModeException;
|
import org.apache.hadoop.hdfs.server.namenode.SafeModeException;
|
||||||
import org.apache.hadoop.io.EnumSetWritable;
|
import org.apache.hadoop.io.EnumSetWritable;
|
||||||
import org.apache.hadoop.io.IOUtils;
|
import org.apache.hadoop.io.IOUtils;
|
||||||
|
@ -127,6 +129,13 @@ public class DFSOutputStream extends FSOutputSummer
|
||||||
implements Syncable, CanSetDropBehind {
|
implements Syncable, CanSetDropBehind {
|
||||||
private final long dfsclientSlowLogThresholdMs;
|
private final long dfsclientSlowLogThresholdMs;
|
||||||
private static final int MAX_PACKETS = 80; // each packet 64K, total 5MB
|
private static final int MAX_PACKETS = 80; // each packet 64K, total 5MB
|
||||||
|
/**
|
||||||
|
* Number of times to retry creating a file when there are transient
|
||||||
|
* errors (typically related to encryption zones and KeyProvider operations).
|
||||||
|
*/
|
||||||
|
@VisibleForTesting
|
||||||
|
public static final int CREATE_RETRY_COUNT = 10;
|
||||||
|
|
||||||
private final DFSClient dfsClient;
|
private final DFSClient dfsClient;
|
||||||
private Socket s;
|
private Socket s;
|
||||||
// closed is accessed by different threads under different locks.
|
// closed is accessed by different threads under different locks.
|
||||||
|
@ -1648,23 +1657,46 @@ public class DFSOutputStream extends FSOutputSummer
|
||||||
short replication, long blockSize, Progressable progress, int buffersize,
|
short replication, long blockSize, Progressable progress, int buffersize,
|
||||||
DataChecksum checksum, String[] favoredNodes,
|
DataChecksum checksum, String[] favoredNodes,
|
||||||
List<CipherSuite> cipherSuites) throws IOException {
|
List<CipherSuite> cipherSuites) throws IOException {
|
||||||
final HdfsFileStatus stat;
|
HdfsFileStatus stat = null;
|
||||||
try {
|
|
||||||
stat = dfsClient.namenode.create(src, masked, dfsClient.clientName,
|
// Retry the create if we get a RetryStartFileException up to a maximum
|
||||||
new EnumSetWritable<CreateFlag>(flag), createParent, replication,
|
// number of times
|
||||||
blockSize, cipherSuites);
|
boolean shouldRetry = true;
|
||||||
} catch(RemoteException re) {
|
int retryCount = CREATE_RETRY_COUNT;
|
||||||
throw re.unwrapRemoteException(AccessControlException.class,
|
while (shouldRetry) {
|
||||||
DSQuotaExceededException.class,
|
shouldRetry = false;
|
||||||
FileAlreadyExistsException.class,
|
try {
|
||||||
FileNotFoundException.class,
|
stat = dfsClient.namenode.create(src, masked, dfsClient.clientName,
|
||||||
ParentNotDirectoryException.class,
|
new EnumSetWritable<CreateFlag>(flag), createParent, replication,
|
||||||
NSQuotaExceededException.class,
|
blockSize, cipherSuites);
|
||||||
SafeModeException.class,
|
break;
|
||||||
UnresolvedPathException.class,
|
} catch (RemoteException re) {
|
||||||
SnapshotAccessControlException.class,
|
IOException e = re.unwrapRemoteException(
|
||||||
UnknownCipherSuiteException.class);
|
AccessControlException.class,
|
||||||
|
DSQuotaExceededException.class,
|
||||||
|
FileAlreadyExistsException.class,
|
||||||
|
FileNotFoundException.class,
|
||||||
|
ParentNotDirectoryException.class,
|
||||||
|
NSQuotaExceededException.class,
|
||||||
|
RetryStartFileException.class,
|
||||||
|
SafeModeException.class,
|
||||||
|
UnresolvedPathException.class,
|
||||||
|
SnapshotAccessControlException.class,
|
||||||
|
UnknownCipherSuiteException.class);
|
||||||
|
if (e instanceof RetryStartFileException) {
|
||||||
|
if (retryCount > 0) {
|
||||||
|
shouldRetry = true;
|
||||||
|
retryCount--;
|
||||||
|
} else {
|
||||||
|
throw new IOException("Too many retries because of encryption" +
|
||||||
|
" zone operations", e);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Preconditions.checkNotNull(stat, "HdfsFileStatus should not be null!");
|
||||||
final DFSOutputStream out = new DFSOutputStream(dfsClient, src, stat,
|
final DFSOutputStream out = new DFSOutputStream(dfsClient, src, stat,
|
||||||
flag, progress, checksum, favoredNodes);
|
flag, progress, checksum, favoredNodes);
|
||||||
out.start();
|
out.start();
|
||||||
|
|
|
@ -2448,84 +2448,66 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
||||||
|
|
||||||
waitForLoadingFSImage();
|
waitForLoadingFSImage();
|
||||||
|
|
||||||
/*
|
/**
|
||||||
* We want to avoid holding any locks while doing KeyProvider operations,
|
* If the file is in an encryption zone, we optimistically create an
|
||||||
* since they can be very slow. Since the path can
|
* EDEK for the file by calling out to the configured KeyProvider.
|
||||||
* flip flop between being in an encryption zone and not in the meantime,
|
* Since this typically involves doing an RPC, we take the readLock
|
||||||
* we need to recheck the preconditions and redo KeyProvider operations
|
* initially, then drop it to do the RPC.
|
||||||
* in some situations.
|
*
|
||||||
*
|
* Since the path can flip-flop between being in an encryption zone and not
|
||||||
* A special RetryStartFileException is used to indicate that we should
|
* in the meantime, we need to recheck the preconditions when we retake the
|
||||||
* retry creation of a FileEncryptionInfo.
|
* lock to do the create. If the preconditions are not met, we throw a
|
||||||
|
* special RetryStartFileException to ask the DFSClient to try the create
|
||||||
|
* again later.
|
||||||
*/
|
*/
|
||||||
BlocksMapUpdateInfo toRemoveBlocks = null;
|
CipherSuite suite = null;
|
||||||
|
String ezKeyName = null;
|
||||||
|
readLock();
|
||||||
try {
|
try {
|
||||||
boolean shouldContinue = true;
|
src = resolvePath(src, pathComponents);
|
||||||
int iters = 0;
|
INodesInPath iip = dir.getINodesInPath4Write(src);
|
||||||
while (shouldContinue) {
|
// Nothing to do if the path is not within an EZ
|
||||||
skipSync = false;
|
if (dir.isInAnEZ(iip)) {
|
||||||
if (iters >= 10) {
|
suite = chooseCipherSuite(iip, cipherSuites);
|
||||||
throw new IOException("Too many retries because of encryption zone " +
|
if (suite != null) {
|
||||||
"operations, something might be broken!");
|
Preconditions.checkArgument(!suite.equals(CipherSuite.UNKNOWN),
|
||||||
}
|
"Chose an UNKNOWN CipherSuite!");
|
||||||
shouldContinue = false;
|
|
||||||
iters++;
|
|
||||||
|
|
||||||
// Optimistically determine CipherSuite and ezKeyName if the path is
|
|
||||||
// currently within an encryption zone
|
|
||||||
CipherSuite suite = null;
|
|
||||||
String ezKeyName = null;
|
|
||||||
readLock();
|
|
||||||
try {
|
|
||||||
src = resolvePath(src, pathComponents);
|
|
||||||
INodesInPath iip = dir.getINodesInPath4Write(src);
|
|
||||||
// Nothing to do if the path is not within an EZ
|
|
||||||
if (dir.isInAnEZ(iip)) {
|
|
||||||
suite = chooseCipherSuite(iip, cipherSuites);
|
|
||||||
if (suite != null) {
|
|
||||||
Preconditions.checkArgument(!suite.equals(CipherSuite.UNKNOWN),
|
|
||||||
"Chose an UNKNOWN CipherSuite!");
|
|
||||||
}
|
|
||||||
ezKeyName = dir.getKeyName(iip);
|
|
||||||
Preconditions.checkState(ezKeyName != null);
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
readUnlock();
|
|
||||||
}
|
|
||||||
|
|
||||||
Preconditions.checkState(
|
|
||||||
(suite == null && ezKeyName == null) ||
|
|
||||||
(suite != null && ezKeyName != null),
|
|
||||||
"Both suite and ezKeyName should both be null or not null");
|
|
||||||
// Generate EDEK if necessary while not holding the lock
|
|
||||||
EncryptedKeyVersion edek =
|
|
||||||
generateEncryptedDataEncryptionKey(ezKeyName);
|
|
||||||
EncryptionFaultInjector.getInstance().startFileAfterGenerateKey();
|
|
||||||
// Try to create the file with the computed cipher suite and EDEK
|
|
||||||
writeLock();
|
|
||||||
try {
|
|
||||||
checkOperation(OperationCategory.WRITE);
|
|
||||||
checkNameNodeSafeMode("Cannot create file" + src);
|
|
||||||
src = resolvePath(src, pathComponents);
|
|
||||||
toRemoveBlocks = startFileInternal(pc, src, permissions, holder,
|
|
||||||
clientMachine, create, overwrite, createParent, replication,
|
|
||||||
blockSize, suite, edek, logRetryCache);
|
|
||||||
stat = dir.getFileInfo(src, false,
|
|
||||||
FSDirectory.isReservedRawName(srcArg));
|
|
||||||
} catch (StandbyException se) {
|
|
||||||
skipSync = true;
|
|
||||||
throw se;
|
|
||||||
} catch (RetryStartFileException e) {
|
|
||||||
shouldContinue = true;
|
|
||||||
if (LOG.isTraceEnabled()) {
|
|
||||||
LOG.trace("Preconditions failed, retrying creation of " +
|
|
||||||
"FileEncryptionInfo", e);
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
writeUnlock();
|
|
||||||
}
|
}
|
||||||
|
ezKeyName = dir.getKeyName(iip);
|
||||||
|
Preconditions.checkState(ezKeyName != null);
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
|
readUnlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
Preconditions.checkState(
|
||||||
|
(suite == null && ezKeyName == null) ||
|
||||||
|
(suite != null && ezKeyName != null),
|
||||||
|
"Both suite and ezKeyName should both be null or not null");
|
||||||
|
|
||||||
|
// Generate EDEK if necessary while not holding the lock
|
||||||
|
EncryptedKeyVersion edek =
|
||||||
|
generateEncryptedDataEncryptionKey(ezKeyName);
|
||||||
|
EncryptionFaultInjector.getInstance().startFileAfterGenerateKey();
|
||||||
|
|
||||||
|
// Proceed with the create, using the computed cipher suite and
|
||||||
|
// generated EDEK
|
||||||
|
BlocksMapUpdateInfo toRemoveBlocks = null;
|
||||||
|
writeLock();
|
||||||
|
try {
|
||||||
|
checkOperation(OperationCategory.WRITE);
|
||||||
|
checkNameNodeSafeMode("Cannot create file" + src);
|
||||||
|
src = resolvePath(src, pathComponents);
|
||||||
|
toRemoveBlocks = startFileInternal(pc, src, permissions, holder,
|
||||||
|
clientMachine, create, overwrite, createParent, replication,
|
||||||
|
blockSize, suite, edek, logRetryCache);
|
||||||
|
stat = dir.getFileInfo(src, false,
|
||||||
|
FSDirectory.isReservedRawName(srcArg));
|
||||||
|
} catch (StandbyException se) {
|
||||||
|
skipSync = true;
|
||||||
|
throw se;
|
||||||
|
} finally {
|
||||||
|
writeUnlock();
|
||||||
// There might be transactions logged while trying to recover the lease.
|
// There might be transactions logged while trying to recover the lease.
|
||||||
// They need to be sync'ed even when an exception was thrown.
|
// They need to be sync'ed even when an exception was thrown.
|
||||||
if (!skipSync) {
|
if (!skipSync) {
|
||||||
|
|
|
@ -17,5 +17,20 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdfs.server.namenode;
|
package org.apache.hadoop.hdfs.server.namenode;
|
||||||
|
|
||||||
public class RetryStartFileException extends Exception {
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
public class RetryStartFileException extends IOException {
|
||||||
|
private static final long serialVersionUID = 1L;
|
||||||
|
|
||||||
|
public RetryStartFileException() {
|
||||||
|
super("Preconditions for creating a file failed because of a " +
|
||||||
|
"transient error, retry create later.");
|
||||||
|
}
|
||||||
|
|
||||||
|
public RetryStartFileException(String s) {
|
||||||
|
super(s);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -940,7 +940,7 @@ public class TestEncryptionZones {
|
||||||
Future<?> future = executor.submit(new CreateFileTask(fsWrapper, file));
|
Future<?> future = executor.submit(new CreateFileTask(fsWrapper, file));
|
||||||
|
|
||||||
// Flip-flop between two EZs to repeatedly fail
|
// Flip-flop between two EZs to repeatedly fail
|
||||||
for (int i=0; i<10; i++) {
|
for (int i=0; i<DFSOutputStream.CREATE_RETRY_COUNT+1; i++) {
|
||||||
injector.ready.await();
|
injector.ready.await();
|
||||||
fsWrapper.delete(zone1, true);
|
fsWrapper.delete(zone1, true);
|
||||||
fsWrapper.mkdir(zone1, FsPermission.getDirDefault(), true);
|
fsWrapper.mkdir(zone1, FsPermission.getDirDefault(), true);
|
||||||
|
|
Loading…
Reference in New Issue