HDFS-13112. Token expiration edits may cause log corruption or deadlock. Contributed by Daryn Sharp.

(cherry picked from commit 47473952e5)
This commit is contained in:
Kihwal Lee 2018-02-15 15:35:27 -06:00
parent b9ce463fbd
commit 7b83fca453
5 changed files with 83 additions and 23 deletions

View File

@ -21,7 +21,6 @@ package org.apache.hadoop.hdfs.security.token.delegation;
import java.io.DataInput; import java.io.DataInput;
import java.io.DataOutputStream; import java.io.DataOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InterruptedIOException;
import java.net.InetSocketAddress; import java.net.InetSocketAddress;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Iterator; import java.util.Iterator;
@ -366,35 +365,59 @@ public class DelegationTokenSecretManager
@Override //AbstractDelegationTokenManager @Override //AbstractDelegationTokenManager
protected void logUpdateMasterKey(DelegationKey key) protected void logUpdateMasterKey(DelegationKey key)
throws IOException { throws IOException {
synchronized (noInterruptsLock) { try {
// The edit logging code will fail catastrophically if it // The edit logging code will fail catastrophically if it
// is interrupted during a logSync, since the interrupt // is interrupted during a logSync, since the interrupt
// closes the edit log files. Doing this inside the // closes the edit log files. Doing this inside the
// above lock and then checking interruption status // fsn lock will prevent being interrupted when stopping
// prevents this bug. // the secret manager.
if (Thread.interrupted()) { namesystem.readLockInterruptibly();
throw new InterruptedIOException( try {
"Interrupted before updating master key"); // this monitor isn't necessary if stopped while holding write lock
// but for safety, guard against a stop with read lock.
synchronized (noInterruptsLock) {
if (Thread.currentThread().isInterrupted()) {
return; // leave flag set so secret monitor exits.
} }
namesystem.logUpdateMasterKey(key); namesystem.logUpdateMasterKey(key);
} }
} finally {
namesystem.readUnlock();
}
} catch (InterruptedException ie) {
// AbstractDelegationTokenManager may crash if an exception is thrown.
// The interrupt flag will be detected when it attempts to sleep.
Thread.currentThread().interrupt();
}
} }
@Override //AbstractDelegationTokenManager @Override //AbstractDelegationTokenManager
protected void logExpireToken(final DelegationTokenIdentifier dtId) protected void logExpireToken(final DelegationTokenIdentifier dtId)
throws IOException { throws IOException {
synchronized (noInterruptsLock) { try {
// The edit logging code will fail catastrophically if it // The edit logging code will fail catastrophically if it
// is interrupted during a logSync, since the interrupt // is interrupted during a logSync, since the interrupt
// closes the edit log files. Doing this inside the // closes the edit log files. Doing this inside the
// above lock and then checking interruption status // fsn lock will prevent being interrupted when stopping
// prevents this bug. // the secret manager.
if (Thread.interrupted()) { namesystem.readLockInterruptibly();
throw new InterruptedIOException( try {
"Interrupted before expiring delegation token"); // this monitor isn't necessary if stopped while holding write lock
// but for safety, guard against a stop with read lock.
synchronized (noInterruptsLock) {
if (Thread.currentThread().isInterrupted()) {
return; // leave flag set so secret monitor exits.
} }
namesystem.logExpireDelegationToken(dtId); namesystem.logExpireDelegationToken(dtId);
} }
} finally {
namesystem.readUnlock();
}
} catch (InterruptedException ie) {
// AbstractDelegationTokenManager may crash if an exception is thrown.
// The interrupt flag will be detected when it attempts to sleep.
Thread.currentThread().interrupt();
}
} }
/** A utility method for creating credentials. */ /** A utility method for creating credentials. */

View File

@ -1568,6 +1568,10 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
this.fsLock.readLock(); this.fsLock.readLock();
} }
@Override @Override
public void readLockInterruptibly() throws InterruptedException {
this.fsLock.readLockInterruptibly();
}
@Override
public void readUnlock() { public void readUnlock() {
this.fsLock.readUnlock(); this.fsLock.readUnlock();
} }
@ -5618,9 +5622,9 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
assert !isInSafeMode() : assert !isInSafeMode() :
"this should never be called while in safemode, since we stop " + "this should never be called while in safemode, since we stop " +
"the DT manager before entering safemode!"; "the DT manager before entering safemode!";
// No need to hold FSN lock since we don't access any internal // edit log rolling is not thread-safe and must be protected by the
// structures, and this is stopped before the FSN shuts itself // fsn lock. not updating namespace so read lock is sufficient.
// down, etc. assert hasReadLock();
getEditLog().logUpdateMasterKey(key); getEditLog().logUpdateMasterKey(key);
getEditLog().logSync(); getEditLog().logSync();
} }
@ -5634,9 +5638,10 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
assert !isInSafeMode() : assert !isInSafeMode() :
"this should never be called while in safemode, since we stop " + "this should never be called while in safemode, since we stop " +
"the DT manager before entering safemode!"; "the DT manager before entering safemode!";
// No need to hold FSN lock since we don't access any internal // edit log rolling is not thread-safe and must be protected by the
// structures, and this is stopped before the FSN shuts itself // fsn lock. not updating namespace so read lock is sufficient.
// down, etc. assert hasReadLock();
// do not logSync so expiration edits are batched
getEditLog().logCancelDelegationToken(id); getEditLog().logCancelDelegationToken(id);
} }

View File

@ -145,6 +145,13 @@ class FSNamesystemLock {
} }
} }
public void readLockInterruptibly() throws InterruptedException {
coarseLock.readLock().lockInterruptibly();
if (coarseLock.getReadHoldCount() == 1) {
readLockHeldTimeStampNanos.set(timer.monotonicNowNanos());
}
}
public void readUnlock() { public void readUnlock() {
readUnlock(OP_NAME_OTHER); readUnlock(OP_NAME_OTHER);
} }

View File

@ -22,6 +22,9 @@ public interface RwLock {
/** Acquire read lock. */ /** Acquire read lock. */
public void readLock(); public void readLock();
/** Acquire read lock, unless interrupted while waiting */
void readLockInterruptibly() throws InterruptedException;
/** Release read lock. */ /** Release read lock. */
public void readUnlock(); public void readUnlock();

View File

@ -24,6 +24,7 @@ import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.net.URI; import java.net.URI;
import java.util.Iterator; import java.util.Iterator;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
@ -37,7 +38,11 @@ import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.Token;
import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;
import static org.mockito.Mockito.*; import static org.mockito.Mockito.*;
/** /**
@ -181,7 +186,24 @@ public class TestSecurityTokenEditLog {
FSImage fsImage = mock(FSImage.class); FSImage fsImage = mock(FSImage.class);
FSEditLog log = mock(FSEditLog.class); FSEditLog log = mock(FSEditLog.class);
doReturn(log).when(fsImage).getEditLog(); doReturn(log).when(fsImage).getEditLog();
// verify that the namesystem read lock is held while logging token
// expirations. the namesystem is not updated, so write lock is not
// necessary, but the lock is required because edit log rolling is not
// thread-safe.
final AtomicReference<FSNamesystem> fsnRef = new AtomicReference<>();
doAnswer(
new Answer<Void>() {
@Override
public Void answer(InvocationOnMock invocation) throws Throwable {
// fsn claims read lock if either read or write locked.
Assert.assertTrue(fsnRef.get().hasReadLock());
Assert.assertFalse(fsnRef.get().hasWriteLock());
return null;
}
}
).when(log).logCancelDelegationToken(any(DelegationTokenIdentifier.class));
FSNamesystem fsn = new FSNamesystem(conf, fsImage); FSNamesystem fsn = new FSNamesystem(conf, fsImage);
fsnRef.set(fsn);
DelegationTokenSecretManager dtsm = fsn.getDelegationTokenSecretManager(); DelegationTokenSecretManager dtsm = fsn.getDelegationTokenSecretManager();
try { try {