Incorrect locking in FsVolumeList#checkDirs can hang datanodes (Noah Lorang via Colin P. McCabe)

This commit is contained in:
Colin Patrick Mccabe 2014-12-09 10:55:17 -08:00
parent be86237c09
commit d8352b9b2b
2 changed files with 31 additions and 28 deletions

View File

@ -574,6 +574,9 @@ Release 2.6.1 - UNRELEASED
HDFS-4882. Prevent the Namenode's LeaseManager from looping forever in HDFS-4882. Prevent the Namenode's LeaseManager from looping forever in
checkLeases (Ravi Prakash via Colin P. McCabe) checkLeases (Ravi Prakash via Colin P. McCabe)
HDFS-7489. Incorrect locking in FsVolumeList#checkDirs can hang datanodes
(Noah Lorang via Colin P. McCabe)
Release 2.6.0 - 2014-11-18 Release 2.6.0 - 2014-11-18
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -36,6 +36,7 @@ class FsVolumeList {
* This list is replaced on modification holding "this" lock. * This list is replaced on modification holding "this" lock.
*/ */
volatile List<FsVolumeImpl> volumes = null; volatile List<FsVolumeImpl> volumes = null;
private Object checkDirsMutex = new Object();
private final VolumeChoosingPolicy<FsVolumeImpl> blockChooser; private final VolumeChoosingPolicy<FsVolumeImpl> blockChooser;
private volatile int numFailedVolumes; private volatile int numFailedVolumes;
@ -167,11 +168,12 @@ public void run() {
* Calls {@link FsVolumeImpl#checkDirs()} on each volume, removing any * Calls {@link FsVolumeImpl#checkDirs()} on each volume, removing any
* volumes from the active list that result in a DiskErrorException. * volumes from the active list that result in a DiskErrorException.
* *
* This method is synchronized to allow only one instance of checkDirs() * Use checkDirsMutext to allow only one instance of checkDirs() call
* call *
* @return list of all the removed volumes. * @return list of all the removed volumes.
*/ */
synchronized List<FsVolumeImpl> checkDirs() { List<FsVolumeImpl> checkDirs() {
synchronized(checkDirsMutex) {
ArrayList<FsVolumeImpl> removedVols = null; ArrayList<FsVolumeImpl> removedVols = null;
// Make a copy of volumes for performing modification // Make a copy of volumes for performing modification
@ -187,21 +189,19 @@ synchronized List<FsVolumeImpl> checkDirs() {
removedVols = new ArrayList<FsVolumeImpl>(1); removedVols = new ArrayList<FsVolumeImpl>(1);
} }
removedVols.add(fsv); removedVols.add(fsv);
fsv.shutdown(); removeVolume(fsv.getBasePath());
i.remove(); // Remove the volume
numFailedVolumes++; numFailedVolumes++;
} }
} }
if (removedVols != null && removedVols.size() > 0) { if (removedVols != null && removedVols.size() > 0) {
// Replace volume list
volumes = Collections.unmodifiableList(volumeList);
FsDatasetImpl.LOG.warn("Completed checkDirs. Removed " + removedVols.size() FsDatasetImpl.LOG.warn("Completed checkDirs. Removed " + removedVols.size()
+ " volumes. Current volumes: " + this); + " volumes. Current volumes: " + this);
} }
return removedVols; return removedVols;
} }
}
@Override @Override
public String toString() { public String toString() {