Incorrect locking in FsVolumeList#checkDirs can hang datanodes (Noah Lorang via Colin P. McCabe)
This commit is contained in:
parent
be86237c09
commit
d8352b9b2b
|
@ -574,6 +574,9 @@ Release 2.6.1 - UNRELEASED
|
||||||
HDFS-4882. Prevent the Namenode's LeaseManager from looping forever in
|
HDFS-4882. Prevent the Namenode's LeaseManager from looping forever in
|
||||||
checkLeases (Ravi Prakash via Colin P. McCabe)
|
checkLeases (Ravi Prakash via Colin P. McCabe)
|
||||||
|
|
||||||
|
HDFS-7489. Incorrect locking in FsVolumeList#checkDirs can hang datanodes
|
||||||
|
(Noah Lorang via Colin P. McCabe)
|
||||||
|
|
||||||
Release 2.6.0 - 2014-11-18
|
Release 2.6.0 - 2014-11-18
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -36,6 +36,7 @@ class FsVolumeList {
|
||||||
* This list is replaced on modification holding "this" lock.
|
* This list is replaced on modification holding "this" lock.
|
||||||
*/
|
*/
|
||||||
volatile List<FsVolumeImpl> volumes = null;
|
volatile List<FsVolumeImpl> volumes = null;
|
||||||
|
private Object checkDirsMutex = new Object();
|
||||||
|
|
||||||
private final VolumeChoosingPolicy<FsVolumeImpl> blockChooser;
|
private final VolumeChoosingPolicy<FsVolumeImpl> blockChooser;
|
||||||
private volatile int numFailedVolumes;
|
private volatile int numFailedVolumes;
|
||||||
|
@ -167,40 +168,39 @@ class FsVolumeList {
|
||||||
* Calls {@link FsVolumeImpl#checkDirs()} on each volume, removing any
|
* Calls {@link FsVolumeImpl#checkDirs()} on each volume, removing any
|
||||||
* volumes from the active list that result in a DiskErrorException.
|
* volumes from the active list that result in a DiskErrorException.
|
||||||
*
|
*
|
||||||
* This method is synchronized to allow only one instance of checkDirs()
|
* Use checkDirsMutext to allow only one instance of checkDirs() call
|
||||||
* call
|
*
|
||||||
* @return list of all the removed volumes.
|
* @return list of all the removed volumes.
|
||||||
*/
|
*/
|
||||||
synchronized List<FsVolumeImpl> checkDirs() {
|
List<FsVolumeImpl> checkDirs() {
|
||||||
ArrayList<FsVolumeImpl> removedVols = null;
|
synchronized(checkDirsMutex) {
|
||||||
|
ArrayList<FsVolumeImpl> removedVols = null;
|
||||||
// Make a copy of volumes for performing modification
|
|
||||||
final List<FsVolumeImpl> volumeList = new ArrayList<FsVolumeImpl>(volumes);
|
// Make a copy of volumes for performing modification
|
||||||
|
final List<FsVolumeImpl> volumeList = new ArrayList<FsVolumeImpl>(volumes);
|
||||||
|
|
||||||
for(Iterator<FsVolumeImpl> i = volumeList.iterator(); i.hasNext(); ) {
|
for(Iterator<FsVolumeImpl> i = volumeList.iterator(); i.hasNext(); ) {
|
||||||
final FsVolumeImpl fsv = i.next();
|
final FsVolumeImpl fsv = i.next();
|
||||||
try {
|
try {
|
||||||
fsv.checkDirs();
|
fsv.checkDirs();
|
||||||
} catch (DiskErrorException e) {
|
} catch (DiskErrorException e) {
|
||||||
FsDatasetImpl.LOG.warn("Removing failed volume " + fsv + ": ",e);
|
FsDatasetImpl.LOG.warn("Removing failed volume " + fsv + ": ",e);
|
||||||
if (removedVols == null) {
|
if (removedVols == null) {
|
||||||
removedVols = new ArrayList<FsVolumeImpl>(1);
|
removedVols = new ArrayList<FsVolumeImpl>(1);
|
||||||
|
}
|
||||||
|
removedVols.add(fsv);
|
||||||
|
removeVolume(fsv.getBasePath());
|
||||||
|
numFailedVolumes++;
|
||||||
}
|
}
|
||||||
removedVols.add(fsv);
|
|
||||||
fsv.shutdown();
|
|
||||||
i.remove(); // Remove the volume
|
|
||||||
numFailedVolumes++;
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
if (removedVols != null && removedVols.size() > 0) {
|
||||||
if (removedVols != null && removedVols.size() > 0) {
|
FsDatasetImpl.LOG.warn("Completed checkDirs. Removed " + removedVols.size()
|
||||||
// Replace volume list
|
+ " volumes. Current volumes: " + this);
|
||||||
volumes = Collections.unmodifiableList(volumeList);
|
}
|
||||||
FsDatasetImpl.LOG.warn("Completed checkDirs. Removed " + removedVols.size()
|
|
||||||
+ " volumes. Current volumes: " + this);
|
|
||||||
}
|
|
||||||
|
|
||||||
return removedVols;
|
return removedVols;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
Loading…
Reference in New Issue