YARN-9914. Use separate configs for free disk space checking for full and not-full disks. Contributed by Jim Brennan

This commit is contained in:
Eric Badger 2019-10-25 17:13:04 +00:00
parent 862526530a
commit eef34f2d87
5 changed files with 217 additions and 40 deletions

View File

@ -1942,6 +1942,17 @@ public static boolean isAclEnabled(Configuration conf) {
*/
public static final String NM_MIN_PER_DISK_FREE_SPACE_MB =
NM_DISK_HEALTH_CHECK_PREFIX + "min-free-space-per-disk-mb";
/**
* The minimum space that must be available on an offline
* disk for it to be marked as online. The value should not be less
* than NM_MIN_PER_DISK_FREE_SPACE_MB. If its value is less than
* NM_MIN_PER_DISK_FREE_SPACE_MB or is not set, it will be set to the
* same value as NM_MIN_PER_DISK_FREE_SPACE_MB.
* This applies to nm-local-dirs and nm-log-dirs.
*/
public static final String NM_WM_HIGH_PER_DISK_FREE_SPACE_MB =
NM_DISK_HEALTH_CHECK_PREFIX +
"min-free-space-per-disk-watermark-high-mb";
/**
* By default, all of the disk can be used before it is marked as offline.
*/

View File

@ -1657,13 +1657,26 @@
</property>
<property>
<description>The minimum space that must be available on a disk for
it to be used. This applies to yarn.nodemanager.local-dirs and
<description>The minimum space in megabytes that must be available on a disk for
it to be used. If space on a disk falls below this threshold, it will be marked
as bad. This applies to yarn.nodemanager.local-dirs and
yarn.nodemanager.log-dirs.</description>
<name>yarn.nodemanager.disk-health-checker.min-free-space-per-disk-mb</name>
<value>0</value>
</property>
<property>
<description>The minimum space in megabytes that must be available on a bad
disk for it to be marked as good. This value should not be less
than yarn.nodemanager.disk-health-checker.min-free-space-per-disk-mb.
If it is less than yarn.nodemanager.disk-health-checker.min-free-space-per-disk-mb,
or it is not set, it will be set to the
same value as yarn.nodemanager.disk-health-checker.min-free-space-per-disk-mb.
This applies to yarn.nodemanager.local-dirs and yarn.nodemanager.log-dirs.</description>
<name>yarn.nodemanager.disk-health-checker.min-free-space-per-disk-watermark-high-mb</name>
<value>0</value>
</property>
<property>
<description>The path to the Linux container executor.</description>
<name>yarn.nodemanager.linux-container-executor.path</name>

View File

@ -110,7 +110,8 @@ static List<String> concat(List<String> l1, List<String> l2) {
private float diskUtilizationPercentageCutoffHigh;
private float diskUtilizationPercentageCutoffLow;
private long diskUtilizationSpaceCutoff;
private long diskFreeSpaceCutoffLow;
private long diskFreeSpaceCutoffHigh;
private int goodDirsDiskUtilizationPercentage;
@ -123,7 +124,7 @@ static List<String> concat(List<String> l1, List<String> l2) {
* directories to be monitored
*/
public DirectoryCollection(String[] dirs) {
this(dirs, 100.0F, 100.0F, 0);
this(dirs, 100.0F, 100.0F, 0, 0);
}
/**
@ -139,7 +140,7 @@ public DirectoryCollection(String[] dirs) {
*
*/
public DirectoryCollection(String[] dirs, float utilizationPercentageCutOff) {
this(dirs, utilizationPercentageCutOff, utilizationPercentageCutOff, 0);
this(dirs, utilizationPercentageCutOff, utilizationPercentageCutOff, 0, 0);
}
/**
@ -154,7 +155,26 @@ public DirectoryCollection(String[] dirs, float utilizationPercentageCutOff) {
*
*/
public DirectoryCollection(String[] dirs, long utilizationSpaceCutOff) {
this(dirs, 100.0F, 100.0F, utilizationSpaceCutOff);
this(dirs, 100.0F, 100.0F, utilizationSpaceCutOff, utilizationSpaceCutOff);
}
/**
* Create collection for the directories specified. Users must specify the
* minimum amount of free space that must be available for the dir to be used.
*
* @param dirs
* directories to be monitored
* @param utilizationSpaceCutOffLow
* minimum space, in MB, that must be available on the disk for the
* dir to be taken out of the good dirs list
* @param utilizationSpaceCutOffHigh
* minimum space, in MB, that must be available on the disk for the
* dir to be moved from the bad dirs list to the good dirs list
*/
public DirectoryCollection(String[] dirs, long utilizationSpaceCutOffLow,
long utilizationSpaceCutOffHigh) {
this(dirs, 100.0F, 100.0F, utilizationSpaceCutOffLow,
utilizationSpaceCutOffHigh);
}
/**
@ -174,12 +194,41 @@ public DirectoryCollection(String[] dirs, long utilizationSpaceCutOff) {
* @param utilizationSpaceCutOff
* minimum space, in MB, that must be available on the disk for the
* dir to be marked as good
*/
public DirectoryCollection(String[] dirs,
float utilizationPercentageCutOffHigh,
float utilizationPercentageCutOffLow, long utilizationSpaceCutOff) {
this(dirs, utilizationPercentageCutOffHigh,
utilizationPercentageCutOffLow, utilizationSpaceCutOff,
utilizationSpaceCutOff);
}
/**
* Create collection for the directories specified. Users must specify the
* maximum percentage of disk utilization allowed and the minimum amount of
* free space that must be available for the dir to be used. If either check
* fails the dir is removed from the good dirs list.
*
* @param dirs
* directories to be monitored
* @param utilizationPercentageCutOffHigh
* percentage of disk that can be used before the dir is taken out
* of the good dirs list
* @param utilizationPercentageCutOffLow
* percentage of disk that can be used when the dir is moved from
* the bad dirs list to the good dirs list
* @param utilizationSpaceCutOffLow
* minimum space, in MB, that must be available on the disk for the
* dir to be taken out of the good dirs list
* @param utilizationSpaceCutOffHigh
* minimum space, in MB, that must be available on the disk for the
* dir to be moved from the bad dirs list to the good dirs list
*/
public DirectoryCollection(String[] dirs,
float utilizationPercentageCutOffHigh,
float utilizationPercentageCutOffLow,
long utilizationSpaceCutOff) {
long utilizationSpaceCutOffLow,
long utilizationSpaceCutOffHigh) {
conf = new YarnConfiguration();
try {
String diskValidatorName = conf.get(YarnConfiguration.DISK_VALIDATOR,
@ -199,12 +248,10 @@ public DirectoryCollection(String[] dirs,
this.readLock = lock.readLock();
this.writeLock = lock.writeLock();
diskUtilizationPercentageCutoffHigh = Math.max(0.0F, Math.min(100.0F,
utilizationPercentageCutOffHigh));
diskUtilizationPercentageCutoffLow = Math.max(0.0F, Math.min(
diskUtilizationPercentageCutoffHigh, utilizationPercentageCutOffLow));
diskUtilizationSpaceCutoff =
utilizationSpaceCutOff < 0 ? 0 : utilizationSpaceCutOff;
setDiskUtilizationPercentageCutoff(utilizationPercentageCutOffHigh,
utilizationPercentageCutOffLow);
setDiskUtilizationSpaceCutoff(utilizationSpaceCutOffLow,
utilizationSpaceCutOffHigh);
dirsChangeListeners = Collections.newSetFromMap(
new ConcurrentHashMap<DirsChangeListener, Boolean>());
@ -471,6 +518,8 @@ Map<String, DiskErrorInformation> testDirs(List<String> dirs,
diskValidator.checkStatus(testDir);
float diskUtilizationPercentageCutoff = goodDirs.contains(dir) ?
diskUtilizationPercentageCutoffHigh : diskUtilizationPercentageCutoffLow;
long diskFreeSpaceCutoff = goodDirs.contains(dir) ?
diskFreeSpaceCutoffLow : diskFreeSpaceCutoffHigh;
if (isDiskUsageOverPercentageLimit(testDir,
diskUtilizationPercentageCutoff)) {
msg =
@ -480,9 +529,9 @@ Map<String, DiskErrorInformation> testDirs(List<String> dirs,
ret.put(dir,
new DiskErrorInformation(DiskErrorCause.DISK_FULL, msg));
continue;
} else if (isDiskFreeSpaceUnderLimit(testDir)) {
} else if (isDiskFreeSpaceUnderLimit(testDir, diskFreeSpaceCutoff)) {
msg =
"free space below limit of " + diskUtilizationSpaceCutoff
"free space below limit of " + diskFreeSpaceCutoff
+ "MB";
ret.put(dir,
new DiskErrorInformation(DiskErrorCause.DISK_FULL, msg));
@ -505,9 +554,10 @@ private boolean isDiskUsageOverPercentageLimit(File dir,
|| usedPercentage >= 100.0F);
}
private boolean isDiskFreeSpaceUnderLimit(File dir) {
private boolean isDiskFreeSpaceUnderLimit(File dir,
long freeSpaceCutoff) {
long freeSpace = dir.getUsableSpace() / (1024 * 1024);
return freeSpace < this.diskUtilizationSpaceCutoff;
return freeSpace < freeSpaceCutoff;
}
private void createDir(FileContext localFs, Path dir, FsPermission perm)
@ -550,13 +600,29 @@ public void setDiskUtilizationPercentageCutoff(
}
public long getDiskUtilizationSpaceCutoff() {
return diskUtilizationSpaceCutoff;
return getDiskUtilizationSpaceCutoffLow();
}
public void setDiskUtilizationSpaceCutoff(long diskUtilizationSpaceCutoff) {
diskUtilizationSpaceCutoff =
diskUtilizationSpaceCutoff < 0 ? 0 : diskUtilizationSpaceCutoff;
this.diskUtilizationSpaceCutoff = diskUtilizationSpaceCutoff;
@VisibleForTesting
long getDiskUtilizationSpaceCutoffLow() {
return diskFreeSpaceCutoffLow;
}
@VisibleForTesting
long getDiskUtilizationSpaceCutoffHigh() {
return diskFreeSpaceCutoffHigh;
}
public void setDiskUtilizationSpaceCutoff(long freeSpaceCutoff) {
setDiskUtilizationSpaceCutoff(freeSpaceCutoff,
freeSpaceCutoff);
}
public void setDiskUtilizationSpaceCutoff(long freeSpaceCutoffLow,
long freeSpaceCutoffHigh) {
diskFreeSpaceCutoffLow = Math.max(0, freeSpaceCutoffLow);
diskFreeSpaceCutoffHigh = Math.max(diskFreeSpaceCutoffLow,
Math.max(0, freeSpaceCutoffHigh));
}
private void setGoodDirsDiskUtilizationPercentage() {

View File

@ -139,23 +139,38 @@ public MonitoringTimerTask(Configuration conf) throws YarnRuntimeException {
" is not configured properly.");
lowUsableSpacePercentagePerDisk = highUsableSpacePercentagePerDisk;
}
long minFreeSpacePerDiskMB =
long lowMinFreeSpacePerDiskMB =
conf.getLong(YarnConfiguration.NM_MIN_PER_DISK_FREE_SPACE_MB,
YarnConfiguration.DEFAULT_NM_MIN_PER_DISK_FREE_SPACE_MB);
YarnConfiguration.DEFAULT_NM_MIN_PER_DISK_FREE_SPACE_MB);
long highMinFreeSpacePerDiskMB =
conf.getLong(YarnConfiguration.NM_WM_HIGH_PER_DISK_FREE_SPACE_MB,
lowMinFreeSpacePerDiskMB);
if (highMinFreeSpacePerDiskMB < lowMinFreeSpacePerDiskMB) {
LOG.warn("Using " + YarnConfiguration.
NM_MIN_PER_DISK_FREE_SPACE_MB + " as " +
YarnConfiguration.NM_WM_HIGH_PER_DISK_FREE_SPACE_MB +
", because " + YarnConfiguration.
NM_WM_HIGH_PER_DISK_FREE_SPACE_MB +
" is not configured properly.");
highMinFreeSpacePerDiskMB = lowMinFreeSpacePerDiskMB;
}
localDirs =
new DirectoryCollection(
validatePaths(conf
.getTrimmedStrings(YarnConfiguration.NM_LOCAL_DIRS)),
highUsableSpacePercentagePerDisk,
lowUsableSpacePercentagePerDisk,
minFreeSpacePerDiskMB);
lowMinFreeSpacePerDiskMB,
highMinFreeSpacePerDiskMB);
logDirs =
new DirectoryCollection(
validatePaths(conf
.getTrimmedStrings(YarnConfiguration.NM_LOG_DIRS)),
highUsableSpacePercentagePerDisk,
lowUsableSpacePercentagePerDisk,
minFreeSpacePerDiskMB);
lowMinFreeSpacePerDiskMB,
highMinFreeSpacePerDiskMB);
String local = conf.get(YarnConfiguration.NM_LOCAL_DIRS);
conf.set(NM_GOOD_LOCAL_DIRS,

View File

@ -203,12 +203,30 @@ public void testDiskLimitsCutoffSetters() throws IOException {
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(),
delta);
long spaceValue = 57;
dc.setDiskUtilizationSpaceCutoff(spaceValue);
Assert.assertEquals(spaceValue, dc.getDiskUtilizationSpaceCutoff());
spaceValue = -57;
dc.setDiskUtilizationSpaceCutoff(spaceValue);
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff());
long lowSpaceValue = 57;
dc.setDiskUtilizationSpaceCutoff(lowSpaceValue);
Assert.assertEquals(lowSpaceValue, dc.getDiskUtilizationSpaceCutoffLow());
Assert.assertEquals(lowSpaceValue, dc.getDiskUtilizationSpaceCutoffHigh());
long highSpaceValue = 73;
dc.setDiskUtilizationSpaceCutoff(lowSpaceValue, highSpaceValue);
Assert.assertEquals(lowSpaceValue, dc.getDiskUtilizationSpaceCutoffLow());
Assert.assertEquals(highSpaceValue, dc.getDiskUtilizationSpaceCutoffHigh());
lowSpaceValue = -57;
dc.setDiskUtilizationSpaceCutoff(lowSpaceValue);
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoffLow());
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoffHigh());
dc.setDiskUtilizationSpaceCutoff(lowSpaceValue, highSpaceValue);
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoffLow());
Assert.assertEquals(highSpaceValue, dc.getDiskUtilizationSpaceCutoffHigh());
highSpaceValue = -10;
dc.setDiskUtilizationSpaceCutoff(lowSpaceValue, highSpaceValue);
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoffLow());
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoffHigh());
lowSpaceValue = 33;
dc.setDiskUtilizationSpaceCutoff(lowSpaceValue, highSpaceValue);
Assert.assertEquals(lowSpaceValue, dc.getDiskUtilizationSpaceCutoffLow());
Assert.assertEquals(lowSpaceValue, dc.getDiskUtilizationSpaceCutoffHigh());
}
@Test
@ -273,42 +291,96 @@ public void testConstructors() {
delta);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(),
delta);
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff());
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoffLow());
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoffHigh());
dc = new DirectoryCollection(dirs, 57.5F);
Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoffHigh(),
delta);
Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoffLow(),
delta);
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff());
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoffLow());
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoffHigh());
dc = new DirectoryCollection(dirs, 57);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
delta);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(),
delta);
Assert.assertEquals(57, dc.getDiskUtilizationSpaceCutoff());
Assert.assertEquals(57, dc.getDiskUtilizationSpaceCutoffLow());
Assert.assertEquals(57, dc.getDiskUtilizationSpaceCutoffHigh());
dc = new DirectoryCollection(dirs, 57, 73);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
delta);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(),
delta);
Assert.assertEquals(57, dc.getDiskUtilizationSpaceCutoffLow());
Assert.assertEquals(73, dc.getDiskUtilizationSpaceCutoffHigh());
dc = new DirectoryCollection(dirs, 57, 33);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
delta);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(),
delta);
Assert.assertEquals(57, dc.getDiskUtilizationSpaceCutoffLow());
Assert.assertEquals(57, dc.getDiskUtilizationSpaceCutoffHigh());
dc = new DirectoryCollection(dirs, 57, -33);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
delta);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(),
delta);
Assert.assertEquals(57, dc.getDiskUtilizationSpaceCutoffLow());
Assert.assertEquals(57, dc.getDiskUtilizationSpaceCutoffHigh());
dc = new DirectoryCollection(dirs, -57, -33);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
delta);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(),
delta);
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoffLow());
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoffHigh());
dc = new DirectoryCollection(dirs, -57, 33);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
delta);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(),
delta);
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoffLow());
Assert.assertEquals(33, dc.getDiskUtilizationSpaceCutoffHigh());
dc = new DirectoryCollection(dirs, 57.5F, 50.5F, 67);
Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoffHigh(),
delta);
Assert.assertEquals(50.5F, dc.getDiskUtilizationPercentageCutoffLow(),
delta);
Assert.assertEquals(67, dc.getDiskUtilizationSpaceCutoff());
Assert.assertEquals(67, dc.getDiskUtilizationSpaceCutoffLow());
Assert.assertEquals(67, dc.getDiskUtilizationSpaceCutoffHigh());
dc = new DirectoryCollection(dirs, -57.5F, -57.5F, -67);
Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
delta);
Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoffLow(),
delta);
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff());
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoffLow());
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoffHigh());
dc = new DirectoryCollection(dirs, 157.5F, 157.5F, -67);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
delta);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(),
delta);
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff());
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoffLow());
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoffHigh());
dc = new DirectoryCollection(dirs, 157.5F, 157.5F, 5, 10);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
delta);
Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(),
delta);
Assert.assertEquals(5, dc.getDiskUtilizationSpaceCutoffLow());
Assert.assertEquals(10, dc.getDiskUtilizationSpaceCutoffHigh());
}
@Test