YARN-5137. Make DiskChecker pluggable in NodeManager. (Yufei Gu via rchiang)
(cherry picked from commit dbe9e70cc084220ea1f68da850cdb092281b5e96)
This commit is contained in:
parent
01fc975ed9
commit
372f6f8456
|
@ -897,6 +897,10 @@ public class YarnConfiguration extends Configuration {
|
||||||
NM_PREFIX + "resourcemanager.minimum.version";
|
NM_PREFIX + "resourcemanager.minimum.version";
|
||||||
public static final String DEFAULT_NM_RESOURCEMANAGER_MINIMUM_VERSION = "NONE";
|
public static final String DEFAULT_NM_RESOURCEMANAGER_MINIMUM_VERSION = "NONE";
|
||||||
|
|
||||||
|
/** Disk Validator. */
|
||||||
|
public static final String DISK_VALIDATOR = NM_PREFIX + "disk-validator";
|
||||||
|
public static final String DEFAULT_DISK_VALIDATOR = "basic";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Maximum size of contain's diagnostics to keep for relaunching container
|
* Maximum size of contain's diagnostics to keep for relaunching container
|
||||||
* case.
|
* case.
|
||||||
|
|
|
@ -2892,6 +2892,14 @@
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<description>
|
<description>
|
||||||
|
The name of disk validator.
|
||||||
|
</description>
|
||||||
|
<name>yarn.nodemanager.disk-validator</name>
|
||||||
|
<value>basic</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>
|
||||||
Enable the CSRF filter for the timeline service web app
|
Enable the CSRF filter for the timeline service web app
|
||||||
</description>
|
</description>
|
||||||
<name>yarn.timeline-service.webapp.rest-csrf.enabled</name>
|
<name>yarn.timeline-service.webapp.rest-csrf.enabled</name>
|
||||||
|
|
|
@ -42,7 +42,11 @@ import org.apache.hadoop.fs.FileAlreadyExistsException;
|
||||||
import org.apache.hadoop.fs.FileContext;
|
import org.apache.hadoop.fs.FileContext;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.permission.FsPermission;
|
import org.apache.hadoop.fs.permission.FsPermission;
|
||||||
import org.apache.hadoop.util.DiskChecker;
|
import org.apache.hadoop.util.DiskValidator;
|
||||||
|
import org.apache.hadoop.util.DiskValidatorFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
|
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
|
||||||
|
@ -52,6 +56,8 @@ import com.google.common.annotations.VisibleForTesting;
|
||||||
public class DirectoryCollection {
|
public class DirectoryCollection {
|
||||||
private static final Log LOG = LogFactory.getLog(DirectoryCollection.class);
|
private static final Log LOG = LogFactory.getLog(DirectoryCollection.class);
|
||||||
|
|
||||||
|
private final Configuration conf;
|
||||||
|
private final DiskValidator diskValidator;
|
||||||
/**
|
/**
|
||||||
* The enum defines disk failure type.
|
* The enum defines disk failure type.
|
||||||
*/
|
*/
|
||||||
|
@ -172,6 +178,16 @@ public class DirectoryCollection {
|
||||||
float utilizationPercentageCutOffHigh,
|
float utilizationPercentageCutOffHigh,
|
||||||
float utilizationPercentageCutOffLow,
|
float utilizationPercentageCutOffLow,
|
||||||
long utilizationSpaceCutOff) {
|
long utilizationSpaceCutOff) {
|
||||||
|
conf = new YarnConfiguration();
|
||||||
|
try {
|
||||||
|
diskValidator = DiskValidatorFactory.getInstance(
|
||||||
|
conf.get(YarnConfiguration.DISK_VALIDATOR));
|
||||||
|
LOG.info("Disk Validator: " + YarnConfiguration.DISK_VALIDATOR +
|
||||||
|
" is loaded.");
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new YarnRuntimeException(e);
|
||||||
|
}
|
||||||
|
|
||||||
localDirs = new CopyOnWriteArrayList<>(dirs);
|
localDirs = new CopyOnWriteArrayList<>(dirs);
|
||||||
errorDirs = new CopyOnWriteArrayList<>();
|
errorDirs = new CopyOnWriteArrayList<>();
|
||||||
fullDirs = new CopyOnWriteArrayList<>();
|
fullDirs = new CopyOnWriteArrayList<>();
|
||||||
|
@ -395,7 +411,7 @@ public class DirectoryCollection {
|
||||||
String msg;
|
String msg;
|
||||||
try {
|
try {
|
||||||
File testDir = new File(dir);
|
File testDir = new File(dir);
|
||||||
DiskChecker.checkDir(testDir);
|
diskValidator.checkStatus(testDir);
|
||||||
float diskUtilizationPercentageCutoff = goodDirs.contains(dir) ?
|
float diskUtilizationPercentageCutoff = goodDirs.contains(dir) ?
|
||||||
diskUtilizationPercentageCutoffHigh : diskUtilizationPercentageCutoffLow;
|
diskUtilizationPercentageCutoffHigh : diskUtilizationPercentageCutoffLow;
|
||||||
if (isDiskUsageOverPercentageLimit(testDir,
|
if (isDiskUsageOverPercentageLimit(testDir,
|
||||||
|
@ -445,7 +461,7 @@ public class DirectoryCollection {
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
DiskChecker.checkDir(target);
|
diskValidator.checkStatus(target);
|
||||||
} finally {
|
} finally {
|
||||||
FileUtils.deleteQuietly(target);
|
FileUtils.deleteQuietly(target);
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,7 +51,8 @@ import org.apache.hadoop.security.Credentials;
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
import org.apache.hadoop.security.token.Token;
|
import org.apache.hadoop.security.token.Token;
|
||||||
import org.apache.hadoop.security.token.TokenIdentifier;
|
import org.apache.hadoop.security.token.TokenIdentifier;
|
||||||
import org.apache.hadoop.util.DiskChecker;
|
import org.apache.hadoop.util.DiskValidator;
|
||||||
|
import org.apache.hadoop.util.DiskValidatorFactory;
|
||||||
import org.apache.hadoop.util.concurrent.HadoopExecutors;
|
import org.apache.hadoop.util.concurrent.HadoopExecutors;
|
||||||
import org.apache.hadoop.yarn.YarnUncaughtExceptionHandler;
|
import org.apache.hadoop.yarn.YarnUncaughtExceptionHandler;
|
||||||
import org.apache.hadoop.yarn.api.records.LocalResource;
|
import org.apache.hadoop.yarn.api.records.LocalResource;
|
||||||
|
@ -69,7 +70,6 @@ import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerHe
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerStatus;
|
import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerStatus;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.ResourceStatusType;
|
import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.ResourceStatusType;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.security.LocalizerTokenIdentifier;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.security.LocalizerTokenIdentifier;
|
||||||
import org.apache.hadoop.yarn.util.ConverterUtils;
|
|
||||||
import org.apache.hadoop.yarn.util.FSDownload;
|
import org.apache.hadoop.yarn.util.FSDownload;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
@ -99,6 +99,7 @@ public class ContainerLocalizer {
|
||||||
private final RecordFactory recordFactory;
|
private final RecordFactory recordFactory;
|
||||||
private final Map<LocalResource,Future<Path>> pendingResources;
|
private final Map<LocalResource,Future<Path>> pendingResources;
|
||||||
private final String appCacheDirContextName;
|
private final String appCacheDirContextName;
|
||||||
|
private final DiskValidator diskValidator;
|
||||||
|
|
||||||
public ContainerLocalizer(FileContext lfs, String user, String appId,
|
public ContainerLocalizer(FileContext lfs, String user, String appId,
|
||||||
String localizerId, List<Path> localDirs,
|
String localizerId, List<Path> localDirs,
|
||||||
|
@ -115,7 +116,11 @@ public class ContainerLocalizer {
|
||||||
this.localDirs = localDirs;
|
this.localDirs = localDirs;
|
||||||
this.localizerId = localizerId;
|
this.localizerId = localizerId;
|
||||||
this.recordFactory = recordFactory;
|
this.recordFactory = recordFactory;
|
||||||
this.conf = new Configuration();
|
this.conf = new YarnConfiguration();
|
||||||
|
this.diskValidator = DiskValidatorFactory.getInstance(
|
||||||
|
conf.get(YarnConfiguration.DISK_VALIDATOR));
|
||||||
|
LOG.info("Disk Validator: " + YarnConfiguration.DISK_VALIDATOR +
|
||||||
|
" is loaded.");
|
||||||
this.appCacheDirContextName = String.format(APPCACHE_CTXT_FMT, appId);
|
this.appCacheDirContextName = String.format(APPCACHE_CTXT_FMT, appId);
|
||||||
this.pendingResources = new HashMap<LocalResource,Future<Path>>();
|
this.pendingResources = new HashMap<LocalResource,Future<Path>>();
|
||||||
}
|
}
|
||||||
|
@ -199,7 +204,7 @@ public class ContainerLocalizer {
|
||||||
|
|
||||||
Callable<Path> download(Path path, LocalResource rsrc,
|
Callable<Path> download(Path path, LocalResource rsrc,
|
||||||
UserGroupInformation ugi) throws IOException {
|
UserGroupInformation ugi) throws IOException {
|
||||||
DiskChecker.checkDir(new File(path.toUri().getRawPath()));
|
diskValidator.checkStatus(new File(path.toUri().getRawPath()));
|
||||||
return new FSDownload(lfs, ugi, conf, path, rsrc);
|
return new FSDownload(lfs, ugi, conf, path, rsrc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -72,6 +72,8 @@ import org.apache.hadoop.security.token.TokenIdentifier;
|
||||||
import org.apache.hadoop.service.AbstractService;
|
import org.apache.hadoop.service.AbstractService;
|
||||||
import org.apache.hadoop.service.CompositeService;
|
import org.apache.hadoop.service.CompositeService;
|
||||||
import org.apache.hadoop.util.DiskChecker;
|
import org.apache.hadoop.util.DiskChecker;
|
||||||
|
import org.apache.hadoop.util.DiskValidator;
|
||||||
|
import org.apache.hadoop.util.DiskValidatorFactory;
|
||||||
import org.apache.hadoop.util.StringUtils;
|
import org.apache.hadoop.util.StringUtils;
|
||||||
import org.apache.hadoop.util.concurrent.HadoopExecutors;
|
import org.apache.hadoop.util.concurrent.HadoopExecutors;
|
||||||
import org.apache.hadoop.util.concurrent.HadoopScheduledThreadPoolExecutor;
|
import org.apache.hadoop.util.concurrent.HadoopScheduledThreadPoolExecutor;
|
||||||
|
@ -168,6 +170,7 @@ public class ResourceLocalizationService extends CompositeService
|
||||||
private DirsChangeListener localDirsChangeListener;
|
private DirsChangeListener localDirsChangeListener;
|
||||||
private DirsChangeListener logDirsChangeListener;
|
private DirsChangeListener logDirsChangeListener;
|
||||||
private Context nmContext;
|
private Context nmContext;
|
||||||
|
private DiskValidator diskValidator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Map of LocalResourceTrackers keyed by username, for private
|
* Map of LocalResourceTrackers keyed by username, for private
|
||||||
|
@ -247,6 +250,10 @@ public class ResourceLocalizationService extends CompositeService
|
||||||
"Failed to initialize LocalizationService", e);
|
"Failed to initialize LocalizationService", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
diskValidator = DiskValidatorFactory.getInstance(
|
||||||
|
conf.get(YarnConfiguration.DISK_VALIDATOR));
|
||||||
|
LOG.info("Disk Validator: " + YarnConfiguration.DISK_VALIDATOR +
|
||||||
|
" is loaded.");
|
||||||
cacheTargetSize =
|
cacheTargetSize =
|
||||||
conf.getLong(YarnConfiguration.NM_LOCALIZER_CACHE_TARGET_SIZE_MB, YarnConfiguration.DEFAULT_NM_LOCALIZER_CACHE_TARGET_SIZE_MB) << 20;
|
conf.getLong(YarnConfiguration.NM_LOCALIZER_CACHE_TARGET_SIZE_MB, YarnConfiguration.DEFAULT_NM_LOCALIZER_CACHE_TARGET_SIZE_MB) << 20;
|
||||||
cacheCleanupPeriod =
|
cacheCleanupPeriod =
|
||||||
|
@ -835,7 +842,13 @@ public class ResourceLocalizationService extends CompositeService
|
||||||
publicRsrc.getPathForLocalization(key, publicRootPath,
|
publicRsrc.getPathForLocalization(key, publicRootPath,
|
||||||
delService);
|
delService);
|
||||||
if (!publicDirDestPath.getParent().equals(publicRootPath)) {
|
if (!publicDirDestPath.getParent().equals(publicRootPath)) {
|
||||||
DiskChecker.checkDir(new File(publicDirDestPath.toUri().getPath()));
|
if (diskValidator != null) {
|
||||||
|
diskValidator.checkStatus(
|
||||||
|
new File(publicDirDestPath.toUri().getPath()));
|
||||||
|
} else {
|
||||||
|
throw new DiskChecker.DiskErrorException(
|
||||||
|
"Disk Validator is null!");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// explicitly synchronize pending here to avoid future task
|
// explicitly synchronize pending here to avoid future task
|
||||||
|
|
Loading…
Reference in New Issue