YARN-5314. Fixed a ConcurrentModificationException in ATS v1.5 EntityGroupFSTimelineStore. Contributed by Li Lu.
(cherry picked from commit 673e5e02fe
)
This commit is contained in:
parent
5522a46f4c
commit
e4a62a233d
|
@ -47,7 +47,6 @@ import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
import org.apache.hadoop.util.StringUtils;
|
import org.apache.hadoop.util.StringUtils;
|
||||||
import org.apache.hadoop.util.VersionInfo;
|
|
||||||
import org.apache.hadoop.yarn.api.records.timeline.TimelineDomain;
|
import org.apache.hadoop.yarn.api.records.timeline.TimelineDomain;
|
||||||
import org.apache.hadoop.yarn.api.records.timeline.TimelineDomains;
|
import org.apache.hadoop.yarn.api.records.timeline.TimelineDomains;
|
||||||
import org.apache.hadoop.yarn.api.records.timeline.TimelineEntities;
|
import org.apache.hadoop.yarn.api.records.timeline.TimelineEntities;
|
||||||
|
@ -61,7 +60,6 @@ import org.apache.hadoop.yarn.server.timeline.NameValuePair;
|
||||||
import org.apache.hadoop.yarn.server.timeline.TimelineDataManager;
|
import org.apache.hadoop.yarn.server.timeline.TimelineDataManager;
|
||||||
import org.apache.hadoop.yarn.server.timeline.TimelineReader.Field;
|
import org.apache.hadoop.yarn.server.timeline.TimelineReader.Field;
|
||||||
import org.apache.hadoop.yarn.api.records.timeline.TimelineAbout;
|
import org.apache.hadoop.yarn.api.records.timeline.TimelineAbout;
|
||||||
import org.apache.hadoop.yarn.util.YarnVersionInfo;
|
|
||||||
import org.apache.hadoop.yarn.util.timeline.TimelineUtils;
|
import org.apache.hadoop.yarn.util.timeline.TimelineUtils;
|
||||||
import org.apache.hadoop.yarn.webapp.BadRequestException;
|
import org.apache.hadoop.yarn.webapp.BadRequestException;
|
||||||
import org.apache.hadoop.yarn.webapp.ForbiddenException;
|
import org.apache.hadoop.yarn.webapp.ForbiddenException;
|
||||||
|
|
|
@ -19,19 +19,13 @@ package org.apache.hadoop.yarn.server.timeline;
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.util.Time;
|
import org.apache.hadoop.util.Time;
|
||||||
import org.apache.hadoop.yarn.api.records.timeline.TimelineEntityGroupId;
|
import org.apache.hadoop.yarn.api.records.timeline.TimelineEntityGroupId;
|
||||||
import org.apache.hadoop.yarn.server.timeline.security.TimelineACLsManager;
|
import org.apache.hadoop.yarn.server.timeline.security.TimelineACLsManager;
|
||||||
import org.codehaus.jackson.JsonFactory;
|
|
||||||
import org.codehaus.jackson.map.ObjectMapper;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -47,15 +41,12 @@ public class EntityCacheItem {
|
||||||
private EntityGroupFSTimelineStore.AppLogs appLogs;
|
private EntityGroupFSTimelineStore.AppLogs appLogs;
|
||||||
private long lastRefresh;
|
private long lastRefresh;
|
||||||
private Configuration config;
|
private Configuration config;
|
||||||
private FileSystem fs;
|
|
||||||
private int refCount = 0;
|
private int refCount = 0;
|
||||||
private static AtomicInteger activeStores = new AtomicInteger(0);
|
private static AtomicInteger activeStores = new AtomicInteger(0);
|
||||||
|
|
||||||
public EntityCacheItem(TimelineEntityGroupId gId, Configuration config,
|
public EntityCacheItem(TimelineEntityGroupId gId, Configuration config) {
|
||||||
FileSystem fs) {
|
|
||||||
this.groupId = gId;
|
this.groupId = gId;
|
||||||
this.config = config;
|
this.config = config;
|
||||||
this.fs = fs;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -97,15 +88,12 @@ public class EntityCacheItem {
|
||||||
* other operations on the same cache item.
|
* other operations on the same cache item.
|
||||||
*
|
*
|
||||||
* @param aclManager ACL manager for the timeline storage
|
* @param aclManager ACL manager for the timeline storage
|
||||||
* @param jsonFactory JSON factory for the storage
|
|
||||||
* @param objMapper Object mapper for the storage
|
|
||||||
* @param metrics Metrics to trace the status of the entity group store
|
* @param metrics Metrics to trace the status of the entity group store
|
||||||
* @return a {@link org.apache.hadoop.yarn.server.timeline.TimelineStore}
|
* @return a {@link org.apache.hadoop.yarn.server.timeline.TimelineStore}
|
||||||
* object filled with all entities in the group.
|
* object filled with all entities in the group.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public synchronized TimelineStore refreshCache(TimelineACLsManager aclManager,
|
public synchronized TimelineStore refreshCache(TimelineACLsManager aclManager,
|
||||||
JsonFactory jsonFactory, ObjectMapper objMapper,
|
|
||||||
EntityGroupFSTimelineStoreMetrics metrics) throws IOException {
|
EntityGroupFSTimelineStoreMetrics metrics) throws IOException {
|
||||||
if (needRefresh()) {
|
if (needRefresh()) {
|
||||||
long startTime = Time.monotonicNow();
|
long startTime = Time.monotonicNow();
|
||||||
|
@ -128,31 +116,14 @@ public class EntityCacheItem {
|
||||||
// Store is not null, the refresh is triggered by stale storage.
|
// Store is not null, the refresh is triggered by stale storage.
|
||||||
metrics.incrCacheStaleRefreshes();
|
metrics.incrCacheStaleRefreshes();
|
||||||
}
|
}
|
||||||
List<LogInfo> removeList = new ArrayList<>();
|
|
||||||
try (TimelineDataManager tdm =
|
try (TimelineDataManager tdm =
|
||||||
new TimelineDataManager(store, aclManager)) {
|
new TimelineDataManager(store, aclManager)) {
|
||||||
tdm.init(config);
|
tdm.init(config);
|
||||||
tdm.start();
|
tdm.start();
|
||||||
for (LogInfo log : appLogs.getDetailLogs()) {
|
// Load data from appLogs to tdm
|
||||||
LOG.debug("Try refresh logs for {}", log.getFilename());
|
appLogs.loadDetailLog(tdm, groupId);
|
||||||
// Only refresh the log that matches the cache id
|
|
||||||
if (log.matchesGroupId(groupId)) {
|
|
||||||
Path appDirPath = appLogs.getAppDirPath();
|
|
||||||
if (fs.exists(log.getPath(appDirPath))) {
|
|
||||||
LOG.debug("Refresh logs for cache id {}", groupId);
|
|
||||||
log.parseForStore(tdm, appDirPath, appLogs.isDone(),
|
|
||||||
jsonFactory, objMapper, fs);
|
|
||||||
} else {
|
|
||||||
// The log may have been removed, remove the log
|
|
||||||
removeList.add(log);
|
|
||||||
LOG.info("File {} no longer exists, removing it from log list",
|
|
||||||
log.getPath(appDirPath));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
appLogs.getDetailLogs().removeAll(removeList);
|
|
||||||
}
|
|
||||||
updateRefreshTimeToNow();
|
updateRefreshTimeToNow();
|
||||||
metrics.addCacheRefreshTime(Time.monotonicNow() - startTime);
|
metrics.addCacheRefreshTime(Time.monotonicNow() - startTime);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -716,8 +716,8 @@ public class EntityGroupFSTimelineStore extends CompositeService
|
||||||
summaryLogs.add(log);
|
summaryLogs.add(log);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void addDetailLog(String attemptDirName, String filename,
|
private synchronized void addDetailLog(String attemptDirName,
|
||||||
String owner) {
|
String filename, String owner) {
|
||||||
for (LogInfo log : detailLogs) {
|
for (LogInfo log : detailLogs) {
|
||||||
if (log.getFilename().equals(filename)
|
if (log.getFilename().equals(filename)
|
||||||
&& log.getAttemptDirName().equals(attemptDirName)) {
|
&& log.getAttemptDirName().equals(attemptDirName)) {
|
||||||
|
@ -727,6 +727,30 @@ public class EntityGroupFSTimelineStore extends CompositeService
|
||||||
detailLogs.add(new EntityLogInfo(attemptDirName, filename, owner));
|
detailLogs.add(new EntityLogInfo(attemptDirName, filename, owner));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
synchronized void loadDetailLog(TimelineDataManager tdm,
|
||||||
|
TimelineEntityGroupId groupId) throws IOException {
|
||||||
|
List<LogInfo> removeList = new ArrayList<>();
|
||||||
|
for (LogInfo log : detailLogs) {
|
||||||
|
LOG.debug("Try refresh logs for {}", log.getFilename());
|
||||||
|
// Only refresh the log that matches the cache id
|
||||||
|
if (log.matchesGroupId(groupId)) {
|
||||||
|
Path dirPath = getAppDirPath();
|
||||||
|
if (fs.exists(log.getPath(dirPath))) {
|
||||||
|
LOG.debug("Refresh logs for cache id {}", groupId);
|
||||||
|
log.parseForStore(tdm, dirPath, isDone(),
|
||||||
|
jsonFactory, objMapper, fs);
|
||||||
|
} else {
|
||||||
|
// The log may have been removed, remove the log
|
||||||
|
removeList.add(log);
|
||||||
|
LOG.info(
|
||||||
|
"File {} no longer exists, removing it from log list",
|
||||||
|
log.getPath(dirPath));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
detailLogs.removeAll(removeList);
|
||||||
|
}
|
||||||
|
|
||||||
public synchronized void moveToDone() throws IOException {
|
public synchronized void moveToDone() throws IOException {
|
||||||
Path doneAppPath = getDoneAppPath(appId);
|
Path doneAppPath = getDoneAppPath(appId);
|
||||||
if (!doneAppPath.equals(appDirPath)) {
|
if (!doneAppPath.equals(appDirPath)) {
|
||||||
|
@ -915,7 +939,7 @@ public class EntityGroupFSTimelineStore extends CompositeService
|
||||||
cacheItem = this.cachedLogs.get(groupId);
|
cacheItem = this.cachedLogs.get(groupId);
|
||||||
if (cacheItem == null) {
|
if (cacheItem == null) {
|
||||||
LOG.debug("Set up new cache item for id {}", groupId);
|
LOG.debug("Set up new cache item for id {}", groupId);
|
||||||
cacheItem = new EntityCacheItem(groupId, getConfig(), fs);
|
cacheItem = new EntityCacheItem(groupId, getConfig());
|
||||||
AppLogs appLogs = getAndSetAppLogs(groupId.getApplicationId());
|
AppLogs appLogs = getAndSetAppLogs(groupId.getApplicationId());
|
||||||
if (appLogs != null) {
|
if (appLogs != null) {
|
||||||
LOG.debug("Set applogs {} for group id {}", appLogs, groupId);
|
LOG.debug("Set applogs {} for group id {}", appLogs, groupId);
|
||||||
|
@ -935,8 +959,7 @@ public class EntityGroupFSTimelineStore extends CompositeService
|
||||||
// Add the reference by the store
|
// Add the reference by the store
|
||||||
cacheItem.incrRefs();
|
cacheItem.incrRefs();
|
||||||
cacheItems.add(cacheItem);
|
cacheItems.add(cacheItem);
|
||||||
store = cacheItem.refreshCache(aclManager, jsonFactory, objMapper,
|
store = cacheItem.refreshCache(aclManager, metrics);
|
||||||
metrics);
|
|
||||||
} else {
|
} else {
|
||||||
LOG.warn("AppLogs for group id {} is null", groupId);
|
LOG.warn("AppLogs for group id {} is null", groupId);
|
||||||
}
|
}
|
||||||
|
|
|
@ -308,7 +308,7 @@ public class TestEntityGroupFSTimelineStore extends TimelineStoreTestUtils {
|
||||||
AppState.COMPLETED);
|
AppState.COMPLETED);
|
||||||
EntityCacheItem cacheItem = new EntityCacheItem(
|
EntityCacheItem cacheItem = new EntityCacheItem(
|
||||||
EntityGroupPlugInForTest.getStandardTimelineGroupId(mainTestAppId),
|
EntityGroupPlugInForTest.getStandardTimelineGroupId(mainTestAppId),
|
||||||
config, fs);
|
config);
|
||||||
cacheItem.setAppLogs(appLogs);
|
cacheItem.setAppLogs(appLogs);
|
||||||
store.setCachedLogs(
|
store.setCachedLogs(
|
||||||
EntityGroupPlugInForTest.getStandardTimelineGroupId(mainTestAppId),
|
EntityGroupPlugInForTest.getStandardTimelineGroupId(mainTestAppId),
|
||||||
|
@ -360,7 +360,7 @@ public class TestEntityGroupFSTimelineStore extends TimelineStoreTestUtils {
|
||||||
AppState.COMPLETED);
|
AppState.COMPLETED);
|
||||||
final EntityCacheItem cacheItem = new EntityCacheItem(
|
final EntityCacheItem cacheItem = new EntityCacheItem(
|
||||||
EntityGroupPlugInForTest.getStandardTimelineGroupId(mainTestAppId),
|
EntityGroupPlugInForTest.getStandardTimelineGroupId(mainTestAppId),
|
||||||
config, fs);
|
config);
|
||||||
|
|
||||||
cacheItem.setAppLogs(appLogs);
|
cacheItem.setAppLogs(appLogs);
|
||||||
store.setCachedLogs(
|
store.setCachedLogs(
|
||||||
|
@ -396,7 +396,7 @@ public class TestEntityGroupFSTimelineStore extends TimelineStoreTestUtils {
|
||||||
AppState.COMPLETED);
|
AppState.COMPLETED);
|
||||||
EntityCacheItem item = new EntityCacheItem(
|
EntityCacheItem item = new EntityCacheItem(
|
||||||
EntityGroupPlugInForTest.getStandardTimelineGroupId(appId),
|
EntityGroupPlugInForTest.getStandardTimelineGroupId(appId),
|
||||||
config, fs);
|
config);
|
||||||
item.setAppLogs(currAppLog);
|
item.setAppLogs(currAppLog);
|
||||||
store.setCachedLogs(
|
store.setCachedLogs(
|
||||||
EntityGroupPlugInForTest.getStandardTimelineGroupId(appId),
|
EntityGroupPlugInForTest.getStandardTimelineGroupId(appId),
|
||||||
|
|
Loading…
Reference in New Issue