YARN-3879 [Storage implementation] Create HDFS backing storage implementation for ATS reads. Contributed by Abhishek Modi.

This commit is contained in:
Vrushali C 2018-10-11 21:23:34 -07:00
parent 91f18a5148
commit 7ed627af6b
1 changed files with 50 additions and 37 deletions

View File

@ -20,7 +20,6 @@ package org.apache.hadoop.yarn.server.timelineservice.storage;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.File; import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader; import java.io.InputStreamReader;
@ -38,6 +37,11 @@ import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser; import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord; import org.apache.commons.csv.CSVRecord;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity; import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEvent; import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEvent;
@ -68,7 +72,9 @@ public class FileSystemTimelineReaderImpl extends AbstractService
private static final Logger LOG = private static final Logger LOG =
LoggerFactory.getLogger(FileSystemTimelineReaderImpl.class); LoggerFactory.getLogger(FileSystemTimelineReaderImpl.class);
private String rootPath; private FileSystem fs;
private Path rootPath;
private Path entitiesPath;
private static final String ENTITIES_DIR = "entities"; private static final String ENTITIES_DIR = "entities";
/** Default extension for output files. */ /** Default extension for output files. */
@ -94,7 +100,7 @@ public class FileSystemTimelineReaderImpl extends AbstractService
@VisibleForTesting @VisibleForTesting
String getRootPath() { String getRootPath() {
return rootPath; return rootPath.toString();
} }
private static ObjectMapper mapper; private static ObjectMapper mapper;
@ -162,12 +168,12 @@ public class FileSystemTimelineReaderImpl extends AbstractService
if (clusterId == null || appId == null) { if (clusterId == null || appId == null) {
throw new IOException("Unable to get flow info"); throw new IOException("Unable to get flow info");
} }
String appFlowMappingFile = rootPath + File.separator + ENTITIES_DIR + Path clusterIdPath = new Path(entitiesPath, clusterId);
File.separator + clusterId + File.separator + APP_FLOW_MAPPING_FILE; Path appFlowMappingFilePath = new Path(clusterIdPath,
APP_FLOW_MAPPING_FILE);
try (BufferedReader reader = try (BufferedReader reader =
new BufferedReader(new InputStreamReader( new BufferedReader(new InputStreamReader(
new FileInputStream( fs.open(appFlowMappingFilePath), Charset.forName("UTF-8")));
appFlowMappingFile), Charset.forName("UTF-8")));
CSVParser parser = new CSVParser(reader, csvFormat)) { CSVParser parser = new CSVParser(reader, csvFormat)) {
for (CSVRecord record : parser.getRecords()) { for (CSVRecord record : parser.getRecords()) {
if (record.size() < 4) { if (record.size() < 4) {
@ -266,7 +272,7 @@ public class FileSystemTimelineReaderImpl extends AbstractService
return entity; return entity;
} }
private Set<TimelineEntity> getEntities(File dir, String entityType, private Set<TimelineEntity> getEntities(Path dir, String entityType,
TimelineEntityFilters filters, TimelineDataToRetrieve dataToRetrieve) TimelineEntityFilters filters, TimelineDataToRetrieve dataToRetrieve)
throws IOException { throws IOException {
// First sort the selected entities based on created/start time. // First sort the selected entities based on created/start time.
@ -280,15 +286,18 @@ public class FileSystemTimelineReaderImpl extends AbstractService
} }
); );
if (dir != null) { if (dir != null) {
File[] files = dir.listFiles(); RemoteIterator<LocatedFileStatus> fileStatuses = fs.listFiles(dir,
if (files != null) { false);
for (File entityFile : files) { if (fileStatuses != null) {
while (fileStatuses.hasNext()) {
LocatedFileStatus locatedFileStatus = fileStatuses.next();
Path entityFile = locatedFileStatus.getPath();
if (!entityFile.getName() if (!entityFile.getName()
.contains(TIMELINE_SERVICE_STORAGE_EXTENSION)) { .contains(TIMELINE_SERVICE_STORAGE_EXTENSION)) {
continue; continue;
} }
try (BufferedReader reader = new BufferedReader( try (BufferedReader reader = new BufferedReader(
new InputStreamReader(new FileInputStream(entityFile), new InputStreamReader(fs.open(entityFile),
Charset.forName("UTF-8")))) { Charset.forName("UTF-8")))) {
TimelineEntity entity = readEntityFromFile(reader); TimelineEntity entity = readEntityFromFile(reader);
if (!entity.getType().equals(entityType)) { if (!entity.getType().equals(entityType)) {
@ -366,25 +375,30 @@ public class FileSystemTimelineReaderImpl extends AbstractService
@Override @Override
public void serviceInit(Configuration conf) throws Exception { public void serviceInit(Configuration conf) throws Exception {
rootPath = conf.get(TIMELINE_SERVICE_STORAGE_DIR_ROOT, String outputRoot = conf.get(TIMELINE_SERVICE_STORAGE_DIR_ROOT,
conf.get("hadoop.tmp.dir") + File.separator + STORAGE_DIR_ROOT); conf.get("hadoop.tmp.dir") + File.separator + STORAGE_DIR_ROOT);
rootPath = new Path(outputRoot);
entitiesPath = new Path(rootPath, ENTITIES_DIR);
fs = rootPath.getFileSystem(conf);
super.serviceInit(conf); super.serviceInit(conf);
} }
@Override @Override
public TimelineEntity getEntity(TimelineReaderContext context, public TimelineEntity getEntity(TimelineReaderContext context,
TimelineDataToRetrieve dataToRetrieve) throws IOException { TimelineDataToRetrieve dataToRetrieve) throws IOException {
String flowRunPath = getFlowRunPath(context.getUserId(), String flowRunPathStr = getFlowRunPath(context.getUserId(),
context.getClusterId(), context.getFlowName(), context.getFlowRunId(), context.getClusterId(), context.getFlowName(), context.getFlowRunId(),
context.getAppId()); context.getAppId());
File dir = new File(new File(rootPath, ENTITIES_DIR), Path clusterIdPath = new Path(entitiesPath, context.getClusterId());
context.getClusterId() + File.separator + flowRunPath + File.separator + Path flowRunPath = new Path(clusterIdPath, flowRunPathStr);
context.getAppId() + File.separator + context.getEntityType()); Path appIdPath = new Path(flowRunPath, context.getAppId());
File entityFile = new File( Path entityTypePath = new Path(appIdPath, context.getEntityType());
dir, context.getEntityId() + TIMELINE_SERVICE_STORAGE_EXTENSION); Path entityFilePath = new Path(entityTypePath,
context.getEntityId() + TIMELINE_SERVICE_STORAGE_EXTENSION);
try (BufferedReader reader = try (BufferedReader reader =
new BufferedReader(new InputStreamReader( new BufferedReader(new InputStreamReader(
new FileInputStream(entityFile), Charset.forName("UTF-8")))) { fs.open(entityFilePath), Charset.forName("UTF-8")))) {
TimelineEntity entity = readEntityFromFile(reader); TimelineEntity entity = readEntityFromFile(reader);
return createEntityToBeReturned( return createEntityToBeReturned(
entity, dataToRetrieve.getFieldsToRetrieve()); entity, dataToRetrieve.getFieldsToRetrieve());
@ -399,32 +413,31 @@ public class FileSystemTimelineReaderImpl extends AbstractService
public Set<TimelineEntity> getEntities(TimelineReaderContext context, public Set<TimelineEntity> getEntities(TimelineReaderContext context,
TimelineEntityFilters filters, TimelineDataToRetrieve dataToRetrieve) TimelineEntityFilters filters, TimelineDataToRetrieve dataToRetrieve)
throws IOException { throws IOException {
String flowRunPath = getFlowRunPath(context.getUserId(), String flowRunPathStr = getFlowRunPath(context.getUserId(),
context.getClusterId(), context.getFlowName(), context.getFlowRunId(), context.getClusterId(), context.getFlowName(), context.getFlowRunId(),
context.getAppId()); context.getAppId());
File dir = Path clusterIdPath = new Path(entitiesPath, context.getClusterId());
new File(new File(rootPath, ENTITIES_DIR), Path flowRunPath = new Path(clusterIdPath, flowRunPathStr);
context.getClusterId() + File.separator + flowRunPath + Path appIdPath = new Path(flowRunPath, context.getAppId());
File.separator + context.getAppId() + File.separator + Path entityTypePath = new Path(appIdPath, context.getEntityType());
context.getEntityType());
return getEntities(dir, context.getEntityType(), filters, dataToRetrieve); return getEntities(entityTypePath, context.getEntityType(), filters,
dataToRetrieve);
} }
@Override public Set<String> getEntityTypes(TimelineReaderContext context) @Override public Set<String> getEntityTypes(TimelineReaderContext context)
throws IOException { throws IOException {
Set<String> result = new TreeSet<>(); Set<String> result = new TreeSet<>();
String flowRunPath = getFlowRunPath(context.getUserId(), String flowRunPathStr = getFlowRunPath(context.getUserId(),
context.getClusterId(), context.getFlowName(), context.getFlowRunId(), context.getClusterId(), context.getFlowName(), context.getFlowRunId(),
context.getAppId()); context.getAppId());
File dir = new File(new File(rootPath, ENTITIES_DIR), Path clusterIdPath = new Path(entitiesPath, context.getClusterId());
context.getClusterId() + File.separator + flowRunPath Path flowRunPath = new Path(clusterIdPath, flowRunPathStr);
+ File.separator + context.getAppId()); Path appIdPath = new Path(flowRunPath, context.getAppId());
File[] fileList = dir.listFiles(); FileStatus[] fileStatuses = fs.listStatus(appIdPath);
if (fileList != null) { for (FileStatus fileStatus : fileStatuses) {
for (File f : fileList) { if (fileStatus.isDirectory()) {
if (f.isDirectory()) { result.add(fileStatus.getPath().getName());
result.add(f.getName());
}
} }
} }
return result; return result;