YARN-9545. Create healthcheck REST endpoint for ATSv2. Contributed by Zoltan Siegl.
This commit is contained in:
parent
f1ead03672
commit
72203f7a12
|
@ -0,0 +1,82 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.yarn.api.records.timeline;
|
||||||
|
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
|
||||||
|
import javax.xml.bind.annotation.XmlAccessType;
|
||||||
|
import javax.xml.bind.annotation.XmlAccessorType;
|
||||||
|
import javax.xml.bind.annotation.XmlElement;
|
||||||
|
import javax.xml.bind.annotation.XmlRootElement;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class holds health information for ATS.
|
||||||
|
*/
|
||||||
|
@XmlRootElement(name = "health")
|
||||||
|
@XmlAccessorType(XmlAccessType.NONE)
|
||||||
|
@InterfaceAudience.Public
|
||||||
|
@InterfaceStability.Unstable
|
||||||
|
public class TimelineHealth {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Timline health status.
|
||||||
|
*
|
||||||
|
* RUNNING - Service is up and running
|
||||||
|
* READER_CONNECTION_FAULURE - isConnectionAlive() of reader implementation
|
||||||
|
* reported an error
|
||||||
|
*/
|
||||||
|
public enum TimelineHealthStatus {
|
||||||
|
RUNNING,
|
||||||
|
READER_CONNECTION_FAILURE
|
||||||
|
}
|
||||||
|
|
||||||
|
private TimelineHealthStatus healthStatus;
|
||||||
|
private String diagnosticsInfo;
|
||||||
|
|
||||||
|
public TimelineHealth(TimelineHealthStatus healthy, String diagnosticsInfo) {
|
||||||
|
this.healthStatus = healthy;
|
||||||
|
this.diagnosticsInfo = diagnosticsInfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
public TimelineHealth() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@XmlElement(name = "healthStatus")
|
||||||
|
public TimelineHealthStatus getHealthStatus() {
|
||||||
|
return healthStatus;
|
||||||
|
}
|
||||||
|
|
||||||
|
@XmlElement(name = "diagnosticsInfo")
|
||||||
|
public String getDiagnosticsInfo() {
|
||||||
|
return diagnosticsInfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void setHealthStatus(TimelineHealthStatus healthStatus) {
|
||||||
|
this.healthStatus = healthStatus;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDiagnosticsInfo(String diagnosticsInfo) {
|
||||||
|
this.diagnosticsInfo = diagnosticsInfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -29,6 +29,7 @@ import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.hbase.client.Connection;
|
import org.apache.hadoop.hbase.client.Connection;
|
||||||
import org.apache.hadoop.hbase.client.ConnectionFactory;
|
import org.apache.hadoop.hbase.client.ConnectionFactory;
|
||||||
import org.apache.hadoop.service.AbstractService;
|
import org.apache.hadoop.service.AbstractService;
|
||||||
|
import org.apache.hadoop.yarn.api.records.timeline.TimelineHealth;
|
||||||
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
|
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
|
||||||
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntityType;
|
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntityType;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
|
@ -158,6 +159,18 @@ public class HBaseTimelineReaderImpl
|
||||||
return reader.readEntityTypes(hbaseConf, conn);
|
return reader.readEntityTypes(hbaseConf, conn);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TimelineHealth getHealthStatus() {
|
||||||
|
if (!this.isHBaseDown()) {
|
||||||
|
return new TimelineHealth(TimelineHealth.TimelineHealthStatus.RUNNING,
|
||||||
|
"");
|
||||||
|
} else {
|
||||||
|
return new TimelineHealth(
|
||||||
|
TimelineHealth.TimelineHealthStatus.READER_CONNECTION_FAILURE,
|
||||||
|
"HBase connection is down");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
protected static final TimelineEntityFilters MONITOR_FILTERS =
|
protected static final TimelineEntityFilters MONITOR_FILTERS =
|
||||||
new TimelineEntityFilters.Builder().entityLimit(1L).build();
|
new TimelineEntityFilters.Builder().entityLimit(1L).build();
|
||||||
protected static final TimelineDataToRetrieve DATA_TO_RETRIEVE =
|
protected static final TimelineDataToRetrieve DATA_TO_RETRIEVE =
|
||||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
import org.apache.hadoop.service.AbstractService;
|
import org.apache.hadoop.service.AbstractService;
|
||||||
|
import org.apache.hadoop.yarn.api.records.timeline.TimelineHealth;
|
||||||
import org.apache.hadoop.yarn.api.records.timelineservice.FlowActivityEntity;
|
import org.apache.hadoop.yarn.api.records.timelineservice.FlowActivityEntity;
|
||||||
import org.apache.hadoop.yarn.api.records.timelineservice.FlowRunEntity;
|
import org.apache.hadoop.yarn.api.records.timelineservice.FlowRunEntity;
|
||||||
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
|
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
|
||||||
|
@ -219,4 +220,13 @@ public class TimelineReaderManager extends AbstractService {
|
||||||
}
|
}
|
||||||
return callerUGI != null && adminACLsManager.isAdmin(callerUGI);
|
return callerUGI != null && adminACLsManager.isAdmin(callerUGI);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if reader connection is alive.
|
||||||
|
*
|
||||||
|
* @return boolean True if reader connection is alive, false otherwise.
|
||||||
|
*/
|
||||||
|
public TimelineHealth getHealthStatus() {
|
||||||
|
return reader.getHealthStatus();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,6 +48,7 @@ import org.apache.hadoop.http.JettyUtils;
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
import org.apache.hadoop.util.Time;
|
import org.apache.hadoop.util.Time;
|
||||||
import org.apache.hadoop.yarn.api.records.timeline.TimelineAbout;
|
import org.apache.hadoop.yarn.api.records.timeline.TimelineAbout;
|
||||||
|
import org.apache.hadoop.yarn.api.records.timeline.TimelineHealth;
|
||||||
import org.apache.hadoop.yarn.api.records.timelineservice.FlowActivityEntity;
|
import org.apache.hadoop.yarn.api.records.timelineservice.FlowActivityEntity;
|
||||||
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
|
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
|
||||||
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntityType;
|
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntityType;
|
||||||
|
@ -218,6 +219,38 @@ public class TimelineReaderWebServices {
|
||||||
return TimelineUtils.createTimelineAbout("Timeline Reader API");
|
return TimelineUtils.createTimelineAbout("Timeline Reader API");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Health check REST end point.
|
||||||
|
*
|
||||||
|
* @param req Servlet request.
|
||||||
|
* @param res Servlet response.
|
||||||
|
*
|
||||||
|
* @return A {@link Response} object with HTTP status 200 OK if the service
|
||||||
|
* is running.
|
||||||
|
* Otherwise, a {@link Response} object with HTTP status 500 is
|
||||||
|
* returned.
|
||||||
|
*/
|
||||||
|
@GET
|
||||||
|
@Path("/health")
|
||||||
|
@Produces(MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8)
|
||||||
|
public Response health(
|
||||||
|
@Context HttpServletRequest req,
|
||||||
|
@Context HttpServletResponse res
|
||||||
|
) {
|
||||||
|
Response response;
|
||||||
|
TimelineHealth timelineHealth = this.getTimelineReaderManager().getHealthStatus();
|
||||||
|
if (timelineHealth.getHealthStatus()
|
||||||
|
.equals(TimelineHealth.TimelineHealthStatus.RUNNING)) {
|
||||||
|
response = Response.ok(timelineHealth).build();
|
||||||
|
} else {
|
||||||
|
LOG.info("Timeline services health check: timeline reader reported " +
|
||||||
|
"connection failure");
|
||||||
|
response = Response.serverError().entity(timelineHealth).build();
|
||||||
|
}
|
||||||
|
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return a single entity for a given entity type and UID which is a delimited
|
* Return a single entity for a given entity type and UID which is a delimited
|
||||||
* string containing clusterid, userid, flow name, flowrun id and app id.
|
* string containing clusterid, userid, flow name, flowrun id and app id.
|
||||||
|
|
|
@ -42,6 +42,7 @@ import org.apache.commons.csv.CSVParser;
|
||||||
import org.apache.commons.csv.CSVRecord;
|
import org.apache.commons.csv.CSVRecord;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.service.AbstractService;
|
import org.apache.hadoop.service.AbstractService;
|
||||||
|
import org.apache.hadoop.yarn.api.records.timeline.TimelineHealth;
|
||||||
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
|
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
|
||||||
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEvent;
|
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEvent;
|
||||||
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineMetric;
|
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineMetric;
|
||||||
|
@ -429,4 +430,26 @@ public class FileSystemTimelineReaderImpl extends AbstractService
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TimelineHealth getHealthStatus() {
|
||||||
|
try {
|
||||||
|
File file = new File(rootPath);
|
||||||
|
if (file.exists()) {
|
||||||
|
return new TimelineHealth(TimelineHealth.TimelineHealthStatus.RUNNING,
|
||||||
|
"");
|
||||||
|
} else {
|
||||||
|
return new TimelineHealth(
|
||||||
|
TimelineHealth.TimelineHealthStatus.READER_CONNECTION_FAILURE,
|
||||||
|
"Root path \"" + rootPath + "\" does not exist"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
return new TimelineHealth(
|
||||||
|
TimelineHealth.TimelineHealthStatus.READER_CONNECTION_FAILURE,
|
||||||
|
e.getMessage()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -24,6 +24,7 @@ import java.util.Set;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||||
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
||||||
import org.apache.hadoop.service.Service;
|
import org.apache.hadoop.service.Service;
|
||||||
|
import org.apache.hadoop.yarn.api.records.timeline.TimelineHealth;
|
||||||
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
|
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
|
||||||
import org.apache.hadoop.yarn.server.timelineservice.reader.TimelineDataToRetrieve;
|
import org.apache.hadoop.yarn.server.timelineservice.reader.TimelineDataToRetrieve;
|
||||||
import org.apache.hadoop.yarn.server.timelineservice.reader.TimelineEntityFilters;
|
import org.apache.hadoop.yarn.server.timelineservice.reader.TimelineEntityFilters;
|
||||||
|
@ -192,4 +193,11 @@ public interface TimelineReader extends Service {
|
||||||
* storage.
|
* storage.
|
||||||
*/
|
*/
|
||||||
Set<String> getEntityTypes(TimelineReaderContext context) throws IOException;
|
Set<String> getEntityTypes(TimelineReaderContext context) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if reader connection is working properly.
|
||||||
|
*
|
||||||
|
* @return True if reader connection works as expected, false otherwise.
|
||||||
|
*/
|
||||||
|
TimelineHealth getHealthStatus();
|
||||||
}
|
}
|
|
@ -37,6 +37,7 @@ import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.http.JettyUtils;
|
import org.apache.hadoop.http.JettyUtils;
|
||||||
import org.apache.hadoop.yarn.api.records.timeline.TimelineAbout;
|
import org.apache.hadoop.yarn.api.records.timeline.TimelineAbout;
|
||||||
|
import org.apache.hadoop.yarn.api.records.timeline.TimelineHealth;
|
||||||
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
|
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
|
||||||
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntityType;
|
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntityType;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
|
@ -777,4 +778,22 @@ public class TestTimelineReaderWebServices {
|
||||||
client.destroy();
|
client.destroy();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHealthCheck() throws Exception {
|
||||||
|
Client client = createClient();
|
||||||
|
try {
|
||||||
|
URI uri = URI.create("http://localhost:" + serverPort + "/ws/v2/"
|
||||||
|
+ "timeline/health");
|
||||||
|
ClientResponse resp = getResponse(client, uri);
|
||||||
|
TimelineHealth timelineHealth =
|
||||||
|
resp.getEntity(new GenericType<TimelineHealth>() {
|
||||||
|
});
|
||||||
|
assertEquals(200, resp.getStatus());
|
||||||
|
assertEquals(TimelineHealth.TimelineHealthStatus.RUNNING,
|
||||||
|
timelineHealth.getHealthStatus());
|
||||||
|
} finally {
|
||||||
|
client.destroy();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
Loading…
Reference in New Issue