YARN-9545. Create healthcheck REST endpoint for ATSv2. Contributed by Zoltan Siegl.

(cherry picked from commit 72203f7a12)
This commit is contained in:
Sunil G 2019-06-12 19:23:40 +05:30
parent 1bb9e9a4f2
commit bc028d3ebb
7 changed files with 188 additions and 0 deletions

View File

@ -0,0 +1,82 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.api.records.timeline;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlRootElement;
/**
* This class holds health information for ATS.
*/
@XmlRootElement(name = "health")
@XmlAccessorType(XmlAccessType.NONE)
@InterfaceAudience.Public
@InterfaceStability.Unstable
public class TimelineHealth {
/**
* Timline health status.
*
* RUNNING - Service is up and running
* READER_CONNECTION_FAULURE - isConnectionAlive() of reader implementation
* reported an error
*/
public enum TimelineHealthStatus {
RUNNING,
READER_CONNECTION_FAILURE
}
private TimelineHealthStatus healthStatus;
private String diagnosticsInfo;
public TimelineHealth(TimelineHealthStatus healthy, String diagnosticsInfo) {
this.healthStatus = healthy;
this.diagnosticsInfo = diagnosticsInfo;
}
public TimelineHealth() {
}
@XmlElement(name = "healthStatus")
public TimelineHealthStatus getHealthStatus() {
return healthStatus;
}
@XmlElement(name = "diagnosticsInfo")
public String getDiagnosticsInfo() {
return diagnosticsInfo;
}
public void setHealthStatus(TimelineHealthStatus healthStatus) {
this.healthStatus = healthStatus;
}
public void setDiagnosticsInfo(String diagnosticsInfo) {
this.diagnosticsInfo = diagnosticsInfo;
}
}

View File

@ -29,6 +29,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.api.records.timeline.TimelineHealth;
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntityType;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
@ -158,6 +159,18 @@ public class HBaseTimelineReaderImpl
return reader.readEntityTypes(hbaseConf, conn);
}
@Override
public TimelineHealth getHealthStatus() {
if (!this.isHBaseDown()) {
return new TimelineHealth(TimelineHealth.TimelineHealthStatus.RUNNING,
"");
} else {
return new TimelineHealth(
TimelineHealth.TimelineHealthStatus.READER_CONNECTION_FAILURE,
"HBase connection is down");
}
}
protected static final TimelineEntityFilters MONITOR_FILTERS =
new TimelineEntityFilters.Builder().entityLimit(1L).build();
protected static final TimelineDataToRetrieve DATA_TO_RETRIEVE =

View File

@ -26,6 +26,7 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.api.records.timeline.TimelineHealth;
import org.apache.hadoop.yarn.api.records.timelineservice.FlowActivityEntity;
import org.apache.hadoop.yarn.api.records.timelineservice.FlowRunEntity;
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
@ -219,4 +220,13 @@ public class TimelineReaderManager extends AbstractService {
}
return callerUGI != null && adminACLsManager.isAdmin(callerUGI);
}
/**
* Check if reader connection is alive.
*
* @return boolean True if reader connection is alive, false otherwise.
*/
public TimelineHealth getHealthStatus() {
return reader.getHealthStatus();
}
}

View File

@ -48,6 +48,7 @@ import org.apache.hadoop.http.JettyUtils;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.Time;
import org.apache.hadoop.yarn.api.records.timeline.TimelineAbout;
import org.apache.hadoop.yarn.api.records.timeline.TimelineHealth;
import org.apache.hadoop.yarn.api.records.timelineservice.FlowActivityEntity;
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntityType;
@ -215,6 +216,38 @@ public class TimelineReaderWebServices {
return TimelineUtils.createTimelineAbout("Timeline Reader API");
}
/**
* Health check REST end point.
*
* @param req Servlet request.
* @param res Servlet response.
*
* @return A {@link Response} object with HTTP status 200 OK if the service
* is running.
* Otherwise, a {@link Response} object with HTTP status 500 is
* returned.
*/
@GET
@Path("/health")
@Produces(MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8)
public Response health(
@Context HttpServletRequest req,
@Context HttpServletResponse res
) {
Response response;
TimelineHealth timelineHealth = this.getTimelineReaderManager().getHealthStatus();
if (timelineHealth.getHealthStatus()
.equals(TimelineHealth.TimelineHealthStatus.RUNNING)) {
response = Response.ok(timelineHealth).build();
} else {
LOG.info("Timeline services health check: timeline reader reported " +
"connection failure");
response = Response.serverError().entity(timelineHealth).build();
}
return response;
}
/**
* Return a single entity for a given entity type and UID which is a delimited
* string containing clusterid, userid, flow name, flowrun id and app id.

View File

@ -42,6 +42,7 @@ import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.api.records.timeline.TimelineHealth;
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEvent;
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineMetric;
@ -429,4 +430,26 @@ public class FileSystemTimelineReaderImpl extends AbstractService
}
return result;
}
@Override
public TimelineHealth getHealthStatus() {
try {
File file = new File(rootPath);
if (file.exists()) {
return new TimelineHealth(TimelineHealth.TimelineHealthStatus.RUNNING,
"");
} else {
return new TimelineHealth(
TimelineHealth.TimelineHealthStatus.READER_CONNECTION_FAILURE,
"Root path \"" + rootPath + "\" does not exist"
);
}
} catch (Exception e) {
return new TimelineHealth(
TimelineHealth.TimelineHealthStatus.READER_CONNECTION_FAILURE,
e.getMessage()
);
}
}
}

View File

@ -24,6 +24,7 @@ import java.util.Set;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.service.Service;
import org.apache.hadoop.yarn.api.records.timeline.TimelineHealth;
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
import org.apache.hadoop.yarn.server.timelineservice.reader.TimelineDataToRetrieve;
import org.apache.hadoop.yarn.server.timelineservice.reader.TimelineEntityFilters;
@ -192,4 +193,11 @@ public interface TimelineReader extends Service {
* storage.
*/
Set<String> getEntityTypes(TimelineReaderContext context) throws IOException;
/**
* Check if reader connection is working properly.
*
* @return True if reader connection works as expected, false otherwise.
*/
TimelineHealth getHealthStatus();
}

View File

@ -37,6 +37,7 @@ import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.http.JettyUtils;
import org.apache.hadoop.yarn.api.records.timeline.TimelineAbout;
import org.apache.hadoop.yarn.api.records.timeline.TimelineHealth;
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntityType;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
@ -777,4 +778,22 @@ public class TestTimelineReaderWebServices {
client.destroy();
}
}
@Test
public void testHealthCheck() throws Exception {
Client client = createClient();
try {
URI uri = URI.create("http://localhost:" + serverPort + "/ws/v2/"
+ "timeline/health");
ClientResponse resp = getResponse(client, uri);
TimelineHealth timelineHealth =
resp.getEntity(new GenericType<TimelineHealth>() {
});
assertEquals(200, resp.getStatus());
assertEquals(TimelineHealth.TimelineHealthStatus.RUNNING,
timelineHealth.getHealthStatus());
} finally {
client.destroy();
}
}
}