YARN-9545. Create healthcheck REST endpoint for ATSv2. Contributed by Zoltan Siegl.
This commit is contained in:
parent
7861a5eb1a
commit
e49162f4b3
|
@ -0,0 +1,82 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.yarn.api.records.timeline;
|
||||
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.classification.InterfaceStability;
|
||||
|
||||
import javax.xml.bind.annotation.XmlAccessType;
|
||||
import javax.xml.bind.annotation.XmlAccessorType;
|
||||
import javax.xml.bind.annotation.XmlElement;
|
||||
import javax.xml.bind.annotation.XmlRootElement;
|
||||
|
||||
/**
|
||||
* This class holds health information for ATS.
|
||||
*/
|
||||
@XmlRootElement(name = "health")
|
||||
@XmlAccessorType(XmlAccessType.NONE)
|
||||
@InterfaceAudience.Public
|
||||
@InterfaceStability.Unstable
|
||||
public class TimelineHealth {
|
||||
|
||||
/**
|
||||
* Timline health status.
|
||||
*
|
||||
* RUNNING - Service is up and running
|
||||
* READER_CONNECTION_FAULURE - isConnectionAlive() of reader implementation
|
||||
* reported an error
|
||||
*/
|
||||
public enum TimelineHealthStatus {
|
||||
RUNNING,
|
||||
READER_CONNECTION_FAILURE
|
||||
}
|
||||
|
||||
private TimelineHealthStatus healthStatus;
|
||||
private String diagnosticsInfo;
|
||||
|
||||
public TimelineHealth(TimelineHealthStatus healthy, String diagnosticsInfo) {
|
||||
this.healthStatus = healthy;
|
||||
this.diagnosticsInfo = diagnosticsInfo;
|
||||
}
|
||||
|
||||
public TimelineHealth() {
|
||||
|
||||
}
|
||||
|
||||
@XmlElement(name = "healthStatus")
|
||||
public TimelineHealthStatus getHealthStatus() {
|
||||
return healthStatus;
|
||||
}
|
||||
|
||||
@XmlElement(name = "diagnosticsInfo")
|
||||
public String getDiagnosticsInfo() {
|
||||
return diagnosticsInfo;
|
||||
}
|
||||
|
||||
|
||||
public void setHealthStatus(TimelineHealthStatus healthStatus) {
|
||||
this.healthStatus = healthStatus;
|
||||
}
|
||||
|
||||
public void setDiagnosticsInfo(String diagnosticsInfo) {
|
||||
this.diagnosticsInfo = diagnosticsInfo;
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.timelineservice.documentstore;
|
|||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.service.AbstractService;
|
||||
import org.apache.hadoop.yarn.api.records.timeline.TimelineHealth;
|
||||
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
|
||||
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntityType;
|
||||
import org.apache.hadoop.yarn.server.timelineservice.documentstore.lib.DocumentStoreVendor;
|
||||
|
@ -100,6 +101,18 @@ public class DocumentStoreTimelineReaderImpl
|
|||
return collectionReader.fetchEntityTypes(context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TimelineHealth getHealthStatus() {
|
||||
if (collectionReader != null) {
|
||||
return new TimelineHealth(TimelineHealth.TimelineHealthStatus.RUNNING,
|
||||
"");
|
||||
} else {
|
||||
return new TimelineHealth(
|
||||
TimelineHealth.TimelineHealthStatus.READER_CONNECTION_FAILURE,
|
||||
"Timeline store reader not initialized.");
|
||||
}
|
||||
}
|
||||
|
||||
// for honoring all filters from {@link TimelineEntityFilters}
|
||||
private Set<TimelineEntity> applyFilters(TimelineEntityFilters filters,
|
||||
TimelineDataToRetrieve dataToRetrieve,
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.hadoop.conf.Configuration;
|
|||
import org.apache.hadoop.hbase.client.Connection;
|
||||
import org.apache.hadoop.hbase.client.ConnectionFactory;
|
||||
import org.apache.hadoop.service.AbstractService;
|
||||
import org.apache.hadoop.yarn.api.records.timeline.TimelineHealth;
|
||||
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
|
||||
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntityType;
|
||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
|
@ -158,6 +159,18 @@ public class HBaseTimelineReaderImpl
|
|||
return reader.readEntityTypes(hbaseConf, conn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TimelineHealth getHealthStatus() {
|
||||
if (!this.isHBaseDown()) {
|
||||
return new TimelineHealth(TimelineHealth.TimelineHealthStatus.RUNNING,
|
||||
"");
|
||||
} else {
|
||||
return new TimelineHealth(
|
||||
TimelineHealth.TimelineHealthStatus.READER_CONNECTION_FAILURE,
|
||||
"HBase connection is down");
|
||||
}
|
||||
}
|
||||
|
||||
protected static final TimelineEntityFilters MONITOR_FILTERS =
|
||||
new TimelineEntityFilters.Builder().entityLimit(1L).build();
|
||||
protected static final TimelineDataToRetrieve DATA_TO_RETRIEVE =
|
||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
|||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.hadoop.service.AbstractService;
|
||||
import org.apache.hadoop.yarn.api.records.timeline.TimelineHealth;
|
||||
import org.apache.hadoop.yarn.api.records.timelineservice.FlowActivityEntity;
|
||||
import org.apache.hadoop.yarn.api.records.timelineservice.FlowRunEntity;
|
||||
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
|
||||
|
@ -219,4 +220,13 @@ public class TimelineReaderManager extends AbstractService {
|
|||
}
|
||||
return callerUGI != null && adminACLsManager.isAdmin(callerUGI);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if reader connection is alive.
|
||||
*
|
||||
* @return boolean True if reader connection is alive, false otherwise.
|
||||
*/
|
||||
public TimelineHealth getHealthStatus() {
|
||||
return reader.getHealthStatus();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -48,6 +48,7 @@ import org.apache.hadoop.http.JettyUtils;
|
|||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.hadoop.util.Time;
|
||||
import org.apache.hadoop.yarn.api.records.timeline.TimelineAbout;
|
||||
import org.apache.hadoop.yarn.api.records.timeline.TimelineHealth;
|
||||
import org.apache.hadoop.yarn.api.records.timelineservice.FlowActivityEntity;
|
||||
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
|
||||
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntityType;
|
||||
|
@ -218,6 +219,38 @@ public class TimelineReaderWebServices {
|
|||
return TimelineUtils.createTimelineAbout("Timeline Reader API");
|
||||
}
|
||||
|
||||
/**
|
||||
* Health check REST end point.
|
||||
*
|
||||
* @param req Servlet request.
|
||||
* @param res Servlet response.
|
||||
*
|
||||
* @return A {@link Response} object with HTTP status 200 OK if the service
|
||||
* is running.
|
||||
* Otherwise, a {@link Response} object with HTTP status 500 is
|
||||
* returned.
|
||||
*/
|
||||
@GET
|
||||
@Path("/health")
|
||||
@Produces(MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8)
|
||||
public Response health(
|
||||
@Context HttpServletRequest req,
|
||||
@Context HttpServletResponse res
|
||||
) {
|
||||
Response response;
|
||||
TimelineHealth timelineHealth = this.getTimelineReaderManager().getHealthStatus();
|
||||
if (timelineHealth.getHealthStatus()
|
||||
.equals(TimelineHealth.TimelineHealthStatus.RUNNING)) {
|
||||
response = Response.ok(timelineHealth).build();
|
||||
} else {
|
||||
LOG.info("Timeline services health check: timeline reader reported " +
|
||||
"connection failure");
|
||||
response = Response.serverError().entity(timelineHealth).build();
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a single entity for a given entity type and UID which is a delimited
|
||||
* string containing clusterid, userid, flow name, flowrun id and app id.
|
||||
|
|
|
@ -46,6 +46,7 @@ import org.apache.hadoop.fs.LocatedFileStatus;
|
|||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.RemoteIterator;
|
||||
import org.apache.hadoop.service.AbstractService;
|
||||
import org.apache.hadoop.yarn.api.records.timeline.TimelineHealth;
|
||||
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
|
||||
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEvent;
|
||||
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineMetric;
|
||||
|
@ -442,4 +443,18 @@ public class FileSystemTimelineReaderImpl extends AbstractService
|
|||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TimelineHealth getHealthStatus() {
|
||||
try {
|
||||
fs.exists(rootPath);
|
||||
} catch (IOException e) {
|
||||
return new TimelineHealth(
|
||||
TimelineHealth.TimelineHealthStatus.READER_CONNECTION_FAILURE,
|
||||
e.getMessage()
|
||||
);
|
||||
}
|
||||
return new TimelineHealth(TimelineHealth.TimelineHealthStatus.RUNNING,
|
||||
"");
|
||||
}
|
||||
}
|
|
@ -19,6 +19,7 @@
|
|||
package org.apache.hadoop.yarn.server.timelineservice.storage;
|
||||
|
||||
import org.apache.hadoop.service.AbstractService;
|
||||
import org.apache.hadoop.yarn.api.records.timeline.TimelineHealth;
|
||||
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
|
||||
import org.apache.hadoop.yarn.server.timelineservice.reader.TimelineDataToRetrieve;
|
||||
import org.apache.hadoop.yarn.server.timelineservice.reader.TimelineEntityFilters;
|
||||
|
@ -71,4 +72,10 @@ public class NoOpTimelineReaderImpl extends AbstractService
|
|||
"requests would be empty");
|
||||
return new HashSet<>();
|
||||
}
|
||||
|
||||
@Override
|
||||
public TimelineHealth getHealthStatus() {
|
||||
return new TimelineHealth(TimelineHealth.TimelineHealthStatus.RUNNING,
|
||||
"NoOpTimelineReader is configured. ");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.util.Set;
|
|||
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
||||
import org.apache.hadoop.service.Service;
|
||||
import org.apache.hadoop.yarn.api.records.timeline.TimelineHealth;
|
||||
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
|
||||
import org.apache.hadoop.yarn.server.timelineservice.reader.TimelineDataToRetrieve;
|
||||
import org.apache.hadoop.yarn.server.timelineservice.reader.TimelineEntityFilters;
|
||||
|
@ -192,4 +193,11 @@ public interface TimelineReader extends Service {
|
|||
* storage.
|
||||
*/
|
||||
Set<String> getEntityTypes(TimelineReaderContext context) throws IOException;
|
||||
|
||||
/**
|
||||
* Check if reader connection is working properly.
|
||||
*
|
||||
* @return True if reader connection works as expected, false otherwise.
|
||||
*/
|
||||
TimelineHealth getHealthStatus();
|
||||
}
|
|
@ -37,6 +37,7 @@ import org.apache.commons.io.FileUtils;
|
|||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.http.JettyUtils;
|
||||
import org.apache.hadoop.yarn.api.records.timeline.TimelineAbout;
|
||||
import org.apache.hadoop.yarn.api.records.timeline.TimelineHealth;
|
||||
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
|
||||
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntityType;
|
||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
|
@ -777,4 +778,22 @@ public class TestTimelineReaderWebServices {
|
|||
client.destroy();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHealthCheck() throws Exception {
|
||||
Client client = createClient();
|
||||
try {
|
||||
URI uri = URI.create("http://localhost:" + serverPort + "/ws/v2/"
|
||||
+ "timeline/health");
|
||||
ClientResponse resp = getResponse(client, uri);
|
||||
TimelineHealth timelineHealth =
|
||||
resp.getEntity(new GenericType<TimelineHealth>() {
|
||||
});
|
||||
assertEquals(200, resp.getStatus());
|
||||
assertEquals(TimelineHealth.TimelineHealthStatus.RUNNING,
|
||||
timelineHealth.getHealthStatus());
|
||||
} finally {
|
||||
client.destroy();
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue