YARN-8859. Add audit logs for router service (#3607)

Co-authored-by: Minni Mittal <mimittal@microsoft.com>
This commit is contained in:
minni31 2022-01-03 10:38:59 +05:30 committed by GitHub
parent 43afd1753a
commit 3bf014d871
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 473 additions and 26 deletions

View File

@ -0,0 +1,243 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.router;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Manages Router audit logs.
* Audit log format is written as key=value pairs. Tab separated.
*/
public class RouterAuditLogger {
private static final Logger LOG =
LoggerFactory.getLogger(RouterAuditLogger.class);
private RouterAuditLogger() {
}
enum Keys {USER, OPERATION, TARGET, RESULT, IP, PERMISSIONS, DESCRIPTION, APPID, SUBCLUSTERID}
public static class AuditConstants {
static final String SUCCESS = "SUCCESS";
static final String FAILURE = "FAILURE";
static final String KEY_VAL_SEPARATOR = "=";
static final char PAIR_SEPARATOR = '\t';
public static final String GET_NEW_APP = "Get New App";
public static final String SUBMIT_NEW_APP = "Submit New App";
public static final String FORCE_KILL_APP = "Force Kill App";
public static final String GET_APP_REPORT = "Get Application Report";
}
/**
* Create a readable and parseable audit log string for a successful event.
*
* @param user User who made the service request to the Router
* @param operation Operation requested by the user.
* @param target The target on which the operation is being performed.
* @param appId Application Id in which operation was performed.
*
* <br><br>
* Note that the {@link RouterAuditLogger} uses tabs ('\t') as a key-val
* delimiter and hence the value fields should not contains tabs ('\t').
*/
public static void logSuccess(String user, String operation, String target,
ApplicationId appId) {
if (LOG.isInfoEnabled()) {
LOG.info(createSuccessLog(user, operation, target, appId, null));
}
}
/**
* Create a readable and parseable audit log string for a successful event.
*
* @param user User who made the service request to the Router
* @param operation Operation requested by the user.
* @param target The target on which the operation is being performed.
* @param appId Application Id in which operation was performed.
* @param subClusterId Subcluster Id in which operation is performed.
*
* <br><br>
* Note that the {@link RouterAuditLogger} uses tabs ('\t') as a key-val
* delimiter and hence the value fields should not contains tabs ('\t').
*/
public static void logSuccess(String user, String operation, String target,
ApplicationId appId, SubClusterId subClusterId) {
if (LOG.isInfoEnabled()) {
LOG.info(createSuccessLog(user, operation, target, appId, subClusterId));
}
}
/**
* A helper api for creating an audit log for a successful event.
*/
static String createSuccessLog(String user, String operation, String target,
ApplicationId appId, SubClusterId subClusterID) {
StringBuilder b =
createStringBuilderForSuccessEvent(user, operation, target);
if (appId != null) {
add(Keys.APPID, appId.toString(), b);
}
if (subClusterID != null) {
add(Keys.SUBCLUSTERID, subClusterID.toString(), b);
}
return b.toString();
}
/**
* A helper function for creating the common portion of a successful
* log message.
*/
private static StringBuilder createStringBuilderForSuccessEvent(String user,
String operation, String target) {
StringBuilder b = new StringBuilder();
start(Keys.USER, user, b);
add(Keys.OPERATION, operation, b);
add(Keys.TARGET, target, b);
add(Keys.RESULT, AuditConstants.SUCCESS, b);
return b;
}
/**
* Create a readable and parseable audit log string for a failed event.
*
* @param user User who made the service request.
* @param operation Operation requested by the user.
* @param perm Target permissions.
* @param target The target on which the operation is being performed.
* @param description Some additional information as to why the operation
* failed.
*
* <br><br>
* Note that the {@link RouterAuditLogger} uses tabs ('\t') as a key-val
* delimiter and hence the value fields should not contains tabs ('\t').
*/
public static void logFailure(String user, String operation, String perm,
String target, String description) {
if (LOG.isInfoEnabled()) {
LOG.info(
createFailureLog(user, operation, perm, target, description, null,
null));
}
}
/**
* Create a readable and parseable audit log string for a failed event.
*
* @param user User who made the service request.
* @param operation Operation requested by the user.
* @param perm Target permissions.
* @param target The target on which the operation is being performed.
* @param description Some additional information as to why the operation
* failed.
* @param appId Application Id in which operation was performed.
*
* <br><br>
* Note that the {@link RouterAuditLogger} uses tabs ('\t') as a key-val
* delimiter and hence the value fields should not contains tabs ('\t').
*/
public static void logFailure(String user, String operation, String perm,
String target, String description, ApplicationId appId) {
if (LOG.isInfoEnabled()) {
LOG.info(
createFailureLog(user, operation, perm, target, description, appId,
null));
}
}
/**
* Create a readable and parseable audit log string for a failed event.
*
* @param user User who made the service request.
* @param operation Operation requested by the user.
* @param perm Target permissions.
* @param target The target on which the operation is being performed.
* @param description Some additional information as to why the operation
* failed.
* @param appId Application Id in which operation was performed.
* @param subClusterId SubCluster Id in which operation was performed.
*
* <br><br>
* Note that the {@link RouterAuditLogger} uses tabs ('\t') as a key-val
* delimiter and hence the value fields should not contains tabs ('\t').
*/
public static void logFailure(String user, String operation, String perm,
String target, String description, ApplicationId appId,
SubClusterId subClusterId) {
if (LOG.isInfoEnabled()) {
LOG.info(
createFailureLog(user, operation, perm, target, description, appId,
subClusterId));
}
}
/**
* A helper api for creating an audit log for a failure event.
*/
static String createFailureLog(String user, String operation, String perm,
String target, String description, ApplicationId appId,
SubClusterId subClusterId) {
StringBuilder b =
createStringBuilderForFailureLog(user, operation, target, description,
perm);
if (appId != null) {
add(Keys.APPID, appId.toString(), b);
}
if (subClusterId != null) {
add(Keys.SUBCLUSTERID, subClusterId.toString(), b);
}
return b.toString();
}
/**
* A helper function for creating the common portion of a failure
* log message.
*/
private static StringBuilder createStringBuilderForFailureLog(String user,
String operation, String target, String description, String perm) {
StringBuilder b = new StringBuilder();
start(Keys.USER, user, b);
add(Keys.OPERATION, operation, b);
add(Keys.TARGET, target, b);
add(Keys.RESULT, AuditConstants.FAILURE, b);
add(Keys.DESCRIPTION, description, b);
add(Keys.PERMISSIONS, perm, b);
return b;
}
/**
* Adds the first key-val pair to the passed builder in the following format
* key=value.
*/
static void start(Keys key, String value, StringBuilder b) {
b.append(key.name()).append(AuditConstants.KEY_VAL_SEPARATOR).append(value);
}
/**
* Appends the key-val pair to the passed builder in the following format
* <pair-delim>key=value.
*/
static void add(Keys key, String value, StringBuilder b) {
b.append(AuditConstants.PAIR_SEPARATOR).append(key.name())
.append(AuditConstants.KEY_VAL_SEPARATOR).append(value);
}
}

View File

@ -124,6 +124,7 @@ import org.apache.hadoop.yarn.server.federation.store.records.ApplicationHomeSub
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterId;
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterInfo;
import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreFacade;
import org.apache.hadoop.yarn.server.router.RouterAuditLogger;
import org.apache.hadoop.yarn.server.router.RouterMetrics;
import org.apache.hadoop.yarn.server.router.RouterServerUtil;
import org.apache.hadoop.yarn.util.Clock;
@ -299,6 +300,9 @@ public class FederationClientInterceptor
long stopTime = clock.getTime();
routerMetrics.succeededAppsCreated(stopTime - startTime);
RouterAuditLogger.logSuccess(user.getShortUserName(),
RouterAuditLogger.AuditConstants.GET_NEW_APP,
"RouterClientRMService", response.getApplicationId());
return response;
} else {
// Empty response from the ResourceManager.
@ -310,7 +314,9 @@ public class FederationClientInterceptor
routerMetrics.incrAppsFailedCreated();
String errMsg = "Fail to create a new application.";
LOG.error(errMsg);
RouterAuditLogger.logFailure(user.getShortUserName(),
RouterAuditLogger.AuditConstants.GET_NEW_APP, "UNKNOWN",
"RouterClientRMService", errMsg);
throw new YarnException(errMsg);
}
@ -389,9 +395,13 @@ public class FederationClientInterceptor
|| request.getApplicationSubmissionContext()
.getApplicationId() == null) {
routerMetrics.incrAppsFailedSubmitted();
RouterServerUtil
.logAndThrowException("Missing submitApplication request or "
+ "applicationSubmissionContex information.", null);
String errMsg =
"Missing submitApplication request or applicationSubmissionContext "
+ "information.";
RouterAuditLogger.logFailure(user.getShortUserName(),
RouterAuditLogger.AuditConstants.SUBMIT_NEW_APP, "UNKNOWN",
"RouterClientRMService", errMsg);
throw new YarnException(errMsg);
}
ApplicationId applicationId =
@ -419,7 +429,10 @@ public class FederationClientInterceptor
routerMetrics.incrAppsFailedSubmitted();
String message = "Unable to insert the ApplicationId " + applicationId
+ " into the FederationStateStore";
RouterServerUtil.logAndThrowException(message, e);
RouterAuditLogger.logFailure(user.getShortUserName(),
RouterAuditLogger.AuditConstants.SUBMIT_NEW_APP, "UNKNOWN",
"RouterClientRMService", message, applicationId, subClusterId);
throw new YarnException(message, e);
}
} else {
try {
@ -436,7 +449,10 @@ public class FederationClientInterceptor
+ " already submitted on SubCluster " + subClusterId);
} else {
routerMetrics.incrAppsFailedSubmitted();
RouterServerUtil.logAndThrowException(message, e);
RouterAuditLogger.logFailure(user.getShortUserName(),
RouterAuditLogger.AuditConstants.SUBMIT_NEW_APP, "UNKNOWN",
"RouterClientRMService", message, applicationId, subClusterId);
throw new YarnException(message, e);
}
}
}
@ -458,6 +474,9 @@ public class FederationClientInterceptor
+ " with appId " + applicationId + " submitted on " + subClusterId);
long stopTime = clock.getTime();
routerMetrics.succeededAppsSubmitted(stopTime - startTime);
RouterAuditLogger.logSuccess(user.getShortUserName(),
RouterAuditLogger.AuditConstants.SUBMIT_NEW_APP,
"RouterClientRMService", applicationId, subClusterId);
return response;
} else {
// Empty response from the ResourceManager.
@ -470,7 +489,9 @@ public class FederationClientInterceptor
String errMsg = "Application "
+ request.getApplicationSubmissionContext().getApplicationName()
+ " with appId " + applicationId + " failed to be submitted.";
LOG.error(errMsg);
RouterAuditLogger.logFailure(user.getShortUserName(),
RouterAuditLogger.AuditConstants.SUBMIT_NEW_APP, "UNKNOWN",
"RouterClientRMService", errMsg, applicationId);
throw new YarnException(errMsg);
}
@ -498,8 +519,11 @@ public class FederationClientInterceptor
if (request == null || request.getApplicationId() == null) {
routerMetrics.incrAppsFailedKilled();
RouterServerUtil.logAndThrowException(
"Missing forceKillApplication request or ApplicationId.", null);
String message = "Missing forceKillApplication request or ApplicationId.";
RouterAuditLogger.logFailure(user.getShortUserName(),
RouterAuditLogger.AuditConstants.FORCE_KILL_APP, "UNKNOWN",
"RouterClientRMService", message);
throw new YarnException(message);
}
ApplicationId applicationId = request.getApplicationId();
SubClusterId subClusterId = null;
@ -509,8 +533,12 @@ public class FederationClientInterceptor
.getApplicationHomeSubCluster(request.getApplicationId());
} catch (YarnException e) {
routerMetrics.incrAppsFailedKilled();
RouterServerUtil.logAndThrowException("Application " + applicationId
+ " does not exist in FederationStateStore", e);
String msg = "Application " + applicationId
+ " does not exist in FederationStateStore";
RouterAuditLogger.logFailure(user.getShortUserName(),
RouterAuditLogger.AuditConstants.FORCE_KILL_APP, "UNKNOWN",
"RouterClientRMService", msg, applicationId);
throw new YarnException(msg, e);
}
ApplicationClientProtocol clientRMProxy =
@ -523,9 +551,10 @@ public class FederationClientInterceptor
response = clientRMProxy.forceKillApplication(request);
} catch (Exception e) {
routerMetrics.incrAppsFailedKilled();
LOG.error("Unable to kill the application report for "
+ request.getApplicationId() + "to SubCluster "
+ subClusterId.getId(), e);
RouterAuditLogger.logFailure(user.getShortUserName(),
RouterAuditLogger.AuditConstants.FORCE_KILL_APP, "UNKNOWN",
"RouterClientRMService", "Unable to kill the application report",
applicationId, subClusterId);
throw e;
}
@ -536,6 +565,9 @@ public class FederationClientInterceptor
long stopTime = clock.getTime();
routerMetrics.succeededAppsKilled(stopTime - startTime);
RouterAuditLogger.logSuccess(user.getShortUserName(),
RouterAuditLogger.AuditConstants.FORCE_KILL_APP,
"RouterClientRMService", applicationId);
return response;
}
@ -563,9 +595,12 @@ public class FederationClientInterceptor
if (request == null || request.getApplicationId() == null) {
routerMetrics.incrAppsFailedRetrieved();
RouterServerUtil.logAndThrowException(
"Missing getApplicationReport request or applicationId information.",
null);
String errMsg =
"Missing getApplicationReport request or applicationId information.";
RouterAuditLogger.logFailure(user.getShortUserName(),
RouterAuditLogger.AuditConstants.GET_APP_REPORT, "UNKNOWN",
"RouterClientRMService", errMsg);
throw new YarnException(errMsg);
}
SubClusterId subClusterId = null;
@ -575,9 +610,12 @@ public class FederationClientInterceptor
.getApplicationHomeSubCluster(request.getApplicationId());
} catch (YarnException e) {
routerMetrics.incrAppsFailedRetrieved();
RouterServerUtil
.logAndThrowException("Application " + request.getApplicationId()
+ " does not exist in FederationStateStore", e);
String errMsg = "Application " + request.getApplicationId()
+ " does not exist in FederationStateStore";
RouterAuditLogger.logFailure(user.getShortUserName(),
RouterAuditLogger.AuditConstants.GET_APP_REPORT, "UNKNOWN",
"RouterClientRMService", errMsg, request.getApplicationId());
throw new YarnException(errMsg, e);
}
ApplicationClientProtocol clientRMProxy =
@ -588,9 +626,12 @@ public class FederationClientInterceptor
response = clientRMProxy.getApplicationReport(request);
} catch (Exception e) {
routerMetrics.incrAppsFailedRetrieved();
LOG.error("Unable to get the application report for "
+ request.getApplicationId() + "to SubCluster "
+ subClusterId.getId(), e);
String errMsg = "Unable to get the application report for " + request
.getApplicationId() + "to SubCluster " + subClusterId.getId();
RouterAuditLogger.logFailure(user.getShortUserName(),
RouterAuditLogger.AuditConstants.GET_APP_REPORT, "UNKNOWN",
"RouterClientRMService", errMsg, request.getApplicationId(),
subClusterId);
throw e;
}
@ -602,6 +643,9 @@ public class FederationClientInterceptor
long stopTime = clock.getTime();
routerMetrics.succeededAppsRetrieved(stopTime - startTime);
RouterAuditLogger.logSuccess(user.getShortUserName(),
RouterAuditLogger.AuditConstants.GET_APP_REPORT,
"RouterClientRMService", request.getApplicationId());
return response;
}

View File

@ -0,0 +1,160 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.router;
import static org.junit.Assert.assertEquals;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterId;
import org.junit.Before;
import org.junit.Test;
/**
* Tests {@link RouterAuditLogger}.
*/
public class TestRouterAuditLogger {
private static final String USER = "test";
private static final String OPERATION = "oper";
private static final String TARGET = "tgt";
private static final String DESC = "description of an audit log";
private static final ApplicationId APPID = mock(ApplicationId.class);
private static final SubClusterId SUBCLUSTERID = mock(SubClusterId.class);
@Before public void setUp() throws Exception {
when(APPID.toString()).thenReturn("app_1");
when(SUBCLUSTERID.toString()).thenReturn("sc0");
}
/**
* Test the AuditLog format with key-val pair.
*/
@Test
public void testKeyValLogFormat() throws Exception {
StringBuilder actLog = new StringBuilder();
StringBuilder expLog = new StringBuilder();
// add the first k=v pair and check
RouterAuditLogger.start(RouterAuditLogger.Keys.USER, USER, actLog);
expLog.append("USER=test");
assertEquals(expLog.toString(), actLog.toString());
// append another k1=v1 pair to already added k=v and test
RouterAuditLogger.add(RouterAuditLogger.Keys.OPERATION, OPERATION, actLog);
expLog.append("\tOPERATION=oper");
assertEquals(expLog.toString(), actLog.toString());
// append another k1=null pair and test
RouterAuditLogger.add(RouterAuditLogger.Keys.APPID, (String) null, actLog);
expLog.append("\tAPPID=null");
assertEquals(expLog.toString(), actLog.toString());
// now add the target and check of the final string
RouterAuditLogger.add(RouterAuditLogger.Keys.TARGET, TARGET, actLog);
expLog.append("\tTARGET=tgt");
assertEquals(expLog.toString(), actLog.toString());
}
/**
* Test the AuditLog format for successful events.
*/
private void testSuccessLogFormatHelper(ApplicationId appId,
SubClusterId subClusterId) {
// check without the IP
String sLog = RouterAuditLogger
.createSuccessLog(USER, OPERATION, TARGET, appId, subClusterId);
StringBuilder expLog = new StringBuilder();
expLog.append("USER=test\t");
expLog.append("OPERATION=oper\tTARGET=tgt\tRESULT=SUCCESS");
if (appId != null) {
expLog.append("\tAPPID=app_1");
}
if (subClusterId != null) {
expLog.append("\tSUBCLUSTERID=sc0");
}
assertEquals(expLog.toString(), sLog);
}
/**
* Test the AuditLog format for successful events passing nulls.
*/
private void testSuccessLogNulls() {
String sLog =
RouterAuditLogger.createSuccessLog(null, null, null, null, null);
StringBuilder expLog = new StringBuilder();
expLog.append("USER=null\t");
expLog.append("OPERATION=null\tTARGET=null\tRESULT=SUCCESS");
assertEquals(expLog.toString(), sLog);
}
/**
* Test the AuditLog format for successful events with the various
* parameters.
*/
private void testSuccessLogFormat() {
testSuccessLogFormatHelper(null, null);
testSuccessLogFormatHelper(APPID, null);
testSuccessLogFormatHelper(null, SUBCLUSTERID);
testSuccessLogFormatHelper(APPID, SUBCLUSTERID);
}
/**
* Test the AuditLog format for failure events.
*/
private void testFailureLogFormatHelper(ApplicationId appId,
SubClusterId subClusterId) {
String fLog = RouterAuditLogger
.createFailureLog(USER, OPERATION, "UNKNOWN", TARGET, DESC, appId,
subClusterId);
StringBuilder expLog = new StringBuilder();
expLog.append("USER=test\t");
expLog.append("OPERATION=oper\tTARGET=tgt\tRESULT=FAILURE\t");
expLog.append("DESCRIPTION=description of an audit log");
expLog.append("\tPERMISSIONS=UNKNOWN");
if (appId != null) {
expLog.append("\tAPPID=app_1");
}
if (subClusterId != null) {
expLog.append("\tSUBCLUSTERID=sc0");
}
assertEquals(expLog.toString(), fLog);
}
/**
* Test the AuditLog format for failure events with the various
* parameters.
*/
private void testFailureLogFormat() {
testFailureLogFormatHelper(null, null);
testFailureLogFormatHelper(APPID, null);
testFailureLogFormatHelper(null, SUBCLUSTERID);
testFailureLogFormatHelper(APPID, SUBCLUSTERID);
}
/**
* Test {@link RouterAuditLogger}.
*/
@Test public void testRouterAuditLogger() throws Exception {
testSuccessLogFormat();
testFailureLogFormat();
}
}

View File

@ -242,7 +242,7 @@ public class TestFederationClientInterceptor extends BaseRouterClientRMTest {
} catch (YarnException e) {
Assert.assertTrue(
e.getMessage().startsWith("Missing submitApplication request or "
+ "applicationSubmissionContex information."));
+ "applicationSubmissionContext information."));
}
try {
interceptor.submitApplication(SubmitApplicationRequest.newInstance(null));
@ -250,7 +250,7 @@ public class TestFederationClientInterceptor extends BaseRouterClientRMTest {
} catch (YarnException e) {
Assert.assertTrue(
e.getMessage().startsWith("Missing submitApplication request or "
+ "applicationSubmissionContex information."));
+ "applicationSubmissionContext information."));
}
try {
ApplicationSubmissionContext context = ApplicationSubmissionContext
@ -262,7 +262,7 @@ public class TestFederationClientInterceptor extends BaseRouterClientRMTest {
} catch (YarnException e) {
Assert.assertTrue(
e.getMessage().startsWith("Missing submitApplication request or "
+ "applicationSubmissionContex information."));
+ "applicationSubmissionContext information."));
}
}