HADOOP-18688. S3A audit header to include count of items in delete ops (#5621)

The auditor-generated http referrer URL now includes the count of keys
to delete in the "ks" query parameter

Contributed by Viraj Jasani
This commit is contained in:
Viraj Jasani 2023-05-16 02:40:16 -07:00 committed by Steve Loughran
parent 5e8663d0f5
commit 949d5ca20b
No known key found for this signature in database
GPG Key ID: D22CF846DBB162A0
5 changed files with 114 additions and 20 deletions

View File

@ -115,4 +115,9 @@ public final class AuditConstants {
*/ */
public static final String PARAM_TIMESTAMP = "ts"; public static final String PARAM_TIMESTAMP = "ts";
/**
* Num of files to be deleted as part of the bulk delete request.
*/
public static final String DELETE_KEYS_SIZE = "ks";
} }

View File

@ -25,6 +25,8 @@ import java.util.HashMap;
import java.util.Map; import java.util.Map;
import com.amazonaws.AmazonWebServiceRequest; import com.amazonaws.AmazonWebServiceRequest;
import com.amazonaws.services.s3.model.DeleteObjectRequest;
import com.amazonaws.services.s3.model.DeleteObjectsRequest;
import com.amazonaws.services.s3.model.GetObjectRequest; import com.amazonaws.services.s3.model.GetObjectRequest;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -41,6 +43,7 @@ import org.apache.hadoop.fs.store.LogExactlyOnce;
import org.apache.hadoop.fs.store.audit.HttpReferrerAuditHeader; import org.apache.hadoop.fs.store.audit.HttpReferrerAuditHeader;
import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation;
import static org.apache.hadoop.fs.audit.AuditConstants.DELETE_KEYS_SIZE;
import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_FILESYSTEM_ID; import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_FILESYSTEM_ID;
import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_PRINCIPAL; import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_PRINCIPAL;
import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_THREAD0; import static org.apache.hadoop.fs.audit.AuditConstants.PARAM_THREAD0;
@ -359,6 +362,8 @@ public class LoggingAuditor
final T request) { final T request) {
// attach range for GetObject requests // attach range for GetObject requests
attachRangeFromRequest(request); attachRangeFromRequest(request);
// for delete op, attach the number of files to delete
attachDeleteKeySizeAttribute(request);
// build the referrer header // build the referrer header
final String header = referrer.buildHttpReferrer(); final String header = referrer.buildHttpReferrer();
// update the outer class's field. // update the outer class's field.
@ -385,6 +390,24 @@ public class LoggingAuditor
return request; return request;
} }
/**
* For delete requests, attach delete key size as a referrer attribute.
*
* @param request the request object.
* @param <T> type of the request.
*/
private <T extends AmazonWebServiceRequest> void attachDeleteKeySizeAttribute(T request) {
if (request instanceof DeleteObjectsRequest) {
int keySize = ((DeleteObjectsRequest) request).getKeys().size();
this.set(DELETE_KEYS_SIZE, String.valueOf(keySize));
} else if (request instanceof DeleteObjectRequest) {
String key = ((DeleteObjectRequest) request).getKey();
if (key != null && key.length() > 0) {
this.set(DELETE_KEYS_SIZE, "1");
}
}
}
@Override @Override
public String toString() { public String toString() {
final StringBuilder sb = new StringBuilder( final StringBuilder sb = new StringBuilder(

View File

@ -211,6 +211,7 @@ https://audit.example.org/hadoop/1/op_rename/3c0d9b7e-2a63-43d9-a220-3c574d768ef
&pr=alice &pr=alice
&p2=s3a://alice-london/path2 &p2=s3a://alice-london/path2
&ps=235865a0-d399-4696-9978-64568db1b51c &ps=235865a0-d399-4696-9978-64568db1b51c
&ks=5
&id=3c0d9b7e-2a63-43d9-a220-3c574d768ef3-3 &id=3c0d9b7e-2a63-43d9-a220-3c574d768ef3-3
&t0=12 &t0=12
&fs=af5943a9-b6f6-4eec-9c58-008982fc492a &fs=af5943a9-b6f6-4eec-9c58-008982fc492a
@ -237,6 +238,7 @@ If any of the field values were `null`, the field is omitted.
| `t0` | Thread 0: thread span was created in | `100` | | `t0` | Thread 0: thread span was created in | `100` |
| `t1` | Thread 1: thread this operation was executed in | `200` | | `t1` | Thread 1: thread this operation was executed in | `200` |
| `ts` | Timestamp (UTC epoch millis) | `1617116985923` | | `ts` | Timestamp (UTC epoch millis) | `1617116985923` |
| `ks` | Key size (num of files) to delete as part of the given request (applicable to delete and rename ops) | `5` |
_Notes_ _Notes_

View File

@ -19,9 +19,13 @@
package org.apache.hadoop.fs.s3a.audit; package org.apache.hadoop.fs.s3a.audit;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.function.Consumer; import java.util.function.Consumer;
import java.util.stream.Collectors;
import com.amazonaws.services.s3.model.DeleteObjectsRequest;
import com.amazonaws.services.s3.model.GetObjectMetadataRequest; import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
import com.amazonaws.services.s3.model.GetObjectRequest; import com.amazonaws.services.s3.model.GetObjectRequest;
import org.junit.After; import org.junit.After;
@ -234,4 +238,21 @@ public abstract class AbstractAuditingTest extends AbstractHadoopTestBase {
.isNull(); .isNull();
} }
/**
* Create head request for bulk delete and pass it through beforeExecution of the manager.
*
* @param keys keys to be provided in the bulk delete request.
* @return a processed request.
*/
protected DeleteObjectsRequest headForBulkDelete(String... keys) {
if (keys == null || keys.length == 0) {
return null;
}
List<DeleteObjectsRequest.KeyVersion> keysToDelete = Arrays
.stream(keys)
.map(DeleteObjectsRequest.KeyVersion::new)
.collect(Collectors.toList());
return manager.beforeExecution(requestFactory.newBulkDeleteRequest(keysToDelete));
}
} }

View File

@ -18,10 +18,12 @@
package org.apache.hadoop.fs.s3a.audit; package org.apache.hadoop.fs.s3a.audit;
import java.io.IOException;
import java.net.URISyntaxException; import java.net.URISyntaxException;
import java.util.Map; import java.util.Map;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import com.amazonaws.services.s3.model.DeleteObjectsRequest;
import com.amazonaws.services.s3.model.GetObjectMetadataRequest; import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
import com.amazonaws.services.s3.model.GetObjectRequest; import com.amazonaws.services.s3.model.GetObjectRequest;
import org.junit.Before; import org.junit.Before;
@ -36,6 +38,7 @@ import org.apache.hadoop.fs.audit.CommonAuditContext;
import org.apache.hadoop.fs.store.audit.HttpReferrerAuditHeader; import org.apache.hadoop.fs.store.audit.HttpReferrerAuditHeader;
import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation;
import static org.apache.hadoop.fs.audit.AuditConstants.DELETE_KEYS_SIZE;
import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.loggingAuditConfig; import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.loggingAuditConfig;
import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.REFERRER_HEADER_FILTER; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.REFERRER_HEADER_FILTER;
import static org.apache.hadoop.fs.s3a.audit.S3LogParser.*; import static org.apache.hadoop.fs.s3a.audit.S3LogParser.*;
@ -104,16 +107,8 @@ public class TestHttpReferrerAuditHeader extends AbstractAuditingTest {
LOG.info("Header is {}", header); LOG.info("Header is {}", header);
Map<String, String> params Map<String, String> params
= HttpReferrerAuditHeader.extractQueryParameters(header); = HttpReferrerAuditHeader.extractQueryParameters(header);
assertMapContains(params, PARAM_PRINCIPAL, final String threadId = CommonAuditContext.currentThreadID();
UserGroupInformation.getCurrentUser().getUserName()); compareCommonHeaders(params, PATH_1, PATH_2, threadId, span);
assertMapContains(params, PARAM_FILESYSTEM_ID, auditor.getAuditorId());
assertMapContains(params, PARAM_OP, OPERATION);
assertMapContains(params, PARAM_PATH, PATH_1);
assertMapContains(params, PARAM_PATH2, PATH_2);
String threadID = CommonAuditContext.currentThreadID();
assertMapContains(params, PARAM_THREAD0, threadID);
assertMapContains(params, PARAM_THREAD1, threadID);
assertMapContains(params, PARAM_ID, span.getSpanId());
assertThat(span.getTimestamp()) assertThat(span.getTimestamp())
.describedAs("Timestamp of " + span) .describedAs("Timestamp of " + span)
.isEqualTo(ts); .isEqualTo(ts);
@ -135,16 +130,8 @@ public class TestHttpReferrerAuditHeader extends AbstractAuditingTest {
AuditSpan span = getManager().createSpan(OPERATION, p1, p2); AuditSpan span = getManager().createSpan(OPERATION, p1, p2);
long ts = span.getTimestamp(); long ts = span.getTimestamp();
Map<String, String> params = issueRequestAndExtractParameters(); Map<String, String> params = issueRequestAndExtractParameters();
assertMapContains(params, PARAM_PRINCIPAL, final String threadId = CommonAuditContext.currentThreadID();
UserGroupInformation.getCurrentUser().getUserName()); compareCommonHeaders(params, p1, p2, threadId, span);
assertMapContains(params, PARAM_FILESYSTEM_ID, auditor.getAuditorId());
assertMapContains(params, PARAM_OP, OPERATION);
assertMapContains(params, PARAM_PATH, p1);
assertMapContains(params, PARAM_PATH2, p2);
String threadID = CommonAuditContext.currentThreadID();
assertMapContains(params, PARAM_THREAD0, threadID);
assertMapContains(params, PARAM_THREAD1, threadID);
assertMapContains(params, PARAM_ID, span.getSpanId());
assertThat(span.getTimestamp()) assertThat(span.getTimestamp())
.describedAs("Timestamp of " + span) .describedAs("Timestamp of " + span)
.isEqualTo(ts); .isEqualTo(ts);
@ -350,6 +337,62 @@ public class TestHttpReferrerAuditHeader extends AbstractAuditingTest {
assertMapNotContains(params, PARAM_RANGE); assertMapNotContains(params, PARAM_RANGE);
} }
@Test
public void testHttpReferrerForBulkDelete() throws Throwable {
AuditSpan span = span();
long ts = span.getTimestamp();
DeleteObjectsRequest request = headForBulkDelete(
"key_01",
"key_02",
"key_03");
Map<String, String> headers
= request.getCustomRequestHeaders();
assertThat(headers)
.describedAs("Custom headers")
.containsKey(HEADER_REFERRER);
String header = headers.get(HEADER_REFERRER);
LOG.info("Header is {}", header);
Map<String, String> params
= HttpReferrerAuditHeader.extractQueryParameters(header);
final String threadId = CommonAuditContext.currentThreadID();
compareCommonHeaders(params, PATH_1, PATH_2, threadId, span);
assertMapContains(params, DELETE_KEYS_SIZE, "3");
assertThat(span.getTimestamp())
.describedAs("Timestamp of " + span)
.isEqualTo(ts);
assertMapNotContains(params, PARAM_RANGE);
assertMapContains(params, PARAM_TIMESTAMP,
Long.toString(ts));
}
/**
* Utility to compare common params from the referer header.
*
* @param params map of params extracted from the header.
* @param path1 first path.
* @param path2 second path.
* @param threadID thread id.
* @param span audit span object.
* @throws IOException if login fails and/or current user cannot be retrieved.
*/
private void compareCommonHeaders(final Map<String, String> params,
final String path1,
final String path2,
final String threadID,
final AuditSpan span) throws IOException {
assertMapContains(params, PARAM_PRINCIPAL,
UserGroupInformation.getCurrentUser().getUserName());
assertMapContains(params, PARAM_FILESYSTEM_ID,
auditor.getAuditorId());
assertMapContains(params, PARAM_OP, OPERATION);
assertMapContains(params, PARAM_PATH, path1);
assertMapContains(params, PARAM_PATH2, path2);
assertMapContains(params, PARAM_THREAD0, threadID);
assertMapContains(params, PARAM_THREAD1, threadID);
assertMapContains(params, PARAM_ID, span.getSpanId());
}
/** /**
* Expect a field with quote stripping to match the expected value. * Expect a field with quote stripping to match the expected value.
* @param str string to strip * @param str string to strip