HADOOP-17293. S3A to always probe S3 in S3A getFileStatus on non-auth paths

This reverts changes in HADOOP-13230 to use S3Guard TTL in choosing when
to issue a HEAD request; fixing tests to compensate.

New org.apache.hadoop.fs.s3a.performance.OperationCost cost,
S3GUARD_NONAUTH_FILE_STATUS_PROBE for use in cost tests.

Contributed by Steve Loughran.

Change-Id: I418d55d2d2562a48b2a14ec7dee369db49b4e29e
This commit is contained in:
Steve Loughran 2020-10-08 15:35:57 +01:00
parent 52db86b0bb
commit f83e07a20f
6 changed files with 53 additions and 58 deletions

View File

@ -2980,55 +2980,31 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
// a file has been found in a non-auth path and the caller has not said
// they only care about directories
LOG.debug("Metadata for {} found in the non-auth metastore.", path);
// If the timestamp of the pm is close to "now", we don't need to
// bother with a check of S3. that means:
// one of : status modtime is close to now,
// or pm.getLastUpdated() == now
final long msModTime = pm.getFileStatus().getModificationTime();
// get the time in which a status modtime is considered valid
// in a non-auth metastore
long validTime =
ttlTimeProvider.getNow() - ttlTimeProvider.getMetadataTtl();
final long msModTime = msStatus.getModificationTime();
S3AFileStatus s3AFileStatus;
try {
s3AFileStatus = s3GetFileStatus(path,
key,
probes,
tombstones,
needEmptyDirectoryFlag);
} catch (FileNotFoundException fne) {
LOG.trace("File Not Found from probes for {}", key, fne);
s3AFileStatus = null;
}
if (s3AFileStatus == null) {
LOG.warn("Failed to find file {}. Either it is not yet visible, or "
+ "it has been deleted.", path);
} else {
final long s3ModTime = s3AFileStatus.getModificationTime();
if (msModTime < validTime) {
LOG.debug("Metastore entry of {} is out of date, probing S3", path);
try {
S3AFileStatus s3AFileStatus = s3GetFileStatus(path,
key,
probes,
tombstones,
needEmptyDirectoryFlag);
// if the new status is more current than that in the metastore,
// it means S3 has changed and the store needs updating
final long s3ModTime = s3AFileStatus.getModificationTime();
if (s3ModTime > msModTime) {
// there's new data in S3
LOG.debug("S3Guard metadata for {} is outdated;"
+ " s3modtime={}; msModTime={} updating metastore",
path, s3ModTime, msModTime);
// add to S3Guard
S3Guard.putAndReturn(metadataStore, s3AFileStatus,
ttlTimeProvider);
} else {
// the modtime of the data is the same as/older than the s3guard
// value either an old object has been found, or the existing one
// was retrieved in both cases -refresh the S3Guard entry so the
// record's TTL is updated.
S3Guard.refreshEntry(metadataStore, pm, s3AFileStatus,
ttlTimeProvider);
}
// return the value
// note that the checks for empty dir status below can be skipped
// because the call to s3GetFileStatus include the checks there
return s3AFileStatus;
} catch (FileNotFoundException fne) {
// the attempt to refresh the record failed because there was
// no entry. Either it is a new file not visible, or it
// has been deleted (and therefore S3Guard is out of sync with S3)
LOG.warn("Failed to find file {}. Either it is not yet visible, or "
+ "it has been deleted.", path);
if(s3ModTime > msModTime) {
LOG.debug("S3Guard metadata for {} is outdated;"
+ " s3modtime={}; msModTime={} updating metastore",
path, s3ModTime, msModTime);
return S3Guard.putAndReturn(metadataStore, s3AFileStatus,
ttlTimeProvider);
}
}
}

View File

@ -314,7 +314,7 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
fsKeyB = createNewFileSystemWithSSECKey(KEY_4);
//Until this point, no exception is thrown about access
if (!fsKeyB.hasMetadataStore()) {
if (statusProbesCheckS3(fsKeyB, fileToStat)) {
intercept(AccessDeniedException.class,
SERVICE_AMAZON_S3_STATUS_CODE_403,
() -> fsKeyB.listStatus(fileToStat));
@ -323,6 +323,16 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
}
}
/**
* Do file status probes check S3?
* @param fs filesystem
* @param path file path
* @return true if check for a path being a file will issue a HEAD request.
*/
private boolean statusProbesCheckS3(S3AFileSystem fs, Path path) {
return !fs.hasMetadataStore() || !fs.allowAuthoritative(path);
}
/**
* It is possible to delete directories without the proper encryption key and
* the hierarchy above it.
@ -340,7 +350,7 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
Path fileToDelete = new Path(pathABC, "filetobedeleted.txt");
writeThenReadFile(fileToDelete, TEST_FILE_LEN);
fsKeyB = createNewFileSystemWithSSECKey(KEY_4);
if (!fsKeyB.hasMetadataStore()) {
if (statusProbesCheckS3(fsKeyB, fileToDelete)) {
intercept(AccessDeniedException.class,
SERVICE_AMAZON_S3_STATUS_CODE_403,
() -> fsKeyB.delete(fileToDelete, false));

View File

@ -92,7 +92,8 @@ public class ITestS3AFileOperationCost extends AbstractS3ACostTest {
whenRaw(FILE_STATUS_FILE_PROBE
.plus(LIST_LOCATED_STATUS_LIST_OP)),
whenAuthoritative(LIST_LOCATED_STATUS_LIST_OP),
whenNonauth(LIST_LOCATED_STATUS_LIST_OP));
whenNonauth(LIST_LOCATED_STATUS_LIST_OP
.plus(S3GUARD_NONAUTH_FILE_STATUS_PROBE)));
}
@Test
@ -187,7 +188,8 @@ public class ITestS3AFileOperationCost extends AbstractS3ACostTest {
whenRaw(LIST_STATUS_LIST_OP
.plus(GET_FILE_STATUS_ON_FILE)),
whenAuthoritative(LIST_STATUS_LIST_OP),
whenNonauth(LIST_STATUS_LIST_OP));
whenNonauth(LIST_STATUS_LIST_OP
.plus(S3GUARD_NONAUTH_FILE_STATUS_PROBE)));
}
@Test

View File

@ -326,7 +326,7 @@ public class ITestS3ARemoteFileChanged extends AbstractS3ATestBase {
* @return a number >= 0.
*/
private int getFileStatusHeadCount() {
return authMode ? 0 : 0;
return authMode ? 0 : 1;
}
/**

View File

@ -422,7 +422,7 @@ public class ITestRestrictedReadAccess extends AbstractS3ATestBase {
readonlyFS.getFileStatus(emptyDir);
// now look at a file; the outcome depends on the mode.
accessDeniedIf(!s3guard, () ->
accessDeniedIf(!guardedInAuthMode, () ->
readonlyFS.getFileStatus(subdirFile));
// irrespective of mode, the attempt to read the data will fail.
@ -437,7 +437,7 @@ public class ITestRestrictedReadAccess extends AbstractS3ATestBase {
// This means that permissions on the file do not get checked.
// See: HADOOP-16464.
Optional<FSDataInputStream> optIn = accessDeniedIf(
!s3guard, () -> readonlyFS.open(emptyFile));
!guardedInAuthMode, () -> readonlyFS.open(emptyFile));
if (optIn.isPresent()) {
try (FSDataInputStream is = optIn.get()) {
Assertions.assertThat(is.read())
@ -455,8 +455,8 @@ public class ITestRestrictedReadAccess extends AbstractS3ATestBase {
describe("Glob Status operations");
// baseline: the real filesystem on a subdir
globFS(getFileSystem(), subdirFile, null, false, 1);
// a file fails if not guarded
globFS(readonlyFS, subdirFile, null, !s3guard, 1);
// a file fails if not in auth mode
globFS(readonlyFS, subdirFile, null, !guardedInAuthMode, 1);
// empty directories don't fail.
FileStatus[] st = globFS(readonlyFS, emptyDir, null, false, 1);
if (s3guard) {
@ -554,7 +554,7 @@ public class ITestRestrictedReadAccess extends AbstractS3ATestBase {
true,
TEXT_FILE,
true);
accessDeniedIf(!s3guard,
accessDeniedIf(!guardedInAuthMode,
() -> fetcher.getFileStatuses())
.ifPresent(stats -> {
Assertions.assertThat(stats)
@ -619,7 +619,7 @@ public class ITestRestrictedReadAccess extends AbstractS3ATestBase {
public void checkDeleteOperations() throws Throwable {
describe("Testing delete operations");
readonlyFS.delete(emptyDir, true);
if (!s3guard) {
if (!authMode) {
// to fail on HEAD
accessDenied(() -> readonlyFS.delete(emptyFile, true));
} else {

View File

@ -154,6 +154,13 @@ public final class OperationCost {
public static final OperationCost CREATE_FILE_NO_OVERWRITE =
FILE_STATUS_ALL_PROBES;
/**
* S3Guard in non-auth mode always attempts a single file
* status call.
*/
public static final OperationCost S3GUARD_NONAUTH_FILE_STATUS_PROBE =
FILE_STATUS_FILE_PROBE;
/** Expected HEAD count. */
private final int head;