HADOOP-17293. S3A to always probe S3 in S3A getFileStatus on non-auth paths
This reverts changes in HADOOP-13230 to use S3Guard TTL in choosing when to issue a HEAD request; fixing tests to compensate. New org.apache.hadoop.fs.s3a.performance.OperationCost cost, S3GUARD_NONAUTH_FILE_STATUS_PROBE for use in cost tests. Contributed by Steve Loughran. Change-Id: I418d55d2d2562a48b2a14ec7dee369db49b4e29e
This commit is contained in:
parent
44ff4c1058
commit
963793dd48
|
@ -2980,55 +2980,31 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
|
|||
// a file has been found in a non-auth path and the caller has not said
|
||||
// they only care about directories
|
||||
LOG.debug("Metadata for {} found in the non-auth metastore.", path);
|
||||
// If the timestamp of the pm is close to "now", we don't need to
|
||||
// bother with a check of S3. that means:
|
||||
// one of : status modtime is close to now,
|
||||
// or pm.getLastUpdated() == now
|
||||
final long msModTime = pm.getFileStatus().getModificationTime();
|
||||
|
||||
// get the time in which a status modtime is considered valid
|
||||
// in a non-auth metastore
|
||||
long validTime =
|
||||
ttlTimeProvider.getNow() - ttlTimeProvider.getMetadataTtl();
|
||||
final long msModTime = msStatus.getModificationTime();
|
||||
S3AFileStatus s3AFileStatus;
|
||||
try {
|
||||
s3AFileStatus = s3GetFileStatus(path,
|
||||
key,
|
||||
probes,
|
||||
tombstones,
|
||||
needEmptyDirectoryFlag);
|
||||
} catch (FileNotFoundException fne) {
|
||||
LOG.trace("File Not Found from probes for {}", key, fne);
|
||||
s3AFileStatus = null;
|
||||
}
|
||||
if (s3AFileStatus == null) {
|
||||
LOG.warn("Failed to find file {}. Either it is not yet visible, or "
|
||||
+ "it has been deleted.", path);
|
||||
} else {
|
||||
final long s3ModTime = s3AFileStatus.getModificationTime();
|
||||
|
||||
if (msModTime < validTime) {
|
||||
LOG.debug("Metastore entry of {} is out of date, probing S3", path);
|
||||
try {
|
||||
S3AFileStatus s3AFileStatus = s3GetFileStatus(path,
|
||||
key,
|
||||
probes,
|
||||
tombstones,
|
||||
needEmptyDirectoryFlag);
|
||||
// if the new status is more current than that in the metastore,
|
||||
// it means S3 has changed and the store needs updating
|
||||
final long s3ModTime = s3AFileStatus.getModificationTime();
|
||||
|
||||
if (s3ModTime > msModTime) {
|
||||
// there's new data in S3
|
||||
LOG.debug("S3Guard metadata for {} is outdated;"
|
||||
+ " s3modtime={}; msModTime={} updating metastore",
|
||||
path, s3ModTime, msModTime);
|
||||
// add to S3Guard
|
||||
S3Guard.putAndReturn(metadataStore, s3AFileStatus,
|
||||
ttlTimeProvider);
|
||||
} else {
|
||||
// the modtime of the data is the same as/older than the s3guard
|
||||
// value either an old object has been found, or the existing one
|
||||
// was retrieved in both cases -refresh the S3Guard entry so the
|
||||
// record's TTL is updated.
|
||||
S3Guard.refreshEntry(metadataStore, pm, s3AFileStatus,
|
||||
ttlTimeProvider);
|
||||
}
|
||||
// return the value
|
||||
// note that the checks for empty dir status below can be skipped
|
||||
// because the call to s3GetFileStatus include the checks there
|
||||
return s3AFileStatus;
|
||||
} catch (FileNotFoundException fne) {
|
||||
// the attempt to refresh the record failed because there was
|
||||
// no entry. Either it is a new file not visible, or it
|
||||
// has been deleted (and therefore S3Guard is out of sync with S3)
|
||||
LOG.warn("Failed to find file {}. Either it is not yet visible, or "
|
||||
+ "it has been deleted.", path);
|
||||
if(s3ModTime > msModTime) {
|
||||
LOG.debug("S3Guard metadata for {} is outdated;"
|
||||
+ " s3modtime={}; msModTime={} updating metastore",
|
||||
path, s3ModTime, msModTime);
|
||||
return S3Guard.putAndReturn(metadataStore, s3AFileStatus,
|
||||
ttlTimeProvider);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -314,7 +314,7 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
|
|||
fsKeyB = createNewFileSystemWithSSECKey(KEY_4);
|
||||
|
||||
//Until this point, no exception is thrown about access
|
||||
if (!fsKeyB.hasMetadataStore()) {
|
||||
if (statusProbesCheckS3(fsKeyB, fileToStat)) {
|
||||
intercept(AccessDeniedException.class,
|
||||
SERVICE_AMAZON_S3_STATUS_CODE_403,
|
||||
() -> fsKeyB.listStatus(fileToStat));
|
||||
|
@ -323,6 +323,16 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Do file status probes check S3?
|
||||
* @param fs filesystem
|
||||
* @param path file path
|
||||
* @return true if check for a path being a file will issue a HEAD request.
|
||||
*/
|
||||
private boolean statusProbesCheckS3(S3AFileSystem fs, Path path) {
|
||||
return !fs.hasMetadataStore() || !fs.allowAuthoritative(path);
|
||||
}
|
||||
|
||||
/**
|
||||
* It is possible to delete directories without the proper encryption key and
|
||||
* the hierarchy above it.
|
||||
|
@ -340,7 +350,7 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
|
|||
Path fileToDelete = new Path(pathABC, "filetobedeleted.txt");
|
||||
writeThenReadFile(fileToDelete, TEST_FILE_LEN);
|
||||
fsKeyB = createNewFileSystemWithSSECKey(KEY_4);
|
||||
if (!fsKeyB.hasMetadataStore()) {
|
||||
if (statusProbesCheckS3(fsKeyB, fileToDelete)) {
|
||||
intercept(AccessDeniedException.class,
|
||||
SERVICE_AMAZON_S3_STATUS_CODE_403,
|
||||
() -> fsKeyB.delete(fileToDelete, false));
|
||||
|
|
|
@ -92,7 +92,8 @@ public class ITestS3AFileOperationCost extends AbstractS3ACostTest {
|
|||
whenRaw(FILE_STATUS_FILE_PROBE
|
||||
.plus(LIST_LOCATED_STATUS_LIST_OP)),
|
||||
whenAuthoritative(LIST_LOCATED_STATUS_LIST_OP),
|
||||
whenNonauth(LIST_LOCATED_STATUS_LIST_OP));
|
||||
whenNonauth(LIST_LOCATED_STATUS_LIST_OP
|
||||
.plus(S3GUARD_NONAUTH_FILE_STATUS_PROBE)));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -187,7 +188,8 @@ public class ITestS3AFileOperationCost extends AbstractS3ACostTest {
|
|||
whenRaw(LIST_STATUS_LIST_OP
|
||||
.plus(GET_FILE_STATUS_ON_FILE)),
|
||||
whenAuthoritative(LIST_STATUS_LIST_OP),
|
||||
whenNonauth(LIST_STATUS_LIST_OP));
|
||||
whenNonauth(LIST_STATUS_LIST_OP
|
||||
.plus(S3GUARD_NONAUTH_FILE_STATUS_PROBE)));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -326,7 +326,7 @@ public class ITestS3ARemoteFileChanged extends AbstractS3ATestBase {
|
|||
* @return a number >= 0.
|
||||
*/
|
||||
private int getFileStatusHeadCount() {
|
||||
return authMode ? 0 : 0;
|
||||
return authMode ? 0 : 1;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -422,7 +422,7 @@ public class ITestRestrictedReadAccess extends AbstractS3ATestBase {
|
|||
|
||||
readonlyFS.getFileStatus(emptyDir);
|
||||
// now look at a file; the outcome depends on the mode.
|
||||
accessDeniedIf(!s3guard, () ->
|
||||
accessDeniedIf(!guardedInAuthMode, () ->
|
||||
readonlyFS.getFileStatus(subdirFile));
|
||||
|
||||
// irrespective of mode, the attempt to read the data will fail.
|
||||
|
@ -437,7 +437,7 @@ public class ITestRestrictedReadAccess extends AbstractS3ATestBase {
|
|||
// This means that permissions on the file do not get checked.
|
||||
// See: HADOOP-16464.
|
||||
Optional<FSDataInputStream> optIn = accessDeniedIf(
|
||||
!s3guard, () -> readonlyFS.open(emptyFile));
|
||||
!guardedInAuthMode, () -> readonlyFS.open(emptyFile));
|
||||
if (optIn.isPresent()) {
|
||||
try (FSDataInputStream is = optIn.get()) {
|
||||
Assertions.assertThat(is.read())
|
||||
|
@ -455,8 +455,8 @@ public class ITestRestrictedReadAccess extends AbstractS3ATestBase {
|
|||
describe("Glob Status operations");
|
||||
// baseline: the real filesystem on a subdir
|
||||
globFS(getFileSystem(), subdirFile, null, false, 1);
|
||||
// a file fails if not guarded
|
||||
globFS(readonlyFS, subdirFile, null, !s3guard, 1);
|
||||
// a file fails if not in auth mode
|
||||
globFS(readonlyFS, subdirFile, null, !guardedInAuthMode, 1);
|
||||
// empty directories don't fail.
|
||||
FileStatus[] st = globFS(readonlyFS, emptyDir, null, false, 1);
|
||||
if (s3guard) {
|
||||
|
@ -554,7 +554,7 @@ public class ITestRestrictedReadAccess extends AbstractS3ATestBase {
|
|||
true,
|
||||
TEXT_FILE,
|
||||
true);
|
||||
accessDeniedIf(!s3guard,
|
||||
accessDeniedIf(!guardedInAuthMode,
|
||||
() -> fetcher.getFileStatuses())
|
||||
.ifPresent(stats -> {
|
||||
Assertions.assertThat(stats)
|
||||
|
@ -619,7 +619,7 @@ public class ITestRestrictedReadAccess extends AbstractS3ATestBase {
|
|||
public void checkDeleteOperations() throws Throwable {
|
||||
describe("Testing delete operations");
|
||||
readonlyFS.delete(emptyDir, true);
|
||||
if (!s3guard) {
|
||||
if (!authMode) {
|
||||
// to fail on HEAD
|
||||
accessDenied(() -> readonlyFS.delete(emptyFile, true));
|
||||
} else {
|
||||
|
|
|
@ -154,6 +154,13 @@ public final class OperationCost {
|
|||
public static final OperationCost CREATE_FILE_NO_OVERWRITE =
|
||||
FILE_STATUS_ALL_PROBES;
|
||||
|
||||
/**
|
||||
* S3Guard in non-auth mode always attempts a single file
|
||||
* status call.
|
||||
*/
|
||||
public static final OperationCost S3GUARD_NONAUTH_FILE_STATUS_PROBE =
|
||||
FILE_STATUS_FILE_PROBE;
|
||||
|
||||
/** Expected HEAD count. */
|
||||
private final int head;
|
||||
|
||||
|
|
Loading…
Reference in New Issue