HADOOP-16697. Tune/audit S3A authoritative mode.

Contains:

HADOOP-16474. S3Guard ProgressiveRenameTracker to mark destination
              dirirectory as authoritative on success.
HADOOP-16684. S3guard bucket info to list a bit more about
              authoritative paths.
HADOOP-16722. S3GuardTool to support FilterFileSystem.

This patch improves the marking of newly created/import directory
trees in S3Guard DynamoDB tables as authoritative.

Specific changes:

 * Renamed directories are marked as authoritative if the entire
   operation succeeded (HADOOP-16474).
 * When updating parent table entries as part of any table write,
   there's no overwriting of their authoritative flag.

s3guard import changes:

* new -verbose flag to print out what is going on.

* The "s3guard import" command lets you declare that a directory tree
is to be marked as authoritative

  hadoop s3guard import -authoritative -verbose s3a://bucket/path

When importing a listing and a file is found, the import tool queries
the metastore and only updates the entry if the file is different from
before, where different == new timestamp, etag, or length. S3Guard can get
timestamp differences due to clock skew in PUT operations.

As the recursive list performed by the import command doesn't retrieve the
versionID, the existing entry may in fact be more complete.
When updating an existing due to clock skew the existing version ID
is propagated to the new entry (note: the etags must match; this is needed
to deal with inconsistent listings).

There is a new s3guard command to audit a s3guard bucket/path's
authoritative state:

  hadoop s3guard authoritative -check-config s3a://bucket/path

This is primarily for testing/auditing.

The s3guard bucket-info command also provides some more details on the
authoritative state of a store (HADOOP-16684).

Change-Id: I58001341c04f6f3597fcb4fcb1581ccefeb77d91
This commit is contained in:
Steve Loughran 2020-01-09 18:22:04 +00:00
parent 9da294a140
commit 49df838995
No known key found for this signature in database
GPG Key ID: D22CF846DBB162A0
32 changed files with 2585 additions and 363 deletions

View File

@ -204,7 +204,10 @@ public abstract class AbstractContractRenameTest extends
assertPathExists("not created in src/sub dir",
new Path(srcSubDir, "subfile.txt"));
fs.rename(srcDir, finalDir);
boolean rename = fs.rename(srcDir, finalDir);
assertTrue("rename(" + srcDir + ", " + finalDir + ") failed",
rename);
// Accept both POSIX rename behavior and CLI rename behavior
if (renameRemoveEmptyDest) {
// POSIX rename behavior

View File

@ -82,6 +82,15 @@ public abstract class AbstractFSContractTestBase extends Assert
Thread.currentThread().setName("JUnit");
}
@Before
public void nameThread() {
Thread.currentThread().setName("JUnit-" + getMethodName());
}
protected String getMethodName() {
return methodName.getMethodName();
}
/**
* This must be implemented by all instantiated test cases.
* -provide the FS contract
@ -172,6 +181,7 @@ public abstract class AbstractFSContractTestBase extends Assert
*/
@Before
public void setup() throws Exception {
Thread.currentThread().setName("setup");
LOG.debug("== Setup ==");
contract = createContract(createConfiguration());
contract.init();
@ -200,6 +210,7 @@ public abstract class AbstractFSContractTestBase extends Assert
*/
@After
public void teardown() throws Exception {
Thread.currentThread().setName("teardown");
LOG.debug("== Teardown ==");
deleteTestDirInTeardown();
LOG.debug("== Teardown complete ==");

View File

@ -68,5 +68,10 @@
<Method name="openFileWithOptions"/>
<Bug pattern="RV_RETURN_VALUE_IGNORED_BAD_PRACTICE"/>
</Match>
<Match>
<Class name="org.apache.hadoop.fs.s3a.s3guard.S3GuardTool$BucketInfo"/>
<Method name="run"/>
<Bug pattern="SF_SWITCH_FALLTHROUGH"/>
</Match>
</FindBugsFilter>

View File

@ -178,6 +178,14 @@ public class S3AFileStatus extends FileStatus {
return versionId;
}
/**
* set the S3 object versionId, else null.
* @param versionId version ID or null.
*/
public void setVersionId(final String versionId) {
this.versionId = versionId;
}
/** Compare if this object is equal to another object.
* @param o the object to be compared.
* @return true if two file status has the same path name; false if not.

View File

@ -1245,7 +1245,8 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
} catch (AmazonClientException e) {
throw translateException("rename(" + src +", " + dst + ")", src, e);
} catch (RenameFailedException e) {
LOG.debug(e.getMessage());
LOG.info("{}", e.getMessage());
LOG.debug("rename failure", e);
return e.getExitCode();
} catch (FileNotFoundException e) {
LOG.debug(e.toString());
@ -2477,7 +2478,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
* @param path path
* @return true if the path is auth
*/
protected boolean allowAuthoritative(final Path path) {
public boolean allowAuthoritative(final Path path) {
return S3Guard.allowAuthoritative(path, this,
allowAuthoritativeMetadataStore, allowAuthoritativePaths);
}
@ -2720,7 +2721,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
+ " s3modtime={}; msModTime={} updating metastore",
path, s3ModTime, msModTime);
return S3Guard.putAndReturn(metadataStore, s3AFileStatus,
instrumentation, ttlTimeProvider);
ttlTimeProvider);
}
}
}
@ -2755,13 +2756,12 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
}
// entry was found, save in S3Guard
return S3Guard.putAndReturn(metadataStore, s3FileStatus,
instrumentation, ttlTimeProvider);
ttlTimeProvider);
} else {
// there was no entry in S3Guard
// retrieve the data and update the metadata store in the process.
return S3Guard.putAndReturn(metadataStore,
s3GetFileStatus(path, key, probes, tombstones),
instrumentation,
ttlTimeProvider);
}
}
@ -3177,12 +3177,12 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
HadoopExecutors.shutdown(unboundedThreadPool, LOG,
THREAD_POOL_SHUTDOWN_DELAY_SECONDS, TimeUnit.SECONDS);
unboundedThreadPool = null;
closeAutocloseables(LOG, credentials);
cleanupWithLogger(LOG,
metadataStore,
instrumentation,
delegationTokens.orElse(null),
signerManager);
closeAutocloseables(LOG, credentials);
delegationTokens = Optional.empty();
signerManager = null;
credentials = null;
@ -3529,13 +3529,21 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
activeState = stateToClose;
}
S3Guard.addAncestors(metadataStore, p, ttlTimeProvider, activeState);
final boolean isDir = objectRepresentsDirectory(key, length);
S3AFileStatus status = createUploadFileStatus(p,
S3AUtils.objectRepresentsDirectory(key, length), length,
isDir, length,
getDefaultBlockSize(p), username, eTag, versionId);
S3Guard.putAndReturn(metadataStore, status,
instrumentation,
ttlTimeProvider,
activeState);
if (!isDir) {
S3Guard.putAndReturn(metadataStore, status,
ttlTimeProvider,
activeState);
} else {
// this is a directory marker so put it as such.
status.setIsEmptyDirectory(Tristate.TRUE);
S3Guard.putAuthDirectoryMarker(metadataStore, status,
ttlTimeProvider,
activeState);
}
}
} catch (IOException e) {
if (failOnMetadataWriteError) {

View File

@ -25,6 +25,7 @@ import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.FileSystem.Statistics;
import org.apache.hadoop.fs.s3a.s3guard.MetastoreInstrumentation;
import org.apache.hadoop.metrics2.AbstractMetric;
import org.apache.hadoop.metrics2.MetricStringBuilder;
import org.apache.hadoop.metrics2.MetricsCollector;
@ -188,6 +189,7 @@ public class S3AInstrumentation implements Closeable, MetricsSource {
S3GUARD_METADATASTORE_RECORD_WRITES,
S3GUARD_METADATASTORE_RETRY,
S3GUARD_METADATASTORE_THROTTLED,
S3GUARD_METADATASTORE_AUTHORITATIVE_DIRECTORIES_UPDATED,
STORE_IO_THROTTLED,
DELEGATION_TOKENS_ISSUED,
FILES_DELETE_REJECTED
@ -562,11 +564,11 @@ public class S3AInstrumentation implements Closeable, MetricsSource {
}
/**
* Create a S3Guard instrumentation instance.
* Create a MetastoreInstrumentation instrumentation instance.
* There's likely to be at most one instance of this per FS instance.
* @return the S3Guard instrumentation point.
*/
public S3GuardInstrumentation getS3GuardInstrumentation() {
public MetastoreInstrumentation getS3GuardInstrumentation() {
return s3GuardInstrumentation;
}
@ -1127,43 +1129,35 @@ public class S3AInstrumentation implements Closeable, MetricsSource {
/**
* Instrumentation exported to S3Guard.
*/
public final class S3GuardInstrumentation {
private final class S3GuardInstrumentation
implements MetastoreInstrumentation {
/** Initialized event. */
@Override
public void initialized() {
incrementCounter(S3GUARD_METADATASTORE_INITIALIZATION, 1);
}
@Override
public void storeClosed() {
}
/**
* Throttled request.
*/
@Override
public void throttled() {
// counters are incremented by owner.
}
/**
* S3Guard is retrying after a (retryable) failure.
*/
@Override
public void retrying() {
// counters are incremented by owner.
}
/**
* Records have been read.
* @param count the number of records read
*/
@Override
public void recordsDeleted(int count) {
incrementCounter(S3GUARD_METADATASTORE_RECORD_DELETES, count);
}
/**
* Records have been read.
* @param count the number of records read
*/
@Override
public void recordsRead(int count) {
incrementCounter(S3GUARD_METADATASTORE_RECORD_READS, count);
}
@ -1172,10 +1166,25 @@ public class S3AInstrumentation implements Closeable, MetricsSource {
* records have been written (including deleted).
* @param count number of records written.
*/
@Override
public void recordsWritten(int count) {
incrementCounter(S3GUARD_METADATASTORE_RECORD_WRITES, count);
}
@Override
public void directoryMarkedAuthoritative() {
incrementCounter(S3GUARD_METADATASTORE_AUTHORITATIVE_DIRECTORIES_UPDATED,
1);
}
@Override
public void entryAdded(final long durationNanos) {
addValueToQuantiles(
S3GUARD_METADATASTORE_PUT_PATH_LATENCY,
durationNanos);
incrementCounter(S3GUARD_METADATASTORE_PUT_PATH_REQUEST, 1);
}
}
/**

View File

@ -225,6 +225,9 @@ public enum Statistic {
S3GUARD_METADATASTORE_THROTTLE_RATE(
"s3guard_metadatastore_throttle_rate",
"S3Guard metadata store throttle rate"),
S3GUARD_METADATASTORE_AUTHORITATIVE_DIRECTORIES_UPDATED(
"s3guard_metadatastore_authoritative_directories_updated",
"S3Guard metadata store authoritative directories updated from S3"),
STORE_IO_THROTTLED("store_io_throttled", "Requests throttled and retried"),

View File

@ -0,0 +1,255 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import java.io.IOException;
import java.util.ArrayDeque;
import java.util.Collection;
import java.util.Queue;
import com.google.common.annotations.VisibleForTesting;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathIOException;
import org.apache.hadoop.fs.s3a.impl.AbstractStoreOperation;
import org.apache.hadoop.fs.s3a.impl.StoreContext;
import org.apache.hadoop.service.launcher.LauncherExitCodes;
import org.apache.hadoop.util.DurationInfo;
import org.apache.hadoop.util.ExitCodeProvider;
import org.apache.hadoop.util.ExitUtil;
import static org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_NOT_ACCEPTABLE;
/**
* Audit a directory tree for being authoritative.
* One aspect of the audit to be aware of: the root directory is
* always considered authoritative, even though, because there is no
* matching entry in any of the stores, it is not strictly true.
*/
public class AuthoritativeAuditOperation extends AbstractStoreOperation {
private static final Logger LOG = LoggerFactory.getLogger(
AuthoritativeAuditOperation.class);
/**
* Exception error code when a path is non-auth in the DB}.
*/
public static final int ERROR_ENTRY_NOT_AUTH_IN_DDB = EXIT_NOT_ACCEPTABLE;
/**
* Exception error code when a path is not configured to be
* auth in the S3A FS Config: {@value}.
*/
public static final int ERROR_PATH_NOT_AUTH_IN_FS = 5;
/**
* Exception error string: {@value}.
*/
public static final String E_NONAUTH
= "Directory is not marked as authoritative in the S3Guard store";
/** The metastore to audit. */
private final DynamoDBMetadataStore metastore;
/** require all directories to be authoritative. */
private final boolean requireAuthoritative;
/**
* Verbose switch.
*/
private final boolean verbose;
/**
* Constructor.
* @param storeContext store context.
* @param metastore metastore
* @param requireAuthoritative require all directories to be authoritative
* @param verbose verbose output
*/
public AuthoritativeAuditOperation(
final StoreContext storeContext,
final DynamoDBMetadataStore metastore,
final boolean requireAuthoritative,
final boolean verbose) {
super(storeContext);
this.metastore = metastore;
this.requireAuthoritative = requireAuthoritative;
this.verbose = verbose;
}
/**
* Examine the path metadata and verify that the dir is authoritative.
* @param md metadata.
* @param requireAuth require all directories to be authoritative
* @throws NonAuthoritativeDirException if it is !auth and requireAuth=true.
*/
private void verifyAuthDir(final DDBPathMetadata md,
final boolean requireAuth)
throws PathIOException {
final Path path = md.getFileStatus().getPath();
boolean isAuth = path.isRoot() || md.isAuthoritativeDir();
if (!isAuth && requireAuth) {
throw new NonAuthoritativeDirException(path);
}
}
/**
* Examine the path metadata, declare whether it should be queued for
* recursive scanning.
* @param md metadata.
* @return true if it is a dir to scan.
*/
private boolean isDirectory(PathMetadata md) {
return !md.getFileStatus().isFile();
}
/**
* Audit the tree.
* @param path qualified path to scan
* @return tuple(dirs scanned, nonauth dirs found)
* @throws IOException IO failure
* @throws ExitUtil.ExitException if a non-auth dir was found.
*/
public Pair<Integer, Integer> audit(Path path) throws IOException {
try (DurationInfo ignored =
new DurationInfo(LOG, "Audit %s", path)) {
return executeAudit(path, requireAuthoritative, true);
}
}
/**
* Audit the tree.
* This is the internal code which throws a NonAuthoritativePathException
* on failures; tests may use it.
* @param path path to scan
* @param requireAuth require all directories to be authoritative
* @param recursive recurse?
* @return tuple(dirs scanned, nonauth dirs found)
* @throws IOException IO failure
* @throws NonAuthoritativeDirException if a non-auth dir was found.
*/
@VisibleForTesting
Pair<Integer, Integer> executeAudit(
final Path path,
final boolean requireAuth,
final boolean recursive) throws IOException {
int dirs = 0;
int nonauth = 0;
final Queue<DDBPathMetadata> queue = new ArrayDeque<>();
final boolean isRoot = path.isRoot();
final DDBPathMetadata baseData = metastore.get(path);
if (baseData == null) {
throw new ExitUtil.ExitException(LauncherExitCodes.EXIT_NOT_FOUND,
"No S3Guard entry for path " + path);
}
if (isRoot || isDirectory(baseData)) {
// we have the root entry or an authoritative a directory
queue.add(baseData);
} else {
LOG.info("Path represents file");
return Pair.of(0, 0);
}
while (!queue.isEmpty()) {
dirs++;
final DDBPathMetadata dir = queue.poll();
final Path p = dir.getFileStatus().getPath();
LOG.debug("Directory {}", dir.prettyPrint());
// log a message about the dir state, with root treated specially
if (!p.isRoot()) {
if (!dir.isAuthoritativeDir()) {
LOG.warn("Directory {} is not authoritative", p);
nonauth++;
verifyAuthDir(dir, requireAuth);
} else {
LOG.info("Directory {}", p);
}
} else {
// this is done to avoid the confusing message about root not being
// authoritative
LOG.info("Root directory {}", p);
}
// list its children
if (recursive) {
final DirListingMetadata entry = metastore.listChildren(p);
if (entry != null) {
final Collection<PathMetadata> listing = entry.getListing();
int files = 0, subdirs = 0;
for (PathMetadata e : listing) {
if (isDirectory(e)) {
// queue for auditing
queue.add((DDBPathMetadata) e);
subdirs++;
} else {
files++;
}
}
if (verbose && files > 0 || subdirs > 0) {
LOG.info(" files {}; directories {}", files, subdirs);
}
} else {
LOG.info("Directory {} has been deleted", dir);
}
}
}
// end of scan
if (dirs == 1 && isRoot) {
LOG.info("The store has no directories to scan");
} else {
LOG.info("Scanned {} directories - {} were not marked as authoritative",
dirs, nonauth);
}
return Pair.of(dirs, nonauth);
}
/**
* A directory was found which was non-authoritative.
* The exit code for this operation is
* {@link LauncherExitCodes#EXIT_NOT_ACCEPTABLE} -This is what the S3Guard
* will return.
*/
public static final class NonAuthoritativeDirException
extends PathIOException implements ExitCodeProvider {
/**
* Instantiate.
* @param path the path which is non-authoritative.
*/
private NonAuthoritativeDirException(final Path path) {
super(path.toString(), E_NONAUTH);
}
@Override
public int getExitCode() {
return ERROR_ENTRY_NOT_AUTH_IN_DDB;
}
@Override
public String toString() {
return getMessage();
}
}
}

View File

@ -74,7 +74,11 @@ public class BulkOperationState implements Closeable {
public enum OperationType {
/** Writing data. */
Put,
/** Rename: add and delete. */
/**
* Rename: add and delete.
* After the rename, the tree under the destination path
* can be tagged as authoritative.
*/
Rename,
/** Pruning: deleting entries and updating parents. */
Prune,
@ -83,6 +87,16 @@ public class BulkOperationState implements Closeable {
/** Deletion operation. */
Delete,
/** FSCK operation. */
Fsck
Fsck,
/**
* Bulk directory tree import.
* After an import, the entire tree under the path has been
* enumerated and should be tagged as authoritative.
*/
Import,
/**
* Listing update.
*/
Listing,
}
}

View File

@ -54,6 +54,7 @@ import com.amazonaws.services.dynamodbv2.document.QueryOutcome;
import com.amazonaws.services.dynamodbv2.document.ScanOutcome;
import com.amazonaws.services.dynamodbv2.document.Table;
import com.amazonaws.services.dynamodbv2.document.TableWriteItems;
import com.amazonaws.services.dynamodbv2.document.internal.IteratorSupport;
import com.amazonaws.services.dynamodbv2.document.spec.GetItemSpec;
import com.amazonaws.services.dynamodbv2.document.spec.QuerySpec;
import com.amazonaws.services.dynamodbv2.document.utils.ValueMap;
@ -69,6 +70,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
@ -77,6 +79,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathIOException;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.impl.FunctionsRaisingIOE;
import org.apache.hadoop.fs.s3a.AWSCredentialProviderList;
import org.apache.hadoop.fs.s3a.AWSServiceThrottledException;
import org.apache.hadoop.fs.s3a.Constants;
@ -84,7 +87,6 @@ import org.apache.hadoop.fs.s3a.Invoker;
import org.apache.hadoop.fs.s3a.Retries;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.S3AInstrumentation;
import org.apache.hadoop.fs.s3a.S3AUtils;
import org.apache.hadoop.fs.s3a.Tristate;
import org.apache.hadoop.fs.s3a.auth.RoleModel;
@ -105,6 +107,7 @@ import static org.apache.hadoop.fs.s3a.auth.RolePolicies.allowS3GuardClientOpera
import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.submit;
import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.waitForCompletion;
import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.*;
import static org.apache.hadoop.fs.s3a.s3guard.PathOrderComparators.TOPMOST_PM_LAST;
import static org.apache.hadoop.fs.s3a.s3guard.S3Guard.*;
/**
@ -299,7 +302,12 @@ public class DynamoDBMetadataStore implements MetadataStore,
*/
private RetryPolicy batchWriteRetryPolicy;
private S3AInstrumentation.S3GuardInstrumentation instrumentation;
/**
* The instrumentation is never null -if/when bound to an owner file system
* That filesystem statistics will be updated as appropriate.
*/
private MetastoreInstrumentation instrumentation
= new MetastoreInstrumentationImpl();
/** Owner FS: only valid if configured with an owner FS. */
private S3AFileSystem owner;
@ -385,7 +393,8 @@ public class DynamoDBMetadataStore implements MetadataStore,
throws IOException {
Preconditions.checkNotNull(fs, "Null filesystem");
Preconditions.checkArgument(fs instanceof S3AFileSystem,
"DynamoDBMetadataStore only supports S3A filesystem.");
"DynamoDBMetadataStore only supports S3A filesystem - not %s",
fs);
bindToOwnerFilesystem((S3AFileSystem) fs);
final String bucket = owner.getBucket();
String confRegion = conf.getTrimmed(S3GUARD_DDB_REGION_KEY);
@ -736,9 +745,10 @@ public class DynamoDBMetadataStore implements MetadataStore,
tableName, region, path, meta);
}
if (wantEmptyDirectoryFlag && meta != null) {
if (wantEmptyDirectoryFlag && meta != null && !meta.isDeleted()) {
final FileStatus status = meta.getFileStatus();
// for directory, we query its direct children to determine isEmpty bit
// for a non-deleted directory, we query its direct undeleted children
// to determine the isEmpty bit. There's no TTL checking going on here.
if (status.isDirectory()) {
final QuerySpec spec = new QuerySpec()
.withHashKey(pathToParentKeyAttribute(path))
@ -748,11 +758,27 @@ public class DynamoDBMetadataStore implements MetadataStore,
boolean hasChildren = readOp.retry("get/hasChildren",
path.toString(),
true,
() -> table.query(spec).iterator().hasNext());
() -> {
// issue the query
final IteratorSupport<Item, QueryOutcome> it = table.query(
spec).iterator();
// if non empty, log the result to aid with some debugging
if (it.hasNext()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Dir {} is non-empty", status.getPath());
while(it.hasNext()) {
LOG.debug("{}", itemToPathMetadata(it.next(), username));
}
}
return true;
} else {
return false;
}
});
// If directory is authoritative, we can set the empty directory flag
// to TRUE or FALSE. Otherwise FALSE, or UNKNOWN.
if(meta.isAuthoritativeDir()) {
if (meta.isAuthoritativeDir()) {
meta.setIsEmptyDirectory(
hasChildren ? Tristate.FALSE : Tristate.TRUE);
} else {
@ -838,6 +864,18 @@ public class DynamoDBMetadataStore implements MetadataStore,
dirPathMeta.getLastUpdated());
}
/**
* Origin of entries in the ancestor map built up in
* {@link #completeAncestry(Collection, AncestorState)}.
* This is done to stop generated ancestor entries to overwriting those
* in the store, while allowing those requested in the API call to do this.
*/
private enum EntryOrigin {
Requested, // requested in method call
Retrieved, // retrieved from DDB: do not resubmit
Generated // generated ancestor.
}
/**
* Build the list of all parent entries.
* <p>
@ -850,9 +888,9 @@ public class DynamoDBMetadataStore implements MetadataStore,
*/
private Collection<DDBPathMetadata> completeAncestry(
final Collection<DDBPathMetadata> pathsToCreate,
final AncestorState ancestorState) throws PathIOException {
final AncestorState ancestorState) throws IOException {
// Key on path to allow fast lookup
Map<Path, DDBPathMetadata> ancestry = new HashMap<>();
Map<Path, Pair<EntryOrigin, DDBPathMetadata>> ancestry = new HashMap<>();
LOG.debug("Completing ancestry for {} paths", pathsToCreate.size());
// we sort the inputs to guarantee that the topmost entries come first.
// that way if the put request contains both parents and children
@ -892,23 +930,48 @@ public class DynamoDBMetadataStore implements MetadataStore,
path, entry);
}
}
ancestry.put(path, entry);
// add the entry to the ancestry map as an explicitly requested entry.
ancestry.put(path, Pair.of(EntryOrigin.Requested, entry));
Path parent = path.getParent();
while (!parent.isRoot() && !ancestry.containsKey(parent)) {
if (!ancestorState.findEntry(parent, true)) {
// don't add this entry, but carry on with the parents
LOG.debug("auto-create ancestor path {} for child path {}",
parent, path);
final S3AFileStatus status = makeDirStatus(parent, username);
DDBPathMetadata md = new DDBPathMetadata(status, Tristate.FALSE,
false, false, ttlTimeProvider.getNow());
// there is no entry in the ancestor state.
// look in the store
DDBPathMetadata md;
Pair<EntryOrigin, DDBPathMetadata> newEntry;
final Item item = getConsistentItem(parent);
if (item != null && !itemToPathMetadata(item, username).isDeleted()) {
// This is an undeleted entry found in the database.
// register it in ancestor state and in the map of entries to create
// as a retrieved entry
md = itemToPathMetadata(item, username);
LOG.debug("Found existing entry for parent: {}", md);
newEntry = Pair.of(EntryOrigin.Retrieved, md);
} else {
// A directory entry was not found in the DB. Create one.
LOG.debug("auto-create ancestor path {} for child path {}",
parent, path);
final S3AFileStatus status = makeDirStatus(parent, username);
md = new DDBPathMetadata(status, Tristate.FALSE,
false, false, ttlTimeProvider.getNow());
// declare to be a generated entry
newEntry = Pair.of(EntryOrigin.Generated, md);
}
// insert into the ancestor state to avoid further checks
ancestorState.put(parent, md);
ancestry.put(parent, md);
ancestry.put(parent, newEntry);
}
parent = parent.getParent();
}
}
return ancestry.values();
// we now have a list of entries which were not in the operation state.
// Filter out those which were retrieved, to produce a list of those
// which must be written to the database.
// TODO sort in reverse order of existence
return ancestry.values().stream()
.filter(p -> p.getLeft() != EntryOrigin.Retrieved)
.map(Pair::getRight)
.collect(Collectors.toList());
}
/**
@ -939,7 +1002,7 @@ public class DynamoDBMetadataStore implements MetadataStore,
Collection<DDBPathMetadata> newDirs = new ArrayList<>();
final AncestorState ancestorState = extractOrCreate(operationState,
BulkOperationState.OperationType.Rename);
BulkOperationState.OperationType.Put);
Path parent = qualifiedPath.getParent();
boolean entryFound = false;
@ -1066,7 +1129,7 @@ public class DynamoDBMetadataStore implements MetadataStore,
tombstones.add(new DDBPathMetadata(pmTombstone));
}
// sort all the tombstones lowest first.
tombstones.sort(PathOrderComparators.TOPMOST_PM_LAST);
tombstones.sort(TOPMOST_PM_LAST);
newItems.addAll(tombstones);
}
@ -1350,6 +1413,20 @@ public class DynamoDBMetadataStore implements MetadataStore,
return true;
}
/**
* Get the value of an optional boolean attribute, falling back to the
* default value if the attribute is absent.
* @param item Item
* @param attrName Attribute name
* @param defVal Default value
* @return The value or the default
*/
private static boolean getBoolAttribute(Item item,
String attrName,
boolean defVal) {
return item.hasAttribute(attrName) ? item.getBoolean(attrName) : defVal;
}
/** Create a directory FileStatus using 0 for the lastUpdated time. */
static S3AFileStatus makeDirStatus(Path f, String owner) {
return new S3AFileStatus(Tristate.UNKNOWN, f, owner);
@ -1371,7 +1448,6 @@ public class DynamoDBMetadataStore implements MetadataStore,
final DirListingMetadata meta,
@Nullable final BulkOperationState operationState) throws IOException {
LOG.debug("Saving {} dir meta for {} to table {} in region {}: {}",
tableName,
meta.isAuthoritative() ? "auth" : "nonauth",
meta.getPath(),
tableName, region, meta);
@ -1404,9 +1480,7 @@ public class DynamoDBMetadataStore implements MetadataStore,
@Override
public synchronized void close() {
if (instrumentation != null) {
instrumentation.storeClosed();
}
instrumentation.storeClosed();
try {
if (dynamoDB != null) {
LOG.debug("Shutting down {}", this);
@ -1435,18 +1509,27 @@ public class DynamoDBMetadataStore implements MetadataStore,
switch (pruneMode) {
case ALL_BY_MODTIME:
// filter all files under the given parent older than the modtime.
// this implicitly skips directories, because they lack a modtime field.
// however we explicitly exclude directories to make clear that
// directories are to be excluded and avoid any confusion
// see: HADOOP-16725.
// note: files lack the is_dir field entirely, so we use a `not` to
// filter out the directories.
filterExpression =
"mod_time < :mod_time and begins_with(parent, :parent)";
"mod_time < :mod_time and begins_with(parent, :parent)"
+ " and not is_dir = :is_dir";
projectionExpression = "parent,child";
map = new ValueMap()
.withLong(":mod_time", cutoff)
.withString(":parent", keyPrefix);
.withString(":parent", keyPrefix)
.withBoolean(":is_dir", true);
break;
case TOMBSTONES_BY_LASTUPDATED:
filterExpression =
"last_updated < :last_updated and begins_with(parent, :parent) "
+ "and is_deleted = :is_deleted";
projectionExpression = "parent,child";
projectionExpression = "parent,child,is_deleted";
map = new ValueMap()
.withLong(":last_updated", cutoff)
.withString(":parent", keyPrefix)
@ -1471,7 +1554,7 @@ public class DynamoDBMetadataStore implements MetadataStore,
}
/**
* Prune files, in batches. There's a sleep between each batch.
* Prune files, in batches. There's optionally a sleep between each batch.
*
* @param pruneMode The mode of operation for the prune For details see
* {@link MetadataStore#prune(PruneMode, long)}
@ -1479,10 +1562,11 @@ public class DynamoDBMetadataStore implements MetadataStore,
* @param keyPrefix The prefix for the keys that should be removed
* @throws IOException Any IO/DDB failure.
* @throws InterruptedIOException if the prune was interrupted
* @return count of pruned items.
*/
@Override
@Retries.RetryTranslated
public void prune(PruneMode pruneMode, long cutoff, String keyPrefix)
public long prune(PruneMode pruneMode, long cutoff, String keyPrefix)
throws IOException {
LOG.debug("Prune {} under {} with age {}",
pruneMode == PruneMode.ALL_BY_MODTIME
@ -1490,10 +1574,24 @@ public class DynamoDBMetadataStore implements MetadataStore,
keyPrefix, cutoff);
final ItemCollection<ScanOutcome> items =
expiredFiles(pruneMode, cutoff, keyPrefix);
innerPrune(keyPrefix, items);
return innerPrune(pruneMode, cutoff, keyPrefix, items);
}
private void innerPrune(String keyPrefix, ItemCollection<ScanOutcome> items)
/**
* Prune files, in batches. There's optionally a sleep between each batch.
*
* @param pruneMode The mode of operation for the prune For details see
* {@link MetadataStore#prune(PruneMode, long)}
* @param cutoff Oldest modification time to allow
* @param keyPrefix The prefix for the keys that should be removed
* @param items expired items
* @return count of pruned items.
* @throws IOException Any IO/DDB failure.
* @throws InterruptedIOException if the prune was interrupted
*/
private int innerPrune(
final PruneMode pruneMode, final long cutoff, final String keyPrefix,
final ItemCollection<ScanOutcome> items)
throws IOException {
int itemCount = 0;
try (AncestorState state = initiateBulkWrite(
@ -1508,6 +1606,22 @@ public class DynamoDBMetadataStore implements MetadataStore,
TimeUnit.MILLISECONDS);
Set<Path> parentPathSet = new HashSet<>();
Set<Path> clearedParentPathSet = new HashSet<>();
// declare the operation to delete a batch as a function so
// as to keep the code consistent across multiple uses.
FunctionsRaisingIOE.CallableRaisingIOE<Void> deleteBatchOperation =
() -> {
// lowest path entries get deleted first.
deletionBatch.sort(PathOrderComparators.TOPMOST_PATH_LAST);
processBatchWriteRequest(state, pathToKey(deletionBatch), null);
// set authoritative false for each pruned dir listing
// if at least one entry was not a tombstone
removeAuthoritativeDirFlag(parentPathSet, state);
// already cleared parent paths.
clearedParentPathSet.addAll(parentPathSet);
parentPathSet.clear();
return null;
};
for (Item item : items) {
DDBPathMetadata md = PathMetadataDynamoDBTranslation
.itemToPathMetadata(item, username);
@ -1524,22 +1638,14 @@ public class DynamoDBMetadataStore implements MetadataStore,
Path parentPath = path.getParent();
if (!tombstone
&& parentPath != null
&& !parentPath.isRoot()
&& !clearedParentPathSet.contains(parentPath)) {
parentPathSet.add(parentPath);
}
itemCount++;
if (deletionBatch.size() == S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT) {
// lowest path entries get deleted first.
deletionBatch.sort(PathOrderComparators.TOPMOST_PATH_LAST);
processBatchWriteRequest(state, pathToKey(deletionBatch), null);
// set authoritative false for each pruned dir listing
removeAuthoritativeDirFlag(parentPathSet, state);
// already cleared parent paths.
clearedParentPathSet.addAll(parentPathSet);
parentPathSet.clear();
deleteBatchOperation.apply();
deletionBatch.clear();
if (delay > 0) {
Thread.sleep(delay);
@ -1548,11 +1654,7 @@ public class DynamoDBMetadataStore implements MetadataStore,
}
// final batch of deletes
if (!deletionBatch.isEmpty()) {
processBatchWriteRequest(state, pathToKey(deletionBatch), null);
// set authoritative false for each pruned dir listing
removeAuthoritativeDirFlag(parentPathSet, state);
parentPathSet.clear();
deleteBatchOperation.apply();
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
@ -1563,6 +1665,7 @@ public class DynamoDBMetadataStore implements MetadataStore,
}
LOG.info("Finished pruning {} items in batches of {}", itemCount,
S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT);
return itemCount;
}
/**
@ -1597,6 +1700,10 @@ public class DynamoDBMetadataStore implements MetadataStore,
Set<DDBPathMetadata> metas = pathSet.stream().map(path -> {
try {
if (path.isRoot()) {
LOG.debug("ignoring root path");
return null;
}
if (state != null && state.get(path) != null) {
// there's already an entry for this path
LOG.debug("Ignoring update of entry already in the state map");
@ -1620,6 +1727,7 @@ public class DynamoDBMetadataStore implements MetadataStore,
}
LOG.debug("Setting isAuthoritativeDir==false on {}", ddbPathMetadata);
ddbPathMetadata.setAuthoritativeDir(false);
ddbPathMetadata.setLastUpdated(ttlTimeProvider.getNow());
return ddbPathMetadata;
} catch (IOException e) {
String msg = String.format("IOException while getting PathMetadata "
@ -1879,9 +1987,7 @@ public class DynamoDBMetadataStore implements MetadataStore,
boolean idempotent) {
if (S3AUtils.isThrottleException(ex)) {
// throttled
if (instrumentation != null) {
instrumentation.throttled();
}
instrumentation.throttled();
int eventCount = throttleEventCount.addAndGet(1);
if (attempts == 1 && eventCount < THROTTLE_EVENT_LOG_LIMIT) {
LOG.warn("DynamoDB IO limits reached in {};"
@ -1898,10 +2004,8 @@ public class DynamoDBMetadataStore implements MetadataStore,
LOG.debug("Retrying {}", text, ex);
}
if (instrumentation != null) {
// note a retry
instrumentation.retrying();
}
// note a retry
instrumentation.retrying();
if (owner != null) {
owner.metastoreOperationRetried(ex, attempts, idempotent);
}
@ -1940,9 +2044,7 @@ public class DynamoDBMetadataStore implements MetadataStore,
* @param count count of records.
*/
private void recordsWritten(final int count) {
if (instrumentation != null) {
instrumentation.recordsWritten(count);
}
instrumentation.recordsWritten(count);
}
/**
@ -1950,18 +2052,14 @@ public class DynamoDBMetadataStore implements MetadataStore,
* @param count count of records.
*/
private void recordsRead(final int count) {
if (instrumentation != null) {
instrumentation.recordsRead(count);
}
instrumentation.recordsRead(count);
}
/**
* Record the number of records deleted.
* @param count count of records.
*/
private void recordsDeleted(final int count) {
if (instrumentation != null) {
instrumentation.recordsDeleted(count);
}
instrumentation.recordsDeleted(count);
}
/**
@ -1983,6 +2081,62 @@ public class DynamoDBMetadataStore implements MetadataStore,
new AncestorState(this, BulkOperationState.OperationType.Rename, dest));
}
/**
* Mark the directories instantiated under the destination path
* as authoritative. That is: all entries in the
* operationState (which must be an AncestorState instance),
* that are under the destination path.
*
* The database update synchronized on the operationState, so all other
* threads trying to update that state will be blocked until completion.
*
* This operation is only used in import and at the end of a rename,
* so this is not considered an issue.
* @param dest destination path.
* @param operationState active state.
* @throws IOException failure.
* @return the number of directories marked.
*/
@Override
public int markAsAuthoritative(
final Path dest,
final BulkOperationState operationState) throws IOException {
if (operationState == null) {
return 0;
}
Preconditions.checkArgument(operationState instanceof AncestorState,
"Not an AncestorState %s", operationState);
final AncestorState state = (AncestorState)operationState;
// only mark paths under the dest as auth
final String simpleDestKey = pathToParentKey(dest);
final String destPathKey = simpleDestKey + "/";
final String opId = AncestorState.stateAsString(state);
LOG.debug("{}: marking directories under {} as authoritative",
opId, destPathKey);
// the list of dirs to build up.
final List<DDBPathMetadata> dirsToUpdate = new ArrayList<>();
synchronized (state) {
for (Map.Entry<Path, DDBPathMetadata> entry :
state.getAncestry().entrySet()) {
final Path path = entry.getKey();
final DDBPathMetadata md = entry.getValue();
final String key = pathToParentKey(path);
if (md.getFileStatus().isDirectory()
&& (key.equals(simpleDestKey) || key.startsWith(destPathKey))) {
// the updated entry is under the destination.
md.setAuthoritativeDir(true);
md.setLastUpdated(ttlTimeProvider.getNow());
LOG.debug("{}: added {}", opId, key);
dirsToUpdate.add(md);
}
}
processBatchWriteRequest(state,
null, pathMetadataToItem(dirsToUpdate));
}
return dirsToUpdate.size();
}
@Override
public AncestorState initiateBulkWrite(
final BulkOperationState.OperationType operation,
@ -2016,10 +2170,14 @@ public class DynamoDBMetadataStore implements MetadataStore,
String stateStr = AncestorState.stateAsString(state);
for (Item item : items) {
boolean tombstone = !itemExists(item);
OPERATIONS_LOG.debug("{} {} {}",
boolean isDir = getBoolAttribute(item, IS_DIR, false);
boolean auth = getBoolAttribute(item, IS_AUTHORITATIVE, false);
OPERATIONS_LOG.debug("{} {} {}{}{}",
stateStr,
tombstone ? "TOMBSTONE" : "PUT",
itemPrimaryKeyToString(item));
itemPrimaryKeyToString(item),
auth ? " [auth]" : "",
isDir ? " directory" : "");
}
}
}
@ -2084,11 +2242,18 @@ public class DynamoDBMetadataStore implements MetadataStore,
}
}
@Override
public MetastoreInstrumentation getInstrumentation() {
return instrumentation;
}
/**
* This tracks all the ancestors created,
* across multiple move/write operations.
* This is to avoid duplicate creation of ancestors during bulk commits
* and rename operations managed by a rename tracker.
*
* There is no thread safety: callers must synchronize as appropriate.
*/
@VisibleForTesting
static final class AncestorState extends BulkOperationState {
@ -2135,6 +2300,14 @@ public class DynamoDBMetadataStore implements MetadataStore,
return ancestry.size();
}
/**
* Get the ancestry. Not thread safe.
* @return the map of ancestors.
*/
Map<Path, DDBPathMetadata> getAncestry() {
return ancestry;
}
public Path getDest() {
return dest;
}

View File

@ -0,0 +1,272 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import javax.annotation.Nullable;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.S3ALocatedFileStatus;
import org.apache.hadoop.fs.s3a.impl.ExecutingStoreOperation;
import org.apache.hadoop.util.DurationInfo;
/**
* Import a directory tree into the metastore.
* This code was moved from S3GuardTool and enhanced to mark
* the destination tree as authoritative.
*/
class ImportOperation extends ExecutingStoreOperation<Long> {
private static final Logger LOG = LoggerFactory.getLogger(
ImportOperation.class);
/**
* Source file system: must not be guarded.
*/
private final S3AFileSystem filesystem;
/**
* Destination metadata store.
*/
private final MetadataStore store;
/**
* Source entry: File or directory.
*/
private final S3AFileStatus status;
/**
* If importing the directory tree -should it be marked
* authoritative afterwards?
*/
private final boolean authoritative;
private final boolean verbose;
/**
* For DDB the BulkOperation tracking eliminates the need for this cache,
* but it is retained here for the local store and to allow for
* ease of moving to operations which may update the store in parallel with
* writing.
*/
private final Set<Path> dirCache = new HashSet<>();
/**
* Import.
* @param filesystem Unguarded FS to scan.
* @param store store to update
* @param status source status
* @param authoritative should the imported tree be marked as authoritative
* @param verbose Verbose output
*/
ImportOperation(final S3AFileSystem filesystem,
final MetadataStore store,
final S3AFileStatus status,
final boolean authoritative,
final boolean verbose) {
super(filesystem.createStoreContext());
this.verbose = verbose;
Preconditions.checkState(!filesystem.hasMetadataStore(),
"Source filesystem for import has a metadata store");
this.filesystem = filesystem;
this.store = store;
this.status = status;
this.authoritative = authoritative;
}
private S3AFileSystem getFilesystem() {
return filesystem;
}
private MetadataStore getStore() {
return store;
}
private FileStatus getStatus() {
return status;
}
@Override
public Long execute() throws IOException {
final long items;
if (status.isFile()) {
PathMetadata meta = new PathMetadata(status);
getStore().put(meta, null);
items = 1;
} else {
try (DurationInfo ignored =
new DurationInfo(LOG, "Importing %s", getStatus().getPath())) {
items = importDir();
}
}
return items;
}
/**
* Recursively import every path under path.
* @return number of items inserted into MetadataStore
* @throws IOException on I/O errors.
*/
private long importDir() throws IOException {
Preconditions.checkArgument(status.isDirectory());
long totalCountOfEntriesWritten = 0;
final Path basePath = status.getPath();
final MetadataStore ms = getStore();
LOG.info("Importing directory {}", basePath);
try (BulkOperationState operationState = ms
.initiateBulkWrite(
BulkOperationState.OperationType.Import,
basePath)) {
long countOfFilesWritten = 0;
long countOfDirsWritten = 0;
RemoteIterator<S3ALocatedFileStatus> it = getFilesystem()
.listFilesAndEmptyDirectories(basePath, true);
while (it.hasNext()) {
S3ALocatedFileStatus located = it.next();
S3AFileStatus child;
final Path path = located.getPath();
final boolean isDirectory = located.isDirectory();
if (isDirectory) {
child = DynamoDBMetadataStore.makeDirStatus(path,
located.getOwner());
dirCache.add(path);
// and update the dir count
countOfDirsWritten++;
} else {
child = located.toS3AFileStatus();
}
int parentsWritten = putParentsIfNotPresent(child, operationState);
LOG.debug("Wrote {} parent entries", parentsWritten);
// We don't blindly overwrite any existing file entry in S3Guard with a
// new one, Because that may lose the version information.
// instead we merge them
if (!isDirectory) {
final PathMetadata existingEntry = S3Guard.getWithTtl(ms, path, null,
false, true);
if (existingEntry != null) {
final S3AFileStatus existingStatus = existingEntry.getFileStatus();
if (existingStatus.isFile()) {
// source is also a file.
// we only worry about an update if the timestamp is different,
final String existingEtag = existingStatus.getETag();
final String childEtag = child.getETag();
if (child.getModificationTime()
!= existingStatus.getModificationTime()
|| existingStatus.getLen() != child.getLen()
|| existingEtag == null
|| !existingEtag.equals(childEtag)) {
// files are potentially different, though a modtime change
// can just be a clock skew problem
// so if the etag is unchanged, we propagate any versionID
if (childEtag.equals(existingEtag)) {
// copy over any version ID.
child.setVersionId(existingStatus.getVersionId());
}
} else {
// the entry modtimes match
child = null;
}
}
}
if (child != null) {
countOfFilesWritten++;
}
}
if (child != null) {
// there's an entry to add.
// log entry spaced to same width
String t = isDirectory ? "Dir " : "File";
if (verbose) {
LOG.info("{} {}", t, path);
} else {
LOG.debug("{} {}", t, path);
}
S3Guard.putWithTtl(
ms,
new PathMetadata(child),
getFilesystem().getTtlTimeProvider(),
operationState);
totalCountOfEntriesWritten++;
}
}
LOG.info("Updated S3Guard with {} files and {} directory entries",
countOfFilesWritten, countOfDirsWritten);
// here all entries are imported.
// tell the store that everything should be marked as auth
if (authoritative) {
LOG.info("Marking directory tree {} as authoritative",
basePath);
ms.markAsAuthoritative(basePath, operationState);
}
}
return totalCountOfEntriesWritten;
}
/**
* Put parents into metastore and cache if the parents are not present.
*
* There's duplication here with S3Guard DDB ancestor state, but this
* is designed to work across implementations.
* @param fileStatus the file or an empty directory.
* @param operationState store's bulk update state.
* @return number of entries written.
* @throws IOException on I/O errors.
*/
private int putParentsIfNotPresent(FileStatus fileStatus,
@Nullable BulkOperationState operationState) throws IOException {
Preconditions.checkNotNull(fileStatus);
Path parent = fileStatus.getPath().getParent();
int count = 0;
while (parent != null) {
if (dirCache.contains(parent)) {
return count;
}
final ITtlTimeProvider timeProvider
= getFilesystem().getTtlTimeProvider();
final PathMetadata pmd = S3Guard.getWithTtl(getStore(), parent,
timeProvider, false, true);
if (pmd == null || pmd.isDeleted()) {
S3AFileStatus dir = DynamoDBMetadataStore.makeDirStatus(parent,
fileStatus.getOwner());
S3Guard.putWithTtl(getStore(), new PathMetadata(dir),
timeProvider,
operationState);
count++;
}
dirCache.add(parent);
parent = parent.getParent();
}
return count;
}
}

View File

@ -47,6 +47,7 @@ import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import static org.apache.hadoop.fs.s3a.Constants.*;
@ -380,15 +381,19 @@ public class LocalMetadataStore implements MetadataStore {
}
@Override
public synchronized void prune(PruneMode pruneMode, long cutoff,
public synchronized long prune(PruneMode pruneMode, long cutoff,
String keyPrefix) {
// prune files
AtomicLong count = new AtomicLong();
// filter path_metadata (files), filter expired, remove expired
localCache.asMap().entrySet().stream()
.filter(entry -> entry.getValue().hasPathMeta())
.filter(entry -> expired(pruneMode,
entry.getValue().getFileMeta(), cutoff, keyPrefix))
.forEach(entry -> localCache.invalidate(entry.getKey()));
.forEach(entry -> {
localCache.invalidate(entry.getKey());
count.incrementAndGet();
});
// prune dirs
@ -404,10 +409,13 @@ public class LocalMetadataStore implements MetadataStore {
for (PathMetadata child : oldChildren) {
if (!expired(pruneMode, child, cutoff, keyPrefix)) {
newChildren.add(child);
} else {
count.incrementAndGet();
}
}
removeAuthoritativeFromParent(path, oldChildren, newChildren);
});
return count.get();
}
private void removeAuthoritativeFromParent(Path path,

View File

@ -328,12 +328,13 @@ public interface MetadataStore extends Closeable {
* additional keyPrefix parameter to filter the pruned keys with a prefix.
*
* @param pruneMode Prune Mode
* @param cutoff Oldest time to allow (UTC)
* @param cutoff Oldest time in milliseconds to allow (UTC)
* @param keyPrefix The prefix for the keys that should be removed
* @throws IOException if there is an error
* @throws UnsupportedOperationException if not implemented
* @return the number of pruned entries
*/
void prune(PruneMode pruneMode, long cutoff, String keyPrefix)
long prune(PruneMode pruneMode, long cutoff, String keyPrefix)
throws IOException, UnsupportedOperationException;
/**
@ -352,6 +353,23 @@ public interface MetadataStore extends Closeable {
*/
void updateParameters(Map<String, String> parameters) throws IOException;
/**
* Mark all directories created/touched in an operation as authoritative.
* The metastore can now update that path with any authoritative
* flags it chooses.
* The store may assume that therefore the operation state is complete.
* This holds for rename and needs to be documented for import.
* @param dest destination path.
* @param operationState active state.
* @throws IOException failure.
* @return the number of directories marked.
*/
default int markAsAuthoritative(Path dest,
BulkOperationState operationState)
throws IOException {
return 0;
}
/**
* Modes of operation for prune.
* For details see {@link MetadataStore#prune(PruneMode, long)}
@ -389,7 +407,7 @@ public interface MetadataStore extends Closeable {
default BulkOperationState initiateBulkWrite(
BulkOperationState.OperationType operation,
Path dest) throws IOException {
return null;
return new BulkOperationState(operation);
}
/**
@ -401,4 +419,11 @@ public interface MetadataStore extends Closeable {
*/
void setTtlTimeProvider(ITtlTimeProvider ttlTimeProvider);
/**
* Get any S3GuardInstrumentation for this store...must not be null.
* @return any store instrumentation.
*/
default MetastoreInstrumentation getInstrumentation() {
return new MetastoreInstrumentationImpl();
}
}

View File

@ -0,0 +1,70 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
/**
* Instrumentation exported to S3Guard.
*/
public interface MetastoreInstrumentation {
/** Initialized event. */
void initialized();
/** Store has been closed. */
void storeClosed();
/**
* Throttled request.
*/
void throttled();
/**
* S3Guard is retrying after a (retryable) failure.
*/
void retrying();
/**
* Records have been deleted.
* @param count the number of records deleted.
*/
void recordsDeleted(int count);
/**
* Records have been read.
* @param count the number of records read
*/
void recordsRead(int count);
/**
* records have been written (including tombstones).
* @param count number of records written.
*/
void recordsWritten(int count);
/**
* A directory has been tagged as authoritative.
*/
void directoryMarkedAuthoritative();
/**
* An entry was added.
* @param durationNanos time to add
*/
void entryAdded(long durationNanos);
}

View File

@ -0,0 +1,72 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
/**
* A no-op implementation of {@link MetastoreInstrumentation}
* which allows metastores to always return an instance
* when requested.
*/
public class MetastoreInstrumentationImpl implements MetastoreInstrumentation {
@Override
public void initialized() {
}
@Override
public void storeClosed() {
}
@Override
public void throttled() {
}
@Override
public void retrying() {
}
@Override
public void recordsDeleted(final int count) {
}
@Override
public void recordsRead(final int count) {
}
@Override
public void recordsWritten(final int count) {
}
@Override
public void directoryMarkedAuthoritative() {
}
@Override
public void entryAdded(final long durationNanos) {
}
}

View File

@ -125,7 +125,8 @@ public class NullMetadataStore implements MetadataStore {
}
@Override
public void prune(PruneMode pruneMode, long cutoff, String keyPrefix) {
public long prune(PruneMode pruneMode, long cutoff, String keyPrefix) {
return 0;
}
@Override

View File

@ -398,4 +398,16 @@ public final class PathMetadataDynamoDBTranslation {
}
return "s3a://" + parent + "/" + child;
}
/**
* Create an empty dir marker which, when passed to the
* DDB metastore, is considered authoritative.
* @param status file status
* @return path metadata.
*/
static PathMetadata authoritativeEmptyDirectoryMarker(
final S3AFileStatus status) {
return new DDBPathMetadata(status, Tristate.TRUE,
false, true, 0);
}
}

View File

@ -205,7 +205,6 @@ public class ProgressiveRenameTracker extends RenameTracker {
public synchronized void moveSourceDirectory() throws IOException {
// this moves the source directory in the metastore if it has not
// already been processed.
// TODO S3Guard: performance: mark destination dirs as authoritative
if (!pathsToDelete.contains(getSourceRoot())) {
final List<Path> toDelete = new ArrayList<>(1);
final List<PathMetadata> toAdd = new ArrayList<>(1);
@ -216,6 +215,8 @@ public class ProgressiveRenameTracker extends RenameTracker {
getOwner());
getMetadataStore().move(toDelete, toAdd, getOperationState());
}
getMetadataStore().markAsAuthoritative(
getDest(), getOperationState());
}
/**
@ -237,7 +238,8 @@ public class ProgressiveRenameTracker extends RenameTracker {
@Override
public synchronized void completeRename() throws IOException {
// and finish off by deleting source directories.
// mark dest tree as authoritative all the way down.
// finish off by deleting source directories.
sourceObjectsDeleted(pathsToDelete);
super.completeRename();
}

View File

@ -48,14 +48,13 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.Retries;
import org.apache.hadoop.fs.s3a.Retries.RetryTranslated;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.S3AInstrumentation;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.ReflectionUtils;
import static org.apache.hadoop.fs.s3a.Constants.*;
import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_AUTHORITATIVE_PATH;
import static org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_PUT_PATH_LATENCY;
import static org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_PUT_PATH_REQUEST;
import static org.apache.hadoop.fs.s3a.S3AUtils.createUploadFileStatus;
import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.authoritativeEmptyDirectoryMarker;
/**
* Logic for integrating MetadataStore with S3A.
@ -149,7 +148,6 @@ public final class S3Guard {
* returns the same S3AFileStatus. Instrumentation monitors the put operation.
* @param ms MetadataStore to {@code put()} into.
* @param status status to store
* @param instrumentation instrumentation of the s3a file system
* @param timeProvider Time provider to use when writing entries
* @return The same status as passed in
* @throws IOException if metadata store update failed
@ -157,9 +155,8 @@ public final class S3Guard {
@RetryTranslated
public static S3AFileStatus putAndReturn(MetadataStore ms,
S3AFileStatus status,
S3AInstrumentation instrumentation,
ITtlTimeProvider timeProvider) throws IOException {
return putAndReturn(ms, status, instrumentation, timeProvider, null);
return putAndReturn(ms, status, timeProvider, null);
}
/**
@ -167,7 +164,6 @@ public final class S3Guard {
* returns the same S3AFileStatus. Instrumentation monitors the put operation.
* @param ms MetadataStore to {@code put()} into.
* @param status status to store
* @param instrumentation instrumentation of the s3a file system
* @param timeProvider Time provider to use when writing entries
* @param operationState possibly-null metastore state tracker.
* @return The same status as passed in
@ -177,23 +173,40 @@ public final class S3Guard {
public static S3AFileStatus putAndReturn(
final MetadataStore ms,
final S3AFileStatus status,
final S3AInstrumentation instrumentation,
final ITtlTimeProvider timeProvider,
@Nullable final BulkOperationState operationState) throws IOException {
long startTimeNano = System.nanoTime();
try {
putWithTtl(ms, new PathMetadata(status), timeProvider, operationState);
} finally {
instrumentation.addValueToQuantiles(
S3GUARD_METADATASTORE_PUT_PATH_LATENCY,
(System.nanoTime() - startTimeNano));
instrumentation.incrementCounter(
S3GUARD_METADATASTORE_PUT_PATH_REQUEST,
1);
ms.getInstrumentation().entryAdded((System.nanoTime() - startTimeNano));
}
return status;
}
/**
* Creates an authoritative directory marker for the store.
* @param ms MetadataStore to {@code put()} into.
* @param status status to store
* @param timeProvider Time provider to use when writing entries
* @param operationState possibly-null metastore state tracker.
* @throws IOException if metadata store update failed
*/
@RetryTranslated
public static void putAuthDirectoryMarker(
final MetadataStore ms,
final S3AFileStatus status,
final ITtlTimeProvider timeProvider,
@Nullable final BulkOperationState operationState) throws IOException {
long startTimeNano = System.nanoTime();
try {
final PathMetadata fileMeta = authoritativeEmptyDirectoryMarker(status);
putWithTtl(ms, fileMeta, timeProvider, operationState);
} finally {
ms.getInstrumentation().entryAdded((System.nanoTime() - startTimeNano));
}
}
/**
* Initiate a bulk write and create an operation state for it.
* This may then be passed into put operations.
@ -291,7 +304,9 @@ public final class S3Guard {
.collect(Collectors.toMap(
pm -> pm.getFileStatus().getPath(), PathMetadata::getFileStatus)
);
BulkOperationState operationState = ms.initiateBulkWrite(
BulkOperationState.OperationType.Listing,
path);
for (S3AFileStatus s : backingStatuses) {
if (deleted.contains(s.getPath())) {
continue;
@ -304,7 +319,7 @@ public final class S3Guard {
if (status != null
&& s.getModificationTime() > status.getModificationTime()) {
LOG.debug("Update ms with newer metadata of: {}", status);
S3Guard.putWithTtl(ms, pathMetadata, timeProvider, null);
S3Guard.putWithTtl(ms, pathMetadata, timeProvider, operationState);
}
}
@ -324,9 +339,16 @@ public final class S3Guard {
changed = changed || (!dirMeta.isAuthoritative() && isAuthoritative);
if (changed && isAuthoritative) {
LOG.debug("Marking the directory {} as authoritative", path);
final MetastoreInstrumentation instrumentation
= ms.getInstrumentation();
if (instrumentation != null) {
instrumentation.directoryMarkedAuthoritative();
}
dirMeta.setAuthoritative(true); // This is the full directory contents
S3Guard.putWithTtl(ms, dirMeta, timeProvider, null);
S3Guard.putWithTtl(ms, dirMeta, timeProvider, operationState);
}
IOUtils.cleanupWithLogger(LOG, operationState);
return dirMetaToStatuses(dirMeta);
}

View File

@ -18,13 +18,14 @@
package org.apache.hadoop.fs.s3a.s3guard;
import javax.annotation.Nullable;
import java.io.Closeable;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.file.AccessDeniedException;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
@ -46,19 +47,21 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FilterFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.s3a.MultipartUtils;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.S3ALocatedFileStatus;
import org.apache.hadoop.fs.s3a.S3AUtils;
import org.apache.hadoop.fs.s3a.auth.RolePolicies;
import org.apache.hadoop.fs.s3a.auth.delegation.S3ADelegationTokens;
import org.apache.hadoop.fs.s3a.commit.CommitConstants;
import org.apache.hadoop.fs.s3a.commit.InternalCommitterConstants;
import org.apache.hadoop.fs.s3a.select.SelectTool;
import org.apache.hadoop.fs.shell.CommandFormat;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.ExitCodeProvider;
import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
@ -68,12 +71,15 @@ import static org.apache.hadoop.fs.s3a.Constants.*;
import static org.apache.hadoop.fs.s3a.Invoker.LOG_EVENT;
import static org.apache.hadoop.fs.s3a.S3AUtils.clearBucketOption;
import static org.apache.hadoop.fs.s3a.S3AUtils.propagateBucketOptions;
import static org.apache.hadoop.fs.s3a.commit.CommitConstants.*;
import static org.apache.hadoop.fs.s3a.commit.staging.StagingCommitterConstants.FILESYSTEM_TEMP_PATH;
import static org.apache.hadoop.service.launcher.LauncherExitCodes.*;
/**
* CLI to manage S3Guard Metadata Store.
*/
public abstract class S3GuardTool extends Configured implements Tool {
public abstract class S3GuardTool extends Configured implements Tool,
Closeable {
private static final Logger LOG = LoggerFactory.getLogger(S3GuardTool.class);
private static final String NAME = "s3guard";
@ -97,7 +103,8 @@ public abstract class S3GuardTool extends Configured implements Tool {
"\t" + Prune.NAME + " - " + Prune.PURPOSE + "\n" +
"\t" + SetCapacity.NAME + " - " + SetCapacity.PURPOSE + "\n" +
"\t" + SelectTool.NAME + " - " + SelectTool.PURPOSE + "\n" +
"\t" + Fsck.NAME + " - " + Fsck.PURPOSE + "\n";
"\t" + Fsck.NAME + " - " + Fsck.PURPOSE + "\n" +
"\t" + Authoritative.NAME + " - " + Authoritative.PURPOSE + "\n";
private static final String DATA_IN_S3_IS_PRESERVED
= "(all data in S3 is preserved)";
@ -111,6 +118,14 @@ public abstract class S3GuardTool extends Configured implements Tool {
static final int E_BAD_STATE = EXIT_NOT_ACCEPTABLE;
static final int E_NOT_FOUND = EXIT_NOT_FOUND;
/** Error String when the wrong FS is used for binding: {@value}. **/
@VisibleForTesting
public static final String WRONG_FILESYSTEM = "Wrong filesystem for ";
/**
* The FS we close when we are closed.
*/
private FileSystem baseFS;
private S3AFileSystem filesystem;
private MetadataStore store;
private final CommandFormat commandFormat;
@ -130,6 +145,8 @@ public abstract class S3GuardTool extends Configured implements Tool {
public static final String WRITE_FLAG = "write";
public static final String TAG_FLAG = "tag";
public static final String VERBOSE = "verbose";
/**
* Constructor a S3Guard tool with HDFS configuration.
* @param conf Configuration.
@ -152,10 +169,23 @@ public abstract class S3GuardTool extends Configured implements Tool {
/**
* Return sub-command name.
* @return sub-dommand name.
* @return sub-command name.
*/
public abstract String getName();
/**
* Close the FS and metastore.
* @throws IOException on failure.
*/
@Override
public void close() throws IOException {
IOUtils.cleanupWithLogger(LOG,
baseFS, store);
baseFS = null;
filesystem = null;
store = null;
}
/**
* Parse DynamoDB region from either -m option or a S3 path.
*
@ -365,12 +395,7 @@ public abstract class S3GuardTool extends Configured implements Tool {
"Expected bucket option to be %s but was %s",
S3GUARD_METASTORE_NULL, updatedBucketOption);
FileSystem fs = FileSystem.newInstance(uri, conf);
if (!(fs instanceof S3AFileSystem)) {
throw invalidArgs("URI %s is not a S3A file system: %s",
uri, fs.getClass().getName());
}
filesystem = (S3AFileSystem) fs;
bindFilesystem(FileSystem.newInstance(uri, conf));
}
/**
@ -410,8 +435,26 @@ public abstract class S3GuardTool extends Configured implements Tool {
return filesystem;
}
protected void setFilesystem(S3AFileSystem filesystem) {
this.filesystem = filesystem;
/**
* Sets the filesystem; it must be an S3A FS instance, or a FilterFS
* around an S3A Filesystem.
* @param bindingFS filesystem to bind to
* @return the bound FS.
* @throws ExitUtil.ExitException if the FS is not an S3 FS
*/
protected S3AFileSystem bindFilesystem(FileSystem bindingFS) {
FileSystem fs = bindingFS;
baseFS = bindingFS;
while (fs instanceof FilterFileSystem) {
fs = ((FilterFileSystem) fs).getRawFileSystem();
}
if (!(fs instanceof S3AFileSystem)) {
throw new ExitUtil.ExitException(EXIT_SERVICE_UNAVAILABLE,
WRONG_FILESYSTEM + "URI " + fs.getUri() + " : "
+ fs.getClass().getName());
}
filesystem = (S3AFileSystem) fs;
return filesystem;
}
@VisibleForTesting
@ -714,9 +757,12 @@ public abstract class S3GuardTool extends Configured implements Tool {
public static final String NAME = "import";
public static final String PURPOSE = "import metadata from existing S3 " +
"data";
private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
public static final String AUTH_FLAG = "authoritative";
private static final String USAGE = NAME + " [OPTIONS] [s3a://PATH]\n" +
"\t" + PURPOSE + "\n\n" +
"Common options:\n" +
" -" + AUTH_FLAG + " - Mark imported directory data as authoritative.\n" +
" -" + VERBOSE + " - Verbose Output.\n" +
" -" + META_FLAG + " URL - Metadata repository details " +
"(implementation-specific)\n" +
"\n" +
@ -727,10 +773,8 @@ public abstract class S3GuardTool extends Configured implements Tool {
" Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
" is not supported.";
private final Set<Path> dirCache = new HashSet<>();
Import(Configuration conf) {
super(conf);
super(conf, AUTH_FLAG, VERBOSE);
}
@Override
@ -743,65 +787,6 @@ public abstract class S3GuardTool extends Configured implements Tool {
return USAGE;
}
/**
* Put parents into MS and cache if the parents are not presented.
*
* @param f the file or an empty directory.
* @param operationState store's bulk update state.
* @throws IOException on I/O errors.
*/
private void putParentsIfNotPresent(FileStatus f,
@Nullable BulkOperationState operationState) throws IOException {
Preconditions.checkNotNull(f);
Path parent = f.getPath().getParent();
while (parent != null) {
if (dirCache.contains(parent)) {
return;
}
S3AFileStatus dir = DynamoDBMetadataStore.makeDirStatus(parent,
f.getOwner());
S3Guard.putWithTtl(getStore(), new PathMetadata(dir),
getFilesystem().getTtlTimeProvider(),
operationState);
dirCache.add(parent);
parent = parent.getParent();
}
}
/**
* Recursively import every path under path.
* @return number of items inserted into MetadataStore
* @throws IOException on I/O errors.
*/
private long importDir(FileStatus status) throws IOException {
Preconditions.checkArgument(status.isDirectory());
BulkOperationState operationState = getStore().initiateBulkWrite(
BulkOperationState.OperationType.Put,
status.getPath());
RemoteIterator<S3ALocatedFileStatus> it = getFilesystem()
.listFilesAndEmptyDirectories(status.getPath(), true);
long items = 0;
while (it.hasNext()) {
S3ALocatedFileStatus located = it.next();
S3AFileStatus child;
if (located.isDirectory()) {
child = DynamoDBMetadataStore.makeDirStatus(located.getPath(),
located.getOwner());
dirCache.add(child.getPath());
} else {
child = located.toS3AFileStatus();
}
putParentsIfNotPresent(child, operationState);
S3Guard.putWithTtl(getStore(),
new PathMetadata(child),
getFilesystem().getTtlTimeProvider(),
operationState);
items++;
}
return items;
}
@Override
public int run(String[] args, PrintStream out) throws Exception {
List<String> paths = parseArgs(args);
@ -829,14 +814,15 @@ public abstract class S3GuardTool extends Configured implements Tool {
throw storeNotFound(e);
}
long items = 1;
if (status.isFile()) {
PathMetadata meta = new PathMetadata(status);
getStore().put(meta, null);
} else {
items = importDir(status);
}
final CommandFormat commandFormat = getCommandFormat();
final ImportOperation importer = new ImportOperation(
getFilesystem(),
getStore(),
status,
commandFormat.getOpt(AUTH_FLAG),
commandFormat.getOpt(VERBOSE));
long items = importer.execute();
println(out, "Inserted %d items into Metadata Store", items);
return SUCCESS;
@ -1220,9 +1206,8 @@ public abstract class S3GuardTool extends Configured implements Tool {
unguardedConf.set(S3_METADATA_STORE_IMPL, S3GUARD_METASTORE_NULL);
}
S3AFileSystem fs = (S3AFileSystem) FileSystem.newInstance(
fsURI, unguardedConf);
setFilesystem(fs);
S3AFileSystem fs = bindFilesystem(
FileSystem.newInstance(fsURI, unguardedConf));
Configuration conf = fs.getConf();
URI fsUri = fs.getUri();
MetadataStore store = fs.getMetadataStore();
@ -1245,16 +1230,24 @@ public abstract class S3GuardTool extends Configured implements Tool {
METADATASTORE_AUTHORITATIVE, "false");
printOption(out, "Authoritative Path",
AUTHORITATIVE_PATH, "");
final Collection<String> authoritativePaths
= S3Guard.getAuthoritativePaths(fs);
if (!authoritativePaths.isEmpty()) {
println(out, "Qualified Authoritative Paths:");
for (String path : authoritativePaths) {
println(out, "\t%s", path);
}
println(out, "");
}
authMode = conf.getBoolean(METADATASTORE_AUTHORITATIVE, false);
final long ttl = conf.getTimeDuration(METADATASTORE_METADATA_TTL,
DEFAULT_METADATASTORE_METADATA_TTL, TimeUnit.MILLISECONDS);
println(out, "\tMetadata time to live: %s=%s milliseconds",
METADATASTORE_METADATA_TTL, ttl);
printStoreDiagnostics(out, store);
} else {
println(out, "Filesystem %s is not using S3Guard", fsUri);
}
boolean magic = fs.hasPathCapability(
new Path(s3Path),
CommitConstants.STORE_CAPABILITY_MAGIC_COMMITTER);
println(out, "The \"magic\" committer %s supported",
magic ? "is" : "is not");
println(out, "%nS3A Client");
printOption(out, "\tSigning Algorithm", SIGNING_ALGORITHM, "(unset)");
@ -1270,22 +1263,69 @@ public abstract class S3GuardTool extends Configured implements Tool {
CHANGE_DETECT_SOURCE_DEFAULT);
printOption(out, "\tChange Detection Mode", CHANGE_DETECT_MODE,
CHANGE_DETECT_MODE_DEFAULT);
// committers
println(out, "%nS3A Committers");
boolean magic = fs.hasPathCapability(
new Path(s3Path),
CommitConstants.STORE_CAPABILITY_MAGIC_COMMITTER);
println(out, "\tThe \"magic\" committer %s supported in the filesystem",
magic ? "is" : "is not");
printOption(out, "\tS3A Committer factory class",
S3A_COMMITTER_FACTORY_KEY, "");
String committer = conf.getTrimmed(FS_S3A_COMMITTER_NAME,
COMMITTER_NAME_FILE);
printOption(out, "\tS3A Committer name",
FS_S3A_COMMITTER_NAME, COMMITTER_NAME_FILE);
switch (committer) {
case COMMITTER_NAME_FILE:
println(out, "The original 'file' commmitter is active"
+ " -this is slow and potentially unsafe");
break;
case InternalCommitterConstants.COMMITTER_NAME_STAGING:
println(out, "The 'staging' committer is used "
+ "-prefer the 'directory' committer");
// fall through
case COMMITTER_NAME_DIRECTORY:
// fall through
case COMMITTER_NAME_PARTITIONED:
// print all the staging options.
printOption(out, "\tCluster filesystem staging directory",
FS_S3A_COMMITTER_STAGING_TMP_PATH, FILESYSTEM_TEMP_PATH);
printOption(out, "\tLocal filesystem buffer directory",
BUFFER_DIR, "");
printOption(out, "\tFile conflict resolution",
FS_S3A_COMMITTER_STAGING_CONFLICT_MODE, DEFAULT_CONFLICT_MODE);
break;
case COMMITTER_NAME_MAGIC:
printOption(out, "\tStore magic committer integration",
MAGIC_COMMITTER_ENABLED,
Boolean.toString(DEFAULT_MAGIC_COMMITTER_ENABLED));
if (!magic) {
println(out, "Warning: although the magic committer is enabled, "
+ "the store does not support it");
}
break;
default:
println(out, "\tWarning: committer '%s' is unknown", committer);
}
// look at delegation token support
println(out, "%nSecurity");
if (fs.getDelegationTokens().isPresent()) {
// DT is enabled
S3ADelegationTokens dtIntegration = fs.getDelegationTokens().get();
println(out, "Delegation Support enabled: token kind = %s",
println(out, "\tDelegation Support enabled: token kind = %s",
dtIntegration.getTokenKind());
UserGroupInformation.AuthenticationMethod authenticationMethod
= UserGroupInformation.getCurrentUser().getAuthenticationMethod();
println(out, "Hadoop security mode: %s", authenticationMethod);
println(out, "\tHadoop security mode: %s", authenticationMethod);
if (UserGroupInformation.isSecurityEnabled()) {
println(out,
"Warning: security is disabled; tokens will not be collected");
"\tWarning: security is disabled; tokens will not be collected");
}
} else {
println(out, "Delegation token support is disabled");
println(out, "\tDelegation token support is disabled");
}
if (usingS3Guard) {
@ -1339,7 +1379,6 @@ public abstract class S3GuardTool extends Configured implements Tool {
public static final String ABORT = "abort";
public static final String LIST = "list";
public static final String EXPECT = "expect";
public static final String VERBOSE = "verbose";
public static final String FORCE = "force";
public static final String PURPOSE = "list or abort pending " +
@ -1653,6 +1692,96 @@ public abstract class S3GuardTool extends Configured implements Tool {
return exitValue;
}
}
/**
* Audits a DynamoDB S3Guard repository for all the entries being
* 'authoritative'.
* Checks bucket settings if {@link #CHECK_FLAG} is set, then
* treewalk.
*/
static class Authoritative extends S3GuardTool {
public static final String NAME = "authoritative";
public static final String CHECK_FLAG = "check-config";
public static final String REQUIRE_AUTH = "required";
public static final String PURPOSE = "Audits a DynamoDB S3Guard "
+ "repository for all the entries being 'authoritative'";
private static final String USAGE = NAME + " [OPTIONS] [s3a://PATH]\n"
+ "\t" + PURPOSE + "\n\n"
+ "Options:\n"
+ " -" + REQUIRE_AUTH + " - Require directories under the path to"
+ " be authoritative.\n"
+ " -" + CHECK_FLAG + " - Check the configuration for the path to"
+ " be authoritative\n"
+ " -" + VERBOSE + " - Verbose Output.\n";
Authoritative(Configuration conf) {
super(conf, CHECK_FLAG, REQUIRE_AUTH, VERBOSE);
}
@Override
public String getName() {
return NAME;
}
@Override
public String getUsage() {
return USAGE;
}
public int run(String[] args, PrintStream out) throws
InterruptedException, IOException {
List<String> paths = parseArgs(args);
if (paths.isEmpty()) {
out.println(USAGE);
throw invalidArgs("no arguments");
}
maybeInitFilesystem(paths);
initMetadataStore(false);
String s3Path = paths.get(0);
URI uri = toUri(s3Path);
Path auditPath;
if (uri.getPath().isEmpty()) {
auditPath = new Path("/");
} else {
auditPath = new Path(uri.getPath());
}
final S3AFileSystem fs = getFilesystem();
final MetadataStore ms = getStore();
if (!(ms instanceof DynamoDBMetadataStore)) {
errorln(s3Path + " path uses MS: " + ms);
errorln(NAME + " can be only used with a DynamoDB-backed S3Guard table.");
errorln(USAGE);
return ERROR;
}
final CommandFormat commandFormat = getCommandFormat();
if (commandFormat.getOpt(CHECK_FLAG)) {
// check that the path is auth
if (!fs.allowAuthoritative(auditPath)) {
// path isn't considered auth in the S3A bucket info
errorln("Path " + auditPath
+ " is not configured to be authoritative");
return AuthoritativeAuditOperation.ERROR_PATH_NOT_AUTH_IN_FS;
}
}
final AuthoritativeAuditOperation audit = new AuthoritativeAuditOperation(
fs.createStoreContext(),
(DynamoDBMetadataStore) ms,
commandFormat.getOpt(REQUIRE_AUTH),
commandFormat.getOpt(VERBOSE));
audit.audit(fs.qualify(auditPath));
out.flush();
return EXIT_SUCCESS;
}
}
private static S3GuardTool command;
@ -1836,12 +1965,19 @@ public abstract class S3GuardTool extends Configured implements Tool {
case Fsck.NAME:
command = new Fsck(conf);
break;
case Authoritative.NAME:
command = new Authoritative(conf);
break;
default:
printHelp();
throw new ExitUtil.ExitException(E_USAGE,
"Unknown command " + subCommand);
}
return ToolRunner.run(conf, command, otherArgs);
try {
return ToolRunner.run(conf, command, otherArgs);
} finally {
IOUtils.cleanupWithLogger(LOG, command);
}
}
/**
@ -1858,6 +1994,7 @@ public abstract class S3GuardTool extends Configured implements Tool {
exit(E_USAGE, e.getMessage());
} catch (ExitUtil.ExitException e) {
// explicitly raised exit code
LOG.debug("Exception raised", e);
exit(e.getExitCode(), e.toString());
} catch (FileNotFoundException e) {
// Bucket doesn't exist or similar - return code of 44, "404".
@ -1865,8 +2002,15 @@ public abstract class S3GuardTool extends Configured implements Tool {
LOG.debug("Not found:", e);
exit(EXIT_NOT_FOUND, e.toString());
} catch (Throwable e) {
e.printStackTrace(System.err);
exit(ERROR, e.toString());
if (e instanceof ExitCodeProvider) {
// this exception provides its own exit code
final ExitCodeProvider ec = (ExitCodeProvider) e;
LOG.debug("Exception raised", e);
exit(ec.getExitCode(), e.toString());
} else {
e.printStackTrace(System.err);
exit(ERROR, e.toString());
}
}
}

View File

@ -40,7 +40,6 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FutureDataInputStreamBuilder;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.impl.FutureIOSupport;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool;
import org.apache.hadoop.fs.shell.CommandFormat;
import org.apache.hadoop.util.DurationInfo;
@ -98,8 +97,6 @@ public class SelectTool extends S3GuardTool {
static final String TOO_FEW_ARGUMENTS = "Too few arguments";
static final String WRONG_FILESYSTEM = "Wrong filesystem for ";
static final String SELECT_IS_DISABLED = "S3 Select is disabled";
private OperationDuration selectDuration;
@ -227,14 +224,9 @@ public class SelectTool extends S3GuardTool {
}
// now bind to the filesystem.
FileSystem fs = path.getFileSystem(getConf());
if (!(fs instanceof S3AFileSystem)) {
throw new ExitUtil.ExitException(EXIT_SERVICE_UNAVAILABLE,
WRONG_FILESYSTEM + file + ": got " + fs);
}
setFilesystem((S3AFileSystem) fs);
FileSystem fs = bindFilesystem(path.getFileSystem(getConf()));
if (!getFilesystem().hasPathCapability(path, S3_SELECT_CAPABILITY)) {
if (!fs.hasPathCapability(path, S3_SELECT_CAPABILITY)) {
// capability disabled
throw new ExitUtil.ExitException(EXIT_SERVICE_UNAVAILABLE,
SELECT_IS_DISABLED + " for " + file);

View File

@ -227,7 +227,7 @@ hadoop s3guard import [-meta URI] s3a://my-bucket/file-with-bad-metadata
```
Programmatic retries of the original operation would require overwrite=true.
Suppose the original operation was FileSystem.create(myFile, overwrite=false).
Suppose the original operation was `FileSystem.create(myFile, overwrite=false)`.
If this operation failed with `MetadataPersistenceException` a repeat of the
same operation would result in `FileAlreadyExistsException` since the original
operation successfully created the file in S3 and only failed in writing the
@ -244,7 +244,7 @@ by setting the following configuration:
```
Setting this false is dangerous as it could result in the type of issue S3Guard
is designed to avoid. For example, a reader may see an inconsistent listing
is designed to avoid. For example, a reader may see an inconsistent listing
after a recent write since S3Guard may not contain metadata about the recently
written file due to a metadata write error.
@ -622,20 +622,64 @@ if these are both zero then it will be an on-demand table.
### Import a bucket: `s3guard import`
```bash
hadoop s3guard import [-meta URI] s3a://BUCKET
hadoop s3guard import [-meta URI] [-authoritative] [-verbose] s3a://PATH
```
Pre-populates a metadata store according to the current contents of an S3
bucket. If the `-meta` option is omitted, the binding information is taken
bucket/path. If the `-meta` option is omitted, the binding information is taken
from the `core-site.xml` configuration.
Usage
```
hadoop s3guard import
import [OPTIONS] [s3a://PATH]
import metadata from existing S3 data
Common options:
-authoritative - Mark imported directory data as authoritative.
-verbose - Verbose Output.
-meta URL - Metadata repository details (implementation-specific)
Amazon DynamoDB-specific options:
-region REGION - Service region for connections
URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.
Specifying both the -region option and an S3A path
is not supported.
```
Example
Import all files and directories in a bucket into the S3Guard table.
```bash
hadoop s3guard import s3a://ireland-1
```
### Audit a table: `s3guard diff`
Import a directory tree, marking directories as authoritative.
```bash
hadoop s3guard import -authoritative -verbose s3a://ireland-1/fork-0008
2020-01-03 12:05:18,321 [main] INFO - Metadata store DynamoDBMetadataStore{region=eu-west-1,
tableName=s3guard-metadata, tableArn=arn:aws:dynamodb:eu-west-1:980678866538:table/s3guard-metadata} is initialized.
2020-01-03 12:05:18,324 [main] INFO - Starting: Importing s3a://ireland-1/fork-0008
2020-01-03 12:05:18,324 [main] INFO - Importing directory s3a://ireland-1/fork-0008
2020-01-03 12:05:18,537 [main] INFO - Dir s3a://ireland-1/fork-0008/test/doTestListFiles-0-0-0-false
2020-01-03 12:05:18,630 [main] INFO - Dir s3a://ireland-1/fork-0008/test/doTestListFiles-0-0-0-true
2020-01-03 12:05:19,142 [main] INFO - Dir s3a://ireland-1/fork-0008/test/doTestListFiles-2-0-0-false/dir-0
2020-01-03 12:05:19,191 [main] INFO - Dir s3a://ireland-1/fork-0008/test/doTestListFiles-2-0-0-false/dir-1
2020-01-03 12:05:19,240 [main] INFO - Dir s3a://ireland-1/fork-0008/test/doTestListFiles-2-0-0-true/dir-0
2020-01-03 12:05:19,289 [main] INFO - Dir s3a://ireland-1/fork-0008/test/doTestListFiles-2-0-0-true/dir-1
2020-01-03 12:05:19,314 [main] INFO - Updated S3Guard with 0 files and 6 directory entries
2020-01-03 12:05:19,315 [main] INFO - Marking directory tree s3a://ireland-1/fork-0008 as authoritative
2020-01-03 12:05:19,342 [main] INFO - Importing s3a://ireland-1/fork-0008: duration 0:01.018s
Inserted 6 items into Metadata Store
```
### Compare a S3Guard table and the S3 Store: `s3guard diff`
```bash
hadoop s3guard diff [-meta URI] s3a://BUCKET
@ -856,10 +900,148 @@ the table associated with `s3a://ireland-1` and with the prefix `path_prefix`
hadoop s3guard prune -hours 1 -minutes 30 -meta dynamodb://ireland-team -region eu-west-1
```
Delete all entries more than 90 minutes old from the table "`ireland-team"` in
Delete all file entries more than 90 minutes old from the table "`ireland-team"` in
the region `eu-west-1`.
### Audit the "authoritative state of a DynamoDB Table, `s3guard authoritative`
This recursively checks a S3Guard table to verify that all directories
underneath are marked as "authoritative", and/or that the configuration
is set for the S3A client to treat files and directories urnder the path
as authoritative.
```
hadoop s3guard authoritative
authoritative [OPTIONS] [s3a://PATH]
Audits a DynamoDB S3Guard repository for all the entries being 'authoritative'
Options:
-required Require directories under the path to be authoritative.
-check-config Check the configuration for the path to be authoritative
-verbose Verbose Output.
```
Verify that a path under an object store is declared to be authoritative
in the cluster configuration -and therefore that file entries will not be
validated against S3, and that directories marked as "authoritative" in the
S3Guard table will be treated as complete.
```bash
hadoop s3guard authoritative -check-config s3a:///ireland-1/fork-0003/test/
2020-01-03 11:42:29,147 [main] INFO Metadata store DynamoDBMetadataStore{
region=eu-west-1, tableName=s3guard-metadata, tableArn=arn:aws:dynamodb:eu-west-1:980678866538:table/s3guard-metadata} is initialized.
Path /fork-0003/test is not configured to be authoritative
```
Scan a store and report which directories are not marked as authoritative.
```bash
hadoop s3guard authoritative s3a://ireland-1/
2020-01-03 11:51:58,416 [main] INFO - Metadata store DynamoDBMetadataStore{region=eu-west-1, tableName=s3guard-metadata, tableArn=arn:aws:dynamodb:eu-west-1:980678866538:table/s3guard-metadata} is initialized.
2020-01-03 11:51:58,419 [main] INFO - Starting: audit s3a://ireland-1/
2020-01-03 11:51:58,422 [main] INFO - Root directory s3a://ireland-1/
2020-01-03 11:51:58,469 [main] INFO - files 4; directories 12
2020-01-03 11:51:58,469 [main] INFO - Directory s3a://ireland-1/Users
2020-01-03 11:51:58,521 [main] INFO - files 0; directories 1
2020-01-03 11:51:58,522 [main] INFO - Directory s3a://ireland-1/fork-0007
2020-01-03 11:51:58,573 [main] INFO - Directory s3a://ireland-1/fork-0001
2020-01-03 11:51:58,626 [main] INFO - files 0; directories 1
2020-01-03 11:51:58,626 [main] INFO - Directory s3a://ireland-1/fork-0006
2020-01-03 11:51:58,676 [main] INFO - Directory s3a://ireland-1/path
2020-01-03 11:51:58,734 [main] INFO - files 0; directories 1
2020-01-03 11:51:58,735 [main] INFO - Directory s3a://ireland-1/fork-0008
2020-01-03 11:51:58,802 [main] INFO - files 0; directories 1
2020-01-03 11:51:58,802 [main] INFO - Directory s3a://ireland-1/fork-0004
2020-01-03 11:51:58,854 [main] INFO - files 0; directories 1
2020-01-03 11:51:58,855 [main] WARN - Directory s3a://ireland-1/fork-0003 is not authoritative
2020-01-03 11:51:58,905 [main] INFO - files 0; directories 1
2020-01-03 11:51:58,906 [main] INFO - Directory s3a://ireland-1/fork-0005
2020-01-03 11:51:58,955 [main] INFO - Directory s3a://ireland-1/customsignerpath2
2020-01-03 11:51:59,006 [main] INFO - Directory s3a://ireland-1/fork-0002
2020-01-03 11:51:59,063 [main] INFO - files 0; directories 1
2020-01-03 11:51:59,064 [main] INFO - Directory s3a://ireland-1/customsignerpath1
2020-01-03 11:51:59,121 [main] INFO - Directory s3a://ireland-1/Users/stevel
2020-01-03 11:51:59,170 [main] INFO - files 0; directories 1
2020-01-03 11:51:59,171 [main] INFO - Directory s3a://ireland-1/fork-0001/test
2020-01-03 11:51:59,233 [main] INFO - Directory s3a://ireland-1/path/style
2020-01-03 11:51:59,282 [main] INFO - files 0; directories 1
2020-01-03 11:51:59,282 [main] INFO - Directory s3a://ireland-1/fork-0008/test
2020-01-03 11:51:59,338 [main] INFO - files 15; directories 10
2020-01-03 11:51:59,339 [main] INFO - Directory s3a://ireland-1/fork-0004/test
2020-01-03 11:51:59,394 [main] WARN - Directory s3a://ireland-1/fork-0003/test is not authoritative
2020-01-03 11:51:59,451 [main] INFO - files 35; directories 1
2020-01-03 11:51:59,451 [main] INFO - Directory s3a://ireland-1/fork-0002/test
2020-01-03 11:51:59,508 [main] INFO - Directory s3a://ireland-1/Users/stevel/Projects
2020-01-03 11:51:59,558 [main] INFO - files 0; directories 1
2020-01-03 11:51:59,559 [main] INFO - Directory s3a://ireland-1/path/style/access
2020-01-03 11:51:59,610 [main] INFO - Directory s3a://ireland-1/fork-0008/test/doTestListFiles-0-2-0-false
2020-01-03 11:51:59,660 [main] INFO - Directory s3a://ireland-1/fork-0008/test/doTestListFiles-0-2-1-false
2020-01-03 11:51:59,719 [main] INFO - Directory s3a://ireland-1/fork-0008/test/doTestListFiles-0-0-0-true
2020-01-03 11:51:59,773 [main] INFO - Directory s3a://ireland-1/fork-0008/test/doTestListFiles-2-0-0-true
2020-01-03 11:51:59,824 [main] INFO - files 0; directories 2
2020-01-03 11:51:59,824 [main] INFO - Directory s3a://ireland-1/fork-0008/test/doTestListFiles-0-2-1-true
2020-01-03 11:51:59,879 [main] INFO - Directory s3a://ireland-1/fork-0008/test/doTestListFiles-0-0-1-false
2020-01-03 11:51:59,939 [main] INFO - Directory s3a://ireland-1/fork-0008/test/doTestListFiles-0-0-0-false
2020-01-03 11:51:59,990 [main] INFO - Directory s3a://ireland-1/fork-0008/test/doTestListFiles-0-2-0-true
2020-01-03 11:52:00,042 [main] INFO - Directory s3a://ireland-1/fork-0008/test/doTestListFiles-2-0-0-false
2020-01-03 11:52:00,094 [main] INFO - files 0; directories 2
2020-01-03 11:52:00,094 [main] INFO - Directory s3a://ireland-1/fork-0008/test/doTestListFiles-0-0-1-true
2020-01-03 11:52:00,144 [main] WARN - Directory s3a://ireland-1/fork-0003/test/ancestor is not authoritative
2020-01-03 11:52:00,197 [main] INFO - Directory s3a://ireland-1/Users/stevel/Projects/hadoop-trunk
2020-01-03 11:52:00,245 [main] INFO - files 0; directories 1
2020-01-03 11:52:00,245 [main] INFO - Directory s3a://ireland-1/fork-0008/test/doTestListFiles-2-0-0-true/dir-0
2020-01-03 11:52:00,296 [main] INFO - Directory s3a://ireland-1/fork-0008/test/doTestListFiles-2-0-0-true/dir-1
2020-01-03 11:52:00,346 [main] INFO - Directory s3a://ireland-1/fork-0008/test/doTestListFiles-2-0-0-false/dir-0
2020-01-03 11:52:00,397 [main] INFO - Directory s3a://ireland-1/fork-0008/test/doTestListFiles-2-0-0-false/dir-1
2020-01-03 11:52:00,479 [main] INFO - Directory s3a://ireland-1/Users/stevel/Projects/hadoop-trunk/hadoop-tools
2020-01-03 11:52:00,530 [main] INFO - files 0; directories 1
2020-01-03 11:52:00,530 [main] INFO - Directory s3a://ireland-1/Users/stevel/Projects/hadoop-trunk/hadoop-tools/hadoop-aws
2020-01-03 11:52:00,582 [main] INFO - files 0; directories 1
2020-01-03 11:52:00,582 [main] INFO - Directory s3a://ireland-1/Users/stevel/Projects/hadoop-trunk/hadoop-tools/hadoop-aws/target
2020-01-03 11:52:00,636 [main] INFO - files 0; directories 1
2020-01-03 11:52:00,637 [main] INFO - Directory s3a://ireland-1/Users/stevel/Projects/hadoop-trunk/hadoop-tools/hadoop-aws/target/test-dir
2020-01-03 11:52:00,691 [main] INFO - files 0; directories 3
2020-01-03 11:52:00,691 [main] INFO - Directory s3a://ireland-1/Users/stevel/Projects/hadoop-trunk/hadoop-tools/hadoop-aws/target/test-dir/2
2020-01-03 11:52:00,752 [main] INFO - Directory s3a://ireland-1/Users/stevel/Projects/hadoop-trunk/hadoop-tools/hadoop-aws/target/test-dir/5
2020-01-03 11:52:00,807 [main] INFO - Directory s3a://ireland-1/Users/stevel/Projects/hadoop-trunk/hadoop-tools/hadoop-aws/target/test-dir/8
2020-01-03 11:52:00,862 [main] INFO - Scanned 45 directories - 3 were not marked as authoritative
2020-01-03 11:52:00,863 [main] INFO - audit s3a://ireland-1/: duration 0:02.444s
```
Scan the path/bucket and fail if any entry is non-authoritative.
```bash
hadoop s3guard authoritative -verbose -required s3a://ireland-1/
2020-01-03 11:47:40,288 [main] INFO - Metadata store DynamoDBMetadataStore{region=eu-west-1, tableName=s3guard-metadata, tableArn=arn:aws:dynamodb:eu-west-1:980678866538:table/s3guard-metadata} is initialized.
2020-01-03 11:47:40,291 [main] INFO - Starting: audit s3a://ireland-1/
2020-01-03 11:47:40,295 [main] INFO - Root directory s3a://ireland-1/
2020-01-03 11:47:40,336 [main] INFO - files 4; directories 12
2020-01-03 11:47:40,336 [main] INFO - Directory s3a://ireland-1/Users
2020-01-03 11:47:40,386 [main] INFO - files 0; directories 1
2020-01-03 11:47:40,386 [main] INFO - Directory s3a://ireland-1/fork-0007
2020-01-03 11:47:40,435 [main] INFO - files 1; directories 0
2020-01-03 11:47:40,435 [main] INFO - Directory s3a://ireland-1/fork-0001
2020-01-03 11:47:40,486 [main] INFO - files 0; directories 1
2020-01-03 11:47:40,486 [main] INFO - Directory s3a://ireland-1/fork-0006
2020-01-03 11:47:40,534 [main] INFO - files 1; directories 0
2020-01-03 11:47:40,535 [main] INFO - Directory s3a://ireland-1/path
2020-01-03 11:47:40,587 [main] INFO - files 0; directories 1
2020-01-03 11:47:40,588 [main] INFO - Directory s3a://ireland-1/fork-0008
2020-01-03 11:47:40,641 [main] INFO - files 0; directories 1
2020-01-03 11:47:40,642 [main] INFO - Directory s3a://ireland-1/fork-0004
2020-01-03 11:47:40,692 [main] INFO - files 0; directories 1
2020-01-03 11:47:40,693 [main] WARN - Directory s3a://ireland-1/fork-0003 is not authoritative
2020-01-03 11:47:40,693 [main] INFO - audit s3a://ireland-1/: duration 0:00.402s
2020-01-03 11:47:40,698 [main] INFO - Exiting with status 46: `s3a://ireland-1/fork-0003': Directory is not marked as authoritative in the S3Guard store
```
This command is primarily for testing.
### Tune the I/O capacity of the DynamoDB Table, `s3guard set-capacity`
Alter the read and/or write capacity of a s3guard table created with provisioned

View File

@ -18,7 +18,13 @@
package org.apache.hadoop.fs.contract.s3a;
import java.util.Arrays;
import java.util.Collection;
import org.junit.Assume;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -34,17 +40,38 @@ import org.apache.hadoop.fs.s3a.Statistic;
import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
import static org.apache.hadoop.fs.contract.ContractTestUtils.verifyFileContents;
import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset;
import static org.apache.hadoop.fs.s3a.Constants.METADATASTORE_AUTHORITATIVE;
import static org.apache.hadoop.fs.s3a.S3ATestConstants.S3A_TEST_TIMEOUT;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
/**
* S3A contract tests covering rename.
* Parameterized for auth mode as testRenameWithNonEmptySubDir was failing
* during HADOOP-16697 development; this lets us ensure that when S3Guard
* is enabled, both auth and nonauth paths work
*/
@RunWith(Parameterized.class)
public class ITestS3AContractRename extends AbstractContractRenameTest {
public static final Logger LOG = LoggerFactory.getLogger(
ITestS3AContractRename.class);
private final boolean authoritative;
/**
* Parameterization.
*/
@Parameterized.Parameters(name = "auth={0}")
public static Collection<Object[]> params() {
return Arrays.asList(new Object[][]{
{false},
{true}
});
}
public ITestS3AContractRename(boolean authoritative) {
this.authoritative = authoritative;
}
@Override
protected int getTestTimeoutMillis() {
@ -60,6 +87,7 @@ public class ITestS3AContractRename extends AbstractContractRenameTest {
Configuration conf = super.createConfiguration();
// patch in S3Guard options
maybeEnableS3Guard(conf);
conf.setBoolean(METADATASTORE_AUTHORITATIVE, authoritative);
return conf;
}
@ -69,9 +97,11 @@ public class ITestS3AContractRename extends AbstractContractRenameTest {
}
@Override
public void teardown() throws Exception {
describe("\nTeardown\n");
super.teardown();
public void setup() throws Exception {
super.setup();
Assume.assumeTrue(
"Skipping auth mode tests when the FS doesn't have a metastore",
!authoritative || ((S3AFileSystem) getFileSystem()).hasMetadataStore());
}
@Override

View File

@ -250,6 +250,7 @@ public class TestPartialDeleteFailures {
public String getBucketLocation() throws IOException {
return null;
}
}
/**
* MetadataStore which tracks what is deleted and added.
@ -346,10 +347,10 @@ public class TestPartialDeleteFailures {
}
@Override
public void prune(final PruneMode pruneMode,
public long prune(final PruneMode pruneMode,
final long cutoff,
final String keyPrefix) {
return 0;
}
@Override

View File

@ -25,6 +25,7 @@ import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
@ -81,6 +82,11 @@ public abstract class AbstractS3GuardToolTestBase extends AbstractS3ATestBase {
private MetadataStore ms;
private S3AFileSystem rawFs;
/**
* List of tools to close in test teardown.
*/
private final List<S3GuardTool> toolsToClose = new ArrayList<>();
/**
* The test timeout is increased in case previous tests have created
* many tombstone markers which now need to be purged.
@ -91,6 +97,16 @@ public abstract class AbstractS3GuardToolTestBase extends AbstractS3ATestBase {
return SCALE_TEST_TIMEOUT_SECONDS * 1000;
}
/**
* Declare that the tool is to be closed in teardown.
* @param tool tool to close
* @return the tool.
*/
protected <T extends S3GuardTool> T toClose(T tool) {
toolsToClose.add(tool);
return tool;
}
protected static void expectResult(int expected,
String message,
S3GuardTool tool,
@ -180,6 +196,7 @@ public abstract class AbstractS3GuardToolTestBase extends AbstractS3ATestBase {
@Override
public void teardown() throws Exception {
super.teardown();
toolsToClose.forEach(t -> IOUtils.cleanupWithLogger(LOG, t));
IOUtils.cleanupWithLogger(LOG, ms);
IOUtils.closeStream(rawFs);
}
@ -264,9 +281,9 @@ public abstract class AbstractS3GuardToolTestBase extends AbstractS3ATestBase {
Path keepParent = path("prune-cli-keep");
StopWatch timer = new StopWatch();
final S3AFileSystem fs = getFileSystem();
S3GuardTool.Prune cmd = toClose(new S3GuardTool.Prune(cmdConf));
cmd.setMetadataStore(ms);
try {
S3GuardTool.Prune cmd = new S3GuardTool.Prune(cmdConf);
cmd.setMetadataStore(ms);
fs.mkdirs(parent);
fs.mkdirs(keepParent);
@ -299,6 +316,8 @@ public abstract class AbstractS3GuardToolTestBase extends AbstractS3ATestBase {
ms.prune(MetadataStore.PruneMode.ALL_BY_MODTIME,
Long.MAX_VALUE,
fs.pathToKey(keepParent));
// reset the store before we close the tool.
cmd.setMetadataStore(new NullMetadataStore());
}
}
@ -323,14 +342,20 @@ public abstract class AbstractS3GuardToolTestBase extends AbstractS3ATestBase {
Path testPath = path("testPruneCommandTombstones");
getFileSystem().mkdirs(testPath);
getFileSystem().delete(testPath, true);
S3GuardTool.Prune cmd = new S3GuardTool.Prune(getFileSystem().getConf());
S3GuardTool.Prune cmd = toClose(
new S3GuardTool.Prune(getFileSystem().getConf()));
cmd.setMetadataStore(ms);
exec(cmd,
"prune", "-" + S3GuardTool.Prune.TOMBSTONE,
"-seconds", "0",
testPath.toString());
assertNotNull("Command did not create a filesystem",
cmd.getFilesystem());
try {
exec(cmd,
"prune", "-" + S3GuardTool.Prune.TOMBSTONE,
"-seconds", "0",
testPath.toString());
assertNotNull("Command did not create a filesystem",
cmd.getFilesystem());
} finally {
// reset the store before we close the tool.
cmd.setMetadataStore(new NullMetadataStore());
}
}
/**
@ -339,10 +364,12 @@ public abstract class AbstractS3GuardToolTestBase extends AbstractS3ATestBase {
@Test
public void testMaybeInitFilesystem() throws Exception {
Path testPath = path("maybeInitFilesystem");
S3GuardTool.Prune cmd = new S3GuardTool.Prune(getFileSystem().getConf());
cmd.maybeInitFilesystem(Collections.singletonList(testPath.toString()));
assertNotNull("Command did not create a filesystem",
cmd.getFilesystem());
try (S3GuardTool.Prune cmd =
new S3GuardTool.Prune(getFileSystem().getConf())) {
cmd.maybeInitFilesystem(Collections.singletonList(testPath.toString()));
assertNotNull("Command did not create a filesystem",
cmd.getFilesystem());
}
}
/**
@ -350,10 +377,12 @@ public abstract class AbstractS3GuardToolTestBase extends AbstractS3ATestBase {
*/
@Test
public void testMaybeInitFilesystemNoPath() throws Exception {
S3GuardTool.Prune cmd = new S3GuardTool.Prune(getFileSystem().getConf());
cmd.maybeInitFilesystem(Collections.emptyList());
assertNull("Command should not have created a filesystem",
cmd.getFilesystem());
try (S3GuardTool.Prune cmd = new S3GuardTool.Prune(
getFileSystem().getConf())) {
cmd.maybeInitFilesystem(Collections.emptyList());
assertNull("Command should not have created a filesystem",
cmd.getFilesystem());
}
}
@Test
@ -379,13 +408,13 @@ public abstract class AbstractS3GuardToolTestBase extends AbstractS3ATestBase {
String bucket = getFileSystem().getBucket();
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, getFileSystem().getBucket());
S3GuardTool.SetCapacity cmdR = new S3GuardTool.SetCapacity(conf);
S3GuardTool.SetCapacity cmdR = toClose(new S3GuardTool.SetCapacity(conf));
String[] argsR =
new String[]{cmdR.getName(), "-read", "0", "s3a://" + bucket};
intercept(IllegalArgumentException.class,
S3GuardTool.SetCapacity.READ_CAP_INVALID, () -> cmdR.run(argsR));
S3GuardTool.SetCapacity cmdW = new S3GuardTool.SetCapacity(conf);
S3GuardTool.SetCapacity cmdW = toClose(new S3GuardTool.SetCapacity(conf));
String[] argsW =
new String[]{cmdW.getName(), "-write", "0", "s3a://" + bucket};
intercept(IllegalArgumentException.class,
@ -408,7 +437,7 @@ public abstract class AbstractS3GuardToolTestBase extends AbstractS3ATestBase {
// run a bucket info command and look for
// confirmation that it got the output from DDB diags
S3GuardTool.BucketInfo infocmd = new S3GuardTool.BucketInfo(conf);
S3GuardTool.BucketInfo infocmd = toClose(new S3GuardTool.BucketInfo(conf));
String info = exec(infocmd, S3GuardTool.BucketInfo.NAME,
"-" + S3GuardTool.BucketInfo.UNGUARDED_FLAG,
fsUri.toString());
@ -427,7 +456,7 @@ public abstract class AbstractS3GuardToolTestBase extends AbstractS3ATestBase {
clearBucketOption(conf, bucket, S3GUARD_DDB_TABLE_CREATE_KEY);
conf.set(S3_METADATA_STORE_IMPL, S3GUARD_METASTORE_NULL);
S3GuardTool.SetCapacity cmdR = new S3GuardTool.SetCapacity(conf);
S3GuardTool.SetCapacity cmdR = toClose(new S3GuardTool.SetCapacity(conf));
String[] argsR = new String[]{
cmdR.getName(),
"s3a://" + getFileSystem().getBucket()
@ -467,7 +496,8 @@ public abstract class AbstractS3GuardToolTestBase extends AbstractS3ATestBase {
Arrays.asList(S3GuardTool.Destroy.class, S3GuardTool.BucketInfo.class,
S3GuardTool.Diff.class, S3GuardTool.Import.class,
S3GuardTool.Prune.class, S3GuardTool.SetCapacity.class,
S3GuardTool.Uploads.class);
S3GuardTool.Uploads.class,
S3GuardTool.Authoritative.class);
for (Class<? extends S3GuardTool> tool : tools) {
S3GuardTool cmdR = makeBindedTool(tool);
@ -500,7 +530,8 @@ public abstract class AbstractS3GuardToolTestBase extends AbstractS3ATestBase {
List<Class<? extends S3GuardTool>> tools =
Arrays.asList(S3GuardTool.BucketInfo.class, S3GuardTool.Diff.class,
S3GuardTool.Import.class, S3GuardTool.Prune.class,
S3GuardTool.SetCapacity.class, S3GuardTool.Uploads.class);
S3GuardTool.SetCapacity.class, S3GuardTool.Uploads.class,
S3GuardTool.Authoritative.class);
for (Class<? extends S3GuardTool> tool : tools) {
S3GuardTool cmdR = makeBindedTool(tool);
@ -589,7 +620,7 @@ public abstract class AbstractS3GuardToolTestBase extends AbstractS3ATestBase {
}
ByteArrayOutputStream buf = new ByteArrayOutputStream();
S3GuardTool.Diff cmd = new S3GuardTool.Diff(fs.getConf());
S3GuardTool.Diff cmd = toClose(new S3GuardTool.Diff(fs.getConf()));
cmd.setStore(ms);
String table = "dynamo://" + getTestTableName(DYNAMODB_TABLE);
exec(0, "", cmd, buf, "diff", "-meta", table, testPath.toString());

View File

@ -1275,6 +1275,84 @@ public class ITestDynamoDBMetadataStore extends MetadataStoreTestBase {
assertNotFound(dir);
}
@Test
public void testPruneFilesNotDirs() throws Throwable {
describe("HADOOP-16725: directories cannot be pruned");
String base = "/" + getMethodName();
final long now = getTime();
// round it off for ease of interpreting results
final long t0 = now - (now % 100_000);
long interval = 1_000;
long t1 = t0 + interval;
long t2 = t1 + interval;
String dir = base + "/dir";
String dir2 = base + "/dir2";
String child1 = dir + "/file1";
String child2 = dir + "/file2";
final Path basePath = strToPath(base);
// put the dir at age t0
final DynamoDBMetadataStore ms = getDynamoMetadataStore();
final AncestorState ancestorState
= ms.initiateBulkWrite(
BulkOperationState.OperationType.Put,
basePath);
putDir(base, t0, ancestorState);
assertLastUpdated(base, t0);
putDir(dir, t0, ancestorState);
assertLastUpdated(dir, t0);
// base dir is unchanged
assertLastUpdated(base, t0);
// this directory will not have any children, so
// will be excluded from any ancestor re-creation
putDir(dir2, t0, ancestorState);
// child1 has age t0 and so will be pruned
putFile(child1, t0, ancestorState);
// child2 has age t2
putFile(child2, t2, ancestorState);
// close the ancestor state
ancestorState.close();
// make some assertions about state before the prune
assertLastUpdated(base, t0);
assertLastUpdated(dir, t0);
assertLastUpdated(dir2, t0);
assertLastUpdated(child1, t0);
assertLastUpdated(child2, t2);
// prune all entries older than t1 must delete child1 but
// not the directory, even though it is of the same age
LOG.info("Starting prune of all entries older than {}", t1);
ms.prune(PruneMode.ALL_BY_MODTIME, t1);
// child1 is gone
assertNotFound(child1);
// *AND* the parent dir has not been created
assertCached(dir);
assertCached(child2);
assertCached(dir2);
}
/**
* A cert that there is an entry for the given key and that its
* last updated timestamp matches that passed in.
* @param key Key to look up.
* @param lastUpdated Timestamp to expect.
* @throws IOException I/O failure.
*/
protected void assertLastUpdated(final String key, final long lastUpdated)
throws IOException {
PathMetadata dirMD = verifyCached(key);
assertEquals("Last updated timestamp in MD " + dirMD,
lastUpdated, dirMD.getLastUpdated());
}
/**
* Keep in sync with code changes in S3AFileSystem.finishedWrite() so that
* the production code can be tested here.

View File

@ -0,0 +1,732 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.TimeUnit;
import org.assertj.core.api.Assertions;
import org.junit.AfterClass;
import org.junit.Ignore;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.S3ATestUtils;
import org.apache.hadoop.fs.s3a.Tristate;
import org.apache.hadoop.io.IOUtils;
import static org.apache.hadoop.fs.contract.ContractTestUtils.rm;
import static org.apache.hadoop.fs.contract.ContractTestUtils.writeTextFile;
import static org.apache.hadoop.fs.s3a.Constants.AUTHORITATIVE_PATH;
import static org.apache.hadoop.fs.s3a.Constants.METADATASTORE_AUTHORITATIVE;
import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_BACKGROUND_SLEEP_MSEC_KEY;
import static org.apache.hadoop.fs.s3a.Constants.S3_METADATA_STORE_IMPL;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.assume;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
import static org.apache.hadoop.fs.s3a.S3AUtils.applyLocatedFiles;
import static org.apache.hadoop.fs.s3a.Statistic.OBJECT_LIST_REQUESTS;
import static org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_AUTHORITATIVE_DIRECTORIES_UPDATED;
import static org.apache.hadoop.fs.s3a.s3guard.AuthoritativeAuditOperation.ERROR_PATH_NOT_AUTH_IN_FS;
import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.authoritativeEmptyDirectoryMarker;
import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.Authoritative.CHECK_FLAG;
import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.Authoritative.REQUIRE_AUTH;
import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.Import.AUTH_FLAG;
import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.VERBOSE;
import static org.apache.hadoop.fs.s3a.s3guard.S3GuardToolTestHelper.exec;
import static org.apache.hadoop.fs.s3a.s3guard.S3GuardToolTestHelper.expectExecResult;
import static org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_NOT_ACCEPTABLE;
import static org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_NOT_FOUND;
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
/**
* Test to verify the expected behaviour of DynamoDB and authoritative mode.
* The main testFS is non-auth; we also create a test FS which runs in auth
* mode.
* Making the default FS non-auth means that test path cleanup in the
* superclass isn't going to get mislead by anything authoritative.
*
* For performance boosting we demand create the auth FS and its test
* paths on the first test setup().
* This also fixes the auth/nonauth paths so that a specific
* bit of the FS is expected to be auth in the FS.
*
* This test is designed to run in parallel mode with other tests which
* may or may not be auth mode.
*
* It shouldn't make any difference -tests here simply must not make
* any assumptions about the state of any path outside the test tree.
*/
@SuppressWarnings("StaticNonFinalField")
public class ITestDynamoDBMetadataStoreAuthoritativeMode
extends AbstractS3ATestBase {
private static final Logger LOG = LoggerFactory.getLogger(
ITestDynamoDBMetadataStoreAuthoritativeMode.class);
public static final String AUDIT = S3GuardTool.Authoritative.NAME;
public static final String IMPORT = S3GuardTool.Import.NAME;
private String fsUriStr;
/**
* Authoritative FS.
*/
private static S3AFileSystem authFS;
/**
* The unguarded file system.
*/
private static S3AFileSystem unguardedFS;
/**
* Base path in the store for auth and nonauth paths.
*/
private static Path basePath;
/**
* Path under basePath which will be declared as authoritative.
*/
private static Path authPath;
/**
* Path under basePath which will be declared as non-authoritative.
*/
private static Path nonauthPath;
/**
* test method specific auth path.
*/
private Path methodAuthPath;
/**
* test method specific non-auth path.
*/
private Path methodNonauthPath;
/**
* Auditor of store state.
*/
private AuthoritativeAuditOperation auditor;
private Path dir;
private Path dirFile;
/**
* List of tools to close in test teardown.
*/
private final List<S3GuardTool> toolsToClose = new ArrayList<>();
/**
* After all tests have run, close the filesystems.
*/
@AfterClass
public static void closeFileSystems() {
IOUtils.cleanupWithLogger(LOG, authFS, unguardedFS);
}
@Override
protected Configuration createConfiguration() {
Configuration conf = super.createConfiguration();
removeBaseAndBucketOverrides(conf,
S3GUARD_DDB_BACKGROUND_SLEEP_MSEC_KEY,
METADATASTORE_AUTHORITATIVE,
AUTHORITATIVE_PATH);
conf.setTimeDuration(
S3GUARD_DDB_BACKGROUND_SLEEP_MSEC_KEY,
0,
TimeUnit.MILLISECONDS);
return conf;
}
/**
* Test case setup will on-demand create the class-level fields
* of the authFS and the auth/non-auth paths.
*/
@Override
public void setup() throws Exception {
super.setup();
S3AFileSystem fs = getFileSystem();
Configuration conf = fs.getConf();
S3ATestUtils.assumeS3GuardState(true, conf);
assume("Filesystem isn't running DDB",
fs.getMetadataStore() instanceof DynamoDBMetadataStore);
URI fsURI = fs.getUri();
fsUriStr = fsURI.toString();
if (!fsUriStr.endsWith("/")) {
fsUriStr = fsUriStr + "/";
}
if (authFS == null) {
// creating the test stores
basePath = path("base");
authPath = new Path(basePath, "auth");
nonauthPath = new Path(basePath, "nonauth");
final Configuration authconf = new Configuration(conf);
final URI uri = authPath.toUri();
authconf.set(AUTHORITATIVE_PATH, uri.toString());
authFS = (S3AFileSystem) FileSystem.newInstance(uri, authconf);
// and create the unguarded at the same time
final Configuration unguardedConf = new Configuration(conf);
removeBaseAndBucketOverrides(unguardedConf,
S3_METADATA_STORE_IMPL);
unguardedFS = (S3AFileSystem) FileSystem.newInstance(uri, unguardedConf);
}
auditor = new AuthoritativeAuditOperation(
authFS.createStoreContext(),
(DynamoDBMetadataStore) authFS.getMetadataStore(),
true,
true);
cleanupMethodPaths();
dir = new Path(methodAuthPath, "dir");
dirFile = new Path(dir, "file");
}
@Override
public void teardown() throws Exception {
toolsToClose.forEach(t -> IOUtils.cleanupWithLogger(LOG, t));
try {
cleanupMethodPaths();
} catch (IOException ignored) {
}
super.teardown();
}
/**
* Clean up from other test runs which halted.
* Uses the authfs; no-op if null.
* @throws IOException Failure
*/
private void cleanupMethodPaths() throws IOException {
S3AFileSystem fs = authFS;
if (fs != null) {
methodAuthPath = new Path(authPath, getMethodName());
fs.delete(methodAuthPath, true);
methodNonauthPath = new Path(nonauthPath, getMethodName());
fs.delete(methodNonauthPath, true);
}
}
/**
* Declare that the tool is to be closed in teardown.
* @param tool tool to close
* @return the tool.
*/
protected <T extends S3GuardTool> T toClose(T tool) {
toolsToClose.add(tool);
return tool;
}
/**
* Get the conf of the auth FS.
* @return the auth FS config.
*/
private Configuration getAuthConf() {
return authFS.getConf();
}
@Test
public void testEmptyDirMarkerIsAuth() {
final S3AFileStatus st = new S3AFileStatus(true, dir, "root");
final DDBPathMetadata md = (DDBPathMetadata)
authoritativeEmptyDirectoryMarker(st);
Assertions.assertThat(md)
.describedAs("Metadata %s", md)
.matches(DDBPathMetadata::isAuthoritativeDir, "is auth dir")
.matches(d -> d.isEmptyDirectory() == Tristate.TRUE,
"isEmptyDirectory");
}
@Test
@Ignore("HADOOP-16697. Needs mkdir to be authoritative")
public void testMkDirAuth() throws Throwable {
describe("create an empty dir and assert it is tagged as authoritative");
authFS.mkdirs(dir);
expectAuthRecursive(dir);
}
@Test
public void testListStatusMakesEmptyDirAuth() throws Throwable {
describe("Verify listStatus marks an Empty dir as auth");
authFS.mkdirs(dir);
expectNonauthRecursive(dir);
authFS.listStatus(dir);
// dir is auth; subdir is not
expectAuthRecursive(dir);
// Next list will not go to s3
assertListDoesNotUpdateAuth(dir);
}
@Test
public void testListStatusMakesDirAuth() throws Throwable {
describe("Verify listStatus marks a dir as auth");
final Path subdir = new Path(dir, "subdir");
mkAuthDir(dir);
expectAuthRecursive(dir);
authFS.mkdirs(subdir);
// dir is auth; subdir is not
expectAuthNonRecursive(dir);
expectNonauthRecursive(dir);
assertListDoesNotUpdateAuth(dir);
// Subdir list makes it auth
assertListUpdatesAuth(subdir);
}
@Test
public void testAddFileMarksNonAuth() throws Throwable {
describe("Adding a file marks dir as nonauth but leaves ancestors alone");
mkAuthDir(methodAuthPath);
touchFile(dirFile);
expectNonauthRecursive(dir);
assertListUpdatesAuth(dir);
expectAuthRecursive(methodAuthPath);
}
/**
* When you delete the single file in a directory then a fake directory
* marker is added. This must be auth.
*/
@Test
@Ignore("HADOOP-16697. Needs mkdir to be authoritative")
public void testDeleteSingleFileLeavesMarkersAlone() throws Throwable {
describe("Deleting a file with no peers makes no changes to ancestors");
mkAuthDir(methodAuthPath);
touchFile(dirFile);
assertListUpdatesAuth(dir);
authFS.delete(dirFile, false);
expectAuthRecursive(methodAuthPath);
}
@Test
public void testDeleteMultipleFileLeavesMarkersAlone() throws Throwable {
describe("Deleting a file from a dir with >1 file makes no changes"
+ " to ancestors");
mkAuthDir(methodAuthPath);
touchFile(dirFile);
Path file2 = new Path(dir, "file2");
touchFile(file2);
assertListUpdatesAuth(dir);
authFS.delete(dirFile, false);
expectAuthRecursive(methodAuthPath);
}
/**
* Assert the number of pruned files matches expectations.
* @param path path to prune
* @param mode prune mode
* @param limit timestamp before which files are deleted
* @param expected number of entries to be pruned
*/
protected void assertPruned(final Path path,
final MetadataStore.PruneMode mode,
final long limit,
final int expected)
throws IOException {
String keyPrefix
= PathMetadataDynamoDBTranslation.pathToParentKey(path);
Assertions.assertThat(
authFS.getMetadataStore().prune(
mode,
limit,
keyPrefix))
.describedAs("Number of files pruned under %s", keyPrefix)
.isEqualTo(expected);
}
@Test
public void testPruneFilesMarksNonAuth() throws Throwable {
describe("Pruning a file marks dir as nonauth");
mkAuthDir(methodAuthPath);
touchFile(dirFile);
assertListUpdatesAuth(dir);
assertPruned(dir,
MetadataStore.PruneMode.ALL_BY_MODTIME,
Long.MAX_VALUE,
1);
expectNonauthRecursive(dir);
}
@Test
public void testPruneTombstoneRetainsAuth() throws Throwable {
describe("Verify that deleting and then pruning a file does not change"
+ " the state of the parent.");
mkAuthDir(methodAuthPath);
touchFile(dirFile);
assertListUpdatesAuth(dir);
// add a second file to avoid hitting the mkdir-is-nonauth issue that causes
// testDeleteSingleFileLeavesMarkersAlone() to fail
Path file2 = new Path(dir, "file2");
touchFile(file2);
authFS.delete(dirFile, false);
expectAuthRecursive(dir);
assertPruned(dir, MetadataStore.PruneMode.TOMBSTONES_BY_LASTUPDATED,
Long.MAX_VALUE, 1);
expectAuthRecursive(dir);
}
@Test
public void testRenameFile() throws Throwable {
describe("renaming a file");
final Path source = new Path(dir, "source");
final Path dest = new Path(dir, "dest");
touchFile(source);
assertListUpdatesAuth(dir);
authFS.rename(source, dest);
expectAuthRecursive(dir);
}
@Test
public void testRenameDirMarksDestAsAuth() throws Throwable {
describe("renaming a dir must mark dest tree as auth");
final Path d = methodAuthPath;
final Path source = new Path(d, "source");
final Path dest = new Path(d, "dest");
mkAuthDir(source);
Path f = new Path(source, "subdir/file");
touchFile(f);
authFS.rename(source, dest);
expectNonauthRecursive(d);
expectAuthRecursive(dest);
}
@Test
public void testRenameWithNonEmptySubDir() throws Throwable {
final Path renameTestDir = methodAuthPath;
final Path srcDir = new Path(renameTestDir, "src1");
final Path srcSubDir = new Path(srcDir, "sub");
final Path finalDir = new Path(renameTestDir, "dest");
FileSystem fs = authFS;
rm(fs, renameTestDir, true, false);
fs.mkdirs(srcDir);
fs.mkdirs(finalDir);
writeTextFile(fs, new Path(srcDir, "source.txt"),
"this is the file in src dir", false);
writeTextFile(fs, new Path(srcSubDir, "subfile.txt"),
"this is the file in src/sub dir", false);
assertPathExists("not created in src dir",
new Path(srcDir, "source.txt"));
assertPathExists("not created in src/sub dir",
new Path(srcSubDir, "subfile.txt"));
boolean rename = fs.rename(srcDir, finalDir);
Assertions.assertThat(rename)
.describedAs("rename(%s, %s)", srcDir, finalDir)
.isTrue();
// POSIX rename behavior
assertPathExists("not renamed into dest dir",
new Path(finalDir, "source.txt"));
assertPathExists("not renamed into dest/sub dir",
new Path(finalDir, "sub/subfile.txt"));
assertPathDoesNotExist("not deleted",
new Path(srcDir, "source.txt"));
}
@Test
@Ignore("TODO: HADOOP-16465")
public void testListLocatedStatusMarksDirAsAuth() throws Throwable {
describe("validate listLocatedStatus()");
final Path subdir = new Path(dir, "subdir");
final Path subdirfile = new Path(subdir, "file");
touchFile(subdirfile);
// Subdir list makes it auth
expectAuthoritativeUpdate(1, 1, () -> {
final RemoteIterator<LocatedFileStatus> st
= authFS.listLocatedStatus(subdir);
applyLocatedFiles(st,
f -> LOG.info("{}", f));
return null;
});
expectAuthNonRecursive(subdir);
}
@Test
public void testS3GuardImportMarksDirAsAuth() throws Throwable {
describe("import with authoritive=true marks directories");
// the base dir is auth
mkAuthDir(methodAuthPath);
int expected = 0;
final Path subdir = new Path(dir, "subdir");
final Path subdirfile = new Path(subdir, "file");
ContractTestUtils.touch(authFS, subdirfile);
expected++;
for (int i = 0; i < 5; i++) {
ContractTestUtils.touch(authFS, new Path(subdir, "file-" + i));
expected++;
}
final Path emptydir = new Path(dir, "emptydir");
unguardedFS.mkdirs(emptydir);
expected++;
S3AFileStatus status1 = (S3AFileStatus) authFS.getFileStatus(subdirfile);
final MetadataStore authMS = authFS.getMetadataStore();
final ImportOperation importer = new ImportOperation(unguardedFS,
authMS,
(S3AFileStatus) unguardedFS.getFileStatus(dir),
true, true);
final Long count = importer.execute();
expectAuthRecursive(dir);
// the parent dir shouldn't have changed
expectAuthRecursive(methodAuthPath);
// file entry
S3AFileStatus status2 = (S3AFileStatus) authFS.getFileStatus(subdirfile);
Assertions.assertThat(status2.getETag())
.describedAs("Etag of %s", status2)
.isEqualTo(status1.getETag());
// only picked up on versioned stores.
Assertions.assertThat(status2.getVersionId())
.describedAs("version ID of %s", status2)
.isEqualTo(status1.getVersionId());
// the import finds files and empty dirs
Assertions.assertThat(count)
.describedAs("Count of imports under %s", dir)
.isEqualTo(expected);
}
/**
* Given a flag, add a - prefix.
* @param flag flag to wrap
* @return a flag for use in the CLI
*/
private String f(String flag) {
return "-" + flag;
}
@Test
public void testAuditS3GuardTool() throws Throwable {
describe("Test the s3guard audit CLI");
authFS.mkdirs(methodAuthPath);
final String path = methodAuthPath.toString();
// this is non-auth, so the scan is rejected
expectExecResult(EXIT_NOT_ACCEPTABLE,
authTool(),
AUDIT,
f(CHECK_FLAG),
f(REQUIRE_AUTH),
f(VERBOSE),
path);
// a non-auth audit is fine
exec(authTool(),
AUDIT,
f(VERBOSE),
path);
// non-auth import
exec(importTool(),
IMPORT,
f(VERBOSE),
path);
// which will leave the result unchanged
expectExecResult(EXIT_NOT_ACCEPTABLE,
authTool(),
AUDIT,
f(CHECK_FLAG),
f(REQUIRE_AUTH),
f(VERBOSE),
path);
// auth import
exec(importTool(),
IMPORT,
f(AUTH_FLAG),
f(VERBOSE),
path);
// so now the audit succeeds
exec(authTool(),
AUDIT,
f(REQUIRE_AUTH),
path);
}
/**
* Create an import tool instance with the auth FS Config.
* It will be closed in teardown.
* @return a new instance.
*/
protected S3GuardTool.Import importTool() {
return toClose(new S3GuardTool.Import(getAuthConf()));
}
/**
* Create an auth tool instance with the auth FS Config.
* It will be closed in teardown.
* @return a new instance.
*/
protected S3GuardTool.Authoritative authTool() {
return toClose(new S3GuardTool.Authoritative(getAuthConf()));
}
@Test
public void testAuditS3GuardToolNonauthDir() throws Throwable {
describe("Test the s3guard audit -check-conf against a nonauth path");
mkdirs(methodNonauthPath);
expectExecResult(ERROR_PATH_NOT_AUTH_IN_FS,
authTool(),
AUDIT,
f(CHECK_FLAG),
methodNonauthPath.toString());
}
@Test
public void testImportNonauthDir() throws Throwable {
describe("s3guard import against a nonauth path marks the dirs as auth");
final String path = methodNonauthPath.toString();
mkdirs(methodNonauthPath);
// auth import
exec(importTool(),
IMPORT,
f(AUTH_FLAG),
f(VERBOSE),
path);
exec(authTool(),
AUDIT,
f(REQUIRE_AUTH),
f(VERBOSE),
path);
}
@Test
public void testAuditS3GuardTooMissingDir() throws Throwable {
describe("Test the s3guard audit against a missing path");
expectExecResult(EXIT_NOT_FOUND,
authTool(),
AUDIT,
methodAuthPath.toString());
}
/**
* Touch a file in the authoritative fs.
* @param file path of file
* @throws IOException Failure
*/
protected void touchFile(final Path file) throws IOException {
ContractTestUtils.touch(authFS, file);
}
/**
* Invoke an operation expecting the meta store to be updated{@code updates}
* times and S3 LIST requests made {@code lists} times.
* @param <T> Return type
* @param updates Expected count
* @param lists Expected lists
* @param fn Function to invoke
* @return Result of the function call
* @throws Exception Failure
*/
private <T> T expectAuthoritativeUpdate(
int updates,
int lists,
Callable<T> fn)
throws Exception {
S3ATestUtils.MetricDiff authDirsMarked = new S3ATestUtils.MetricDiff(authFS,
S3GUARD_METADATASTORE_AUTHORITATIVE_DIRECTORIES_UPDATED);
S3ATestUtils.MetricDiff listRequests = new S3ATestUtils.MetricDiff(authFS,
OBJECT_LIST_REQUESTS);
final T call = fn.call();
authDirsMarked.assertDiffEquals(updates);
listRequests.assertDiffEquals(lists);
return call;
}
/**
* Assert that a listStatus call increments the
* "s3guard_metadatastore_authoritative_directories_updated" counter.
* Then checks that the directory is recursively authoritative.
* @param path path to scan
*/
private void assertListUpdatesAuth(Path path) throws Exception {
expectAuthoritativeUpdate(1, 1, () -> authFS.listStatus(path));
expectAuthRecursive(path);
}
/**
* Assert that a listStatus call does not increment the
* "s3guard_metadatastore_authoritative_directories_updated" counter.
* @param path path to scan
*/
private void assertListDoesNotUpdateAuth(Path path) throws Exception {
expectAuthoritativeUpdate(0, 0, () -> authFS.listStatus(path));
}
/**
* Create a directory if needed, force it to be authoritatively listed.
* @param path dir
*/
private void mkAuthDir(Path path) throws IOException {
authFS.mkdirs(path);
authFS.listStatus(path);
}
/**
* Performed a recursive audit of the directory
* -require everything to be authoritative.
* @param path directory
*/
private void expectAuthRecursive(Path path) throws Exception {
auditor.executeAudit(path, true, true);
}
/**
* Performed a non-recursive audit of the directory
* -require the directory to be authoritative.
* @param path directory
*/
private void expectAuthNonRecursive(Path path) throws Exception {
auditor.executeAudit(path, true, false);
}
/**
* Performed a recursive audit of the directory
* -expect a failure.
* @param path directory
* @return the path returned by the exception
*/
private Path expectNonauthRecursive(Path path) throws Exception {
return intercept(
AuthoritativeAuditOperation.NonAuthoritativeDirException.class,
() -> auditor.executeAudit(path, true, true))
.getPath();
}
}

View File

@ -153,6 +153,7 @@ public class ITestS3GuardDDBRootOperations extends AbstractS3ATestBase {
Configuration conf = fs.getConf();
int result = S3GuardTool.run(conf,
S3GuardTool.Prune.NAME,
"-seconds", "1",
fsUriStr);
Assertions.assertThat(result)
.describedAs("Result of prune %s", fsUriStr)

View File

@ -25,7 +25,6 @@ import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.UUID;
import java.util.concurrent.Callable;
import com.amazonaws.services.dynamodbv2.document.DynamoDB;
import com.amazonaws.services.dynamodbv2.document.Table;
@ -98,20 +97,17 @@ public class ITestS3GuardToolDynamoDB extends AbstractS3GuardToolTestBase {
getTestTableName("testInvalidRegion" + new Random().nextInt());
final String testRegion = "invalidRegion";
// Initialize MetadataStore
final Init initCmd = new Init(getFileSystem().getConf());
final Init initCmd = toClose(new Init(getFileSystem().getConf()));
intercept(IOException.class,
new Callable<String>() {
@Override
public String call() throws Exception {
int res = initCmd.run(new String[]{
"init",
"-region", testRegion,
"-meta", "dynamodb://" + testTableName
});
return "Use of invalid region did not fail, returning " + res
+ "- table may have been " +
"created and not cleaned up: " + testTableName;
}
() -> {
int res = initCmd.run(new String[]{
"init",
"-region", testRegion,
"-meta", "dynamodb://" + testTableName
});
return "Use of invalid region did not fail, returning " + res
+ "- table may have been " +
"created and not cleaned up: " + testTableName;
});
}
@ -196,15 +192,16 @@ public class ITestS3GuardToolDynamoDB extends AbstractS3GuardToolTestBase {
S3AFileSystem fs = getFileSystem();
DynamoDB db = null;
try {
try (Init initCmd = new Init(fs.getConf())) {
// Initialize MetadataStore
Init initCmd = new Init(fs.getConf());
expectSuccess("Init command did not exit successfully - see output",
initCmd,
Init.NAME,
"-" + READ_FLAG, "0",
"-" + WRITE_FLAG, "0",
"-" + META_FLAG, "dynamodb://" + testTableName,
testS3Url);
expectSuccess("Init command did not exit successfully - see output",
initCmd,
Init.NAME,
"-" + READ_FLAG, "0",
"-" + WRITE_FLAG, "0",
"-" + META_FLAG, "dynamodb://" + testTableName,
testS3Url);
}
// Verify it exists
MetadataStore ms = getMetadataStore();
assertTrue("metadata store should be DynamoDBMetadataStore",
@ -219,24 +216,27 @@ public class ITestS3GuardToolDynamoDB extends AbstractS3GuardToolTestBase {
// force in a new bucket
setBucketOption(conf, bucket, Constants.S3_METADATA_STORE_IMPL,
Constants.S3GUARD_METASTORE_DYNAMO);
initCmd = new Init(conf);
String initOutput = exec(initCmd,
"init", "-meta", "dynamodb://" + testTableName, testS3Url);
assertTrue("No Dynamo diagnostics in output " + initOutput,
initOutput.contains(DESCRIPTION));
try (Init initCmd = new Init(conf)) {
String initOutput = exec(initCmd,
"init", "-meta", "dynamodb://" + testTableName, testS3Url);
assertTrue("No Dynamo diagnostics in output " + initOutput,
initOutput.contains(DESCRIPTION));
}
// run a bucket info command and look for
// confirmation that it got the output from DDB diags
S3GuardTool.BucketInfo infocmd = new S3GuardTool.BucketInfo(conf);
String info = exec(infocmd, S3GuardTool.BucketInfo.NAME,
"-" + S3GuardTool.BucketInfo.GUARDED_FLAG,
testS3Url);
assertTrue("No Dynamo diagnostics in output " + info,
info.contains(DESCRIPTION));
assertTrue("No Dynamo diagnostics in output " + info,
info.contains(DESCRIPTION));
String info;
try (S3GuardTool.BucketInfo infocmd = new S3GuardTool.BucketInfo(conf)) {
info = exec(infocmd, BucketInfo.NAME,
"-" + BucketInfo.GUARDED_FLAG,
testS3Url);
assertTrue("No Dynamo diagnostics in output " + info,
info.contains(DESCRIPTION));
assertTrue("No Dynamo diagnostics in output " + info,
info.contains(DESCRIPTION));
}
// get the current values to set again
// get the current values to set again
// play with the set-capacity option
String fsURI = getFileSystem().getUri().toString();
@ -245,23 +245,23 @@ public class ITestS3GuardToolDynamoDB extends AbstractS3GuardToolTestBase {
info.contains(BILLING_MODE_PER_REQUEST));
// per-request tables fail here, so expect that
intercept(IOException.class, E_ON_DEMAND_NO_SET_CAPACITY,
() -> exec(newSetCapacity(),
SetCapacity.NAME,
fsURI));
() -> exec(toClose(newSetCapacity()),
SetCapacity.NAME,
fsURI));
// Destroy MetadataStore
Destroy destroyCmd = new Destroy(fs.getConf());
try (Destroy destroyCmd = new Destroy(fs.getConf())){
String destroyed = exec(destroyCmd,
"destroy", "-meta", "dynamodb://" + testTableName, testS3Url);
// Verify it does not exist
assertFalse(String.format("%s still exists", testTableName),
exist(db, testTableName));
String destroyed = exec(destroyCmd,
"destroy", "-meta", "dynamodb://" + testTableName, testS3Url);
// Verify it does not exist
assertFalse(String.format("%s still exists", testTableName),
exist(db, testTableName));
// delete again and expect success again
expectSuccess("Destroy command did not exit successfully - see output",
destroyCmd,
"destroy", "-meta", "dynamodb://" + testTableName, testS3Url);
// delete again and expect success again
expectSuccess("Destroy command did not exit successfully - see output",
destroyCmd,
"destroy", "-meta", "dynamodb://" + testTableName, testS3Url);
}
} catch (ResourceNotFoundException e) {
throw new AssertionError(
String.format("DynamoDB table %s does not exist", testTableName),

View File

@ -27,7 +27,6 @@ import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.Callable;
import org.apache.hadoop.test.LambdaTestUtils;
import org.apache.hadoop.util.StringUtils;
@ -84,9 +83,13 @@ public class ITestS3GuardToolLocal extends AbstractS3GuardToolTestBase {
}
}
S3GuardTool.Import cmd = new S3GuardTool.Import(fs.getConf());
cmd.setStore(ms);
exec(cmd, "import", parent.toString());
S3GuardTool.Import cmd = toClose(new S3GuardTool.Import(fs.getConf()));
try {
cmd.setStore(ms);
exec(cmd, "import", parent.toString());
} finally {
cmd.setStore(new NullMetadataStore());
}
DirListingMetadata children =
ms.listChildren(dir);
@ -94,7 +97,7 @@ public class ITestS3GuardToolLocal extends AbstractS3GuardToolTestBase {
.getListing().size());
assertEquals("Expected 2 items: empty directory and a parent directory", 2,
ms.listChildren(parent).getListing().size());
// assertTrue(children.isAuthoritative());
assertTrue(children.isAuthoritative());
}
@Test
@ -120,9 +123,13 @@ public class ITestS3GuardToolLocal extends AbstractS3GuardToolTestBase {
"bogusVersionId", retrievedBogusStatus.getVersionId());
// execute the import
S3GuardTool.Import cmd = new S3GuardTool.Import(fs.getConf());
S3GuardTool.Import cmd = toClose(new S3GuardTool.Import(fs.getConf()));
cmd.setStore(ms);
exec(cmd, "import", path.toString());
try {
exec(cmd, "import", path.toString());
} finally {
cmd.setStore(new NullMetadataStore());
}
// make sure ETag and versionId were corrected
S3AFileStatus updatedStatus = (S3AFileStatus) fs.getFileStatus(path);
@ -141,34 +148,27 @@ public class ITestS3GuardToolLocal extends AbstractS3GuardToolTestBase {
@Test
public void testImportNoFilesystem() throws Throwable {
final Import importer =
new S3GuardTool.Import(getConfiguration());
final Import importer = toClose(new S3GuardTool.Import(getConfiguration()));
importer.setStore(getMetadataStore());
intercept(IOException.class,
new Callable<Integer>() {
@Override
public Integer call() throws Exception {
return importer.run(
new String[]{
"import",
"-meta", LOCAL_METADATA,
S3A_THIS_BUCKET_DOES_NOT_EXIST
});
}
});
try {
intercept(IOException.class,
() -> importer.run(
new String[]{
"import",
"-meta", LOCAL_METADATA,
S3A_THIS_BUCKET_DOES_NOT_EXIST
}));
} finally {
importer.setStore(new NullMetadataStore());
}
}
@Test
public void testInfoBucketAndRegionNoFS() throws Throwable {
intercept(FileNotFoundException.class,
new Callable<Integer>() {
@Override
public Integer call() throws Exception {
return run(BucketInfo.NAME, "-meta",
LOCAL_METADATA, "-region",
"any-region", S3A_THIS_BUCKET_DOES_NOT_EXIST);
}
});
() -> run(BucketInfo.NAME, "-meta",
LOCAL_METADATA, "-region",
"any-region", S3A_THIS_BUCKET_DOES_NOT_EXIST));
}
@Test
@ -230,24 +230,33 @@ public class ITestS3GuardToolLocal extends AbstractS3GuardToolTestBase {
@Test
public void testStoreInfo() throws Throwable {
S3GuardTool.BucketInfo cmd = new S3GuardTool.BucketInfo(
getFileSystem().getConf());
S3GuardTool.BucketInfo cmd =
toClose(new S3GuardTool.BucketInfo(getFileSystem().getConf()));
cmd.setStore(getMetadataStore());
String output = exec(cmd, cmd.getName(),
"-" + S3GuardTool.BucketInfo.GUARDED_FLAG,
getFileSystem().getUri().toString());
LOG.info("Exec output=\n{}", output);
try {
String output = exec(cmd, cmd.getName(),
"-" + BucketInfo.GUARDED_FLAG,
getFileSystem().getUri().toString());
LOG.info("Exec output=\n{}", output);
} finally {
cmd.setStore(new NullMetadataStore());
}
}
@Test
public void testSetCapacity() throws Throwable {
S3GuardTool cmd = new S3GuardTool.SetCapacity(getFileSystem().getConf());
S3GuardTool cmd = toClose(
new S3GuardTool.SetCapacity(getFileSystem().getConf()));
cmd.setStore(getMetadataStore());
String output = exec(cmd, cmd.getName(),
"-" + READ_FLAG, "100",
"-" + WRITE_FLAG, "100",
getFileSystem().getUri().toString());
LOG.info("Exec output=\n{}", output);
try {
String output = exec(cmd, cmd.getName(),
"-" + READ_FLAG, "100",
"-" + WRITE_FLAG, "100",
getFileSystem().getUri().toString());
LOG.info("Exec output=\n{}", output);
} finally {
cmd.setStore(new NullMetadataStore());
}
}
private final static String UPLOAD_PREFIX = "test-upload-prefix";
@ -416,7 +425,7 @@ public class ITestS3GuardToolLocal extends AbstractS3GuardToolTestBase {
while ((line = reader.readLine()) != null) {
String[] fields = line.split("\\s");
if (fields.length == 4 && fields[0].equals(Uploads.TOTAL)) {
int parsedUploads = Integer.valueOf(fields[1]);
int parsedUploads = Integer.parseInt(fields[1]);
LOG.debug("Matched CLI output: {} {} {} {}",
fields[0], fields[1], fields[2], fields[3]);
assertEquals("Unexpected number of uploads", numUploads,

View File

@ -1277,8 +1277,27 @@ public abstract class MetadataStoreTestBase extends HadoopTestBase {
final long time,
BulkOperationState operationState) throws IOException {
PathMetadata meta = new PathMetadata(makeFileStatus(key, 1, time));
ms.put(meta,
operationState);
meta.setLastUpdated(time);
ms.put(meta, operationState);
return meta;
}
/**
* Put a dir to the shared DDB table.
* @param key key
* @param time timestamp.
* @param operationState ongoing state
* @return the entry
* @throws IOException IO failure
*/
protected PathMetadata putDir(
final String key,
final long time,
BulkOperationState operationState) throws IOException {
PathMetadata meta = new PathMetadata(
basicFileStatus(strToPath(key), 0, true, time));
meta.setLastUpdated(time);
ms.put(meta, operationState);
return meta;
}

View File

@ -24,6 +24,8 @@ import java.io.PrintStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.util.ExitCodeProvider;
import static org.junit.Assert.assertEquals;
/**
@ -40,15 +42,32 @@ public final class S3GuardToolTestHelper {
/**
* Execute a command, returning the buffer if the command actually completes.
* If an exception is raised the output is logged instead.
* If an exception is raised the output is logged before the exception is
* rethrown.
* @param cmd command
* @param args argument list
* @throws Exception on any failure
*/
public static String exec(S3GuardTool cmd, String... args) throws Exception {
return expectExecResult(0, cmd, args);
}
/**
* Execute a command, returning the buffer if the command actually completes.
* If an exception is raised which doesn't provide the exit code
* the output is logged before the exception is rethrown.
* @param expectedResult the expected result
* @param cmd command
* @param args argument list
* @throws Exception on any failure
*/
public static String expectExecResult(
final int expectedResult,
final S3GuardTool cmd,
final String... args) throws Exception {
ByteArrayOutputStream buf = new ByteArrayOutputStream();
try {
exec(0, "", cmd, buf, args);
exec(expectedResult, "", cmd, buf, args);
return buf.toString();
} catch (AssertionError e) {
throw e;
@ -65,7 +84,8 @@ public final class S3GuardToolTestHelper {
* @param cmd command
* @param buf buffer to use for tool output (not SLF4J output)
* @param args argument list
* @throws Exception on any failure
* @throws Exception on any failure other than exception which
* implements ExitCodeProvider and whose exit code matches that expected
*/
public static void exec(final int expectedResult,
final String errorText,
@ -78,6 +98,16 @@ public final class S3GuardToolTestHelper {
try (PrintStream out = new PrintStream(buf)) {
r = cmd.run(args, out);
out.flush();
} catch (Exception ex) {
if (ex instanceof ExitCodeProvider) {
// it returns an exit code
final ExitCodeProvider ec = (ExitCodeProvider) ex;
if (ec.getExitCode() == expectedResult) {
// and the exit code matches what is expected -all is good.
return;
}
}
throw ex;
}
if (expectedResult != r) {
String message = errorText.isEmpty() ? "" : (errorText + ": ")