dstMetas,
+ Path srcPath,
+ FileStatus dstStatus) {
+ srcPaths.add(srcPath);
+ dstMetas.add(new PathMetadata(dstStatus));
+ }
+
+ /**
+ * Assert that the path is qualified with a host and scheme.
+ * @param p path to check
+ * @throws NullPointerException if either argument does not hold
+ */
+ public static void assertQualified(Path p) {
+ URI uri = p.toUri();
+ // Paths must include bucket in case MetadataStore is shared between
+ // multiple S3AFileSystem instances
+ Preconditions.checkNotNull(uri.getHost(), "Null host in " + uri);
+
+ // This should never fail, but is retained for completeness.
+ Preconditions.checkNotNull(uri.getScheme(), "Null scheme in " + uri);
+ }
+
+ /**
+ * Assert that all paths are valid.
+ * @param paths path to check
+ * @throws NullPointerException if either argument does not hold
+ */
+ public static void assertQualified(Path...paths) {
+ for (Path path : paths) {
+ assertQualified(path);
+ }
+ }
+}
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
new file mode 100644
index 00000000000..be271ae5eca
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
@@ -0,0 +1,924 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.Constants;
+import org.apache.hadoop.fs.s3a.S3AFileStatus;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.fs.shell.CommandFormat;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import static org.apache.hadoop.fs.s3a.Constants.*;
+
+/**
+ * CLI to manage S3Guard Metadata Store.
+ */
+public abstract class S3GuardTool extends Configured implements Tool {
+ private static final Logger LOG = LoggerFactory.getLogger(S3GuardTool.class);
+
+ private static final String NAME = "s3guard";
+ private static final String COMMON_USAGE =
+ "When possible and not overridden by more specific options, metadata\n" +
+ "repository information will be inferred from the S3A URL (if provided)" +
+ "\n\n" +
+ "Generic options supported are:\n" +
+ " -conf - specify an application configuration file\n" +
+ " -D - define a value for a given property\n";
+
+ private static final String USAGE = NAME +
+ " [command] [OPTIONS] [s3a://BUCKET]\n\n" +
+ "Commands: \n" +
+ "\t" + Init.NAME + " - " + Init.PURPOSE + "\n" +
+ "\t" + Destroy.NAME + " - " + Destroy.PURPOSE + "\n" +
+ "\t" + Import.NAME + " - " + Import.PURPOSE + "\n" +
+ "\t" + Diff.NAME + " - " + Diff.PURPOSE + "\n" +
+ "\t" + Prune.NAME + " - " + Prune.PURPOSE + "\n";
+ private static final String DATA_IN_S3_IS_PRESERVED
+ = "(all data in S3 is preserved";
+
+ abstract public String getUsage();
+
+ // Exit codes
+ static final int SUCCESS = 0;
+ static final int INVALID_ARGUMENT = 1;
+ static final int ERROR = 99;
+
+ private S3AFileSystem filesystem;
+ private MetadataStore store;
+ private final CommandFormat commandFormat;
+
+ private static final String META_FLAG = "meta";
+ private static final String DAYS_FLAG = "days";
+ private static final String HOURS_FLAG = "hours";
+ private static final String MINUTES_FLAG = "minutes";
+ private static final String SECONDS_FLAG = "seconds";
+
+ private static final String REGION_FLAG = "region";
+ private static final String READ_FLAG = "read";
+ private static final String WRITE_FLAG = "write";
+
+ /**
+ * Constructor a S3Guard tool with HDFS configuration.
+ * @param conf Configuration.
+ */
+ protected S3GuardTool(Configuration conf) {
+ super(conf);
+
+ commandFormat = new CommandFormat(0, Integer.MAX_VALUE);
+ // For metadata store URI
+ commandFormat.addOptionWithValue(META_FLAG);
+ // DDB region.
+ commandFormat.addOptionWithValue(REGION_FLAG);
+ }
+
+ /**
+ * Return sub-command name.
+ */
+ abstract String getName();
+
+ /**
+ * Parse DynamoDB region from either -m option or a S3 path.
+ *
+ * This function should only be called from {@link Init} or
+ * {@link Destroy}.
+ *
+ * @param paths remaining parameters from CLI.
+ * @return false for invalid parameters.
+ * @throws IOException on I/O errors.
+ */
+ boolean parseDynamoDBRegion(List paths) throws IOException {
+ Configuration conf = getConf();
+ String fromCli = getCommandFormat().getOptValue(REGION_FLAG);
+ String fromConf = conf.get(S3GUARD_DDB_REGION_KEY);
+ boolean hasS3Path = !paths.isEmpty();
+
+ if (fromCli != null) {
+ if (fromCli.isEmpty()) {
+ System.err.println("No region provided with -" + REGION_FLAG + " flag");
+ return false;
+ }
+ if (hasS3Path) {
+ System.err.println("Providing both an S3 path and the -" + REGION_FLAG
+ + " flag is not supported. If you need to specify a different "
+ + "region than the S3 bucket, configure " + S3GUARD_DDB_REGION_KEY);
+ return false;
+ }
+ conf.set(S3GUARD_DDB_REGION_KEY, fromCli);
+ return true;
+ }
+
+ if (fromConf != null) {
+ if (fromConf.isEmpty()) {
+ System.err.printf("No region provided with config %s, %n",
+ S3GUARD_DDB_REGION_KEY);
+ return false;
+ }
+ return true;
+ }
+
+ if (hasS3Path) {
+ String s3Path = paths.get(0);
+ initS3AFileSystem(s3Path);
+ return true;
+ }
+
+ System.err.println("No region found from -" + REGION_FLAG + " flag, " +
+ "config, or S3 bucket");
+ return false;
+ }
+
+ /**
+ * Parse metadata store from command line option or HDFS configuration.
+ *
+ * @param forceCreate override the auto-creation setting to true.
+ * @return a initialized metadata store.
+ */
+ MetadataStore initMetadataStore(boolean forceCreate) throws IOException {
+ if (getStore() != null) {
+ return getStore();
+ }
+ Configuration conf;
+ if (filesystem == null) {
+ conf = getConf();
+ } else {
+ conf = filesystem.getConf();
+ }
+ String metaURI = getCommandFormat().getOptValue(META_FLAG);
+ if (metaURI != null && !metaURI.isEmpty()) {
+ URI uri = URI.create(metaURI);
+ LOG.info("create metadata store: {}", uri + " scheme: "
+ + uri.getScheme());
+ switch (uri.getScheme().toLowerCase(Locale.ENGLISH)) {
+ case "local":
+ setStore(new LocalMetadataStore());
+ break;
+ case "dynamodb":
+ setStore(new DynamoDBMetadataStore());
+ conf.set(S3GUARD_DDB_TABLE_NAME_KEY, uri.getAuthority());
+ if (forceCreate) {
+ conf.setBoolean(S3GUARD_DDB_TABLE_CREATE_KEY, true);
+ }
+ break;
+ default:
+ throw new IOException(
+ String.format("Metadata store %s is not supported", uri));
+ }
+ } else {
+ // CLI does not specify metadata store URI, it uses default metadata store
+ // DynamoDB instead.
+ setStore(new DynamoDBMetadataStore());
+ if (forceCreate) {
+ conf.setBoolean(S3GUARD_DDB_TABLE_CREATE_KEY, true);
+ }
+ }
+
+ if (filesystem == null) {
+ getStore().initialize(conf);
+ } else {
+ getStore().initialize(filesystem);
+ }
+ LOG.info("Metadata store {} is initialized.", getStore());
+ return getStore();
+ }
+
+ /**
+ * Initialize S3A FileSystem instance.
+ *
+ * @param path s3a URI
+ * @throws IOException
+ */
+ void initS3AFileSystem(String path) throws IOException {
+ URI uri;
+ try {
+ uri = new URI(path);
+ } catch (URISyntaxException e) {
+ throw new IOException(e);
+ }
+ // Make sure that S3AFileSystem does not hold an actual MetadataStore
+ // implementation.
+ Configuration conf = getConf();
+ conf.setClass(S3_METADATA_STORE_IMPL, NullMetadataStore.class,
+ MetadataStore.class);
+ FileSystem fs = FileSystem.get(uri, getConf());
+ if (!(fs instanceof S3AFileSystem)) {
+ throw new IOException(
+ String.format("URI %s is not a S3A file system: %s", uri,
+ fs.getClass().getName()));
+ }
+ filesystem = (S3AFileSystem) fs;
+ }
+
+ /**
+ * Parse CLI arguments and returns the position arguments.
+ * The options are stored in {@link #commandFormat}
+ *
+ * @param args command line arguments.
+ * @return the position arguments from CLI.
+ */
+ List parseArgs(String[] args) {
+ return getCommandFormat().parse(args, 1);
+ }
+
+ protected S3AFileSystem getFilesystem() {
+ return filesystem;
+ }
+
+ protected void setFilesystem(S3AFileSystem filesystem) {
+ this.filesystem = filesystem;
+ }
+
+ @VisibleForTesting
+ public MetadataStore getStore() {
+ return store;
+ }
+
+ @VisibleForTesting
+ protected void setStore(MetadataStore store) {
+ Preconditions.checkNotNull(store);
+ this.store = store;
+ }
+
+ protected CommandFormat getCommandFormat() {
+ return commandFormat;
+ }
+
+ /**
+ * Create the metadata store.
+ */
+ static class Init extends S3GuardTool {
+ private static final String NAME = "init";
+ public static final String PURPOSE = "initialize metadata repository";
+ private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
+ "\t" + PURPOSE + "\n\n" +
+ "Common options:\n" +
+ " -" + META_FLAG + " URL - Metadata repository details " +
+ "(implementation-specific)\n" +
+ "\n" +
+ "Amazon DynamoDB-specific options:\n" +
+ " -" + REGION_FLAG + " REGION - Service region for connections\n" +
+ " -" + READ_FLAG + " UNIT - Provisioned read throughput units\n" +
+ " -" + WRITE_FLAG + " UNIT - Provisioned write through put units\n" +
+ "\n" +
+ " URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
+ " Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
+ " is not supported.";
+
+ Init(Configuration conf) {
+ super(conf);
+ // read capacity.
+ getCommandFormat().addOptionWithValue(READ_FLAG);
+ // write capacity.
+ getCommandFormat().addOptionWithValue(WRITE_FLAG);
+ }
+
+ @Override
+ String getName() {
+ return NAME;
+ }
+
+ @Override
+ public String getUsage() {
+ return USAGE;
+ }
+
+ @Override
+ public int run(String[] args) throws IOException {
+ List paths = parseArgs(args);
+
+ String readCap = getCommandFormat().getOptValue(READ_FLAG);
+ if (readCap != null && !readCap.isEmpty()) {
+ int readCapacity = Integer.parseInt(readCap);
+ getConf().setInt(S3GUARD_DDB_TABLE_CAPACITY_READ_KEY, readCapacity);
+ }
+ String writeCap = getCommandFormat().getOptValue(WRITE_FLAG);
+ if (writeCap != null && !writeCap.isEmpty()) {
+ int writeCapacity = Integer.parseInt(writeCap);
+ getConf().setInt(S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY, writeCapacity);
+ }
+
+ // Validate parameters.
+ if (!parseDynamoDBRegion(paths)) {
+ System.err.println(USAGE);
+ return INVALID_ARGUMENT;
+ }
+ initMetadataStore(true);
+ return SUCCESS;
+ }
+ }
+
+ /**
+ * Destroy a metadata store.
+ */
+ static class Destroy extends S3GuardTool {
+ private static final String NAME = "destroy";
+ public static final String PURPOSE = "destroy Metadata Store data "
+ + DATA_IN_S3_IS_PRESERVED;
+ private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
+ "\t" + PURPOSE + "\n\n" +
+ "Common options:\n" +
+ " -" + META_FLAG + " URL - Metadata repository details " +
+ "(implementation-specific)\n" +
+ "\n" +
+ "Amazon DynamoDB-specific options:\n" +
+ " -" + REGION_FLAG + " REGION - Service region for connections\n" +
+ "\n" +
+ " URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
+ " Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
+ " is not supported.";
+
+ Destroy(Configuration conf) {
+ super(conf);
+ }
+
+ @Override
+ String getName() {
+ return NAME;
+ }
+
+ @Override
+ public String getUsage() {
+ return USAGE;
+ }
+
+ public int run(String[] args) throws IOException {
+ List paths = parseArgs(args);
+ if (!parseDynamoDBRegion(paths)) {
+ System.err.println(USAGE);
+ return INVALID_ARGUMENT;
+ }
+
+ try {
+ initMetadataStore(false);
+ } catch (FileNotFoundException e) {
+ // indication that the table was not found
+ LOG.debug("Failed to bind to store to be destroyed", e);
+ LOG.info("Metadata Store does not exist.");
+ return SUCCESS;
+ }
+
+ Preconditions.checkState(getStore() != null,
+ "Metadata Store is not initialized");
+
+ getStore().destroy();
+ LOG.info("Metadata store is deleted.");
+ return SUCCESS;
+ }
+ }
+
+ /**
+ * Import s3 metadata to the metadata store.
+ */
+ static class Import extends S3GuardTool {
+ private static final String NAME = "import";
+ public static final String PURPOSE = "import metadata from existing S3 " +
+ "data";
+ private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
+ "\t" + PURPOSE + "\n\n" +
+ "Common options:\n" +
+ " -" + META_FLAG + " URL - Metadata repository details " +
+ "(implementation-specific)\n" +
+ "\n" +
+ "Amazon DynamoDB-specific options:\n" +
+ " -" + REGION_FLAG + " REGION - Service region for connections\n" +
+ "\n" +
+ " URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
+ " Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
+ " is not supported.";
+
+ private final Set dirCache = new HashSet<>();
+
+ Import(Configuration conf) {
+ super(conf);
+ }
+
+ @Override
+ String getName() {
+ return NAME;
+ }
+
+ @Override
+ public String getUsage() {
+ return USAGE;
+ }
+
+ /**
+ * Put parents into MS and cache if the parents are not presented.
+ *
+ * @param f the file or an empty directory.
+ * @throws IOException on I/O errors.
+ */
+ private void putParentsIfNotPresent(FileStatus f) throws IOException {
+ Preconditions.checkNotNull(f);
+ Path parent = f.getPath().getParent();
+ while (parent != null) {
+ if (dirCache.contains(parent)) {
+ return;
+ }
+ FileStatus dir = DynamoDBMetadataStore.makeDirStatus(parent,
+ f.getOwner());
+ getStore().put(new PathMetadata(dir));
+ dirCache.add(parent);
+ parent = parent.getParent();
+ }
+ }
+
+ /**
+ * Recursively import every path under path.
+ * @return number of items inserted into MetadataStore
+ * @throws IOException on I/O errors.
+ */
+ private long importDir(FileStatus status) throws IOException {
+ Preconditions.checkArgument(status.isDirectory());
+ RemoteIterator it = getFilesystem()
+ .listFilesAndEmptyDirectories(status.getPath(), true);
+ long items = 0;
+
+ while (it.hasNext()) {
+ LocatedFileStatus located = it.next();
+ FileStatus child;
+ if (located.isDirectory()) {
+ child = DynamoDBMetadataStore.makeDirStatus(located.getPath(),
+ located.getOwner());
+ dirCache.add(child.getPath());
+ } else {
+ child = new S3AFileStatus(located.getLen(),
+ located.getModificationTime(),
+ located.getPath(),
+ located.getBlockSize(),
+ located.getOwner());
+ }
+ putParentsIfNotPresent(child);
+ getStore().put(new PathMetadata(child));
+ items++;
+ }
+ return items;
+ }
+
+ @Override
+ public int run(String[] args) throws IOException {
+ List paths = parseArgs(args);
+ if (paths.isEmpty()) {
+ System.err.println(getUsage());
+ return INVALID_ARGUMENT;
+ }
+ String s3Path = paths.get(0);
+ initS3AFileSystem(s3Path);
+
+ URI uri;
+ try {
+ uri = new URI(s3Path);
+ } catch (URISyntaxException e) {
+ throw new IOException(e);
+ }
+ String filePath = uri.getPath();
+ if (filePath.isEmpty()) {
+ // If they specify a naked S3 URI (e.g. s3a://bucket), we'll consider
+ // root to be the path
+ filePath = "/";
+ }
+ Path path = new Path(filePath);
+ FileStatus status = getFilesystem().getFileStatus(path);
+
+ initMetadataStore(false);
+
+ long items = 1;
+ if (status.isFile()) {
+ PathMetadata meta = new PathMetadata(status);
+ getStore().put(meta);
+ } else {
+ items = importDir(status);
+ }
+
+ System.out.printf("Inserted %d items into Metadata Store%n", items);
+
+ return SUCCESS;
+ }
+ }
+
+ /**
+ * Show diffs between the s3 and metadata store.
+ */
+ static class Diff extends S3GuardTool {
+ private static final String NAME = "diff";
+ public static final String PURPOSE = "report on delta between S3 and " +
+ "repository";
+ private static final String USAGE = NAME + " [OPTIONS] s3a://BUCKET\n" +
+ "\t" + PURPOSE + "\n\n" +
+ "Common options:\n" +
+ " -" + META_FLAG + " URL - Metadata repository details " +
+ "(implementation-specific)\n" +
+ "\n" +
+ "Amazon DynamoDB-specific options:\n" +
+ " -" + REGION_FLAG + " REGION - Service region for connections\n" +
+ "\n" +
+ " URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
+ " Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
+ " is not supported.";
+
+ private static final String SEP = "\t";
+ static final String S3_PREFIX = "S3";
+ static final String MS_PREFIX = "MS";
+
+ Diff(Configuration conf) {
+ super(conf);
+ }
+
+ @Override
+ String getName() {
+ return NAME;
+ }
+
+ @Override
+ public String getUsage() {
+ return USAGE;
+ }
+
+ /**
+ * Formats the output of printing a FileStatus in S3guard diff tool.
+ * @param status the status to print.
+ * @return the string of output.
+ */
+ private static String formatFileStatus(FileStatus status) {
+ return String.format("%s%s%d%s%s",
+ status.isDirectory() ? "D" : "F",
+ SEP,
+ status.getLen(),
+ SEP,
+ status.getPath().toString());
+ }
+
+ /**
+ * Compares metadata from 2 S3 FileStatus's to see if they differ.
+ * @param thisOne
+ * @param thatOne
+ * @return true if the metadata is not identical
+ */
+ private static boolean differ(FileStatus thisOne, FileStatus thatOne) {
+ Preconditions.checkArgument(!(thisOne == null && thatOne == null));
+ return (thisOne == null || thatOne == null) ||
+ (thisOne.getLen() != thatOne.getLen()) ||
+ (thisOne.isDirectory() != thatOne.isDirectory()) ||
+ (!thisOne.isDirectory() &&
+ thisOne.getModificationTime() != thatOne.getModificationTime());
+ }
+
+ /**
+ * Print difference, if any, between two file statuses to the output stream.
+ *
+ * @param msStatus file status from metadata store.
+ * @param s3Status file status from S3.
+ * @param out output stream.
+ */
+ private static void printDiff(FileStatus msStatus,
+ FileStatus s3Status,
+ PrintStream out) {
+ Preconditions.checkArgument(!(msStatus == null && s3Status == null));
+ if (msStatus != null && s3Status != null) {
+ Preconditions.checkArgument(
+ msStatus.getPath().equals(s3Status.getPath()),
+ String.format("The path from metadata store and s3 are different:" +
+ " ms=%s s3=%s", msStatus.getPath(), s3Status.getPath()));
+ }
+
+ if (differ(msStatus, s3Status)) {
+ if (s3Status != null) {
+ out.printf("%s%s%s%n", S3_PREFIX, SEP, formatFileStatus(s3Status));
+ }
+ if (msStatus != null) {
+ out.printf("%s%s%s%n", MS_PREFIX, SEP, formatFileStatus(msStatus));
+ }
+ }
+ }
+
+ /**
+ * Compare the metadata of the directory with the same path, on S3 and
+ * the metadata store, respectively. If one of them is null, consider the
+ * metadata of the directory and all its subdirectories are missing from
+ * the source.
+ *
+ * Pass the FileStatus obtained from s3 and metadata store to avoid one
+ * round trip to fetch the same metadata twice, because the FileStatus
+ * hve already been obtained from listStatus() / listChildren operations.
+ *
+ * @param msDir the directory FileStatus obtained from the metadata store.
+ * @param s3Dir the directory FileStatus obtained from S3.
+ * @param out the output stream to generate diff results.
+ * @throws IOException on I/O errors.
+ */
+ private void compareDir(FileStatus msDir, FileStatus s3Dir,
+ PrintStream out) throws IOException {
+ Preconditions.checkArgument(!(msDir == null && s3Dir == null));
+ if (msDir != null && s3Dir != null) {
+ Preconditions.checkArgument(msDir.getPath().equals(s3Dir.getPath()),
+ String.format("The path from metadata store and s3 are different:" +
+ " ms=%s s3=%s", msDir.getPath(), s3Dir.getPath()));
+ }
+
+ Map s3Children = new HashMap<>();
+ if (s3Dir != null && s3Dir.isDirectory()) {
+ for (FileStatus status : getFilesystem().listStatus(s3Dir.getPath())) {
+ s3Children.put(status.getPath(), status);
+ }
+ }
+
+ Map msChildren = new HashMap<>();
+ if (msDir != null && msDir.isDirectory()) {
+ DirListingMetadata dirMeta =
+ getStore().listChildren(msDir.getPath());
+
+ if (dirMeta != null) {
+ for (PathMetadata meta : dirMeta.getListing()) {
+ FileStatus status = meta.getFileStatus();
+ msChildren.put(status.getPath(), status);
+ }
+ }
+ }
+
+ Set allPaths = new HashSet<>(s3Children.keySet());
+ allPaths.addAll(msChildren.keySet());
+
+ for (Path path : allPaths) {
+ FileStatus s3Status = s3Children.get(path);
+ FileStatus msStatus = msChildren.get(path);
+ printDiff(msStatus, s3Status, out);
+ if ((s3Status != null && s3Status.isDirectory()) ||
+ (msStatus != null && msStatus.isDirectory())) {
+ compareDir(msStatus, s3Status, out);
+ }
+ }
+ out.flush();
+ }
+
+ /**
+ * Compare both metadata store and S3 on the same path.
+ *
+ * @param path the path to be compared.
+ * @param out the output stream to display results.
+ * @throws IOException on I/O errors.
+ */
+ private void compareRoot(Path path, PrintStream out) throws IOException {
+ Path qualified = getFilesystem().qualify(path);
+ FileStatus s3Status = null;
+ try {
+ s3Status = getFilesystem().getFileStatus(qualified);
+ } catch (FileNotFoundException e) {
+ }
+ PathMetadata meta = getStore().get(qualified);
+ FileStatus msStatus = (meta != null && !meta.isDeleted()) ?
+ meta.getFileStatus() : null;
+ compareDir(msStatus, s3Status, out);
+ }
+
+ @VisibleForTesting
+ public int run(String[] args, PrintStream out) throws IOException {
+ List paths = parseArgs(args);
+ if (paths.isEmpty()) {
+ out.println(USAGE);
+ return INVALID_ARGUMENT;
+ }
+ String s3Path = paths.get(0);
+ initS3AFileSystem(s3Path);
+ initMetadataStore(true);
+
+ URI uri;
+ try {
+ uri = new URI(s3Path);
+ } catch (URISyntaxException e) {
+ throw new IOException(e);
+ }
+ Path root;
+ if (uri.getPath().isEmpty()) {
+ root = new Path("/");
+ } else {
+ root = new Path(uri.getPath());
+ }
+ root = getFilesystem().qualify(root);
+ compareRoot(root, out);
+ out.flush();
+ return SUCCESS;
+ }
+
+ @Override
+ public int run(String[] args) throws IOException {
+ return run(args, System.out);
+ }
+ }
+
+ /**
+ * Prune metadata that has not been modified recently.
+ */
+ static class Prune extends S3GuardTool {
+ private static final String NAME = "prune";
+ public static final String PURPOSE = "truncate older metadata from " +
+ "repository "
+ + DATA_IN_S3_IS_PRESERVED;;
+ private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
+ "\t" + PURPOSE + "\n\n" +
+ "Common options:\n" +
+ " -" + META_FLAG + " URL - Metadata repository details " +
+ "(implementation-specific)\n" +
+ "\n" +
+ "Amazon DynamoDB-specific options:\n" +
+ " -" + REGION_FLAG + " REGION - Service region for connections\n" +
+ "\n" +
+ " URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
+ " Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
+ " is not supported.";
+
+ Prune(Configuration conf) {
+ super(conf);
+
+ CommandFormat format = getCommandFormat();
+ format.addOptionWithValue(DAYS_FLAG);
+ format.addOptionWithValue(HOURS_FLAG);
+ format.addOptionWithValue(MINUTES_FLAG);
+ format.addOptionWithValue(SECONDS_FLAG);
+ }
+
+ @VisibleForTesting
+ void setMetadataStore(MetadataStore ms) {
+ Preconditions.checkNotNull(ms);
+ this.setStore(ms);
+ }
+
+ @Override
+ String getName() {
+ return NAME;
+ }
+
+ @Override
+ public String getUsage() {
+ return USAGE;
+ }
+
+ private long getDeltaComponent(TimeUnit unit, String arg) {
+ String raw = getCommandFormat().getOptValue(arg);
+ if (raw == null || raw.isEmpty()) {
+ return 0;
+ }
+ Long parsed = Long.parseLong(raw);
+ return unit.toMillis(parsed);
+ }
+
+ @VisibleForTesting
+ public int run(String[] args, PrintStream out) throws
+ InterruptedException, IOException {
+ List paths = parseArgs(args);
+ if (!parseDynamoDBRegion(paths)) {
+ System.err.println(USAGE);
+ return INVALID_ARGUMENT;
+ }
+ initMetadataStore(false);
+
+ Configuration conf = getConf();
+ long confDelta = conf.getLong(Constants.S3GUARD_CLI_PRUNE_AGE, 0);
+
+ long cliDelta = 0;
+ cliDelta += getDeltaComponent(TimeUnit.DAYS, "days");
+ cliDelta += getDeltaComponent(TimeUnit.HOURS, "hours");
+ cliDelta += getDeltaComponent(TimeUnit.MINUTES, "minutes");
+ cliDelta += getDeltaComponent(TimeUnit.SECONDS, "seconds");
+
+ if (confDelta <= 0 && cliDelta <= 0) {
+ System.err.println(
+ "You must specify a positive age for metadata to prune.");
+ }
+
+ // A delta provided on the CLI overrides if one is configured
+ long delta = confDelta;
+ if (cliDelta > 0) {
+ delta = cliDelta;
+ }
+
+ long now = System.currentTimeMillis();
+ long divide = now - delta;
+
+ getStore().prune(divide);
+
+ out.flush();
+ return SUCCESS;
+ }
+
+ @Override
+ public int run(String[] args) throws InterruptedException, IOException {
+ return run(args, System.out);
+ }
+ }
+
+ private static S3GuardTool command;
+
+ private static void printHelp() {
+ if (command == null) {
+ System.err.println("Usage: hadoop " + USAGE);
+ System.err.println("\tperform S3Guard metadata store " +
+ "administrative commands.");
+ } else {
+ System.err.println("Usage: hadoop " + command.getUsage());
+ }
+ System.err.println();
+ System.err.println(COMMON_USAGE);
+ }
+
+ /**
+ * Execute the command with the given arguments.
+ *
+ * @param args command specific arguments.
+ * @param conf Hadoop configuration.
+ * @return exit code.
+ * @throws Exception on I/O errors.
+ */
+ public static int run(String[] args, Configuration conf) throws
+ Exception {
+ /* ToolRunner.run does this too, but we must do it before looking at
+ subCommand or instantiating the cmd object below */
+ String[] otherArgs = new GenericOptionsParser(conf, args)
+ .getRemainingArgs();
+ if (otherArgs.length == 0) {
+ printHelp();
+ return INVALID_ARGUMENT;
+ }
+ final String subCommand = otherArgs[0];
+ switch (subCommand) {
+ case Init.NAME:
+ command = new Init(conf);
+ break;
+ case Destroy.NAME:
+ command = new Destroy(conf);
+ break;
+ case Import.NAME:
+ command = new Import(conf);
+ break;
+ case Diff.NAME:
+ command = new Diff(conf);
+ break;
+ case Prune.NAME:
+ command = new Prune(conf);
+ break;
+ default:
+ printHelp();
+ return INVALID_ARGUMENT;
+ }
+ return ToolRunner.run(conf, command, otherArgs);
+ }
+
+ /**
+ * Main entry point. Calls {@code System.exit()} on all execution paths.
+ * @param args argument list
+ */
+ public static void main(String[] args) {
+ try {
+ int ret = run(args, new Configuration());
+ System.exit(ret);
+ } catch (CommandFormat.UnknownOptionException e) {
+ System.err.println(e.getMessage());
+ printHelp();
+ System.exit(INVALID_ARGUMENT);
+ } catch (Throwable e) {
+ e.printStackTrace(System.err);
+ System.exit(ERROR);
+ }
+ }
+}
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/package-info.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/package-info.java
new file mode 100644
index 00000000000..d4303150d1d
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/package-info.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * This package contains classes related to S3Guard: a feature of S3A to mask
+ * the eventual consistency behavior of S3 and optimize access patterns by
+ * coordinating with a strongly consistent external store for file system
+ * metadata.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3xLoginHelper.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3xLoginHelper.java
index 862ce6bec28..ce792849def 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3xLoginHelper.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3xLoginHelper.java
@@ -105,6 +105,10 @@ public final class S3xLoginHelper {
* @return a login tuple, possibly empty.
*/
public static Login extractLoginDetails(URI name) {
+ if (name == null) {
+ return Login.EMPTY;
+ }
+
try {
String authority = name.getAuthority();
if (authority == null) {
diff --git a/hadoop-tools/hadoop-aws/src/main/shellprofile.d/hadoop-s3guard.sh b/hadoop-tools/hadoop-aws/src/main/shellprofile.d/hadoop-s3guard.sh
new file mode 100644
index 00000000000..039b0772e76
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/main/shellprofile.d/hadoop-s3guard.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if ! declare -f hadoop_subcommand_s3guard >/dev/null 2>/dev/null; then
+
+ if [[ "${HADOOP_SHELL_EXECNAME}" = hadoop ]]; then
+ hadoop_add_subcommand "s3guard" client "manage metadata on S3"
+ fi
+
+ # this can't be indented otherwise shelldocs won't get it
+
+## @description s3guard command for hadoop
+## @audience public
+## @stability stable
+## @replaceable yes
+function hadoop_subcommand_s3guard
+{
+ # shellcheck disable=SC2034
+ HADOOP_CLASSNAME=org.apache.hadoop.fs.s3a.s3guard.S3GuardTool
+ hadoop_add_to_classpath_tools hadoop-aws
+}
+
+fi
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
index 182f0607eaa..b8d37c633ce 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
@@ -46,6 +46,7 @@ See also:
* [Testing](testing.html)
* [Troubleshooting S3a](troubleshooting_s3a.html)
+* [S3Guard](s3guard.html)
### Warning #1: Object Stores are not filesystems
@@ -1552,7 +1553,7 @@ for `fs.s3a.server-side-encryption-algorithm` is `AES256`.
SSE-KMS is where the user specifies a Customer Master Key(CMK) that is used to
encrypt the objects. The user may specify a specific CMK or leave the
-`fs.s3a.server-side-encryption-key` empty to use the default auto-generated key
+`fs.s3a.server-side-encryption.key` empty to use the default auto-generated key
in AWS IAM. Each CMK configured in AWS IAM is region specific, and cannot be
used in a in a S3 bucket in a different region. There is can also be policies
assigned to the CMK that prohibit or restrict its use for users causing S3A
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md
new file mode 100644
index 00000000000..fe67d6954f2
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md
@@ -0,0 +1,610 @@
+
+
+# S3Guard: Consistency and Metadata Caching for S3A
+
+**Experimental Feature**
+
+
+
+## Overview
+
+*S3Guard* is an experimental feature for the S3A client of the S3 object store,
+which can use a (consistent) database as the store of metadata about objects
+in an S3 bucket.
+
+S3Guard
+
+1. May improve performance on directory listing/scanning operations,
+including those which take place during the partitioning period of query
+execution, the process where files are listed and the work divided up amongst
+processes.
+
+1. Permits a consistent view of the object store. Without this, changes in
+objects may not be immediately visible, especially in listing operations.
+
+1. Offers a platform for future performance improvements for running Hadoop
+workloads on top of object stores
+
+The basic idea is that, for each operation in the Hadoop S3 client (s3a) that
+reads or modifies metadata, a shadow copy of that metadata is stored in a
+separate MetadataStore implementation. Each MetadataStore implementation
+offers HDFS-like consistency for the metadata, and may also provide faster
+lookups for things like file status or directory listings.
+
+For links to early design documents and related patches, see
+[HADOOP-13345](https://issues.apache.org/jira/browse/HADOOP-13345).
+
+*Important*
+
+* S3Guard is experimental and should be considered unstable.
+
+* While all underlying data is persisted in S3, if, for some reason,
+the S3Guard-cached metadata becomes inconsistent with that in S3,
+queries on the data may become incorrect.
+For example, new datasets may be omitted, objects may be overwritten,
+or clients may not be aware that some data has been deleted.
+It is essential for all clients writing to an S3Guard-enabled
+S3 Repository to use the feature. Clients reading the data may work directly
+with the S3A data, in which case the normal S3 consistency guarantees apply.
+
+
+## Setting up S3Guard
+
+The latest configuration parameters are defined in `core-default.xml`. You
+should consult that file for full information, but a summary is provided here.
+
+
+### 1. Choose the Database
+
+A core concept of S3Guard is that the directory listing data of the object
+store, *the metadata* is replicated in a higher-performance, consistent,
+database. In S3Guard, this database is called *The Metadata Store*
+
+By default, S3Guard is not enabled.
+
+The Metadata Store to use in production is bonded to Amazon's DynamoDB
+database service. The following setting will enable this Metadata Store:
+
+```xml
+
+ fs.s3a.metadatastore.impl
+ org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore
+
+```
+
+Note that the `NullMetadataStore` store can be explicitly requested if desired.
+This offers no metadata storage, and effectively disables S3Guard.
+
+```xml
+
+ fs.s3a.metadatastore.impl
+ org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore
+
+```
+
+### 2. Configure S3Guard Settings
+
+More settings will may be added in the future.
+Currently the only Metadata Store-independent setting, besides the
+implementation class above, is the *allow authoritative* flag.
+
+It is recommended that you leave the default setting here:
+
+```xml
+
+ fs.s3a.metadatastore.authoritative
+ false
+
+
+```
+
+Setting this to `true` is currently an experimental feature. When true, the
+S3A client will avoid round-trips to S3 when getting directory listings, if
+there is a fully-cached version of the directory stored in the Metadata Store.
+
+Note that if this is set to true, it may exacerbate or persist existing race
+conditions around multiple concurrent modifications and listings of a given
+directory tree.
+
+In particular: **If the Metadata Store is declared as authoritative,
+all interactions with the S3 bucket(s) must be through S3A clients sharing
+the same Metadata Store**
+
+
+### 3. Configure the Metadata Store.
+
+Here are the `DynamoDBMetadataStore` settings. Other Metadata Store
+implementations will have their own configuration parameters.
+
+
+### 4. Name Your Table
+
+First, choose the name of the table you wish to use for the S3Guard metadata
+storage in your DynamoDB instance. If you leave it unset/empty, a
+separate table will be created for each S3 bucket you access, and that
+bucket's name will be used for the name of the DynamoDB table. For example,
+this sets the table name to `my-ddb-table-name`
+
+```xml
+
+ fs.s3a.s3guard.ddb.table
+ my-ddb-table-name
+
+ The DynamoDB table name to operate. Without this property, the respective
+ S3 bucket names will be used.
+
+
+```
+
+It is good to share a table across multiple buckets for multiple reasons.
+
+1. You are billed for the I/O capacity allocated to the table,
+*even when the table is not used*. Sharing capacity can reduce costs.
+
+1. You can share the "provision burden" across the buckets. That is, rather
+than allocating for the peak load on a single bucket, you can allocate for
+the peak load *across all the buckets*, which is likely to be significantly
+lower.
+
+1. It's easier to measure and tune the load requirements and cost of
+S3Guard, because there is only one table to review and configure in the
+AWS management console.
+
+When wouldn't you want to share a table?
+
+1. When you do explicitly want to provision I/O capacity to a specific bucket
+and table, isolated from others.
+
+1. When you are using separate billing for specific buckets allocated
+to specific projects.
+
+1. When different users/roles have different access rights to different buckets.
+As S3Guard requires all users to have R/W access to the table, all users will
+be able to list the metadata in all buckets, even those to which they lack
+read access.
+
+### 5. Locate your Table
+
+You may also wish to specify the region to use for DynamoDB. If a region
+is not configured, S3A will assume that it is in the same region as the S3
+bucket. A list of regions for the DynamoDB service can be found in
+[Amazon's documentation](http://docs.aws.amazon.com/general/latest/gr/rande.html#ddb_region).
+In this example, to use the US West 2 region:
+
+```xml
+
+ fs.s3a.s3guard.ddb.region
+ us-west-2
+
+```
+
+When working with S3Guard-managed buckets from EC2 VMs running in AWS
+infrastructure, using a local DynamoDB region ensures the lowest latency
+and highest reliability, as well as avoiding all long-haul network charges.
+The S3Guard tables, and indeed, the S3 buckets, should all be in the same
+region as the VMs.
+
+### 6. Optional: Create your Table
+
+Next, you can choose whether or not the table will be automatically created
+(if it doesn't already exist). If you want this feature, set the
+`fs.s3a.s3guard.ddb.table.create` option to `true`.
+
+```xml
+
+ fs.s3a.s3guard.ddb.table.create
+ true
+
+ If true, the S3A client will create the table if it does not already exist.
+
+
+```
+
+### 7. If creating a table: Set your DynamoDB IO Capacity
+
+Next, you need to set the DynamoDB read and write throughput requirements you
+expect to need for your cluster. Setting higher values will cost you more
+money. *Note* that these settings only affect table creation when
+`fs.s3a.s3guard.ddb.table.create` is enabled. To change the throughput for
+an existing table, use the AWS console or CLI tool.
+
+For more details on DynamoDB capacity units, see the AWS page on [Capacity
+Unit Calculations](http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/WorkingWithTables.html#CapacityUnitCalculations).
+
+The charges are incurred per hour for the life of the table, *even when the
+table and the underlying S3 buckets are not being used*.
+
+There are also charges incurred for data storage and for data IO outside of the
+region of the DynamoDB instance. S3Guard only stores metadata in DynamoDB: path names
+and summary details of objects —the actual data is stored in S3, so billed at S3
+rates.
+
+```xml
+
+ fs.s3a.s3guard.ddb.table.capacity.read
+ 500
+
+ Provisioned throughput requirements for read operations in terms of capacity
+ units for the DynamoDB table. This config value will only be used when
+ creating a new DynamoDB table, though later you can manually provision by
+ increasing or decreasing read capacity as needed for existing tables.
+ See DynamoDB documents for more information.
+
+
+
+
+ fs.s3a.s3guard.ddb.table.capacity.write
+ 100
+
+ Provisioned throughput requirements for write operations in terms of
+ capacity units for the DynamoDB table. Refer to related config
+ fs.s3a.s3guard.ddb.table.capacity.read before usage.
+
+
+```
+
+Attempting to perform more IO than the capacity requested simply throttles the
+IO; small capacity numbers are recommended when initially experimenting
+with S3Guard.
+
+## Authenticating with S3Guard
+
+The DynamoDB metadata store takes advantage of the fact that the DynamoDB
+service uses the same authentication mechanisms as S3. S3Guard
+gets all its credentials from the S3A client that is using it.
+
+All existing S3 authentication mechanisms can be used, except for one
+exception. Credentials placed in URIs are not supported for S3Guard, for security
+reasons.
+
+## Per-bucket S3Guard configuration
+
+In production, it is likely only some buckets will have S3Guard enabled;
+those which are read-only may have disabled, for example. Equally importantly,
+buckets in different regions should have different tables, each
+in the relevant region.
+
+These options can be managed through S3A's [per-bucket configuration
+mechanism](./index.html#Configuring_different_S3_buckets).
+All options with the under `fs.s3a.bucket.BUCKETNAME.KEY` are propagated
+to the options `fs.s3a.KEY` *for that bucket only*.
+
+As an example, here is a configuration to use different metadata stores
+and tables for different buckets
+
+First, we define shortcuts for the metadata store classnames
+
+
+```xml
+
+ s3guard.null
+ org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore
+
+
+
+ s3guard.dynamo
+ org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore
+
+```
+
+Next, Amazon's public landsat database is configured with no
+metadata store
+
+```xml
+
+ fs.s3a.bucket.landsat-pds.metadatastore.impl
+ ${s3guard.null}
+ The read-only landsat-pds repository isn't
+ managed by S3Guard
+
+```
+
+Next the `ireland-2` and `ireland-offline` buckets are configured with
+DynamoDB as the store, and a shared table `production-table`
+
+
+```xml
+
+ fs.s3a.bucket.ireland-2.metadatastore.impl
+ ${s3guard.dynamo}
+
+
+
+ fs.s3a.bucket.ireland-offline.metadatastore.impl
+ ${s3guard.dynamo}
+
+
+
+ fs.s3a.bucket.ireland-2.s3guard.ddb.table
+ production-table
+
+```
+
+The region of this table is automatically set to be that of the buckets,
+here `eu-west-1`; the same table name may actually be used in different
+regions.
+
+Together then, this configuration enables the DynamoDB Metadata Store
+for two buckets with a shared table, while disabling it for the public
+bucket.
+
+
+## S3Guard Command Line Interface (CLI)
+
+Note that in some cases an AWS region or `s3a://` URI can be provided.
+
+Metadata store URIs include a scheme that designates the backing store. For
+example (e.g. `dynamodb://table_name`;). As documented above, the
+AWS region can be inferred if the URI to an existing bucket is provided.
+
+
+The S3A URI must also be provided for per-bucket configuration options
+to be picked up. That is: when an s3a URL is provided on the command line,
+all its "resolved" per-bucket settings are used to connect to, authenticate
+with and configure the S3Guard table. If no such URL is provided, then
+the base settings are picked up.
+
+
+### Create a table: `s3guard init`
+
+```bash
+hadoop s3guard init -meta URI ( -region REGION | s3a://BUCKET )
+```
+
+Creates and initializes an empty metadata store.
+
+A DynamoDB metadata store can be initialized with additional parameters
+pertaining to [Provisioned Throughput](http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/HowItWorks.ProvisionedThroughput.html):
+
+```bash
+[-write PROVISIONED_WRITES] [-read PROVISIONED_READS]
+```
+
+Example 1
+
+```bash
+hadoop s3guard init -meta dynamodb://ireland-team -write 5 -read 10 s3a://ireland-1
+```
+
+Creates a table "ireland-team" with a capacity of 5 for writes, 10 for reads,
+in the same location as the bucket "ireland-1".
+
+
+Example 2
+
+```bash
+hadoop s3guard init -meta dynamodb://ireland-team -region eu-west-1
+```
+
+Creates a table "ireland-team" in the same region "s3-eu-west-1.amazonaws.com"
+
+
+### Import a bucket: `s3guard import`
+
+```bash
+hadoop s3guard import [-meta URI] s3a://BUCKET
+```
+
+Pre-populates a metadata store according to the current contents of an S3
+bucket. If the `-meta` option is omitted, the binding information is taken
+from the `core-site.xml` configuration.
+
+Example
+
+```bash
+hadoop s3guard import s3a://ireland-1
+```
+
+### Audit a table: `s3guard diff`
+
+```bash
+hadoop s3guard diff [-meta URI] s3a://BUCKET
+```
+
+Lists discrepancies between a metadata store and bucket. Note that depending on
+how S3Guard is used, certain discrepancies are to be expected.
+
+Example
+
+```bash
+hadoop s3guard diff s3a://ireland-1
+```
+
+### Delete a table: `s3guard destroy`
+
+
+Deletes a metadata store. With DynamoDB as the store, this means
+the specific DynamoDB table use to store the metadata.
+
+```bash
+hadoop s3guard destroy [-meta URI] ( -region REGION | s3a://BUCKET )
+```
+
+This *does not* delete the bucket, only the S3Guard table which it is bound
+to.
+
+
+Examples
+
+```bash
+hadoop s3guard destroy s3a://ireland-1
+```
+
+Deletes the table which the bucket ireland-1 is configured to use
+as its MetadataStore.
+
+```bash
+hadoop s3guard destroy -meta dynamodb://ireland-team -region eu-west-1
+```
+
+
+
+### Clean up a table, `s3guard prune`
+
+Delete all file entries in the MetadataStore table whose object "modification
+time" is older than the specified age.
+
+```bash
+hadoop s3guard prune [-days DAYS] [-hours HOURS] [-minutes MINUTES]
+ [-seconds SECONDS] [-m URI] ( -region REGION | s3a://BUCKET )
+```
+
+A time value must be supplied.
+
+1. This does not delete the entries in the bucket itself.
+1. The modification time is effectively the creation time of the objects
+in the S3 Bucket.
+1. Even when an S3A URI is supplied, all entries in the table older than
+a specific age are deleted — even those from other buckets.
+
+Example
+
+```bash
+hadoop s3guard prune -days 7 s3a://ireland-1
+```
+
+Deletes all entries in the S3Guard table for files older than seven days from
+the table associated with `s3a://ireland-1`.
+
+```bash
+hadoop s3guard prune -hours 1 -minutes 30 -meta dynamodb://ireland-team -region eu-west-1
+```
+
+Delete all entries more than 90 minutes old from the table "ireland-team" in
+the region "eu-west-1".
+
+
+
+## Debugging and Error Handling
+
+If you run into network connectivity issues, or have a machine failure in the
+middle of an operation, you may end up with your metadata store having state
+that differs from S3. The S3Guard CLI commands, covered in the CLI section
+above, can be used to diagnose and repair these issues.
+
+There are some logs whose log level can be increased to provide more
+information.
+
+```properties
+# Log S3Guard classes
+log4j.logger.org.apache.hadoop.fs.s3a.s3guard=DEBUG
+
+# Log all S3A classes
+log4j.logger.org.apache.hadoop.fs.s3a=DEBUG
+
+# Enable debug logging of AWS DynamoDB client
+log4j.logger.com.amazonaws.services.dynamodbv2.AmazonDynamoDB
+
+# Log all HTTP requests made; includes S3 interaction. This may
+# include sensitive information such as account IDs in HTTP headers.
+log4j.logger.com.amazonaws.request=DEBUG
+
+```
+
+If all else fails, S3Guard is designed to allow for easy recovery by deleting
+the metadata store data. In DynamoDB, this can be accomplished by simply
+deleting the table, and allowing S3Guard to recreate it from scratch. Note
+that S3Guard tracks recent changes to file metadata to implement consistency.
+Deleting the metadata store table will simply result in a period of eventual
+consistency for any file modifications that were made right before the table
+was deleted.
+
+### Failure Semantics
+
+Operations which modify metadata will make changes to S3 first. If, and only
+if, those operations succeed, the equivalent changes will be made to the
+Metadata Store.
+
+These changes to S3 and Metadata Store are not fully-transactional: If the S3
+operations succeed, and the subsequent Metadata Store updates fail, the S3
+changes will *not* be rolled back. In this case, an error message will be
+logged.
+
+### Versioning
+
+S3Guard tables are created with a version marker, an entry with the primary
+key and child entry of `../VERSION`; the use of a relative path guarantees
+that it will not be resolved.
+
+#### Versioning policy.
+
+1. The version number of an S3Guard table will only be incremented when
+an incompatible change is made to the table structure —that is, the structure
+has changed so that it is no longer readable by older versions, or because
+it has added new mandatory fields which older versions do not create.
+1. The version number of S3Guard tables will only be changed by incrementing
+the value.
+1. Updated versions of S3Guard MAY continue to support older version tables.
+1. If an incompatible change is made such that existing tables are not compatible,
+then a means shall be provided to update existing tables. For example:
+an option in the Command Line Interface, or an option to upgrade tables
+during S3Guard initialization.
+
+*Note*: this policy does not indicate any intent to upgrade table structures
+in an incompatible manner. The version marker in tables exists to support
+such an option if it ever becomes necessary, by ensuring that all S3Guard
+client can recognise any version mismatch.
+
+### Security
+
+All users of the DynamoDB table must have write access to it. This
+effectively means they must have write access to the entire object store.
+
+There's not been much testing of using a S3Guard Metadata Store
+with a read-only S3 Bucket. It *should* work, provided all users
+have write access to the DynamoDB table. And, as updates to the Metadata Store
+are only made after successful file creation, deletion and rename, the
+store is *unlikely* to get out of sync, it is still something which
+merits more testing before it could be considered reliable.
+
+### Troubleshooting
+
+#### Error: `S3Guard table lacks version marker.`
+
+The table which was intended to be used as a S3guard metadata store
+does not have any version marker indicating that it is a S3Guard table.
+
+It may be that this is not a S3Guard table.
+
+* Make sure that this is the correct table name.
+* Delete the table, so it can be rebuilt.
+
+#### Error: `Database table is from an incompatible S3Guard version`
+
+This indicates that the version of S3Guard which created (or possibly updated)
+the database table is from a different version that that expected by the S3A
+client.
+
+This error will also include the expected and actual version numbers.
+
+If the expected version is lower than the actual version, then the version
+of the S3A client library is too old to interact with this S3Guard-managed
+bucket. Upgrade the application/library.
+
+If the expected version is higher than the actual version, then the table
+itself will need upgrading.
+
+#### Error `"DynamoDB table TABLE does not exist in region REGION; auto-creation is turned off"`
+
+S3Guard could not find the DynamoDB table for the Metadata Store,
+and it was not configured to create it. Either the table was missing,
+or the configuration is preventing S3Guard from finding the table.
+
+1. Verify that the value of `fs.s3a.s3guard.ddb.table` is correct.
+1. If the region for an existing table has been set in
+`fs.s3a.s3guard.ddb.region`, verify that the value is correct.
+1. If the region is not set, verify that the table exists in the same
+region as the bucket being used.
+1. Create the table if necessary.
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md
index 3b83f1f7731..3b9b5c49122 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md
@@ -107,6 +107,10 @@ each filesystem for its testing.
1. `test.fs.s3n.name` : the URL of the bucket for S3n tests
1. `test.fs.s3a.name` : the URL of the bucket for S3a tests
+*Note* that running s3a and s3n tests in parallel mode, against the same bucket
+is unreliable. We recommend using separate buckets or testing one connector
+at a time.
+
The contents of each bucket will be destroyed during the test process:
do not use the bucket for any purpose other than testing. Furthermore, for
s3a, all in-progress multi-part uploads to the bucket will be aborted at the
@@ -691,7 +695,7 @@ use requires the presence of secret credentials, where tests may be slow,
and where finding out why something failed from nothing but the test output
is critical.
-#### Subclasses Existing Shared Base Blasses
+#### Subclasses Existing Shared Base Classes
Extend `AbstractS3ATestBase` or `AbstractSTestS3AHugeFiles` unless justifiable.
These set things up for testing against the object stores, provide good threadnames,
@@ -798,7 +802,7 @@ We really appreciate this — you will too.
### How to keep your credentials really safe
-Although the `auth-keys.xml` file is marged as ignored in git and subversion,
+Although the `auth-keys.xml` file is marked as ignored in git and subversion,
it is still in your source tree, and there's always that risk that it may
creep out.
@@ -813,3 +817,283 @@ using an absolute XInclude reference to it.
```
+
+# Failure Injection
+
+**Warning do not enable any type of failure injection in production. The
+following settings are for testing only.**
+
+One of the challenges with S3A integration tests is the fact that S3 is an
+eventually-consistent storage system. In practice, we rarely see delays in
+visibility of recently created objects both in listings (`listStatus()`) and
+when getting a single file's metadata (`getFileStatus()`). Since this behavior
+is rare and non-deterministic, thorough integration testing is challenging.
+
+To address this, S3A supports a shim layer on top of the `AmazonS3Client`
+class which artificially delays certain paths from appearing in listings.
+This is implemented in the class `InconsistentAmazonS3Client`.
+
+## Simulating List Inconsistencies
+
+### Enabling the InconsistentAmazonS3CClient
+
+There are two ways of enabling the `InconsistentAmazonS3Client`: at
+config-time, or programmatically. For an example of programmatic test usage,
+see `ITestS3GuardListConsistency`.
+
+To enable the fault-injecting client via configuration, switch the
+S3A client to use the "Inconsistent S3 Client Factory" when connecting to
+S3:
+
+```xml
+
+ fs.s3a.s3.client.factory.impl
+ org.apache.hadoop.fs.s3a.InconsistentS3ClientFactory
+
+```
+
+The inconsistent client works by:
+
+1. Choosing which objects will be "inconsistent" at the time the object is
+created or deleted.
+2. When `listObjects()` is called, any keys that we have marked as
+inconsistent above will not be returned in the results (until the
+configured delay has elapsed). Similarly, deleted items may be *added* to
+missing results to delay the visibility of the delete.
+
+There are two ways of choosing which keys (filenames) will be affected: By
+substring, and by random probability.
+
+```xml
+
+ fs.s3a.failinject.inconsistency.key.substring
+ DELAY_LISTING_ME
+
+
+
+ fs.s3a.failinject.inconsistency.probability
+ 1.0
+
+```
+
+By default, any object which has the substring "DELAY_LISTING_ME" in its key
+will subject to delayed visibility. For example, the path
+`s3a://my-bucket/test/DELAY_LISTING_ME/file.txt` would match this condition.
+To match all keys use the value "\*" (a single asterisk). This is a special
+value: *We don't support arbitrary wildcards.*
+
+The default probability of delaying an object is 1.0. This means that *all*
+keys that match the substring will get delayed visibility. Note that we take
+the logical *and* of the two conditions (substring matches *and* probability
+random chance occurs). Here are some example configurations:
+
+```
+| substring | probability | behavior |
+|-----------|-------------|--------------------------------------------|
+| | 0.001 | An empty tag in .xml config will |
+| | | be interpreted as unset and revert to the |
+| | | default value, "DELAY_LISTING_ME" |
+| | | |
+| * | 0.001 | 1/1000 chance of *any* key being delayed. |
+| | | |
+| delay | 0.01 | 1/100 chance of any key containing "delay" |
+| | | |
+| delay | 1.0 | All keys containing substring "delay" .. |
+```
+
+You can also configure how long you want the delay in visibility to last.
+The default is 5000 milliseconds (five seconds).
+
+```xml
+
+ fs.s3a.failinject.inconsistency.msec
+ 5000
+
+```
+
+Future versions of this client will introduce new failure modes,
+with simulation of S3 throttling exceptions the next feature under
+development.
+
+### Limitations of Inconsistency Injection
+
+Although `InconsistentAmazonS3Client` can delay the visibility of an object
+or parent directory, it does not prevent the key of that object from
+appearing in all prefix searches. For example, if we create the following
+object with the default configuration above, in an otherwise empty bucket:
+
+```
+s3a://bucket/a/b/c/DELAY_LISTING_ME
+```
+
+Then the following paths will still be visible as directories (ignoring
+possible real-world inconsistencies):
+
+```
+s3a://bucket/a
+s3a://bucket/a/b
+```
+
+Whereas `getFileStatus()` on the following *will* be subject to delayed
+visibility (`FileNotFoundException` until delay has elapsed):
+
+```
+s3a://bucket/a/b/c
+s3a://bucket/a/b/c/DELAY_LISTING_ME
+```
+
+In real-life S3 inconsistency, however, we expect that all the above paths
+(including `a` and `b`) will be subject to delayed visiblity.
+
+### Using the `InconsistentAmazonS3CClient` in downstream integration tests
+
+The inconsistent client is shipped in the `hadoop-aws` JAR, so it can
+be used in applications which work with S3 to see how they handle
+inconsistent directory listings.
+
+## Testing S3Guard
+
+The basic strategy for testing S3Guard correctness consists of:
+
+1. MetadataStore Contract tests.
+
+ The MetadataStore contract tests are inspired by the Hadoop FileSystem and
+ `FileContext` contract tests. Each implementation of the `MetadataStore` interface
+ subclasses the `MetadataStoreTestBase` class and customizes it to initialize
+ their MetadataStore. This test ensures that the different implementations
+ all satisfy the semantics of the MetadataStore API.
+
+2. Running existing S3A unit and integration tests with S3Guard enabled.
+
+ You can run the S3A integration tests on top of S3Guard by configuring your
+ `MetadataStore` in your
+ `hadoop-tools/hadoop-aws/src/test/resources/core-site.xml` or
+ `hadoop-tools/hadoop-aws/src/test/resources/auth-keys.xml` files.
+ Next run the S3A integration tests as outlined in the *Running the Tests* section
+ of the [S3A documentation](./index.html)
+
+3. Running fault-injection tests that test S3Guard's consistency features.
+
+ The `ITestS3GuardListConsistency` uses failure injection to ensure
+ that list consistency logic is correct even when the underlying storage is
+ eventually consistent.
+
+ The integration test adds a shim above the Amazon S3 Client layer that injects
+ delays in object visibility.
+
+ All of these tests will be run if you follow the steps listed in step 2 above.
+
+ No charges are incurred for using this store, and its consistency
+ guarantees are that of the underlying object store instance.
+
+## Testing S3A with S3Guard Enabled
+
+All the S3A tests which work with a private repository can be configured to
+run with S3Guard by using the `s3guard` profile. When set, this will run
+all the tests with local memory for the metadata set to "non-authoritative" mode.
+
+```bash
+mvn -T 1C verify -Dparallel-tests -DtestsThreadCount=6 -Ds3guard
+```
+
+When the `s3guard` profile is enabled, following profiles can be specified:
+
+* `dynamo`: use an AWS-hosted DynamoDB table; creating the table if it does
+ not exist. You will have to pay the bills for DynamoDB web service.
+* `dynamodblocal`: use an in-memory DynamoDBLocal server instead of real AWS
+ DynamoDB web service; launch the server and creating the table.
+ You won't be charged bills for using DynamoDB in test. As it runs in-JVM,
+ the table isn't shared across other tests running in parallel.
+* `non-auth`: treat the S3Guard metadata as authorative.
+
+```bash
+mvn -T 1C verify -Dparallel-tests -DtestsThreadCount=6 -Ds3guard -Ddynamo -Dauth
+```
+
+When experimenting with options, it is usually best to run a single test suite
+at a time until the operations appear to be working.
+
+```bash
+mvn -T 1C verify -Dtest=skip -Dit.test=ITestS3AMiscOperations -Ds3guard -Ddynamo
+```
+
+### Notes
+
+1. If the `s3guard` profile is not set, then the S3Guard properties are those
+of the test configuration set in `contract-test-options.xml` or `auth-keys.xml`
+
+If the `s3guard` profile *is* set,
+1. The S3Guard options from maven (the dynamo and authoritative flags)
+ overwrite any previously set in the configuration files.
+1. DynamoDB will be configured to create any missing tables.
+
+### Warning About Concurrent Tests
+
+You must not run S3A and S3N tests in parallel on the same bucket. This is
+especially true when S3Guard is enabled. S3Guard requires that all clients
+that are modifying the bucket have S3Guard enabled, so having S3N
+integration tests running in parallel with S3A tests will cause strange
+failures.
+
+### Scale Testing MetadataStore Directly
+
+There are some scale tests that exercise Metadata Store implementations
+directly. These ensure that S3Guard is are robust to things like DynamoDB
+throttling, and compare performance for different implementations. These
+are included in the scale tests executed when `-Dscale` is passed to
+the maven command line.
+
+The two S3Guard scale testse are `ITestDynamoDBMetadataStoreScale` and
+`ITestLocalMetadataStoreScale`. To run the DynamoDB test, you will need to
+define your table name and region in your test configuration. For example,
+the following settings allow us to run `ITestDynamoDBMetadataStoreScale` with
+artificially low read and write capacity provisioned, so we can judge the
+effects of being throttled by the DynamoDB service:
+
+```xml
+
+ scale.test.operation.count
+ 10
+
+
+ scale.test.directory.count
+ 3
+
+
+ fs.s3a.scale.test.enabled
+ true
+
+
+ fs.s3a.s3guard.ddb.table
+ my-scale-test
+
+
+ fs.s3a.s3guard.ddb.region
+ us-west-2
+
+
+ fs.s3a.s3guard.ddb.table.create
+ true
+
+
+ fs.s3a.s3guard.ddb.table.capacity.read
+ 10
+
+
+ fs.s3a.s3guard.ddb.table.capacity.write
+ 10
+
+```
+
+### Testing only: Local Metadata Store
+
+There is an in-memory Metadata Store for testing.
+
+```xml
+
+ fs.s3a.metadatastore.impl
+ org.apache.hadoop.fs.s3a.s3guard.LocalMetadataStore
+
+```
+
+This is not for use in production.
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java
index d2a858f615e..fd9497ba3ff 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java
@@ -22,11 +22,25 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractCreateTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
+
/**
* S3A contract tests creating files.
*/
public class ITestS3AContractCreate extends AbstractContractCreateTest {
+ /**
+ * Create a configuration, possibly patching in S3Guard options.
+ * @return a configuration
+ */
+ @Override
+ protected Configuration createConfiguration() {
+ Configuration conf = super.createConfiguration();
+ // patch in S3Guard options
+ maybeEnableS3Guard(conf);
+ return conf;
+ }
+
@Override
protected AbstractFSContract createContract(Configuration conf) {
return new S3AContract(conf);
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDelete.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDelete.java
index a47dcaef61e..95ea410fa66 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDelete.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDelete.java
@@ -22,11 +22,25 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractDeleteTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
+
/**
* S3A contract tests covering deletes.
*/
public class ITestS3AContractDelete extends AbstractContractDeleteTest {
+ /**
+ * Create a configuration, possibly patching in S3Guard options.
+ * @return a configuration
+ */
+ @Override
+ protected Configuration createConfiguration() {
+ Configuration conf = super.createConfiguration();
+ // patch in S3Guard options
+ maybeEnableS3Guard(conf);
+ return conf;
+ }
+
@Override
protected AbstractFSContract createContract(Configuration conf) {
return new S3AContract(conf);
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java
index 50ce0c2a98f..587dbbc1df1 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.fs.contract.s3a;
import static org.apache.hadoop.fs.s3a.Constants.*;
import static org.apache.hadoop.fs.s3a.S3ATestConstants.SCALE_TEST_TIMEOUT_MILLIS;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.tools.contract.AbstractContractDistCpTest;
@@ -38,12 +39,18 @@ public class ITestS3AContractDistCp extends AbstractContractDistCpTest {
return SCALE_TEST_TIMEOUT_MILLIS;
}
+ /**
+ * Create a configuration, possibly patching in S3Guard options.
+ * @return a configuration
+ */
@Override
protected Configuration createConfiguration() {
Configuration newConf = super.createConfiguration();
newConf.setLong(MULTIPART_SIZE, MULTIPART_SETTING);
newConf.setBoolean(FAST_UPLOAD, true);
newConf.set(FAST_UPLOAD_BUFFER, FAST_UPLOAD_BUFFER_DISK);
+ // patch in S3Guard options
+ maybeEnableS3Guard(newConf);
return newConf;
}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractGetFileStatus.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractGetFileStatus.java
index c7ed5a3df3d..cb9819cf9a4 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractGetFileStatus.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractGetFileStatus.java
@@ -23,6 +23,8 @@ import org.apache.hadoop.fs.contract.AbstractContractGetFileStatusTest;
import org.apache.hadoop.fs.s3a.Constants;
import org.apache.hadoop.fs.s3a.S3ATestUtils;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
+
/**
* S3A contract tests covering getFileStatus.
*/
@@ -46,6 +48,8 @@ public class ITestS3AContractGetFileStatus
S3ATestUtils.disableFilesystemCaching(conf);
// aggressively low page size forces tests to go multipage
conf.setInt(Constants.MAX_PAGING_KEYS, 2);
+ // patch in S3Guard options
+ maybeEnableS3Guard(conf);
return conf;
}
}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdir.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdir.java
index d953e7eb6ae..dba52e128d4 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdir.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdir.java
@@ -22,11 +22,25 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractMkdirTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
+
/**
* Test dir operations on S3A.
*/
public class ITestS3AContractMkdir extends AbstractContractMkdirTest {
+ /**
+ * Create a configuration, possibly patching in S3Guard options.
+ * @return a configuration
+ */
+ @Override
+ protected Configuration createConfiguration() {
+ Configuration conf = super.createConfiguration();
+ // patch in S3Guard options
+ maybeEnableS3Guard(conf);
+ return conf;
+ }
+
@Override
protected AbstractFSContract createContract(Configuration conf) {
return new S3AContract(conf);
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractOpen.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractOpen.java
index a7bdc0d3f1b..8e338b71d11 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractOpen.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractOpen.java
@@ -22,11 +22,25 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractOpenTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
+
/**
* S3A contract tests opening files.
*/
public class ITestS3AContractOpen extends AbstractContractOpenTest {
+ /**
+ * Create a configuration, possibly patching in S3Guard options.
+ * @return a configuration
+ */
+ @Override
+ protected Configuration createConfiguration() {
+ Configuration conf = super.createConfiguration();
+ // patch in S3Guard options
+ maybeEnableS3Guard(conf);
+ return conf;
+ }
+
@Override
protected AbstractFSContract createContract(Configuration conf) {
return new S3AContract(conf);
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractRename.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractRename.java
index 5dba03d664c..433964998cd 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractRename.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractRename.java
@@ -26,12 +26,25 @@ import org.apache.hadoop.fs.Path;
import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
/**
* S3A contract tests covering rename.
*/
public class ITestS3AContractRename extends AbstractContractRenameTest {
+ /**
+ * Create a configuration, possibly patching in S3Guard options.
+ * @return a configuration
+ */
+ @Override
+ protected Configuration createConfiguration() {
+ Configuration conf = super.createConfiguration();
+ // patch in S3Guard options
+ maybeEnableS3Guard(conf);
+ return conf;
+ }
+
@Override
protected AbstractFSContract createContract(Configuration conf) {
return new S3AContract(conf);
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractRootDir.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractRootDir.java
index 8383a77365b..5c2e2cdf367 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractRootDir.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractRootDir.java
@@ -28,6 +28,8 @@ import org.apache.hadoop.fs.contract.AbstractFSContract;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
+
/**
* root dir operations against an S3 bucket.
*/
@@ -37,6 +39,18 @@ public class ITestS3AContractRootDir extends
private static final Logger LOG =
LoggerFactory.getLogger(ITestS3AContractRootDir.class);
+ /**
+ * Create a configuration, possibly patching in S3Guard options.
+ * @return a configuration
+ */
+ @Override
+ protected Configuration createConfiguration() {
+ Configuration conf = super.createConfiguration();
+ // patch in S3Guard options
+ maybeEnableS3Guard(conf);
+ return conf;
+ }
+
@Override
protected AbstractFSContract createContract(Configuration conf) {
return new S3AContract(conf);
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractSeek.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractSeek.java
index 1572fbc880b..379ace8ffec 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractSeek.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractSeek.java
@@ -22,11 +22,25 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractSeekTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
+
/**
* S3A contract tests covering file seek.
*/
public class ITestS3AContractSeek extends AbstractContractSeekTest {
+ /**
+ * Create a configuration, possibly patching in S3Guard options.
+ * @return a configuration
+ */
+ @Override
+ protected Configuration createConfiguration() {
+ Configuration conf = super.createConfiguration();
+ // patch in S3Guard options
+ maybeEnableS3Guard(conf);
+ return conf;
+ }
+
@Override
protected AbstractFSContract createContract(Configuration conf) {
return new S3AContract(conf);
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java
index 6734947af96..0c7f7df22b9 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java
@@ -26,6 +26,8 @@ import com.amazonaws.services.s3.AmazonS3;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
+import org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore;
import org.junit.After;
import org.junit.Before;
@@ -33,7 +35,8 @@ import org.junit.Rule;
import org.junit.rules.ExpectedException;
/**
- * Abstract base class for S3A unit tests using a mock S3 client.
+ * Abstract base class for S3A unit tests using a mock S3 client and a null
+ * metadata store.
*/
public abstract class AbstractS3AMockTest {
@@ -55,6 +58,10 @@ public abstract class AbstractS3AMockTest {
Configuration conf = new Configuration();
conf.setClass(S3_CLIENT_FACTORY_IMPL, MockS3ClientFactory.class,
S3ClientFactory.class);
+ // We explicitly disable MetadataStore even if it's configured. For unit
+ // test we don't issue request to AWS DynamoDB service.
+ conf.setClass(S3_METADATA_STORE_IMPL, NullMetadataStore.class,
+ MetadataStore.class);
fs = new S3AFileSystem();
URI uri = URI.create(FS_S3A + "://" + BUCKET);
fs.initialize(uri, conf);
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3ATestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3ATestBase.java
index c19b72cde9f..f0c389db469 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3ATestBase.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3ATestBase.java
@@ -33,6 +33,7 @@ import java.io.IOException;
import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
/**
* An extension of the contract test base set up for S3A tests.
@@ -65,6 +66,18 @@ public abstract class AbstractS3ATestBase extends AbstractFSContractTestBase
return S3A_TEST_TIMEOUT;
}
+ /**
+ * Create a configuration, possibly patching in S3Guard options.
+ * @return a configuration
+ */
+ @Override
+ protected Configuration createConfiguration() {
+ Configuration conf = super.createConfiguration();
+ // patch in S3Guard options
+ maybeEnableS3Guard(conf);
+ return conf;
+ }
+
protected Configuration getConfiguration() {
return getContract().getConf();
}
@@ -99,10 +112,21 @@ public abstract class AbstractS3ATestBase extends AbstractFSContractTestBase
*/
protected Path writeThenReadFile(String name, int len) throws IOException {
Path path = path(name);
+ writeThenReadFile(path, len);
+ return path;
+ }
+
+ /**
+ * Write a file, read it back, validate the dataset. Overwrites the file
+ * if it is present
+ * @param path path to file
+ * @param len length of file
+ * @throws IOException any IO problem
+ */
+ protected void writeThenReadFile(Path path, int len) throws IOException {
byte[] data = dataset(len, 'a', 'z');
writeDataset(getFileSystem(), path, data, data.length, 1024 * 1024, true);
ContractTestUtils.verifyFileContents(getFileSystem(), path, data);
- return path;
}
/**
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java
index 22c4f7ee41f..660123379f4 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java
@@ -140,6 +140,10 @@ public class ITestS3AAWSCredentialsProvider {
createFailingFS(conf);
} catch (AccessDeniedException e) {
// expected
+ } catch (AWSServiceIOException e) {
+ GenericTestUtils.assertExceptionContains(
+ "UnrecognizedClientException", e);
+ // expected
}
}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java
index dd75cb60745..b9fe0fdbc74 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java
@@ -25,6 +25,7 @@ import com.amazonaws.services.s3.S3ClientOptions;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.reflect.FieldUtils;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.apache.hadoop.fs.s3native.S3xLoginHelper;
@@ -483,7 +484,7 @@ public class ITestS3AConfiguration {
}
});
assertEquals("username", alice, fs.getUsername());
- S3AFileStatus status = fs.getFileStatus(new Path("/"));
+ FileStatus status = fs.getFileStatus(new Path("/"));
assertEquals("owner in " + status, alice, status.getOwner());
assertEquals("group in " + status, alice, status.getGroup());
}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java
index 71776acc86c..7dc286d431d 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java
@@ -29,6 +29,7 @@ import org.apache.commons.io.Charsets;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.fs.FileAlreadyExistsException;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
@@ -63,7 +64,7 @@ public class ITestS3ACopyFromLocalFile extends AbstractS3ATestBase {
Path dest = upload(file, true);
assertPathExists("uploaded file not found", dest);
S3AFileSystem fs = getFileSystem();
- S3AFileStatus status = fs.getFileStatus(dest);
+ FileStatus status = fs.getFileStatus(dest);
assertEquals("File length of " + status,
message.getBytes(ASCII).length, status.getLen());
assertFileTextEquals(dest, message);
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACredentialsInURL.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACredentialsInURL.java
index b3d7abfff32..95d44cca610 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACredentialsInURL.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACredentialsInURL.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.fs.s3a;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.After;
@@ -37,6 +38,7 @@ import java.net.URLEncoder;
import java.nio.file.AccessDeniedException;
import static org.apache.hadoop.fs.s3a.S3ATestConstants.TEST_FS_S3A_NAME;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.assumeS3GuardState;
/**
* Tests that credentials can go into the URL. This includes a valid
@@ -63,6 +65,11 @@ public class ITestS3ACredentialsInURL extends Assert {
public void testInstantiateFromURL() throws Throwable {
Configuration conf = new Configuration();
+
+ // Skip in the case of S3Guard with DynamoDB because it cannot get
+ // credentials for its own use if they're only in S3 URLs
+ assumeS3GuardState(false, conf);
+
String accessKey = conf.get(Constants.ACCESS_KEY);
String secretKey = conf.get(Constants.SECRET_KEY);
String fsname = conf.getTrimmed(TEST_FS_S3A_NAME, "");
@@ -84,6 +91,7 @@ public class ITestS3ACredentialsInURL extends Assert {
conf.unset(Constants.ACCESS_KEY);
conf.unset(Constants.SECRET_KEY);
fs = S3ATestUtils.createTestFileSystem(conf);
+
String fsURI = fs.getUri().toString();
assertFalse("FS URI contains a @ symbol", fsURI.contains("@"));
assertFalse("FS URI contains a % symbol", fsURI.contains("%"));
@@ -119,13 +127,14 @@ public class ITestS3ACredentialsInURL extends Assert {
Configuration conf = new Configuration();
String fsname = conf.getTrimmed(TEST_FS_S3A_NAME, "");
Assume.assumeNotNull(fsname);
+ assumeS3GuardState(false, conf);
URI original = new URI(fsname);
URI testURI = createUriWithEmbeddedSecrets(original, "user", "//");
conf.set(TEST_FS_S3A_NAME, testURI.toString());
- fs = S3ATestUtils.createTestFileSystem(conf);
try {
- S3AFileStatus status = fs.getFileStatus(new Path("/"));
+ fs = S3ATestUtils.createTestFileSystem(conf);
+ FileStatus status = fs.getFileStatus(new Path("/"));
fail("Expected an AccessDeniedException, got " + status);
} catch (AccessDeniedException e) {
// expected
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ADelayedFNF.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ADelayedFNF.java
new file mode 100644
index 00000000000..7abd4749764
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ADelayedFNF.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.contract.ContractTestUtils;
+import org.apache.hadoop.test.LambdaTestUtils;
+import org.junit.Test;
+
+import java.io.FileNotFoundException;
+import java.util.concurrent.Callable;
+
+/**
+ * Tests behavior of a FileNotFound error that happens after open(), i.e. on
+ * the first read.
+ */
+public class ITestS3ADelayedFNF extends AbstractS3ATestBase {
+
+
+ /**
+ * See debugging documentation
+ * here.
+ * @throws Exception
+ */
+ @Test
+ public void testNotFoundFirstRead() throws Exception {
+ FileSystem fs = getFileSystem();
+ Path p = path("some-file");
+ ContractTestUtils.createFile(fs, p, false, new byte[] {20, 21, 22});
+
+ final FSDataInputStream in = fs.open(p);
+ assertDeleted(p, false);
+
+ // This should fail since we deleted after the open.
+ LambdaTestUtils.intercept(FileNotFoundException.class,
+ new Callable() {
+ @Override
+ public Integer call() throws Exception {
+ return in.read();
+ }
+ });
+ }
+
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEmptyDirectory.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEmptyDirectory.java
new file mode 100644
index 00000000000..c55be5b6baf
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEmptyDirectory.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.contract.ContractTestUtils;
+import org.junit.Test;
+
+import java.io.IOException;
+
+/**
+ * Tests which exercise treatment of empty/non-empty directories.
+ */
+public class ITestS3AEmptyDirectory extends AbstractS3ATestBase {
+
+ @Test
+ public void testDirectoryBecomesEmpty() throws Exception {
+ S3AFileSystem fs = getFileSystem();
+
+ // 1. set up non-empty dir
+ Path dir = path("testEmptyDir");
+ Path child = path("testEmptyDir/dir2");
+ mkdirs(child);
+
+ S3AFileStatus status = getS3AFileStatus(fs, dir);
+ assertEmptyDirectory(false, status);
+
+ // 2. Make testEmptyDir empty
+ assertDeleted(child, false);
+ status = getS3AFileStatus(fs, dir);
+
+ assertEmptyDirectory(true, status);
+ }
+
+ private static void assertEmptyDirectory(boolean isEmpty, S3AFileStatus s) {
+ String msg = "dir is empty";
+ // Should *not* be Tristate.UNKNOWN since we request a definitive value
+ // in getS3AFileStatus() below
+ Tristate expected = Tristate.fromBool(isEmpty);
+ assertEquals(msg, expected, s.isEmptyDirectory());
+ }
+
+ @Test
+ public void testDirectoryBecomesNonEmpty() throws Exception {
+ S3AFileSystem fs = getFileSystem();
+
+ // 1. create empty dir
+ Path dir = path("testEmptyDir");
+ mkdirs(dir);
+
+ S3AFileStatus status = getS3AFileStatus(fs, dir);
+ assertEmptyDirectory(true, status);
+
+ // 2. Make testEmptyDir non-empty
+
+ ContractTestUtils.touch(fs, path("testEmptyDir/file1"));
+ status = getS3AFileStatus(fs, dir);
+
+ assertEmptyDirectory(false, status);
+ }
+
+ private S3AFileStatus getS3AFileStatus(S3AFileSystem fs, Path p) throws
+ IOException {
+ return fs.innerGetFileStatus(p, true /* want isEmptyDirectory value */);
+ }
+
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEC.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEC.java
index 91be8b9501e..8b7e0311c9e 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEC.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEC.java
@@ -18,19 +18,21 @@
package org.apache.hadoop.fs.s3a;
-import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
-import static org.apache.hadoop.fs.contract.ContractTestUtils.rm;
-import static org.apache.hadoop.fs.s3a.S3ATestUtils.skipIfEncryptionTestsDisabled;
-import static org.apache.hadoop.test.LambdaTestUtils.intercept;
-
import java.io.IOException;
+import java.nio.file.AccessDeniedException;
+
+import org.junit.Test;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.apache.hadoop.fs.contract.s3a.S3AContract;
-import org.junit.Test;
+import org.apache.hadoop.io.IOUtils;
+
+import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
/**
* Concrete class that extends {@link AbstractTestS3AEncryption}
@@ -38,17 +40,39 @@ import org.junit.Test;
*/
public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
+ private static final String SERVICE_AMAZON_S3_STATUS_CODE_403
+ = "Service: Amazon S3; Status Code: 403;";
+ private static final String KEY_1
+ = "4niV/jPK5VFRHY+KNb6wtqYd4xXyMgdJ9XQJpcQUVbs=";
+ private static final String KEY_2
+ = "G61nz31Q7+zpjJWbakxfTOZW4VS0UmQWAq2YXhcTXoo=";
+ private static final String KEY_3
+ = "NTx0dUPrxoo9+LbNiT/gqf3z9jILqL6ilismFmJO50U=";
+ private static final String KEY_4
+ = "msdo3VvvZznp66Gth58a91Hxe/UpExMkwU9BHkIjfW8=";
+ private static final int TEST_FILE_LEN = 2048;
+
+ /**
+ * Filesystem created with a different key.
+ */
+ private FileSystem fsKeyB;
+
@Override
protected Configuration createConfiguration() {
Configuration conf = super.createConfiguration();
- S3ATestUtils.disableFilesystemCaching(conf);
+ disableFilesystemCaching(conf);
conf.set(Constants.SERVER_SIDE_ENCRYPTION_ALGORITHM,
getSSEAlgorithm().getMethod());
- conf.set(Constants.SERVER_SIDE_ENCRYPTION_KEY,
- "4niV/jPK5VFRHY+KNb6wtqYd4xXyMgdJ9XQJpcQUVbs=");
+ conf.set(Constants.SERVER_SIDE_ENCRYPTION_KEY, KEY_1);
return conf;
}
+ @Override
+ public void teardown() throws Exception {
+ super.teardown();
+ IOUtils.closeStream(fsKeyB);
+ }
+
/**
* This will create and write to a file using encryption key A, then attempt
* to read from it again with encryption key B. This will not work as it
@@ -64,26 +88,25 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
assumeEnabled();
skipIfEncryptionTestsDisabled(getConfiguration());
- final Path[] path = new Path[1];
- intercept(java.nio.file.AccessDeniedException.class,
- "Service: Amazon S3; Status Code: 403;", () -> {
+ intercept(AccessDeniedException.class,
+ SERVICE_AMAZON_S3_STATUS_CODE_403,
+ () -> {
+ int len = TEST_FILE_LEN;
+ describe("Create an encrypted file of size " + len);
+ Path src = path("testCreateFileAndReadWithDifferentEncryptionKey");
+ writeThenReadFile(src, len);
- int len = 2048;
- describe("Create an encrypted file of size " + len);
- String src = createFilename(len);
- path[0] = writeThenReadFile(src, len);
-
- //extract the test FS
- FileSystem fileSystem = createNewFileSystemWithSSECKey(
- "kX7SdwVc/1VXJr76kfKnkQ3ONYhxianyL2+C3rPVT9s=");
- byte[] data = dataset(len, 'a', 'z');
- ContractTestUtils.verifyFileContents(fileSystem, path[0], data);
- throw new Exception("Fail");
- });
+ //extract the test FS
+ fsKeyB = createNewFileSystemWithSSECKey(
+ "kX7SdwVc/1VXJr76kfKnkQ3ONYhxianyL2+C3rPVT9s=");
+ byte[] data = dataset(len, 'a', 'z');
+ ContractTestUtils.verifyFileContents(fsKeyB, src, data);
+ return fsKeyB.getFileStatus(src);
+ });
}
/**
- * While each object has it's own key and should be distinct, this verifies
+ * While each object has its own key and should be distinct, this verifies
* that hadoop treats object keys as a filesystem path. So if a top level
* dir is encrypted with keyA, a sublevel dir cannot be accessed with a
* different keyB.
@@ -96,25 +119,20 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
public void testCreateSubdirWithDifferentKey() throws Exception {
assumeEnabled();
skipIfEncryptionTestsDisabled(getConfiguration());
+ assumeS3GuardState(false, getConfiguration());
- final Path[] path = new Path[1];
- intercept(java.nio.file.AccessDeniedException.class,
- "Service: Amazon S3; Status Code: 403;", () -> {
-
- path[0] = S3ATestUtils.createTestPath(
- new Path(createFilename("dir/"))
- );
- Path nestedDirectory = S3ATestUtils.createTestPath(
- new Path(createFilename("dir/nestedDir/"))
- );
- FileSystem fsKeyB = createNewFileSystemWithSSECKey(
- "G61nz31Q7+zpjJWbakxfTOZW4VS0UmQWAq2YXhcTXoo=");
- getFileSystem().mkdirs(path[0]);
- fsKeyB.mkdirs(nestedDirectory);
-
- throw new Exception("Exception should be thrown.");
- });
- rm(getFileSystem(), path[0], true, false);
+ intercept(AccessDeniedException.class,
+ SERVICE_AMAZON_S3_STATUS_CODE_403,
+ () -> {
+ Path base = path("testCreateSubdirWithDifferentKey");
+ Path nestedDirectory = new Path(base, "nestedDir");
+ fsKeyB = createNewFileSystemWithSSECKey(
+ KEY_2);
+ getFileSystem().mkdirs(base);
+ fsKeyB.mkdirs(nestedDirectory);
+ // expected to fail
+ return fsKeyB.getFileStatus(nestedDirectory);
+ });
}
/**
@@ -130,20 +148,17 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
assumeEnabled();
skipIfEncryptionTestsDisabled(getConfiguration());
- final Path[] path = new Path[1];
- intercept(java.nio.file.AccessDeniedException.class,
- "Service: Amazon S3; Status Code: 403;", () -> {
-
- int len = 2048;
- String src = createFilename(len);
- path[0] = writeThenReadFile(src, len);
-
- FileSystem fsKeyB = createNewFileSystemWithSSECKey(
- "NTx0dUPrxoo9+LbNiT/gqf3z9jILqL6ilismFmJO50U=");
- fsKeyB.rename(path[0], new Path(createFilename("different-path.txt")));
-
- throw new Exception("Exception should be thrown.");
- });
+ intercept(AccessDeniedException.class,
+ SERVICE_AMAZON_S3_STATUS_CODE_403,
+ () -> {
+ int len = TEST_FILE_LEN;
+ Path src = path(createFilename(len));
+ writeThenReadFile(src, len);
+ fsKeyB = createNewFileSystemWithSSECKey(KEY_3);
+ Path dest = path(createFilename("different-path.txt"));
+ getFileSystem().mkdirs(dest.getParent());
+ return fsKeyB.rename(src, dest);
+ });
}
/**
@@ -157,11 +172,11 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
assumeEnabled();
skipIfEncryptionTestsDisabled(getConfiguration());
- String src = createFilename("original-path.txt");
- Path path = writeThenReadFile(src, 2048);
- Path newPath = path(createFilename("different-path.txt"));
- getFileSystem().rename(path, newPath);
- byte[] data = dataset(2048, 'a', 'z');
+ Path src = path("original-path.txt");
+ writeThenReadFile(src, TEST_FILE_LEN);
+ Path newPath = path("different-path.txt");
+ getFileSystem().rename(src, newPath);
+ byte[] data = dataset(TEST_FILE_LEN, 'a', 'z');
ContractTestUtils.verifyFileContents(getFileSystem(), newPath, data);
}
@@ -175,30 +190,26 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
public void testListEncryptedDir() throws Exception {
assumeEnabled();
skipIfEncryptionTestsDisabled(getConfiguration());
+ assumeS3GuardState(false, getConfiguration());
- Path nestedDirectory = S3ATestUtils.createTestPath(
- path(createFilename("/a/b/c/"))
- );
+ Path pathABC = path("testListEncryptedDir/a/b/c/");
+ Path pathAB = pathABC.getParent();
+ Path pathA = pathAB.getParent();
+
+ Path nestedDirectory = createTestPath(pathABC);
assertTrue(getFileSystem().mkdirs(nestedDirectory));
- FileSystem fsKeyB = createNewFileSystemWithSSECKey(
- "msdo3VvvZznp66Gth58a91Hxe/UpExMkwU9BHkIjfW8=");
+ fsKeyB = createNewFileSystemWithSSECKey(KEY_4);
- fsKeyB.listFiles(S3ATestUtils.createTestPath(
- path(createFilename("/a/"))
- ), true);
- fsKeyB.listFiles(S3ATestUtils.createTestPath(
- path(createFilename("/a/b/"))
- ), true);
+ fsKeyB.listFiles(pathA, true);
+ fsKeyB.listFiles(pathAB, true);
//Until this point, no exception is thrown about access
- intercept(java.nio.file.AccessDeniedException.class,
- "Service: Amazon S3; Status Code: 403;", () -> {
- fsKeyB.listFiles(S3ATestUtils.createTestPath(
- path(createFilename("/a/b/c/"))
- ), false);
- throw new Exception("Exception should be thrown.");
- });
+ intercept(AccessDeniedException.class,
+ SERVICE_AMAZON_S3_STATUS_CODE_403,
+ () -> {
+ fsKeyB.listFiles(pathABC, false);
+ });
Configuration conf = this.createConfiguration();
conf.unset(Constants.SERVER_SIDE_ENCRYPTION_ALGORITHM);
@@ -209,22 +220,13 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
FileSystem unencryptedFileSystem = contract.getTestFileSystem();
//unencrypted can access until the final directory
- unencryptedFileSystem.listFiles(S3ATestUtils.createTestPath(
- path(createFilename("/a/"))
- ), true);
- unencryptedFileSystem.listFiles(S3ATestUtils.createTestPath(
- path(createFilename("/a/b/"))
- ), true);
- intercept(org.apache.hadoop.fs.s3a.AWSS3IOException.class,
- "Bad Request (Service: Amazon S3; Status Code: 400; Error" +
- " Code: 400 Bad Request;", () -> {
-
- unencryptedFileSystem.listFiles(S3ATestUtils.createTestPath(
- path(createFilename("/a/b/c/"))
- ), false);
- throw new Exception("Exception should be thrown.");
- });
- rm(getFileSystem(), path(createFilename("/")), true, false);
+ unencryptedFileSystem.listFiles(pathA, true);
+ unencryptedFileSystem.listFiles(pathAB, true);
+ AWSS3IOException ex = intercept(AWSS3IOException.class,
+ () -> {
+ unencryptedFileSystem.listFiles(pathABC, false);
+ });
+ assertStatusCode(ex, 400);
}
/**
@@ -236,31 +238,27 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
public void testListStatusEncryptedDir() throws Exception {
assumeEnabled();
skipIfEncryptionTestsDisabled(getConfiguration());
+ assumeS3GuardState(false, getConfiguration());
- Path nestedDirectory = S3ATestUtils.createTestPath(
- path(createFilename("/a/b/c/"))
- );
- assertTrue(getFileSystem().mkdirs(nestedDirectory));
+ Path pathABC = path("testListStatusEncryptedDir/a/b/c/");
+ Path pathAB = pathABC.getParent();
+ Path pathA = pathAB.getParent();
+ assertTrue(getFileSystem().mkdirs(pathABC));
- FileSystem fsKeyB = createNewFileSystemWithSSECKey(
- "msdo3VvvZznp66Gth58a91Hxe/UpExMkwU9BHkIjfW8=");
+ fsKeyB = createNewFileSystemWithSSECKey(KEY_4);
- fsKeyB.listStatus(S3ATestUtils.createTestPath(
- path(createFilename("/a/"))));
- fsKeyB.listStatus(S3ATestUtils.createTestPath(
- path(createFilename("/a/b/"))));
+ fsKeyB.listStatus(pathA);
+ fsKeyB.listStatus(pathAB);
//Until this point, no exception is thrown about access
- intercept(java.nio.file.AccessDeniedException.class,
- "Service: Amazon S3; Status Code: 403;", () -> {
- fsKeyB.listStatus(S3ATestUtils.createTestPath(
- path(createFilename("/a/b/c/"))));
-
- throw new Exception("Exception should be thrown.");
+ intercept(AccessDeniedException.class,
+ SERVICE_AMAZON_S3_STATUS_CODE_403,
+ () -> {
+ fsKeyB.listStatus(pathABC);
});
//Now try it with an unencrypted filesystem.
- Configuration conf = this.createConfiguration();
+ Configuration conf = createConfiguration();
conf.unset(Constants.SERVER_SIDE_ENCRYPTION_ALGORITHM);
conf.unset(Constants.SERVER_SIDE_ENCRYPTION_KEY);
@@ -269,20 +267,14 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
FileSystem unencryptedFileSystem = contract.getTestFileSystem();
//unencrypted can access until the final directory
- unencryptedFileSystem.listStatus(S3ATestUtils.createTestPath(
- path(createFilename("/a/"))));
- unencryptedFileSystem.listStatus(S3ATestUtils.createTestPath(
- path(createFilename("/a/b/"))));
+ unencryptedFileSystem.listStatus(pathA);
+ unencryptedFileSystem.listStatus(pathAB);
- intercept(org.apache.hadoop.fs.s3a.AWSS3IOException.class,
- "Bad Request (Service: Amazon S3; Status Code: 400; Error Code: 400" +
- " Bad Request;", () -> {
-
- unencryptedFileSystem.listStatus(S3ATestUtils.createTestPath(
- path(createFilename("/a/b/c/"))));
- throw new Exception("Exception should be thrown.");
+ AWSS3IOException ex = intercept(AWSS3IOException.class,
+ () -> {
+ unencryptedFileSystem.listStatus(pathABC);
});
- rm(getFileSystem(), path(createFilename("/")), true, false);
+ assertStatusCode(ex, 400);
}
/**
@@ -294,31 +286,24 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
public void testListStatusEncryptedFile() throws Exception {
assumeEnabled();
skipIfEncryptionTestsDisabled(getConfiguration());
+ assumeS3GuardState(false, getConfiguration());
+ Path pathABC = path("testListStatusEncryptedFile/a/b/c/");
+ assertTrue(getFileSystem().mkdirs(pathABC));
- Path nestedDirectory = S3ATestUtils.createTestPath(
- path(createFilename("/a/b/c/"))
- );
- assertTrue(getFileSystem().mkdirs(nestedDirectory));
+ Path fileToStat = new Path(pathABC, "fileToStat.txt");
+ writeThenReadFile(fileToStat, TEST_FILE_LEN);
- String src = createFilename("/a/b/c/fileToStat.txt");
- Path fileToStat = writeThenReadFile(src, 2048);
-
- FileSystem fsKeyB = createNewFileSystemWithSSECKey(
- "msdo3VvvZznp66Gth58a91Hxe/UpExMkwU9BHkIjfW8=");
+ fsKeyB = createNewFileSystemWithSSECKey(KEY_4);
//Until this point, no exception is thrown about access
- intercept(java.nio.file.AccessDeniedException.class,
- "Service: Amazon S3; Status Code: 403;", () -> {
- fsKeyB.listStatus(S3ATestUtils.createTestPath(fileToStat));
-
- throw new Exception("Exception should be thrown.");
- });
- rm(getFileSystem(), path(createFilename("/")), true, false);
+ intercept(AccessDeniedException.class,
+ SERVICE_AMAZON_S3_STATUS_CODE_403,
+ () -> {
+ fsKeyB.listStatus(fileToStat);
+ });
}
-
-
/**
* It is possible to delete directories without the proper encryption key and
* the hierarchy above it.
@@ -329,31 +314,26 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
public void testDeleteEncryptedObjectWithDifferentKey() throws Exception {
assumeEnabled();
skipIfEncryptionTestsDisabled(getConfiguration());
+ assumeS3GuardState(false, getConfiguration());
+ Path pathABC = path("testDeleteEncryptedObjectWithDifferentKey/a/b/c/");
- Path nestedDirectory = S3ATestUtils.createTestPath(
- path(createFilename("/a/b/c/"))
- );
- assertTrue(getFileSystem().mkdirs(nestedDirectory));
- String src = createFilename("/a/b/c/filetobedeleted.txt");
- Path fileToDelete = writeThenReadFile(src, 2048);
-
- FileSystem fsKeyB = createNewFileSystemWithSSECKey(
- "msdo3VvvZznp66Gth58a91Hxe/UpExMkwU9BHkIjfW8=");
- intercept(java.nio.file.AccessDeniedException.class,
- "Forbidden (Service: Amazon S3; Status Code: 403; Error Code: " +
- "403 Forbidden", () -> {
-
- fsKeyB.delete(fileToDelete, false);
- throw new Exception("Exception should be thrown.");
- });
+ Path pathAB = pathABC.getParent();
+ Path pathA = pathAB.getParent();
+ assertTrue(getFileSystem().mkdirs(pathABC));
+ Path fileToDelete = new Path(pathABC, "filetobedeleted.txt");
+ writeThenReadFile(fileToDelete, TEST_FILE_LEN);
+ fsKeyB = createNewFileSystemWithSSECKey(KEY_4);
+ intercept(AccessDeniedException.class,
+ SERVICE_AMAZON_S3_STATUS_CODE_403,
+ () -> {
+ fsKeyB.delete(fileToDelete, false);
+ });
//This is possible
- fsKeyB.delete(S3ATestUtils.createTestPath(
- path(createFilename("/a/b/c/"))), true);
- fsKeyB.delete(S3ATestUtils.createTestPath(
- path(createFilename("/a/b/"))), true);
- fsKeyB.delete(S3ATestUtils.createTestPath(
- path(createFilename("/a/"))), true);
+ fsKeyB.delete(pathABC, true);
+ fsKeyB.delete(pathAB, true);
+ fsKeyB.delete(pathA, true);
+ assertPathDoesNotExist("expected recursive delete", fileToDelete);
}
private FileSystem createNewFileSystemWithSSECKey(String sseCKey) throws
@@ -371,4 +351,5 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
protected S3AEncryptionMethods getSSEAlgorithm() {
return S3AEncryptionMethods.SSE_C;
}
+
}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java
index 00171f06476..3e293f75763 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.fs.s3a;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.ContractTestUtils;
@@ -32,8 +33,8 @@ import java.net.URI;
import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
import static org.apache.hadoop.fs.s3a.Statistic.*;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
-import static org.apache.hadoop.fs.s3a.S3ATestUtils.MetricDiff;
import static org.apache.hadoop.test.GenericTestUtils.getTestDir;
+import static org.junit.Assume.assumeFalse;
/**
* Use metrics to assert about the cost of file status queries.
@@ -62,9 +63,11 @@ public class ITestS3AFileOperationCost extends AbstractS3ATestBase {
S3AFileSystem fs = getFileSystem();
touch(fs, simpleFile);
resetMetricDiffs();
- S3AFileStatus status = fs.getFileStatus(simpleFile);
+ FileStatus status = fs.getFileStatus(simpleFile);
assertTrue("not a file: " + status, status.isFile());
- metadataRequests.assertDiffEquals(1);
+ if (!fs.hasMetadataStore()) {
+ metadataRequests.assertDiffEquals(1);
+ }
listRequests.assertDiffEquals(0);
}
@@ -79,9 +82,13 @@ public class ITestS3AFileOperationCost extends AbstractS3ATestBase {
Path dir = path("empty");
fs.mkdirs(dir);
resetMetricDiffs();
- S3AFileStatus status = fs.getFileStatus(dir);
- assertTrue("not empty: " + status, status.isEmptyDirectory());
- metadataRequests.assertDiffEquals(2);
+ S3AFileStatus status = fs.innerGetFileStatus(dir, true);
+ assertTrue("not empty: " + status,
+ status.isEmptyDirectory() == Tristate.TRUE);
+
+ if (!fs.hasMetadataStore()) {
+ metadataRequests.assertDiffEquals(2);
+ }
listRequests.assertDiffEquals(0);
}
@@ -92,7 +99,7 @@ public class ITestS3AFileOperationCost extends AbstractS3ATestBase {
Path path = path("missing");
resetMetricDiffs();
try {
- S3AFileStatus status = fs.getFileStatus(path);
+ FileStatus status = fs.getFileStatus(path);
fail("Got a status back from a missing file path " + status);
} catch (FileNotFoundException expected) {
// expected
@@ -108,7 +115,7 @@ public class ITestS3AFileOperationCost extends AbstractS3ATestBase {
Path path = path("missingdir/missingpath");
resetMetricDiffs();
try {
- S3AFileStatus status = fs.getFileStatus(path);
+ FileStatus status = fs.getFileStatus(path);
fail("Got a status back from a missing file path " + status);
} catch (FileNotFoundException expected) {
// expected
@@ -126,16 +133,18 @@ public class ITestS3AFileOperationCost extends AbstractS3ATestBase {
Path simpleFile = new Path(dir, "simple.txt");
touch(fs, simpleFile);
resetMetricDiffs();
- S3AFileStatus status = fs.getFileStatus(dir);
- if (status.isEmptyDirectory()) {
+ S3AFileStatus status = fs.innerGetFileStatus(dir, true);
+ if (status.isEmptyDirectory() == Tristate.TRUE) {
// erroneous state
String fsState = fs.toString();
fail("FileStatus says directory isempty: " + status
+ "\n" + ContractTestUtils.ls(fs, dir)
+ "\n" + fsState);
}
- metadataRequests.assertDiffEquals(2);
- listRequests.assertDiffEquals(1);
+ if (!fs.hasMetadataStore()) {
+ metadataRequests.assertDiffEquals(2);
+ listRequests.assertDiffEquals(1);
+ }
}
@Test
@@ -187,6 +196,13 @@ public class ITestS3AFileOperationCost extends AbstractS3ATestBase {
+ "In S3, rename deletes any fake directories as a part of "
+ "clean up activity");
S3AFileSystem fs = getFileSystem();
+
+ // As this test uses the s3 metrics to count the number of fake directory
+ // operations, it depends on side effects happening internally. With
+ // metadata store enabled, it is brittle to change. We disable this test
+ // before the internal behavior w/ or w/o metadata store.
+ assumeFalse(fs.hasMetadataStore());
+
Path srcBaseDir = path("src");
mkdirs(srcBaseDir);
MetricDiff deleteRequests =
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileSystemContract.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileSystemContract.java
index 1b49d079b6a..27af23aa0cc 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileSystemContract.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileSystemContract.java
@@ -27,6 +27,7 @@ import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystemContractBaseTest;
import org.apache.hadoop.fs.Path;
+
import static org.junit.Assume.*;
import static org.junit.Assert.*;
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AInconsistency.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AInconsistency.java
new file mode 100644
index 00000000000..eb4f70bf7c8
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AInconsistency.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.contract.AbstractFSContract;
+import org.apache.hadoop.fs.contract.s3a.S3AContract;
+import org.apache.hadoop.test.LambdaTestUtils;
+import org.junit.Test;
+
+import java.io.FileNotFoundException;
+import java.util.concurrent.Callable;
+
+import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
+import static org.apache.hadoop.fs.s3a.Constants.*;
+import static org.apache.hadoop.fs.s3a.InconsistentAmazonS3Client.*;
+
+/**
+ * Tests S3A behavior under forced inconsistency via {@link
+ * InconsistentAmazonS3Client}.
+ *
+ * These tests are for validating expected behavior *without* S3Guard, but
+ * may also run with S3Guard enabled. For tests that validate S3Guard's
+ * consistency features, see {@link ITestS3GuardListConsistency}.
+ */
+public class ITestS3AInconsistency extends AbstractS3ATestBase {
+
+ @Override
+ protected AbstractFSContract createContract(Configuration conf) {
+ conf.setClass(S3_CLIENT_FACTORY_IMPL, InconsistentS3ClientFactory.class,
+ S3ClientFactory.class);
+ conf.set(FAIL_INJECT_INCONSISTENCY_KEY, DEFAULT_DELAY_KEY_SUBSTRING);
+ conf.setFloat(FAIL_INJECT_INCONSISTENCY_PROBABILITY, 1.0f);
+ conf.setLong(FAIL_INJECT_INCONSISTENCY_MSEC, DEFAULT_DELAY_KEY_MSEC);
+ return new S3AContract(conf);
+ }
+
+ @Test
+ public void testGetFileStatus() throws Exception {
+ S3AFileSystem fs = getFileSystem();
+
+ // 1. Make sure no ancestor dirs exist
+ Path dir = path("ancestor");
+ fs.delete(dir, true);
+ waitUntilDeleted(dir);
+
+ // 2. Create a descendant file, which implicitly creates ancestors
+ // This file has delayed visibility.
+ touch(getFileSystem(),
+ path("ancestor/file-" + DEFAULT_DELAY_KEY_SUBSTRING));
+
+ // 3. Assert expected behavior. If S3Guard is enabled, we should be able
+ // to get status for ancestor. If S3Guard is *not* enabled, S3A will
+ // fail to infer the existence of the ancestor since visibility of the
+ // child file is delayed, and its key prefix search will return nothing.
+ try {
+ FileStatus status = fs.getFileStatus(dir);
+ if (fs.hasMetadataStore()) {
+ assertTrue("Ancestor is dir", status.isDirectory());
+ } else {
+ fail("getFileStatus should fail due to delayed visibility.");
+ }
+ } catch (FileNotFoundException e) {
+ if (fs.hasMetadataStore()) {
+ fail("S3Guard failed to list parent of inconsistent child.");
+ }
+ LOG.info("File not found, as expected.");
+ }
+ }
+
+ private void waitUntilDeleted(final Path p) throws Exception {
+ LambdaTestUtils.eventually(30 * 1000, 1000,
+ new Callable() {
+ @Override
+ public Void call() throws Exception {
+ assertPathDoesNotExist("Dir should be deleted", p);
+ return null;
+ }
+ }
+ );
+ }
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java
index 59fcb05729c..869d64c336e 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java
@@ -22,10 +22,17 @@ import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileAlreadyExistsException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.ContractTestUtils;
+import org.apache.hadoop.test.LambdaTestUtils;
+
+import com.amazonaws.services.s3.model.ObjectMetadata;
+import com.amazonaws.services.s3.model.PutObjectRequest;
+import com.amazonaws.services.s3.model.PutObjectResult;
import org.junit.Test;
+import java.io.ByteArrayInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
+import java.util.concurrent.Callable;
/**
* Tests of the S3A FileSystem which don't have a specific home and can share
@@ -55,6 +62,26 @@ public class ITestS3AMiscOperations extends AbstractS3ATestBase {
createNonRecursive(new Path(parent, "fail"));
}
+ @Test
+ public void testPutObjectDirect() throws Throwable {
+ final S3AFileSystem fs = getFileSystem();
+ ObjectMetadata metadata = fs.newObjectMetadata(-1);
+ metadata.setContentLength(-1);
+ Path path = path("putDirect");
+ final PutObjectRequest put = new PutObjectRequest(fs.getBucket(),
+ path.toUri().getPath(),
+ new ByteArrayInputStream("PUT".getBytes()),
+ metadata);
+ LambdaTestUtils.intercept(IllegalStateException.class,
+ new Callable() {
+ @Override
+ public PutObjectResult call() throws Exception {
+ return fs.putObjectDirect(put);
+ }
+ });
+ assertPathDoesNotExist("put object was created", path);
+ }
+
private FSDataOutputStream createNonRecursive(Path path) throws IOException {
return getFileSystem().createNonRecursive(path, false, 4096,
(short) 3, (short) 4096,
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardCreate.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardCreate.java
new file mode 100644
index 00000000000..dcc2538ec61
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardCreate.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.s3guard.DirListingMetadata;
+import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
+import org.junit.Assume;
+import org.junit.Test;
+
+import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
+
+/**
+ * Home for testing the creation of new files and directories with S3Guard
+ * enabled.
+ */
+public class ITestS3GuardCreate extends AbstractS3ATestBase {
+
+ /**
+ * Test that ancestor creation during S3AFileSystem#create() is properly
+ * accounted for in the MetadataStore. This should be handled by the
+ * FileSystem, and be a FS contract test, but S3A does not handle ancestors on
+ * create(), so we need to take care in the S3Guard code to do the right
+ * thing. This may change: See HADOOP-13221 for more detail.
+ */
+ @Test
+ public void testCreatePopulatesFileAncestors() throws Exception {
+ final S3AFileSystem fs = getFileSystem();
+ Assume.assumeTrue(fs.hasMetadataStore());
+ final MetadataStore ms = fs.getMetadataStore();
+ final Path parent = path("testCreatePopulatesFileAncestors");
+
+ try {
+ fs.mkdirs(parent);
+ final Path nestedFile = new Path(parent, "dir1/dir2/file4");
+ touch(fs, nestedFile);
+
+ DirListingMetadata list = ms.listChildren(parent);
+ assertFalse("MetadataStore falsely reports authoritative empty list",
+ list.isEmpty() == Tristate.TRUE);
+ } finally {
+ fs.delete(parent, true);
+ }
+ }
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardEmptyDirs.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardEmptyDirs.java
new file mode 100644
index 00000000000..fb6e3701d32
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardEmptyDirs.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
+import org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore;
+import org.junit.Assume;
+import org.junit.Test;
+
+import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
+
+/**
+ * Test logic around whether or not a directory is empty, with S3Guard enabled.
+ * The fact that S3AFileStatus has an isEmptyDirectory flag in it makes caching
+ * S3AFileStatus's really tricky, as the flag can change as a side effect of
+ * changes to other paths.
+ * After S3Guard is merged to trunk, we should try to remove the
+ * isEmptyDirectory flag from S3AFileStatus, or maintain it outside
+ * of the MetadataStore.
+ */
+public class ITestS3GuardEmptyDirs extends AbstractS3ATestBase {
+
+ @Test
+ public void testEmptyDirs() throws Exception {
+ S3AFileSystem fs = getFileSystem();
+ Assume.assumeTrue(fs.hasMetadataStore());
+ MetadataStore configuredMs = fs.getMetadataStore();
+ Path existingDir = path("existing-dir");
+ Path existingFile = path("existing-dir/existing-file");
+ try {
+ // 1. Simulate files already existing in the bucket before we started our
+ // cluster. Temporarily disable the MetadataStore so it doesn't witness
+ // us creating these files.
+
+ fs.setMetadataStore(new NullMetadataStore());
+ assertTrue(fs.mkdirs(existingDir));
+ touch(fs, existingFile);
+
+
+ // 2. Simulate (from MetadataStore's perspective) starting our cluster and
+ // creating a file in an existing directory.
+ fs.setMetadataStore(configuredMs); // "start cluster"
+ Path newFile = path("existing-dir/new-file");
+ touch(fs, newFile);
+
+ S3AFileStatus status = fs.innerGetFileStatus(existingDir, true);
+ assertEquals("Should not be empty dir", Tristate.FALSE,
+ status.isEmptyDirectory());
+
+ // 3. Assert that removing the only file the MetadataStore witnessed
+ // being created doesn't cause it to think the directory is now empty.
+ fs.delete(newFile, false);
+ status = fs.innerGetFileStatus(existingDir, true);
+ assertEquals("Should not be empty dir", Tristate.FALSE,
+ status.isEmptyDirectory());
+
+ // 4. Assert that removing the final file, that existed "before"
+ // MetadataStore started, *does* cause the directory to be marked empty.
+ fs.delete(existingFile, false);
+ status = fs.innerGetFileStatus(existingDir, true);
+ assertEquals("Should be empty dir now", Tristate.TRUE,
+ status.isEmptyDirectory());
+ } finally {
+ configuredMs.forgetMetadata(existingFile);
+ configuredMs.forgetMetadata(existingDir);
+ }
+ }
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java
new file mode 100644
index 00000000000..6cff5339244
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java
@@ -0,0 +1,544 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import com.amazonaws.services.s3.model.ObjectListing;
+import com.amazonaws.services.s3.AmazonS3;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.contract.AbstractFSContract;
+import org.apache.hadoop.fs.contract.s3a.S3AContract;
+import org.junit.Assume;
+import org.junit.Test;
+
+import java.io.FileNotFoundException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+
+import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
+import static org.apache.hadoop.fs.contract.ContractTestUtils.writeTextFile;
+import static org.apache.hadoop.fs.s3a.Constants.*;
+import static org.apache.hadoop.fs.s3a.InconsistentAmazonS3Client.*;
+
+/**
+ * Test S3Guard list consistency feature by injecting delayed listObjects()
+ * visibility via {@link InconsistentAmazonS3Client}.
+ *
+ * Tests here generally:
+ * 1. Use the inconsistency injection mentioned above.
+ * 2. Only run when S3Guard is enabled.
+ */
+public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
+
+ @Override
+ protected AbstractFSContract createContract(Configuration conf) {
+ conf.setClass(S3_CLIENT_FACTORY_IMPL, InconsistentS3ClientFactory.class,
+ S3ClientFactory.class);
+ // Other configs would break test assumptions
+ conf.set(FAIL_INJECT_INCONSISTENCY_KEY, DEFAULT_DELAY_KEY_SUBSTRING);
+ conf.setFloat(FAIL_INJECT_INCONSISTENCY_PROBABILITY, 1.0f);
+ conf.setLong(FAIL_INJECT_INCONSISTENCY_MSEC, DEFAULT_DELAY_KEY_MSEC);
+ return new S3AContract(conf);
+ }
+
+ /**
+ * Helper function for other test cases: does a single rename operation and
+ * validates the aftermath.
+ * @param mkdirs Directories to create
+ * @param srcdirs Source paths for rename operation
+ * @param dstdirs Destination paths for rename operation
+ * @param yesdirs Files that must exist post-rename (e.g. srcdirs children)
+ * @param nodirs Files that must not exist post-rename (e.g. dstdirs children)
+ * @throws Exception
+ */
+ private void doTestRenameSequence(Path[] mkdirs, Path[] srcdirs,
+ Path[] dstdirs, Path[] yesdirs, Path[] nodirs) throws Exception {
+ S3AFileSystem fs = getFileSystem();
+ Assume.assumeTrue(fs.hasMetadataStore());
+
+ if (mkdirs != null) {
+ for (Path mkdir : mkdirs) {
+ assertTrue(fs.mkdirs(mkdir));
+ }
+ clearInconsistency(fs);
+ }
+
+ assertTrue("srcdirs and dstdirs must have equal length",
+ srcdirs.length == dstdirs.length);
+ for (int i = 0; i < srcdirs.length; i++) {
+ assertTrue("Rename returned false: " + srcdirs[i] + " -> " + dstdirs[i],
+ fs.rename(srcdirs[i], dstdirs[i]));
+ }
+
+ for (Path yesdir : yesdirs) {
+ assertTrue("Path was supposed to exist: " + yesdir, fs.exists(yesdir));
+ }
+ for (Path nodir : nodirs) {
+ assertFalse("Path is not supposed to exist: " + nodir, fs.exists(nodir));
+ }
+ }
+
+ /**
+ * Tests that after renaming a directory, the original directory and its
+ * contents are indeed missing and the corresponding new paths are visible.
+ * @throws Exception
+ */
+ @Test
+ public void testConsistentListAfterRename() throws Exception {
+ Path[] mkdirs = {
+ path("d1/f"),
+ path("d1/f" + DEFAULT_DELAY_KEY_SUBSTRING)
+ };
+ Path[] srcdirs = {path("d1")};
+ Path[] dstdirs = {path("d2")};
+ Path[] yesdirs = {path("d2"), path("d2/f"),
+ path("d2/f" + DEFAULT_DELAY_KEY_SUBSTRING)};
+ Path[] nodirs = {path("d1"), path("d1/f"),
+ path("d1/f" + DEFAULT_DELAY_KEY_SUBSTRING)};
+ doTestRenameSequence(mkdirs, srcdirs, dstdirs, yesdirs, nodirs);
+ getFileSystem().delete(path("d1"), true);
+ getFileSystem().delete(path("d2"), true);
+ }
+
+ /**
+ * Tests a circular sequence of renames to verify that overwriting recently
+ * deleted files and reading recently created files from rename operations
+ * works as expected.
+ * @throws Exception
+ */
+ @Test
+ public void testRollingRenames() throws Exception {
+ Path[] dir0 = {path("rolling/1")};
+ Path[] dir1 = {path("rolling/2")};
+ Path[] dir2 = {path("rolling/3")};
+ // These sets have to be in reverse order compared to the movement
+ Path[] setA = {dir1[0], dir0[0]};
+ Path[] setB = {dir2[0], dir1[0]};
+ Path[] setC = {dir0[0], dir2[0]};
+
+ for(int i = 0; i < 2; i++) {
+ Path[] firstSet = i == 0 ? setA : null;
+ doTestRenameSequence(firstSet, setA, setB, setB, dir0);
+ doTestRenameSequence(null, setB, setC, setC, dir1);
+ doTestRenameSequence(null, setC, setA, setA, dir2);
+ }
+
+ S3AFileSystem fs = getFileSystem();
+ assertFalse("Renaming deleted file should have failed",
+ fs.rename(dir2[0], dir1[0]));
+ assertTrue("Renaming over existing file should have succeeded",
+ fs.rename(dir1[0], dir0[0]));
+ }
+
+ /**
+ * Tests that deleted files immediately stop manifesting in list operations
+ * even when the effect in S3 is delayed.
+ * @throws Exception
+ */
+ @Test
+ public void testConsistentListAfterDelete() throws Exception {
+ S3AFileSystem fs = getFileSystem();
+ // test will fail if NullMetadataStore (the default) is configured: skip it.
+ Assume.assumeTrue(fs.hasMetadataStore());
+
+ // Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
+ // in listObjects() results via InconsistentS3Client
+ Path inconsistentPath =
+ path("a/b/dir3-" + DEFAULT_DELAY_KEY_SUBSTRING);
+
+ Path[] testDirs = {path("a/b/dir1"),
+ path("a/b/dir2"),
+ inconsistentPath};
+
+ for (Path path : testDirs) {
+ assertTrue(fs.mkdirs(path));
+ }
+ clearInconsistency(fs);
+ for (Path path : testDirs) {
+ assertTrue(fs.delete(path, false));
+ }
+
+ FileStatus[] paths = fs.listStatus(path("a/b/"));
+ List list = new ArrayList<>();
+ for (FileStatus fileState : paths) {
+ list.add(fileState.getPath());
+ }
+ assertFalse(list.contains(path("a/b/dir1")));
+ assertFalse(list.contains(path("a/b/dir2")));
+ // This should fail without S3Guard, and succeed with it.
+ assertFalse(list.contains(inconsistentPath));
+ }
+
+ /**
+ * Tests that rename immediately after files in the source directory are
+ * deleted results in exactly the correct set of destination files and none
+ * of the source files.
+ * @throws Exception
+ */
+ @Test
+ public void testConsistentRenameAfterDelete() throws Exception {
+ S3AFileSystem fs = getFileSystem();
+ // test will fail if NullMetadataStore (the default) is configured: skip it.
+ Assume.assumeTrue(fs.hasMetadataStore());
+
+ // Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
+ // in listObjects() results via InconsistentS3Client
+ Path inconsistentPath =
+ path("a/b/dir3-" + DEFAULT_DELAY_KEY_SUBSTRING);
+
+ Path[] testDirs = {path("a/b/dir1"),
+ path("a/b/dir2"),
+ inconsistentPath};
+
+ for (Path path : testDirs) {
+ assertTrue(fs.mkdirs(path));
+ }
+ clearInconsistency(fs);
+ assertTrue(fs.delete(testDirs[1], false));
+ assertTrue(fs.delete(testDirs[2], false));
+
+ fs.rename(path("a"), path("a3"));
+ FileStatus[] paths = fs.listStatus(path("a3/b"));
+ List list = new ArrayList<>();
+ for (FileStatus fileState : paths) {
+ list.add(fileState.getPath());
+ }
+ assertTrue(list.contains(path("a3/b/dir1")));
+ assertFalse(list.contains(path("a3/b/dir2")));
+ // This should fail without S3Guard, and succeed with it.
+ assertFalse(list.contains(path("a3/b/dir3-" +
+ DEFAULT_DELAY_KEY_SUBSTRING)));
+
+ try {
+ RemoteIterator old = fs.listFilesAndEmptyDirectories(
+ path("a"), true);
+ fail("Recently renamed dir should not be visible");
+ } catch(FileNotFoundException e) {
+ // expected
+ }
+ }
+
+ @Test
+ public void testConsistentListStatusAfterPut() throws Exception {
+
+ S3AFileSystem fs = getFileSystem();
+
+ // This test will fail if NullMetadataStore (the default) is configured:
+ // skip it.
+ Assume.assumeTrue(fs.hasMetadataStore());
+
+ // Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
+ // in listObjects() results via InconsistentS3Client
+ Path inconsistentPath =
+ path("a/b/dir3-" + DEFAULT_DELAY_KEY_SUBSTRING);
+
+ Path[] testDirs = {path("a/b/dir1"),
+ path("a/b/dir2"),
+ inconsistentPath};
+
+ for (Path path : testDirs) {
+ assertTrue(fs.mkdirs(path));
+ }
+
+ FileStatus[] paths = fs.listStatus(path("a/b/"));
+ List list = new ArrayList<>();
+ for (FileStatus fileState : paths) {
+ list.add(fileState.getPath());
+ }
+ assertTrue(list.contains(path("a/b/dir1")));
+ assertTrue(list.contains(path("a/b/dir2")));
+ // This should fail without S3Guard, and succeed with it.
+ assertTrue(list.contains(inconsistentPath));
+ }
+
+ /**
+ * Similar to {@link #testConsistentListStatusAfterPut()}, this tests that the
+ * FS listLocatedStatus() call will return consistent list.
+ */
+ @Test
+ public void testConsistentListLocatedStatusAfterPut() throws Exception {
+ final S3AFileSystem fs = getFileSystem();
+ // This test will fail if NullMetadataStore (the default) is configured:
+ // skip it.
+ Assume.assumeTrue(fs.hasMetadataStore());
+ String rootDir = "doTestConsistentListLocatedStatusAfterPut";
+ fs.mkdirs(path(rootDir));
+
+ final int[] numOfPaths = {0, 1, 5};
+ for (int normalPathNum : numOfPaths) {
+ for (int delayedPathNum : new int[] {0, 2}) {
+ LOG.info("Testing with normalPathNum={}, delayedPathNum={}",
+ normalPathNum, delayedPathNum);
+ doTestConsistentListLocatedStatusAfterPut(fs, rootDir, normalPathNum,
+ delayedPathNum);
+ }
+ }
+ }
+
+ /**
+ * Helper method to implement the tests of consistent listLocatedStatus().
+ * @param fs The S3 file system from contract
+ * @param normalPathNum number paths listed directly from S3 without delaying
+ * @param delayedPathNum number paths listed with delaying
+ * @throws Exception
+ */
+ private void doTestConsistentListLocatedStatusAfterPut(S3AFileSystem fs,
+ String rootDir, int normalPathNum, int delayedPathNum) throws Exception {
+ final List testDirs = new ArrayList<>(normalPathNum + delayedPathNum);
+ int index = 0;
+ for (; index < normalPathNum; index++) {
+ testDirs.add(path(rootDir + "/dir-" +
+ index));
+ }
+ for (; index < normalPathNum + delayedPathNum; index++) {
+ // Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
+ // in listObjects() results via InconsistentS3Client
+ testDirs.add(path(rootDir + "/dir-" + index +
+ DEFAULT_DELAY_KEY_SUBSTRING));
+ }
+
+ for (Path path : testDirs) {
+ // delete the old test path (if any) so that when we call mkdirs() later,
+ // the to delay directories will be tracked via putObject() request.
+ fs.delete(path, true);
+ assertTrue(fs.mkdirs(path));
+ }
+
+ // this should return the union data from S3 and MetadataStore
+ final RemoteIterator statusIterator =
+ fs.listLocatedStatus(path(rootDir + "/"));
+ List list = new ArrayList<>();
+ for (; statusIterator.hasNext();) {
+ list.add(statusIterator.next().getPath());
+ }
+
+ // This should fail without S3Guard, and succeed with it because part of the
+ // children under test path are delaying visibility
+ for (Path path : testDirs) {
+ assertTrue("listLocatedStatus should list " + path, list.contains(path));
+ }
+ }
+
+ /**
+ * Tests that the S3AFS listFiles() call will return consistent file list.
+ */
+ @Test
+ public void testConsistentListFiles() throws Exception {
+ final S3AFileSystem fs = getFileSystem();
+ // This test will fail if NullMetadataStore (the default) is configured:
+ // skip it.
+ Assume.assumeTrue(fs.hasMetadataStore());
+
+ final int[] numOfPaths = {0, 2};
+ for (int dirNum : numOfPaths) {
+ for (int normalFile : numOfPaths) {
+ for (int delayedFile : new int[] {0, 1}) {
+ for (boolean recursive : new boolean[] {true, false}) {
+ doTestListFiles(fs, dirNum, normalFile, delayedFile, recursive);
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Helper method to implement the tests of consistent listFiles().
+ *
+ * The file structure has dirNum subdirectories, and each directory (including
+ * the test base directory itself) has normalFileNum normal files and
+ * delayedFileNum delayed files.
+ *
+ * @param fs The S3 file system from contract
+ * @param dirNum number of subdirectories
+ * @param normalFileNum number files in each directory without delay to list
+ * @param delayedFileNum number files in each directory with delay to list
+ * @param recursive listFiles recursively if true
+ * @throws Exception if any unexpected error
+ */
+ private void doTestListFiles(S3AFileSystem fs, int dirNum, int normalFileNum,
+ int delayedFileNum, boolean recursive) throws Exception {
+ describe("Testing dirNum=%d, normalFile=%d, delayedFile=%d, "
+ + "recursive=%s", dirNum, normalFileNum, delayedFileNum, recursive);
+ final Path baseTestDir = path("doTestListFiles-" + dirNum + "-"
+ + normalFileNum + "-" + delayedFileNum + "-" + recursive);
+ // delete the old test path (if any) so that when we call mkdirs() later,
+ // the to delay sub directories will be tracked via putObject() request.
+ fs.delete(baseTestDir, true);
+
+ // make subdirectories (if any)
+ final List testDirs = new ArrayList<>(dirNum + 1);
+ assertTrue(fs.mkdirs(baseTestDir));
+ testDirs.add(baseTestDir);
+ for (int i = 0; i < dirNum; i++) {
+ final Path subdir = path(baseTestDir + "/dir-" + i);
+ assertTrue(fs.mkdirs(subdir));
+ testDirs.add(subdir);
+ }
+
+ final Collection fileNames
+ = new ArrayList<>(normalFileNum + delayedFileNum);
+ int index = 0;
+ for (; index < normalFileNum; index++) {
+ fileNames.add("file-" + index);
+ }
+ for (; index < normalFileNum + delayedFileNum; index++) {
+ // Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
+ // in listObjects() results via InconsistentS3Client
+ fileNames.add("file-" + index + "-" + DEFAULT_DELAY_KEY_SUBSTRING);
+ }
+
+ int filesAndEmptyDirectories = 0;
+
+ // create files under each test directory
+ for (Path dir : testDirs) {
+ for (String fileName : fileNames) {
+ writeTextFile(fs, new Path(dir, fileName), "I, " + fileName, false);
+ filesAndEmptyDirectories++;
+ }
+ }
+
+ // this should return the union data from S3 and MetadataStore
+ final RemoteIterator statusIterator
+ = fs.listFiles(baseTestDir, recursive);
+ final Collection listedFiles = new HashSet<>();
+ for (; statusIterator.hasNext();) {
+ final FileStatus status = statusIterator.next();
+ assertTrue("FileStatus " + status + " is not a file!", status.isFile());
+ listedFiles.add(status.getPath());
+ }
+ LOG.info("S3AFileSystem::listFiles('{}', {}) -> {}",
+ baseTestDir, recursive, listedFiles);
+
+ // This should fail without S3Guard, and succeed with it because part of the
+ // files to list are delaying visibility
+ if (!recursive) {
+ // in this case only the top level files are listed
+ assertEquals("Unexpected number of files returned by listFiles() call",
+ normalFileNum + delayedFileNum, listedFiles.size());
+ verifyFileIsListed(listedFiles, baseTestDir, fileNames);
+ } else {
+ assertEquals("Unexpected number of files returned by listFiles() call",
+ filesAndEmptyDirectories,
+ listedFiles.size());
+ for (Path dir : testDirs) {
+ verifyFileIsListed(listedFiles, dir, fileNames);
+ }
+ }
+ }
+
+ private static void verifyFileIsListed(Collection listedFiles,
+ Path currentDir, Collection fileNames) {
+ for (String fileName : fileNames) {
+ final Path file = new Path(currentDir, fileName);
+ assertTrue(file + " should have been listed", listedFiles.contains(file));
+ }
+ }
+
+ @Test
+ public void testCommitByRenameOperations() throws Throwable {
+ S3AFileSystem fs = getFileSystem();
+ Assume.assumeTrue(fs.hasMetadataStore());
+ Path work = path("test-commit-by-rename-" + DEFAULT_DELAY_KEY_SUBSTRING);
+ Path task00 = new Path(work, "task00");
+ fs.mkdirs(task00);
+ String name = "part-00";
+ try (FSDataOutputStream out =
+ fs.create(new Path(task00, name), false)) {
+ out.writeChars("hello");
+ }
+ for (FileStatus stat : fs.listStatus(task00)) {
+ fs.rename(stat.getPath(), work);
+ }
+ List files = new ArrayList<>(2);
+ for (FileStatus stat : fs.listStatus(work)) {
+ if (stat.isFile()) {
+ files.add(stat);
+ }
+ }
+ assertFalse("renamed file " + name + " not found in " + work,
+ files.isEmpty());
+ assertEquals("more files found than expected in " + work
+ + " " + ls(work), 1, files.size());
+ FileStatus status = files.get(0);
+ assertEquals("Wrong filename in " + status,
+ name, status.getPath().getName());
+ }
+
+ @Test
+ public void testInconsistentS3ClientDeletes() throws Throwable {
+ S3AFileSystem fs = getFileSystem();
+ Path root = path("testInconsistentClient" + DEFAULT_DELAY_KEY_SUBSTRING);
+ for (int i = 0; i < 3; i++) {
+ fs.mkdirs(new Path(root, "dir" + i));
+ touch(fs, new Path(root, "file" + i));
+ for (int j = 0; j < 3; j++) {
+ touch(fs, new Path(new Path(root, "dir" + i), "file" + i + "-" + j));
+ }
+ }
+ clearInconsistency(fs);
+
+ AmazonS3 client = fs.getAmazonS3Client();
+ String key = fs.pathToKey(root) + "/";
+
+ ObjectListing preDeleteDelimited = client.listObjects(
+ fs.createListObjectsRequest(key, "/"));
+ ObjectListing preDeleteUndelimited = client.listObjects(
+ fs.createListObjectsRequest(key, null));
+
+ fs.delete(root, true);
+
+ ObjectListing postDeleteDelimited = client.listObjects(
+ fs.createListObjectsRequest(key, "/"));
+ ObjectListing postDeleteUndelimited = client.listObjects(
+ fs.createListObjectsRequest(key, null));
+
+ assertEquals("InconsistentAmazonS3Client added back objects incorrectly " +
+ "in a non-recursive listing",
+ preDeleteDelimited.getObjectSummaries().size(),
+ postDeleteDelimited.getObjectSummaries().size()
+ );
+ assertEquals("InconsistentAmazonS3Client added back prefixes incorrectly " +
+ "in a non-recursive listing",
+ preDeleteDelimited.getCommonPrefixes().size(),
+ postDeleteDelimited.getCommonPrefixes().size()
+ );
+ assertEquals("InconsistentAmazonS3Client added back objects incorrectly " +
+ "in a recursive listing",
+ preDeleteUndelimited.getObjectSummaries().size(),
+ postDeleteUndelimited.getObjectSummaries().size()
+ );
+ assertEquals("InconsistentAmazonS3Client added back prefixes incorrectly " +
+ "in a recursive listing",
+ preDeleteUndelimited.getCommonPrefixes().size(),
+ postDeleteUndelimited.getCommonPrefixes().size()
+ );
+ }
+
+ private static void clearInconsistency(S3AFileSystem fs) throws Exception {
+ AmazonS3 s3 = fs.getAmazonS3Client();
+ InconsistentAmazonS3Client ic = InconsistentAmazonS3Client.castFrom(s3);
+ ic.clearInconsistency();
+ }
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardWriteBack.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardWriteBack.java
new file mode 100644
index 00000000000..a63b696b7f8
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardWriteBack.java
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.s3guard.DirListingMetadata;
+import org.junit.Assume;
+import org.junit.Test;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URI;
+import java.util.Arrays;
+
+/**
+ * Test cases that validate S3Guard's behavior for writing things like
+ * directory listings back to the MetadataStore.
+ */
+public class ITestS3GuardWriteBack extends AbstractS3ATestBase {
+
+ /**
+ * In listStatus(), when S3Guard is enabled, the full listing for a
+ * directory is "written back" to the MetadataStore before the listing is
+ * returned. Currently this "write back" behavior occurs when
+ * fs.s3a.metadatastore.authoritative is true. This test validates this
+ * behavior.
+ * @throws Exception on failure
+ */
+ @Test
+ public void testListStatusWriteBack() throws Exception {
+ Assume.assumeTrue(getFileSystem().hasMetadataStore());
+
+ Path directory = path("ListStatusWriteBack");
+
+ // "raw" S3AFileSystem without S3Guard
+ S3AFileSystem noS3Guard = createTestFS(directory.toUri(), true, false);
+
+ // Another with S3Guard and write-back disabled
+ S3AFileSystem noWriteBack = createTestFS(directory.toUri(), false, false);
+
+ // Another S3Guard and write-back enabled
+ S3AFileSystem yesWriteBack = createTestFS(directory.toUri(), false, true);
+
+ // delete the existing directory (in case of last test failure)
+ noS3Guard.delete(directory, true);
+ // Create a directory on S3 only
+ noS3Guard.mkdirs(new Path(directory, "OnS3"));
+ // Create a directory on both S3 and metadata store
+ Path p = new Path(directory, "OnS3AndMS");
+ assertPathDoesntExist(noWriteBack, p);
+ noWriteBack.mkdirs(p);
+
+ FileStatus[] fsResults;
+ DirListingMetadata mdResults;
+
+ // FS should return both even though S3Guard is not writing back to MS
+ fsResults = noWriteBack.listStatus(directory);
+ assertEquals("Filesystem enabled S3Guard without write back should have "
+ + "both /OnS3 and /OnS3AndMS: " + Arrays.toString(fsResults),
+ 2, fsResults.length);
+
+ // Metadata store without write-back should still only contain /OnS3AndMS,
+ // because newly discovered /OnS3 is not written back to metadata store
+ mdResults = noWriteBack.getMetadataStore().listChildren(directory);
+ assertEquals("Metadata store without write back should still only know "
+ + "about /OnS3AndMS, but it has: " + mdResults,
+ 1, mdResults.numEntries());
+
+ // FS should return both (and will write it back)
+ fsResults = yesWriteBack.listStatus(directory);
+ assertEquals("Filesystem enabled S3Guard with write back should have "
+ + " both /OnS3 and /OnS3AndMS: " + Arrays.toString(fsResults),
+ 2, fsResults.length);
+
+ // Metadata store with write-back should contain both because the newly
+ // discovered /OnS3 should have been written back to metadata store
+ mdResults = yesWriteBack.getMetadataStore().listChildren(directory);
+ assertEquals("Unexpected number of results from metadata store. "
+ + "Should have /OnS3 and /OnS3AndMS: " + mdResults,
+ 2, mdResults.numEntries());
+
+ // If we don't clean this up, the next test run will fail because it will
+ // have recorded /OnS3 being deleted even after it's written to noS3Guard.
+ getFileSystem().getMetadataStore().forgetMetadata(
+ new Path(directory, "OnS3"));
+ }
+
+ /** Create a separate S3AFileSystem instance for testing. */
+ private S3AFileSystem createTestFS(URI fsURI, boolean disableS3Guard,
+ boolean authoritativeMeta) throws IOException {
+ Configuration conf;
+
+ // Create a FileSystem that is S3-backed only
+ conf = createConfiguration();
+ S3ATestUtils.disableFilesystemCaching(conf);
+ if (disableS3Guard) {
+ conf.set(Constants.S3_METADATA_STORE_IMPL,
+ Constants.S3GUARD_METASTORE_NULL);
+ } else {
+ S3ATestUtils.maybeEnableS3Guard(conf);
+ conf.setBoolean(Constants.METADATASTORE_AUTHORITATIVE, authoritativeMeta);
+ }
+ FileSystem fs = FileSystem.get(fsURI, conf);
+ return asS3AFS(fs);
+ }
+
+ private static S3AFileSystem asS3AFS(FileSystem fs) {
+ assertTrue("Not a S3AFileSystem: " + fs, fs instanceof S3AFileSystem);
+ return (S3AFileSystem)fs;
+ }
+
+ private static void assertPathDoesntExist(FileSystem fs, Path p)
+ throws IOException {
+ try {
+ FileStatus s = fs.getFileStatus(p);
+ } catch (FileNotFoundException e) {
+ return;
+ }
+ fail("Path should not exist: " + p);
+ }
+
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java
index 9e0a5e42b62..4e2538062ce 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java
@@ -23,6 +23,7 @@ import static org.mockito.Mockito.*;
import java.net.URI;
import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.model.Region;
/**
* An {@link S3ClientFactory} that returns Mockito mocks of the {@link AmazonS3}
@@ -35,6 +36,8 @@ public class MockS3ClientFactory implements S3ClientFactory {
String bucket = name.getHost();
AmazonS3 s3 = mock(AmazonS3.class);
when(s3.doesBucketExist(bucket)).thenReturn(true);
+ when(s3.getBucketLocation(anyString()))
+ .thenReturn(Region.US_West.toString());
return s3;
}
}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java
index acbe6103995..2c4f0094004 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java
@@ -134,6 +134,18 @@ public interface S3ATestConstants {
String TEST_STS_ENABLED = "test.fs.s3a.sts.enabled";
String TEST_STS_ENDPOINT = "test.fs.s3a.sts.endpoint";
+ /**
+ * Various S3Guard tests.
+ */
+ String TEST_S3GUARD_PREFIX = "fs.s3a.s3guard.test";
+ String TEST_S3GUARD_ENABLED = TEST_S3GUARD_PREFIX + ".enabled";
+ String TEST_S3GUARD_AUTHORITATIVE = TEST_S3GUARD_PREFIX + ".authoritative";
+ String TEST_S3GUARD_IMPLEMENTATION = TEST_S3GUARD_PREFIX + ".implementation";
+ String TEST_S3GUARD_IMPLEMENTATION_LOCAL = "local";
+ String TEST_S3GUARD_IMPLEMENTATION_DYNAMO = "dynamo";
+ String TEST_S3GUARD_IMPLEMENTATION_DYNAMODBLOCAL = "dynamodblocal";
+ String TEST_S3GUARD_IMPLEMENTATION_NONE = "none";
+
/**
* Timeout in Milliseconds for standard tests: {@value}.
*/
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java
index 95289674dc9..8dbf90af751 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java
@@ -22,7 +22,14 @@ import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileContext;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.fs.s3a.s3guard.DynamoDBClientFactory;
+import org.apache.hadoop.fs.s3a.s3guard.DynamoDBLocalClientFactory;
+import org.apache.hadoop.fs.s3a.s3guard.S3Guard;
+
+import org.hamcrest.core.Is;
import org.junit.Assert;
import org.junit.Assume;
import org.junit.internal.AssumptionViolatedException;
@@ -31,11 +38,13 @@ import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.URI;
+import java.net.URISyntaxException;
import java.util.List;
import static org.apache.hadoop.fs.contract.ContractTestUtils.skip;
import static org.apache.hadoop.fs.s3a.S3ATestConstants.*;
import static org.apache.hadoop.fs.s3a.Constants.*;
+import static org.apache.hadoop.fs.s3a.S3AUtils.propagateBucketOptions;
import static org.junit.Assert.*;
/**
@@ -51,6 +60,15 @@ public final class S3ATestUtils {
*/
public static final String UNSET_PROPERTY = "unset";
+ /**
+ * Get S3A FS name.
+ * @param conf configuration.
+ * @return S3A fs name.
+ */
+ public static String getFsName(Configuration conf) {
+ return conf.getTrimmed(TEST_FS_S3A_NAME, "");
+ }
+
/**
* Create the test filesystem.
*
@@ -97,6 +115,8 @@ public final class S3ATestUtils {
throw new AssumptionViolatedException(
"No test filesystem in " + TEST_FS_S3A_NAME);
}
+ // patch in S3Guard options
+ maybeEnableS3Guard(conf);
S3AFileSystem fs1 = new S3AFileSystem();
//enable purging in tests
if (purge) {
@@ -137,6 +157,8 @@ public final class S3ATestUtils {
throw new AssumptionViolatedException("No test filesystem in "
+ TEST_FS_S3A_NAME);
}
+ // patch in S3Guard options
+ maybeEnableS3Guard(conf);
FileContext fc = FileContext.getFileContext(testURI, conf);
return fc;
}
@@ -301,12 +323,95 @@ public final class S3ATestUtils {
* @return a path
*/
public static Path createTestPath(Path defVal) {
- String testUniqueForkId = System.getProperty(
- S3ATestConstants.TEST_UNIQUE_FORK_ID);
+ String testUniqueForkId =
+ System.getProperty(S3ATestConstants.TEST_UNIQUE_FORK_ID);
return testUniqueForkId == null ? defVal :
new Path("/" + testUniqueForkId, "test");
}
+ /**
+ * Test assumption that S3Guard is/is not enabled.
+ * @param shouldBeEnabled should S3Guard be enabled?
+ * @param originalConf configuration to check
+ * @throws URISyntaxException
+ */
+ public static void assumeS3GuardState(boolean shouldBeEnabled,
+ Configuration originalConf) throws URISyntaxException {
+ boolean isEnabled = getTestPropertyBool(originalConf, TEST_S3GUARD_ENABLED,
+ originalConf.getBoolean(TEST_S3GUARD_ENABLED, false));
+ Assume.assumeThat("Unexpected S3Guard test state:"
+ + " shouldBeEnabled=" + shouldBeEnabled
+ + " and isEnabled=" + isEnabled,
+ shouldBeEnabled, Is.is(isEnabled));
+
+ final String fsname = originalConf.getTrimmed(TEST_FS_S3A_NAME);
+ Assume.assumeNotNull(fsname);
+ final String bucket = new URI(fsname).getHost();
+ final Configuration conf = propagateBucketOptions(originalConf, bucket);
+ boolean usingNullImpl = S3GUARD_METASTORE_NULL.equals(
+ conf.getTrimmed(S3_METADATA_STORE_IMPL, S3GUARD_METASTORE_NULL));
+ Assume.assumeThat("Unexpected S3Guard test state:"
+ + " shouldBeEnabled=" + shouldBeEnabled
+ + " but usingNullImpl=" + usingNullImpl,
+ shouldBeEnabled, Is.is(!usingNullImpl));
+ }
+
+ /**
+ * Conditionally set the S3Guard options from test properties.
+ * @param conf configuration
+ */
+ public static void maybeEnableS3Guard(Configuration conf) {
+ if (getTestPropertyBool(conf, TEST_S3GUARD_ENABLED,
+ conf.getBoolean(TEST_S3GUARD_ENABLED, false))) {
+ // S3Guard is enabled.
+ boolean authoritative = getTestPropertyBool(conf,
+ TEST_S3GUARD_AUTHORITATIVE,
+ conf.getBoolean(TEST_S3GUARD_AUTHORITATIVE, true));
+ String impl = getTestProperty(conf, TEST_S3GUARD_IMPLEMENTATION,
+ conf.get(TEST_S3GUARD_IMPLEMENTATION,
+ TEST_S3GUARD_IMPLEMENTATION_LOCAL));
+ String implClass = "";
+ switch (impl) {
+ case TEST_S3GUARD_IMPLEMENTATION_LOCAL:
+ implClass = S3GUARD_METASTORE_LOCAL;
+ break;
+ case TEST_S3GUARD_IMPLEMENTATION_DYNAMODBLOCAL:
+ conf.setClass(S3Guard.S3GUARD_DDB_CLIENT_FACTORY_IMPL,
+ DynamoDBLocalClientFactory.class, DynamoDBClientFactory.class);
+ case TEST_S3GUARD_IMPLEMENTATION_DYNAMO:
+ implClass = S3GUARD_METASTORE_DYNAMO;
+ break;
+ case TEST_S3GUARD_IMPLEMENTATION_NONE:
+ implClass = S3GUARD_METASTORE_NULL;
+ break;
+ default:
+ fail("Unknown s3guard back end: \"" + impl + "\"");
+ }
+ LOG.debug("Enabling S3Guard, authoritative={}, implementation={}",
+ authoritative, implClass);
+ conf.setBoolean(METADATASTORE_AUTHORITATIVE, authoritative);
+ conf.set(S3_METADATA_STORE_IMPL, implClass);
+ conf.setBoolean(S3GUARD_DDB_TABLE_CREATE_KEY, true);
+ }
+ }
+
+ /**
+ * Is there a MetadataStore configured for s3a with authoritative enabled?
+ * @param conf Configuration to test.
+ * @return true iff there is a MetadataStore configured, and it is
+ * configured allow authoritative results. This can result in reducing
+ * round trips to S3 service for cached results, which may affect FS/FC
+ * statistics.
+ */
+ public static boolean isMetadataStoreAuthoritative(Configuration conf) {
+ if (conf == null) {
+ return Constants.DEFAULT_METADATASTORE_AUTHORITATIVE;
+ }
+ return conf.getBoolean(
+ Constants.METADATASTORE_AUTHORITATIVE,
+ Constants.DEFAULT_METADATASTORE_AUTHORITATIVE);
+ }
+
/**
* Reset all metrics in a list.
* @param metrics metrics to reset
@@ -503,6 +608,94 @@ public final class S3ATestUtils {
private S3ATestUtils() {
}
+ /**
+ * Verify the core size, block size and timestamp values of a file.
+ * @param status status entry to check
+ * @param size file size
+ * @param blockSize block size
+ * @param modTime modified time
+ */
+ public static void verifyFileStatus(FileStatus status, long size,
+ long blockSize, long modTime) {
+ verifyFileStatus(status, size, 0, modTime, 0, blockSize, null, null, null);
+ }
+
+ /**
+ * Verify the status entry of a file matches that expected.
+ * @param status status entry to check
+ * @param size file size
+ * @param replication replication factor (may be 0)
+ * @param modTime modified time
+ * @param accessTime access time (may be 0)
+ * @param blockSize block size
+ * @param owner owner (may be null)
+ * @param group user group (may be null)
+ * @param permission permission (may be null)
+ */
+ public static void verifyFileStatus(FileStatus status,
+ long size,
+ int replication,
+ long modTime,
+ long accessTime,
+ long blockSize,
+ String owner,
+ String group,
+ FsPermission permission) {
+ String details = status.toString();
+ assertFalse("Not a dir: " + details, status.isDirectory());
+ assertEquals("Mod time: " + details, modTime, status.getModificationTime());
+ assertEquals("File size: " + details, size, status.getLen());
+ assertEquals("Block size: " + details, blockSize, status.getBlockSize());
+ if (replication > 0) {
+ assertEquals("Replication value: " + details, replication,
+ status.getReplication());
+ }
+ if (accessTime != 0) {
+ assertEquals("Access time: " + details, accessTime,
+ status.getAccessTime());
+ }
+ if (owner != null) {
+ assertEquals("Owner: " + details, owner, status.getOwner());
+ }
+ if (group != null) {
+ assertEquals("Group: " + details, group, status.getGroup());
+ }
+ if (permission != null) {
+ assertEquals("Permission: " + details, permission,
+ status.getPermission());
+ }
+ }
+
+ /**
+ * Verify the status entry of a directory matches that expected.
+ * @param status status entry to check
+ * @param replication replication factor
+ * @param modTime modified time
+ * @param accessTime access time
+ * @param owner owner
+ * @param group user group
+ * @param permission permission.
+ */
+ public static void verifyDirStatus(FileStatus status,
+ int replication,
+ long modTime,
+ long accessTime,
+ String owner,
+ String group,
+ FsPermission permission) {
+ String details = status.toString();
+ assertTrue("Is a dir: " + details, status.isDirectory());
+ assertEquals("zero length: " + details, 0, status.getLen());
+
+ assertEquals("Mod time: " + details, modTime, status.getModificationTime());
+ assertEquals("Replication value: " + details, replication,
+ status.getReplication());
+ assertEquals("Access time: " + details, accessTime, status.getAccessTime());
+ assertEquals("Owner: " + details, owner, status.getOwner());
+ assertEquals("Group: " + details, group, status.getGroup());
+ assertEquals("Permission: " + details, permission, status.getPermission());
+ }
+
/**
* Set a bucket specific property to a particular value.
* If the generic key passed in has an {@code fs.s3a. prefix},
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestListing.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestListing.java
new file mode 100644
index 00000000000..e647327728f
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestListing.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+import java.util.Set;
+
+import static org.apache.hadoop.fs.s3a.Listing.ACCEPT_ALL;
+import static org.apache.hadoop.fs.s3a.Listing.ProvidedFileStatusIterator;
+
+/**
+ * Place for the S3A listing classes; keeps all the small classes under control.
+ */
+public class TestListing extends AbstractS3AMockTest {
+
+ private static class MockRemoteIterator implements
+ RemoteIterator {
+ private Iterator iterator;
+
+ MockRemoteIterator(Collection source) {
+ iterator = source.iterator();
+ }
+
+ public boolean hasNext() {
+ return iterator.hasNext();
+ }
+
+ public FileStatus next() {
+ return iterator.next();
+ }
+ }
+
+ private FileStatus blankFileStatus(Path path) {
+ return new FileStatus(0, true, 0, 0, 0, path);
+ }
+
+ @Test
+ public void testTombstoneReconcilingIterator() throws Exception {
+ Path parent = new Path("/parent");
+ Path liveChild = new Path(parent, "/liveChild");
+ Path deletedChild = new Path(parent, "/deletedChild");
+ Path[] allFiles = {parent, liveChild, deletedChild};
+ Path[] liveFiles = {parent, liveChild};
+
+ Listing listing = new Listing(fs);
+ Collection statuses = new ArrayList<>();
+ statuses.add(blankFileStatus(parent));
+ statuses.add(blankFileStatus(liveChild));
+ statuses.add(blankFileStatus(deletedChild));
+
+ Set tombstones = new HashSet<>();
+ tombstones.add(deletedChild);
+
+ RemoteIterator sourceIterator = new MockRemoteIterator(
+ statuses);
+ RemoteIterator locatedIterator =
+ listing.createLocatedFileStatusIterator(sourceIterator);
+ RemoteIterator reconcilingIterator =
+ listing.createTombstoneReconcilingIterator(locatedIterator, tombstones);
+
+ Set expectedPaths = new HashSet<>();
+ expectedPaths.add(parent);
+ expectedPaths.add(liveChild);
+
+ Set actualPaths = new HashSet<>();
+ while (reconcilingIterator.hasNext()) {
+ actualPaths.add(reconcilingIterator.next().getPath());
+ }
+ Assert.assertTrue(actualPaths.equals(expectedPaths));
+ }
+
+ @Test
+ public void testProvidedFileStatusIteratorEnd() throws Exception {
+ FileStatus[] statuses = {
+ new FileStatus(100, false, 1, 8192, 0, new Path("s3a://blah/blah"))
+ };
+ ProvidedFileStatusIterator it = new ProvidedFileStatusIterator(statuses,
+ ACCEPT_ALL, new Listing.AcceptAllButS3nDirs());
+
+ Assert.assertTrue("hasNext() should return true first time", it.hasNext());
+ Assert.assertNotNull("first element should not be null", it.next());
+ Assert.assertFalse("hasNext() should now be false", it.hasNext());
+ try {
+ it.next();
+ Assert.fail("next() should have thrown exception");
+ } catch (NoSuchElementException e) {
+ // Correct behavior. Any other exceptions are propagated as failure.
+ return;
+ }
+ }
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextStatistics.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextStatistics.java
index e1aef757866..e493818ffb8 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextStatistics.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextStatistics.java
@@ -39,7 +39,9 @@ public class ITestS3AFileContextStatistics extends FCStatisticsBaseTest {
@After
public void tearDown() throws Exception {
- fc.delete(fileContextTestHelper.getTestRootPath(fc, "test"), true);
+ if (fc != null) {
+ fc.delete(fileContextTestHelper.getTestRootPath(fc, "test"), true);
+ }
}
@Override
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextURI.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextURI.java
index fff1fcb9f58..725646ce1bb 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextURI.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextURI.java
@@ -16,19 +16,29 @@ package org.apache.hadoop.fs.s3a.fileContext;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileContextURIBase;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.S3ATestUtils;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.assume;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.createTestFileSystem;
+
/**
* S3a implementation of FileContextURIBase.
*/
public class ITestS3AFileContextURI extends FileContextURIBase {
+ private Configuration conf;
+ private boolean hasMetadataStore;
+
@Before
public void setUp() throws IOException, Exception {
- Configuration conf = new Configuration();
+ conf = new Configuration();
+ try(S3AFileSystem s3aFS = createTestFileSystem(conf)) {
+ hasMetadataStore = s3aFS.hasMetadataStore();
+ }
fc1 = S3ATestUtils.createTestFileContext(conf);
fc2 = S3ATestUtils.createTestFileContext(conf); //different object, same FS
super.setUp();
@@ -41,4 +51,11 @@ public class ITestS3AFileContextURI extends FileContextURIBase {
// (the statistics tested with this method are not relevant for an S3FS)
}
+ @Test
+ @Override
+ public void testModificationTime() throws IOException {
+ // skip modtime tests as there may be some inconsistency during creation
+ assume("modification time tests are skipped", !hasMetadataStore);
+ super.testModificationTime();
+ }
}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractMSContract.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractMSContract.java
new file mode 100644
index 00000000000..921d4a686e0
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractMSContract.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import org.apache.hadoop.fs.FileSystem;
+
+import java.io.IOException;
+
+/**
+ * Test specification for MetadataStore contract tests. Supplies configuration
+ * and MetadataStore instance.
+ */
+public abstract class AbstractMSContract {
+
+ public abstract FileSystem getFileSystem() throws IOException;
+ public abstract MetadataStore getMetadataStore() throws IOException;
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java
new file mode 100644
index 00000000000..ceacdf382b1
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import java.io.IOException;
+import java.util.concurrent.TimeUnit;
+
+import org.junit.Test;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.contract.ContractTestUtils;
+import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
+import org.apache.hadoop.fs.s3a.Constants;
+import org.apache.hadoop.fs.s3a.S3AFileStatus;
+import org.apache.hadoop.fs.s3a.S3ATestUtils;
+import org.apache.hadoop.io.IOUtils;
+
+import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.SUCCESS;
+
+/**
+ * Common functionality for S3GuardTool test cases.
+ */
+public abstract class AbstractS3GuardToolTestBase extends AbstractS3ATestBase {
+
+ protected static final String OWNER = "hdfs";
+
+ private MetadataStore ms;
+
+ protected static void expectResult(int expected,
+ String message,
+ S3GuardTool tool,
+ String... args) throws Exception {
+ assertEquals(message, expected, tool.run(args));
+ }
+
+ protected static void expectSuccess(
+ String message,
+ S3GuardTool tool,
+ String... args) throws Exception {
+ assertEquals(message, SUCCESS, tool.run(args));
+ }
+
+ protected MetadataStore getMetadataStore() {
+ return ms;
+ }
+
+ protected abstract MetadataStore newMetadataStore();
+
+ @Override
+ public void setup() throws Exception {
+ super.setup();
+ S3ATestUtils.assumeS3GuardState(true, getConfiguration());
+ ms = newMetadataStore();
+ ms.initialize(getFileSystem());
+ }
+
+ @Override
+ public void teardown() throws Exception {
+ super.teardown();
+ IOUtils.cleanupWithLogger(LOG, ms);
+ }
+
+ protected void mkdirs(Path path, boolean onS3, boolean onMetadataStore)
+ throws IOException {
+ if (onS3) {
+ getFileSystem().mkdirs(path);
+ }
+ if (onMetadataStore) {
+ S3AFileStatus status = new S3AFileStatus(true, path, OWNER);
+ ms.put(new PathMetadata(status));
+ }
+ }
+
+ protected static void putFile(MetadataStore ms, S3AFileStatus f)
+ throws IOException {
+ assertNotNull(f);
+ ms.put(new PathMetadata(f));
+ Path parent = f.getPath().getParent();
+ while (parent != null) {
+ S3AFileStatus dir = new S3AFileStatus(false, parent, f.getOwner());
+ ms.put(new PathMetadata(dir));
+ parent = parent.getParent();
+ }
+ }
+
+ /**
+ * Create file either on S3 or in metadata store.
+ * @param path the file path.
+ * @param onS3 set to true to create the file on S3.
+ * @param onMetadataStore set to true to create the file on the
+ * metadata store.
+ * @throws IOException IO problem
+ */
+ protected void createFile(Path path, boolean onS3, boolean onMetadataStore)
+ throws IOException {
+ if (onS3) {
+ ContractTestUtils.touch(getFileSystem(), path);
+ }
+
+ if (onMetadataStore) {
+ S3AFileStatus status = new S3AFileStatus(100L, System.currentTimeMillis(),
+ getFileSystem().qualify(path), 512L, "hdfs");
+ putFile(ms, status);
+ }
+ }
+
+ private void testPruneCommand(Configuration cmdConf, String...args)
+ throws Exception {
+ Path parent = path("prune-cli");
+ try {
+ getFileSystem().mkdirs(parent);
+
+ S3GuardTool.Prune cmd = new S3GuardTool.Prune(cmdConf);
+ cmd.setMetadataStore(ms);
+
+ createFile(new Path(parent, "stale"), true, true);
+ Thread.sleep(TimeUnit.SECONDS.toMillis(2));
+ createFile(new Path(parent, "fresh"), true, true);
+
+ assertEquals(2, ms.listChildren(parent).getListing().size());
+ expectSuccess("Prune command did not exit successfully - see output", cmd,
+ args);
+ assertEquals(1, ms.listChildren(parent).getListing().size());
+ } finally {
+ getFileSystem().delete(parent, true);
+ ms.prune(Long.MAX_VALUE);
+ }
+ }
+
+ @Test
+ public void testPruneCommandCLI() throws Exception {
+ String testPath = path("testPruneCommandCLI").toString();
+ testPruneCommand(getFileSystem().getConf(),
+ "prune", "-seconds", "1", testPath);
+ }
+
+ @Test
+ public void testPruneCommandConf() throws Exception {
+ getConfiguration().setLong(Constants.S3GUARD_CLI_PRUNE_AGE,
+ TimeUnit.SECONDS.toMillis(1));
+ String testPath = path("testPruneCommandConf").toString();
+ testPruneCommand(getConfiguration(), "prune", testPath);
+ }
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBLocalClientFactory.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBLocalClientFactory.java
new file mode 100644
index 00000000000..0291acdb006
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBLocalClientFactory.java
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import java.io.File;
+import java.io.IOException;
+
+import com.amazonaws.ClientConfiguration;
+import com.amazonaws.auth.AWSCredentialsProvider;
+import com.amazonaws.client.builder.AwsClientBuilder;
+import com.amazonaws.services.dynamodbv2.AmazonDynamoDB;
+import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder;
+import com.amazonaws.services.dynamodbv2.local.main.ServerRunner;
+import com.amazonaws.services.dynamodbv2.local.server.DynamoDBProxyServer;
+import org.apache.commons.lang3.StringUtils;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.s3a.DefaultS3ClientFactory;
+import org.apache.hadoop.net.ServerSocketUtil;
+
+import static org.apache.hadoop.fs.s3a.S3AUtils.createAWSCredentialProviderSet;
+import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBClientFactory.DefaultDynamoDBClientFactory.getRegion;
+
+/**
+ * A DynamoDBClientFactory implementation that creates AmazonDynamoDB clients
+ * against an in-memory DynamoDBLocal server instance.
+ *
+ * You won't be charged bills for issuing any DynamoDB requests. However, the
+ * DynamoDBLocal is considered a simulator of the DynamoDB web service, so it
+ * may be stale or different. For example, the throttling is not yet supported
+ * in DynamoDBLocal. This is for testing purpose only.
+ *
+ * To use this for creating DynamoDB client in tests:
+ *
+ * -
+ * As all DynamoDBClientFactory implementations, this should be configured.
+ *
+ * -
+ * The singleton DynamoDBLocal server instance is started automatically when
+ * creating the AmazonDynamoDB client for the first time. It still merits to
+ * launch the server before all the tests and fail fast if error happens.
+ *
+ * -
+ * The server can be stopped explicitly, which is not actually needed in
+ * tests as JVM termination will do that.
+ *
+ *
+ *
+ * @see DefaultDynamoDBClientFactory
+ */
+public class DynamoDBLocalClientFactory extends Configured
+ implements DynamoDBClientFactory {
+
+ /** The DynamoDBLocal dynamoDBLocalServer instance for testing. */
+ private static DynamoDBProxyServer dynamoDBLocalServer;
+ private static String ddbEndpoint;
+
+ private static final String SYSPROP_SQLITE_LIB = "sqlite4java.library.path";
+
+ @Override
+ public AmazonDynamoDB createDynamoDBClient(String defaultRegion)
+ throws IOException {
+ startSingletonServer();
+
+ final Configuration conf = getConf();
+ final AWSCredentialsProvider credentials =
+ createAWSCredentialProviderSet(null, conf);
+ final ClientConfiguration awsConf =
+ DefaultS3ClientFactory.createAwsConf(conf);
+ // fail fast in case of service errors
+ awsConf.setMaxErrorRetry(3);
+
+ final String region = getRegion(conf, defaultRegion);
+ LOG.info("Creating DynamoDBLocal client using endpoint {} in region {}",
+ ddbEndpoint, region);
+
+ return AmazonDynamoDBClientBuilder.standard()
+ .withCredentials(credentials)
+ .withClientConfiguration(awsConf)
+ .withEndpointConfiguration(
+ new AwsClientBuilder.EndpointConfiguration(ddbEndpoint, region))
+ .build();
+ }
+
+ /**
+ * Start a singleton in-memory DynamoDBLocal server if not started yet.
+ * @throws IOException if any error occurs
+ */
+ public synchronized static void startSingletonServer() throws IOException {
+ if (dynamoDBLocalServer != null) {
+ return;
+ }
+
+ // Set this property if it has not been set elsewhere
+ if (StringUtils.isEmpty(System.getProperty(SYSPROP_SQLITE_LIB))) {
+ String projectBuildDir = System.getProperty("project.build.directory");
+ if (StringUtils.isEmpty(projectBuildDir)) {
+ projectBuildDir = "target";
+ }
+ // sqlite4java lib should have been copied to $projectBuildDir/native-libs
+ System.setProperty(SYSPROP_SQLITE_LIB,
+ projectBuildDir + File.separator + "native-libs");
+ LOG.info("Setting {} -> {}",
+ SYSPROP_SQLITE_LIB, System.getProperty(SYSPROP_SQLITE_LIB));
+ }
+
+ try {
+ // Start an in-memory local DynamoDB instance
+ final String port = String.valueOf(ServerSocketUtil.getPort(0, 100));
+ ddbEndpoint = "http://localhost:" + port;
+ dynamoDBLocalServer = ServerRunner.createServerFromCommandLineArgs(
+ new String[]{"-inMemory", "-port", port});
+ dynamoDBLocalServer.start();
+ LOG.info("DynamoDBLocal singleton server was started at {}", ddbEndpoint);
+ } catch (Exception t) {
+ String msg = "Error starting DynamoDBLocal server at " + ddbEndpoint
+ + " " + t;
+ LOG.error(msg, t);
+ throw new IOException(msg, t);
+ }
+ }
+
+ /**
+ * Stop the in-memory DynamoDBLocal server if it is started.
+ * @throws IOException if any error occurs
+ */
+ public synchronized static void stopSingletonServer() throws IOException {
+ if (dynamoDBLocalServer != null) {
+ LOG.info("Shutting down the in-memory DynamoDBLocal server");
+ try {
+ dynamoDBLocalServer.stop();
+ } catch (Throwable t) {
+ String msg = "Error stopping DynamoDBLocal server at " + ddbEndpoint;
+ LOG.error(msg, t);
+ throw new IOException(msg, t);
+ }
+ }
+ }
+
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardConcurrentOps.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardConcurrentOps.java
new file mode 100644
index 00000000000..c6838a08c74
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardConcurrentOps.java
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.ThreadFactory;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import com.amazonaws.services.dynamodbv2.document.DynamoDB;
+import com.amazonaws.services.dynamodbv2.document.Table;
+import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException;
+import org.junit.Assume;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.Timeout;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.contract.ContractTestUtils;
+import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
+import org.apache.hadoop.fs.s3a.Constants;
+
+import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_REGION_KEY;
+
+/**
+ * Tests concurrent operations on S3Guard.
+ */
+public class ITestS3GuardConcurrentOps extends AbstractS3ATestBase {
+
+ @Rule
+ public final Timeout timeout = new Timeout(5 * 60 * 1000);
+
+ private void failIfTableExists(DynamoDB db, String tableName) {
+ boolean tableExists = true;
+ try {
+ Table table = db.getTable(tableName);
+ table.describe();
+ } catch (ResourceNotFoundException e) {
+ tableExists = false;
+ }
+ if (tableExists) {
+ fail("Table already exists: " + tableName);
+ }
+ }
+
+ private void deleteTable(DynamoDB db, String tableName) throws
+ InterruptedException {
+ try {
+ Table table = db.getTable(tableName);
+ table.waitForActive();
+ table.delete();
+ table.waitForDelete();
+ } catch (ResourceNotFoundException e) {
+ LOG.warn("Failed to delete {}, as it was not found", tableName, e);
+ }
+ }
+
+ @Test
+ public void testConcurrentTableCreations() throws Exception {
+ final Configuration conf = getConfiguration();
+ Assume.assumeTrue("Test only applies when DynamoDB is used for S3Guard",
+ conf.get(Constants.S3_METADATA_STORE_IMPL).equals(
+ Constants.S3GUARD_METASTORE_DYNAMO));
+
+ DynamoDBMetadataStore ms = new DynamoDBMetadataStore();
+ ms.initialize(getFileSystem());
+ DynamoDB db = ms.getDynamoDB();
+
+ String tableName = "testConcurrentTableCreations" + new Random().nextInt();
+ conf.setBoolean(Constants.S3GUARD_DDB_TABLE_CREATE_KEY, true);
+ conf.set(Constants.S3GUARD_DDB_TABLE_NAME_KEY, tableName);
+
+ String region = conf.getTrimmed(S3GUARD_DDB_REGION_KEY);
+ if (StringUtils.isEmpty(region)) {
+ // no region set, so pick it up from the test bucket
+ conf.set(S3GUARD_DDB_REGION_KEY, getFileSystem().getBucketLocation());
+ }
+ int concurrentOps = 16;
+ int iterations = 4;
+
+ failIfTableExists(db, tableName);
+
+ for (int i = 0; i < iterations; i++) {
+ ExecutorService executor = Executors.newFixedThreadPool(
+ concurrentOps, new ThreadFactory() {
+ private AtomicInteger count = new AtomicInteger(0);
+
+ public Thread newThread(Runnable r) {
+ return new Thread(r,
+ "testConcurrentTableCreations" + count.getAndIncrement());
+ }
+ });
+ ((ThreadPoolExecutor) executor).prestartAllCoreThreads();
+ Future[] futures = new Future[concurrentOps];
+ for (int f = 0; f < concurrentOps; f++) {
+ final int index = f;
+ futures[f] = executor.submit(new Callable() {
+ @Override
+ public Exception call() throws Exception {
+
+ ContractTestUtils.NanoTimer timer =
+ new ContractTestUtils.NanoTimer();
+
+ Exception result = null;
+ try (DynamoDBMetadataStore store = new DynamoDBMetadataStore()) {
+ store.initialize(conf);
+ } catch (Exception e) {
+ LOG.error(e.getClass() + ": " + e.getMessage());
+ result = e;
+ }
+
+ timer.end("Parallel DynamoDB client creation %d", index);
+ LOG.info("Parallel DynamoDB client creation {} ran from {} to {}",
+ index, timer.getStartTime(), timer.getEndTime());
+ return result;
+ }
+ });
+ }
+ List exceptions = new ArrayList<>(concurrentOps);
+ for (int f = 0; f < concurrentOps; f++) {
+ Exception outcome = futures[f].get();
+ if (outcome != null) {
+ exceptions.add(outcome);
+ }
+ }
+ deleteTable(db, tableName);
+ int exceptionsThrown = exceptions.size();
+ if (exceptionsThrown > 0) {
+ // at least one exception was thrown. Fail the test & nest the first
+ // exception caught
+ throw new AssertionError(exceptionsThrown + "/" + concurrentOps +
+ " threads threw exceptions while initializing on iteration " + i,
+ exceptions.get(0));
+ }
+ }
+ }
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolDynamoDB.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolDynamoDB.java
new file mode 100644
index 00000000000..c13dfc4d495
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolDynamoDB.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import java.io.IOException;
+import java.util.Random;
+import java.util.concurrent.Callable;
+
+import com.amazonaws.services.dynamodbv2.document.DynamoDB;
+import com.amazonaws.services.dynamodbv2.document.Table;
+import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException;
+import org.junit.Test;
+
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.Destroy;
+import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.Init;
+import org.apache.hadoop.test.LambdaTestUtils;
+
+/**
+ * Test S3Guard related CLI commands against DynamoDB.
+ */
+public class ITestS3GuardToolDynamoDB extends AbstractS3GuardToolTestBase {
+
+ @Override
+ protected MetadataStore newMetadataStore() {
+ return new DynamoDBMetadataStore();
+ }
+
+ // Check the existence of a given DynamoDB table.
+ private static boolean exist(DynamoDB dynamoDB, String tableName) {
+ assertNotNull(dynamoDB);
+ assertNotNull(tableName);
+ assertFalse("empty table name", tableName.isEmpty());
+ try {
+ Table table = dynamoDB.getTable(tableName);
+ table.describe();
+ } catch (ResourceNotFoundException e) {
+ return false;
+ }
+ return true;
+ }
+
+ @Test
+ public void testInvalidRegion() throws Exception {
+ final String testTableName = "testInvalidRegion" + new Random().nextInt();
+ final String testRegion = "invalidRegion";
+ // Initialize MetadataStore
+ final Init initCmd = new Init(getFileSystem().getConf());
+ LambdaTestUtils.intercept(IOException.class,
+ new Callable() {
+ @Override
+ public String call() throws Exception {
+ int res = initCmd.run(new String[]{
+ "init",
+ "-region", testRegion,
+ "-meta", "dynamodb://" + testTableName
+ });
+ return "Use of invalid region did not fail, returning " + res
+ + "- table may have been " +
+ "created and not cleaned up: " + testTableName;
+ }
+ });
+ }
+
+ @Test
+ public void testDynamoDBInitDestroyCycle() throws Exception {
+ String testTableName = "testDynamoDBInitDestroy" + new Random().nextInt();
+ String testS3Url = path(testTableName).toString();
+ S3AFileSystem fs = getFileSystem();
+ DynamoDB db = null;
+ try {
+ // Initialize MetadataStore
+ Init initCmd = new Init(fs.getConf());
+ expectSuccess("Init command did not exit successfully - see output",
+ initCmd,
+ "init", "-meta", "dynamodb://" + testTableName, testS3Url);
+ // Verify it exists
+ MetadataStore ms = getMetadataStore();
+ assertTrue("metadata store should be DynamoDBMetadataStore",
+ ms instanceof DynamoDBMetadataStore);
+ DynamoDBMetadataStore dynamoMs = (DynamoDBMetadataStore) ms;
+ db = dynamoMs.getDynamoDB();
+ assertTrue(String.format("%s does not exist", testTableName),
+ exist(db, testTableName));
+
+ // Destroy MetadataStore
+ Destroy destroyCmd = new Destroy(fs.getConf());
+
+ expectSuccess("Destroy command did not exit successfully - see output",
+ destroyCmd,
+ "destroy", "-meta", "dynamodb://" + testTableName, testS3Url);
+ // Verify it does not exist
+ assertFalse(String.format("%s still exists", testTableName),
+ exist(db, testTableName));
+
+ // delete again and expect success again
+ expectSuccess("Destroy command did not exit successfully - see output",
+ destroyCmd,
+ "destroy", "-meta", "dynamodb://" + testTableName, testS3Url);
+ } catch (ResourceNotFoundException e) {
+ throw new AssertionError(
+ String.format("DynamoDB table %s does not exist", testTableName),
+ e);
+ } finally {
+ LOG.warn("Table may have not been cleaned up: " +
+ testTableName);
+ if (db != null) {
+ Table table = db.getTable(testTableName);
+ if (table != null) {
+ try {
+ table.delete();
+ table.waitForDelete();
+ } catch (ResourceNotFoundException e) { /* Ignore */ }
+ }
+ }
+ }
+ }
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolLocal.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolLocal.java
new file mode 100644
index 00000000000..181cdfb275c
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolLocal.java
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.PrintStream;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.junit.Test;
+
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.Diff;
+
+import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.SUCCESS;
+
+/**
+ * Test S3Guard related CLI commands against a LocalMetadataStore.
+ */
+public class ITestS3GuardToolLocal extends AbstractS3GuardToolTestBase {
+
+ @Override
+ protected MetadataStore newMetadataStore() {
+ return new LocalMetadataStore();
+ }
+
+ @Test
+ public void testImportCommand() throws Exception {
+ S3AFileSystem fs = getFileSystem();
+ MetadataStore ms = getMetadataStore();
+ Path parent = path("test-import");
+ fs.mkdirs(parent);
+ Path dir = new Path(parent, "a");
+ fs.mkdirs(dir);
+ Path emptyDir = new Path(parent, "emptyDir");
+ fs.mkdirs(emptyDir);
+ for (int i = 0; i < 10; i++) {
+ String child = String.format("file-%d", i);
+ try (FSDataOutputStream out = fs.create(new Path(dir, child))) {
+ out.write(1);
+ }
+ }
+
+ S3GuardTool.Import cmd = new S3GuardTool.Import(fs.getConf());
+ cmd.setStore(ms);
+
+ expectSuccess("Import command did not exit successfully - see output",
+ cmd,
+ "import", parent.toString());
+
+ DirListingMetadata children =
+ ms.listChildren(dir);
+ assertEquals("Unexpected number of paths imported", 10, children
+ .getListing().size());
+ assertEquals("Expected 2 items: empty directory and a parent directory", 2,
+ ms.listChildren(parent).getListing().size());
+ // assertTrue(children.isAuthoritative());
+ }
+
+ @Test
+ public void testDiffCommand() throws IOException {
+ S3AFileSystem fs = getFileSystem();
+ MetadataStore ms = getMetadataStore();
+ Set filesOnS3 = new HashSet<>(); // files on S3.
+ Set filesOnMS = new HashSet<>(); // files on metadata store.
+
+ Path testPath = path("test-diff");
+ mkdirs(testPath, true, true);
+
+ Path msOnlyPath = new Path(testPath, "ms_only");
+ mkdirs(msOnlyPath, false, true);
+ filesOnMS.add(msOnlyPath);
+ for (int i = 0; i < 5; i++) {
+ Path file = new Path(msOnlyPath, String.format("file-%d", i));
+ createFile(file, false, true);
+ filesOnMS.add(file);
+ }
+
+ Path s3OnlyPath = new Path(testPath, "s3_only");
+ mkdirs(s3OnlyPath, true, false);
+ filesOnS3.add(s3OnlyPath);
+ for (int i = 0; i < 5; i++) {
+ Path file = new Path(s3OnlyPath, String.format("file-%d", i));
+ createFile(file, true, false);
+ filesOnS3.add(file);
+ }
+
+ ByteArrayOutputStream buf = new ByteArrayOutputStream();
+ PrintStream out = new PrintStream(buf);
+ Diff cmd = new Diff(fs.getConf());
+ cmd.setStore(ms);
+ assertEquals("Diff command did not exit successfully - see output", SUCCESS,
+ cmd.run(new String[]{"diff", "-meta", "local://metadata",
+ testPath.toString()}, out));
+ out.close();
+
+ Set actualOnS3 = new HashSet<>();
+ Set actualOnMS = new HashSet<>();
+ boolean duplicates = false;
+ try (BufferedReader reader =
+ new BufferedReader(new InputStreamReader(
+ new ByteArrayInputStream(buf.toByteArray())))) {
+ String line;
+ while ((line = reader.readLine()) != null) {
+ String[] fields = line.split("\\s");
+ assertEquals("[" + line + "] does not have enough fields",
+ 4, fields.length);
+ String where = fields[0];
+ Path path = new Path(fields[3]);
+ if (Diff.S3_PREFIX.equals(where)) {
+ duplicates = duplicates || actualOnS3.contains(path);
+ actualOnS3.add(path);
+ } else if (Diff.MS_PREFIX.equals(where)) {
+ duplicates = duplicates || actualOnMS.contains(path);
+ actualOnMS.add(path);
+ } else {
+ fail("Unknown prefix: " + where);
+ }
+ }
+ }
+ String actualOut = out.toString();
+ assertEquals("Mismatched metadata store outputs: " + actualOut,
+ filesOnMS, actualOnMS);
+ assertEquals("Mismatched s3 outputs: " + actualOut, filesOnS3, actualOnS3);
+ assertFalse("Diff contained duplicates", duplicates);
+ }
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java
new file mode 100644
index 00000000000..c19ae9184e7
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java
@@ -0,0 +1,887 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Set;
+
+import com.google.common.collect.Sets;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Assume;
+import org.junit.Before;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.fs.s3a.S3ATestUtils;
+import org.apache.hadoop.fs.s3a.Tristate;
+import org.apache.hadoop.io.IOUtils;
+
+/**
+ * Main test class for MetadataStore implementations.
+ * Implementations should each create a test by subclassing this and
+ * overriding {@link #createContract()}.
+ * If your implementation may return missing results for recently set paths,
+ * override {@link MetadataStoreTestBase#allowMissing()}.
+ */
+public abstract class MetadataStoreTestBase extends Assert {
+
+ private static final Logger LOG =
+ LoggerFactory.getLogger(MetadataStoreTestBase.class);
+
+ /** Some dummy values for sanity-checking FileStatus contents. */
+ static final long BLOCK_SIZE = 32 * 1024 * 1024;
+ static final int REPLICATION = 1;
+ static final FsPermission PERMISSION = new FsPermission((short)0755);
+ static final String OWNER = "bob";
+ static final String GROUP = "uncles";
+ private final long accessTime = System.currentTimeMillis();
+ private final long modTime = accessTime - 5000;
+
+ /**
+ * Each test should override this. Will use a new Configuration instance.
+ * @return Contract which specifies the MetadataStore under test plus config.
+ */
+ public abstract AbstractMSContract createContract() throws IOException;
+
+ /**
+ * Each test should override this.
+ * @param conf Base configuration instance to use.
+ * @return Contract which specifies the MetadataStore under test plus config.
+ */
+ public abstract AbstractMSContract createContract(Configuration conf)
+ throws IOException;
+
+ /**
+ * Tests assume that implementations will return recently set results. If
+ * your implementation does not always hold onto metadata (e.g. LRU or
+ * time-based expiry) you can override this to return false.
+ * @return true if the test should succeed when null results are returned
+ * from the MetadataStore under test.
+ */
+ public boolean allowMissing() {
+ return false;
+ }
+
+ /**
+ * Pruning is an optional feature for metadata store implementations.
+ * Tests will only check that functionality if it is expected to work.
+ * @return true if the test should expect pruning to work.
+ */
+ public boolean supportsPruning() {
+ return true;
+ }
+
+ /** The MetadataStore contract used to test against. */
+ private AbstractMSContract contract;
+
+ private MetadataStore ms;
+
+ /**
+ * @return reference to the test contract.
+ */
+ protected AbstractMSContract getContract() {
+ return contract;
+ }
+
+ @Before
+ public void setUp() throws Exception {
+ LOG.debug("== Setup. ==");
+ contract = createContract();
+ ms = contract.getMetadataStore();
+ assertNotNull("null MetadataStore", ms);
+ assertNotNull("null FileSystem", contract.getFileSystem());
+ ms.initialize(contract.getFileSystem());
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ LOG.debug("== Tear down. ==");
+ if (ms != null) {
+ try {
+ ms.destroy();
+ } catch (Exception e) {
+ LOG.warn("Failed to destroy tables in teardown", e);
+ }
+ IOUtils.closeStream(ms);
+ ms = null;
+ }
+ }
+
+ /**
+ * Helper function for verifying DescendantsIterator and
+ * MetadataStoreListFilesIterator behavior.
+ * @param createNodes List of paths to create
+ * @param checkNodes List of paths that the iterator should return
+ */
+ private void doTestDescendantsIterator(
+ Class implementation, String[] createNodes,
+ String[] checkNodes) throws Exception {
+ // we set up the example file system tree in metadata store
+ for (String pathStr : createNodes) {
+ final FileStatus status = pathStr.contains("file")
+ ? basicFileStatus(strToPath(pathStr), 100, false)
+ : basicFileStatus(strToPath(pathStr), 0, true);
+ ms.put(new PathMetadata(status));
+ }
+
+ final PathMetadata rootMeta = new PathMetadata(makeDirStatus("/"));
+ RemoteIterator iterator;
+ if (implementation == DescendantsIterator.class) {
+ iterator = new DescendantsIterator(ms, rootMeta);
+ } else if (implementation == MetadataStoreListFilesIterator.class) {
+ iterator = new MetadataStoreListFilesIterator(ms, rootMeta, false);
+ } else {
+ throw new UnsupportedOperationException("Unrecognized class");
+ }
+
+ final Set actual = new HashSet<>();
+ while (iterator.hasNext()) {
+ final Path p = iterator.next().getPath();
+ actual.add(Path.getPathWithoutSchemeAndAuthority(p).toString());
+ }
+ LOG.info("We got {} by iterating DescendantsIterator", actual);
+
+ if (!allowMissing()) {
+ assertEquals(Sets.newHashSet(checkNodes), actual);
+ }
+ }
+
+ /**
+ * Test that we can get the whole sub-tree by iterating DescendantsIterator.
+ *
+ * The tree is similar to or same as the example in code comment.
+ */
+ @Test
+ public void testDescendantsIterator() throws Exception {
+ final String[] tree = new String[] {
+ "/dir1",
+ "/dir1/dir2",
+ "/dir1/dir3",
+ "/dir1/dir2/file1",
+ "/dir1/dir2/file2",
+ "/dir1/dir3/dir4",
+ "/dir1/dir3/dir5",
+ "/dir1/dir3/dir4/file3",
+ "/dir1/dir3/dir5/file4",
+ "/dir1/dir3/dir6"
+ };
+ doTestDescendantsIterator(DescendantsIterator.class,
+ tree, tree);
+ }
+
+ /**
+ * Test that we can get the correct subset of the tree with
+ * MetadataStoreListFilesIterator.
+ *
+ * The tree is similar to or same as the example in code comment.
+ */
+ @Test
+ public void testMetadataStoreListFilesIterator() throws Exception {
+ final String[] wholeTree = new String[] {
+ "/dir1",
+ "/dir1/dir2",
+ "/dir1/dir3",
+ "/dir1/dir2/file1",
+ "/dir1/dir2/file2",
+ "/dir1/dir3/dir4",
+ "/dir1/dir3/dir5",
+ "/dir1/dir3/dir4/file3",
+ "/dir1/dir3/dir5/file4",
+ "/dir1/dir3/dir6"
+ };
+ final String[] leafNodes = new String[] {
+ "/dir1/dir2/file1",
+ "/dir1/dir2/file2",
+ "/dir1/dir3/dir4/file3",
+ "/dir1/dir3/dir5/file4"
+ };
+ doTestDescendantsIterator(MetadataStoreListFilesIterator.class, wholeTree,
+ leafNodes);
+ }
+
+ @Test
+ public void testPutNew() throws Exception {
+ /* create three dirs /da1, /da2, /da3 */
+ createNewDirs("/da1", "/da2", "/da3");
+
+ /* It is caller's responsibility to set up ancestor entries beyond the
+ * containing directory. We only track direct children of the directory.
+ * Thus this will not affect entry for /da1.
+ */
+ ms.put(new PathMetadata(makeFileStatus("/da1/db1/fc1", 100)));
+
+ assertEmptyDirs("/da2", "/da3");
+ assertDirectorySize("/da1/db1", 1);
+
+ /* Check contents of dir status. */
+ PathMetadata dirMeta = ms.get(strToPath("/da1"));
+ if (!allowMissing() || dirMeta != null) {
+ verifyDirStatus(dirMeta.getFileStatus());
+ }
+
+ /* This already exists, and should silently replace it. */
+ ms.put(new PathMetadata(makeDirStatus("/da1/db1")));
+
+ /* If we had putNew(), and used it above, this would be empty again. */
+ assertDirectorySize("/da1", 1);
+
+ assertEmptyDirs("/da2", "/da3");
+
+ /* Ensure new files update correct parent dirs. */
+ ms.put(new PathMetadata(makeFileStatus("/da1/db1/fc1", 100)));
+ ms.put(new PathMetadata(makeFileStatus("/da1/db1/fc2", 200)));
+ assertDirectorySize("/da1", 1);
+ assertDirectorySize("/da1/db1", 2);
+ assertEmptyDirs("/da2", "/da3");
+ PathMetadata meta = ms.get(strToPath("/da1/db1/fc2"));
+ if (!allowMissing() || meta != null) {
+ assertNotNull("Get file after put new.", meta);
+ verifyFileStatus(meta.getFileStatus(), 200);
+ }
+ }
+
+ @Test
+ public void testPutOverwrite() throws Exception {
+ final String filePath = "/a1/b1/c1/some_file";
+ final String dirPath = "/a1/b1/c1/d1";
+ ms.put(new PathMetadata(makeFileStatus(filePath, 100)));
+ ms.put(new PathMetadata(makeDirStatus(dirPath)));
+ PathMetadata meta = ms.get(strToPath(filePath));
+ if (!allowMissing() || meta != null) {
+ verifyFileStatus(meta.getFileStatus(), 100);
+ }
+
+ ms.put(new PathMetadata(basicFileStatus(strToPath(filePath), 9999, false)));
+ meta = ms.get(strToPath(filePath));
+ if (!allowMissing() || meta != null) {
+ verifyFileStatus(meta.getFileStatus(), 9999);
+ }
+ }
+
+ @Test
+ public void testRootDirPutNew() throws Exception {
+ Path rootPath = strToPath("/");
+
+ ms.put(new PathMetadata(makeFileStatus("/file1", 100)));
+ DirListingMetadata dir = ms.listChildren(rootPath);
+ if (!allowMissing() || dir != null) {
+ assertNotNull("Root dir cached", dir);
+ assertFalse("Root not fully cached", dir.isAuthoritative());
+ assertNotNull("have root dir file listing", dir.getListing());
+ assertEquals("One file in root dir", 1, dir.getListing().size());
+ assertEquals("file1 in root dir", strToPath("/file1"),
+ dir.getListing().iterator().next().getFileStatus().getPath());
+ }
+ }
+
+ @Test
+ public void testDelete() throws Exception {
+ setUpDeleteTest();
+
+ ms.delete(strToPath("/ADirectory1/db1/file2"));
+
+ /* Ensure delete happened. */
+ assertDirectorySize("/ADirectory1/db1", 1);
+ PathMetadata meta = ms.get(strToPath("/ADirectory1/db1/file2"));
+ assertTrue("File deleted", meta == null || meta.isDeleted());
+ }
+
+ @Test
+ public void testDeleteSubtree() throws Exception {
+ deleteSubtreeHelper("");
+ }
+
+ @Test
+ public void testDeleteSubtreeHostPath() throws Exception {
+ deleteSubtreeHelper(contract.getFileSystem().getUri().toString());
+ }
+
+ private void deleteSubtreeHelper(String pathPrefix) throws Exception {
+
+ String p = pathPrefix;
+ setUpDeleteTest(p);
+ createNewDirs(p + "/ADirectory1/db1/dc1", p + "/ADirectory1/db1/dc1/dd1");
+ ms.put(new PathMetadata(
+ makeFileStatus(p + "/ADirectory1/db1/dc1/dd1/deepFile", 100)));
+ if (!allowMissing()) {
+ assertCached(p + "/ADirectory1/db1");
+ }
+ ms.deleteSubtree(strToPath(p + "/ADirectory1/db1/"));
+
+ assertEmptyDirectory(p + "/ADirectory1");
+ assertDeleted(p + "/ADirectory1/db1");
+ assertDeleted(p + "/ADirectory1/file1");
+ assertDeleted(p + "/ADirectory1/file2");
+ assertDeleted(p + "/ADirectory1/db1/dc1/dd1/deepFile");
+ assertEmptyDirectory(p + "/ADirectory2");
+ }
+
+
+ /*
+ * Some implementations might not support this. It was useful to test
+ * correctness of the LocalMetadataStore implementation, but feel free to
+ * override this to be a no-op.
+ */
+ @Test
+ public void testDeleteRecursiveRoot() throws Exception {
+ setUpDeleteTest();
+
+ ms.deleteSubtree(strToPath("/"));
+ assertDeleted("/ADirectory1");
+ assertDeleted("/ADirectory2");
+ assertDeleted("/ADirectory2/db1");
+ assertDeleted("/ADirectory2/db1/file1");
+ assertDeleted("/ADirectory2/db1/file2");
+ }
+
+ @Test
+ public void testDeleteNonExisting() throws Exception {
+ // Path doesn't exist, but should silently succeed
+ ms.delete(strToPath("/bobs/your/uncle"));
+
+ // Ditto.
+ ms.deleteSubtree(strToPath("/internets"));
+ }
+
+
+ private void setUpDeleteTest() throws IOException {
+ setUpDeleteTest("");
+ }
+
+ private void setUpDeleteTest(String prefix) throws IOException {
+ createNewDirs(prefix + "/ADirectory1", prefix + "/ADirectory2",
+ prefix + "/ADirectory1/db1");
+ ms.put(new PathMetadata(makeFileStatus(prefix + "/ADirectory1/db1/file1",
+ 100)));
+ ms.put(new PathMetadata(makeFileStatus(prefix + "/ADirectory1/db1/file2",
+ 100)));
+
+ PathMetadata meta = ms.get(strToPath(prefix + "/ADirectory1/db1/file2"));
+ if (!allowMissing() || meta != null) {
+ assertNotNull("Found test file", meta);
+ assertDirectorySize(prefix + "/ADirectory1/db1", 2);
+ }
+ }
+
+ @Test
+ public void testGet() throws Exception {
+ final String filePath = "/a1/b1/c1/some_file";
+ final String dirPath = "/a1/b1/c1/d1";
+ ms.put(new PathMetadata(makeFileStatus(filePath, 100)));
+ ms.put(new PathMetadata(makeDirStatus(dirPath)));
+ PathMetadata meta = ms.get(strToPath(filePath));
+ if (!allowMissing() || meta != null) {
+ assertNotNull("Get found file", meta);
+ verifyFileStatus(meta.getFileStatus(), 100);
+ }
+
+ if (!(ms instanceof NullMetadataStore)) {
+ ms.delete(strToPath(filePath));
+ meta = ms.get(strToPath(filePath));
+ assertTrue("Tombstone not left for deleted file", meta.isDeleted());
+ }
+
+ meta = ms.get(strToPath(dirPath));
+ if (!allowMissing() || meta != null) {
+ assertNotNull("Get found file (dir)", meta);
+ assertTrue("Found dir", meta.getFileStatus().isDirectory());
+ }
+
+ meta = ms.get(strToPath("/bollocks"));
+ assertNull("Don't get non-existent file", meta);
+ }
+
+ @Test
+ public void testGetEmptyDir() throws Exception {
+ final String dirPath = "/a1/b1/c1/d1";
+ // Creates /a1/b1/c1/d1 as an empty dir
+ setupListStatus();
+
+ // 1. Tell MetadataStore (MS) that there are zero children
+ putListStatusFiles(dirPath, true /* authoritative */
+ /* zero children */);
+
+ // 2. Request a file status for dir, including whether or not the dir
+ // is empty.
+ PathMetadata meta = ms.get(strToPath(dirPath), true);
+
+ // 3. Check that either (a) the MS doesn't track whether or not it is
+ // empty (which is allowed), or (b) the MS knows the dir is empty.
+ if (!allowMissing() || meta != null) {
+ assertNotNull("Get should find meta for dir", meta);
+ assertNotEquals("Dir is empty or unknown", Tristate.FALSE,
+ meta.isEmptyDirectory());
+ }
+ }
+
+ @Test
+ public void testGetNonEmptyDir() throws Exception {
+ final String dirPath = "/a1/b1/c1";
+ // Creates /a1/b1/c1 as an non-empty dir
+ setupListStatus();
+
+ // Request a file status for dir, including whether or not the dir
+ // is empty.
+ PathMetadata meta = ms.get(strToPath(dirPath), true);
+
+ // MetadataStore knows /a1/b1/c1 has at least one child. It is valid
+ // for it to answer either (a) UNKNOWN: the MS doesn't track whether
+ // or not the dir is empty, or (b) the MS knows the dir is non-empty.
+ if (!allowMissing() || meta != null) {
+ assertNotNull("Get should find meta for dir", meta);
+ assertNotEquals("Dir is non-empty or unknown", Tristate.TRUE,
+ meta.isEmptyDirectory());
+ }
+ }
+
+ @Test
+ public void testGetDirUnknownIfEmpty() throws Exception {
+ final String dirPath = "/a1/b1/c1/d1";
+ // 1. Create /a1/b1/c1/d1 as an empty dir, but do not tell MetadataStore
+ // (MS) whether or not it has any children.
+ setupListStatus();
+
+ // 2. Request a file status for dir, including whether or not the dir
+ // is empty.
+ PathMetadata meta = ms.get(strToPath(dirPath), true);
+
+ // 3. Assert MS reports isEmptyDir as UNKONWN: We haven't told MS
+ // whether or not the directory has any children.
+ if (!allowMissing() || meta != null) {
+ assertNotNull("Get should find meta for dir", meta);
+ assertEquals("Dir empty is unknown", Tristate.UNKNOWN,
+ meta.isEmptyDirectory());
+ }
+ }
+
+ @Test
+ public void testListChildren() throws Exception {
+ setupListStatus();
+
+ DirListingMetadata dirMeta;
+ dirMeta = ms.listChildren(strToPath("/"));
+ if (!allowMissing()) {
+ assertNotNull(dirMeta);
+ /* Cache has no way of knowing it has all entries for root unless we
+ * specifically tell it via put() with
+ * DirListingMetadata.isAuthoritative = true */
+ assertFalse("Root dir is not cached, or partially cached",
+ dirMeta.isAuthoritative());
+ assertListingsEqual(dirMeta.getListing(), "/a1", "/a2");
+ }
+
+ dirMeta = ms.listChildren(strToPath("/a1"));
+ if (!allowMissing() || dirMeta != null) {
+ dirMeta = dirMeta.withoutTombstones();
+ assertListingsEqual(dirMeta.getListing(), "/a1/b1", "/a1/b2");
+ }
+
+ // TODO HADOOP-14756 instrument MetadataStore for asserting & testing
+ dirMeta = ms.listChildren(strToPath("/a1/b1"));
+ if (!allowMissing() || dirMeta != null) {
+ assertListingsEqual(dirMeta.getListing(), "/a1/b1/file1", "/a1/b1/file2",
+ "/a1/b1/c1");
+ }
+ }
+
+ @Test
+ public void testDirListingRoot() throws Exception {
+ commonTestPutListStatus("/");
+ }
+
+ @Test
+ public void testPutDirListing() throws Exception {
+ commonTestPutListStatus("/a");
+ }
+
+ @Test
+ public void testInvalidListChildren() throws Exception {
+ setupListStatus();
+ assertNull("missing path returns null",
+ ms.listChildren(strToPath("/a1/b1x")));
+ }
+
+ @Test
+ public void testMove() throws Exception {
+ // Create test dir structure
+ createNewDirs("/a1", "/a2", "/a3");
+ createNewDirs("/a1/b1", "/a1/b2");
+ putListStatusFiles("/a1/b1", false, "/a1/b1/file1", "/a1/b1/file2");
+
+ // Assert root listing as expected
+ Collection entries;
+ DirListingMetadata dirMeta = ms.listChildren(strToPath("/"));
+ if (!allowMissing() || dirMeta != null) {
+ dirMeta = dirMeta.withoutTombstones();
+ assertNotNull("Listing root", dirMeta);
+ entries = dirMeta.getListing();
+ assertListingsEqual(entries, "/a1", "/a2", "/a3");
+ }
+
+ // Assert src listing as expected
+ dirMeta = ms.listChildren(strToPath("/a1/b1"));
+ if (!allowMissing() || dirMeta != null) {
+ assertNotNull("Listing /a1/b1", dirMeta);
+ entries = dirMeta.getListing();
+ assertListingsEqual(entries, "/a1/b1/file1", "/a1/b1/file2");
+ }
+
+ // Do the move(): rename(/a1/b1, /b1)
+ Collection srcPaths = Arrays.asList(strToPath("/a1/b1"),
+ strToPath("/a1/b1/file1"), strToPath("/a1/b1/file2"));
+
+ ArrayList destMetas = new ArrayList<>();
+ destMetas.add(new PathMetadata(makeDirStatus("/b1")));
+ destMetas.add(new PathMetadata(makeFileStatus("/b1/file1", 100)));
+ destMetas.add(new PathMetadata(makeFileStatus("/b1/file2", 100)));
+ ms.move(srcPaths, destMetas);
+
+ // Assert src is no longer there
+ dirMeta = ms.listChildren(strToPath("/a1"));
+ if (!allowMissing() || dirMeta != null) {
+ assertNotNull("Listing /a1", dirMeta);
+ entries = dirMeta.withoutTombstones().getListing();
+ assertListingsEqual(entries, "/a1/b2");
+ }
+
+ PathMetadata meta = ms.get(strToPath("/a1/b1/file1"));
+ assertTrue("Src path deleted", meta == null || meta.isDeleted());
+
+ // Assert dest looks right
+ meta = ms.get(strToPath("/b1/file1"));
+ if (!allowMissing() || meta != null) {
+ assertNotNull("dest file not null", meta);
+ verifyFileStatus(meta.getFileStatus(), 100);
+ }
+
+ dirMeta = ms.listChildren(strToPath("/b1"));
+ if (!allowMissing() || dirMeta != null) {
+ assertNotNull("dest listing not null", dirMeta);
+ entries = dirMeta.getListing();
+ assertListingsEqual(entries, "/b1/file1", "/b1/file2");
+ }
+ }
+
+ /**
+ * Test that the MetadataStore differentiates between the same path in two
+ * different buckets.
+ */
+ @Test
+ public void testMultiBucketPaths() throws Exception {
+ String p1 = "s3a://bucket-a/path1";
+ String p2 = "s3a://bucket-b/path2";
+
+ // Make sure we start out empty
+ PathMetadata meta = ms.get(new Path(p1));
+ assertNull("Path should not be present yet.", meta);
+ meta = ms.get(new Path(p2));
+ assertNull("Path2 should not be present yet.", meta);
+
+ // Put p1, assert p2 doesn't match
+ ms.put(new PathMetadata(makeFileStatus(p1, 100)));
+ meta = ms.get(new Path(p2));
+ assertNull("Path 2 should not match path 1.", meta);
+
+ // Make sure delete is correct as well
+ if (!allowMissing()) {
+ ms.delete(new Path(p2));
+ meta = ms.get(new Path(p1));
+ assertNotNull("Path should not have been deleted", meta);
+ }
+ ms.delete(new Path(p1));
+ }
+
+ @Test
+ public void testPruneFiles() throws Exception {
+ Assume.assumeTrue(supportsPruning());
+ createNewDirs("/pruneFiles");
+
+ long oldTime = getTime();
+ ms.put(new PathMetadata(makeFileStatus("/pruneFiles/old", 1, oldTime,
+ oldTime)));
+ DirListingMetadata ls2 = ms.listChildren(strToPath("/pruneFiles"));
+ if (!allowMissing()) {
+ assertListingsEqual(ls2.getListing(), "/pruneFiles/old");
+ }
+
+ // It's possible for the Local implementation to get from /pruneFiles/old's
+ // modification time to here in under 1ms, causing it to not get pruned
+ Thread.sleep(1);
+ long cutoff = System.currentTimeMillis();
+ long newTime = getTime();
+ ms.put(new PathMetadata(makeFileStatus("/pruneFiles/new", 1, newTime,
+ newTime)));
+
+ DirListingMetadata ls;
+ ls = ms.listChildren(strToPath("/pruneFiles"));
+ if (!allowMissing()) {
+ assertListingsEqual(ls.getListing(), "/pruneFiles/new",
+ "/pruneFiles/old");
+ }
+ ms.prune(cutoff);
+ ls = ms.listChildren(strToPath("/pruneFiles"));
+ if (allowMissing()) {
+ assertDeleted("/pruneFiles/old");
+ } else {
+ assertListingsEqual(ls.getListing(), "/pruneFiles/new");
+ }
+ }
+
+ @Test
+ public void testPruneDirs() throws Exception {
+ Assume.assumeTrue(supportsPruning());
+
+ // We only test that files, not dirs, are removed during prune.
+ // We specifically allow directories to remain, as it is more robust
+ // for DynamoDBMetadataStore's prune() implementation: If a
+ // file was created in a directory while it was being pruned, it would
+ // violate the invariant that all ancestors of a file exist in the table.
+
+ createNewDirs("/pruneDirs/dir");
+
+ long oldTime = getTime();
+ ms.put(new PathMetadata(makeFileStatus("/pruneDirs/dir/file",
+ 1, oldTime, oldTime)));
+
+ // It's possible for the Local implementation to get from the old
+ // modification time to here in under 1ms, causing it to not get pruned
+ Thread.sleep(1);
+ long cutoff = getTime();
+
+ ms.prune(cutoff);
+
+ assertDeleted("/pruneDirs/dir/file");
+ }
+
+ @Test
+ public void testPruneUnsetsAuthoritative() throws Exception {
+ String rootDir = "/unpruned-root-dir";
+ String grandparentDir = rootDir + "/pruned-grandparent-dir";
+ String parentDir = grandparentDir + "/pruned-parent-dir";
+ String staleFile = parentDir + "/stale-file";
+ String freshFile = rootDir + "/fresh-file";
+ String[] directories = {rootDir, grandparentDir, parentDir};
+
+ createNewDirs(rootDir, grandparentDir, parentDir);
+ long time = System.currentTimeMillis();
+ ms.put(new PathMetadata(
+ new FileStatus(0, false, 0, 0, time - 1, strToPath(staleFile)),
+ Tristate.FALSE, false));
+ ms.put(new PathMetadata(
+ new FileStatus(0, false, 0, 0, time + 1, strToPath(freshFile)),
+ Tristate.FALSE, false));
+
+ ms.prune(time);
+ DirListingMetadata listing;
+ for (String directory : directories) {
+ Path path = strToPath(directory);
+ if (ms.get(path) != null) {
+ listing = ms.listChildren(path);
+ assertFalse(listing.isAuthoritative());
+ }
+ }
+ }
+
+ /*
+ * Helper functions.
+ */
+
+ /** Modifies paths input array and returns it. */
+ private String[] buildPathStrings(String parent, String... paths)
+ throws IOException {
+ for (int i = 0; i < paths.length; i++) {
+ Path p = new Path(strToPath(parent), paths[i]);
+ paths[i] = p.toString();
+ }
+ return paths;
+ }
+
+ private void commonTestPutListStatus(final String parent) throws IOException {
+ putListStatusFiles(parent, true, buildPathStrings(parent, "file1", "file2",
+ "file3"));
+ DirListingMetadata dirMeta = ms.listChildren(strToPath(parent));
+ if (!allowMissing() || dirMeta != null) {
+ dirMeta = dirMeta.withoutTombstones();
+ assertNotNull("list after putListStatus", dirMeta);
+ Collection entries = dirMeta.getListing();
+ assertNotNull("listStatus has entries", entries);
+ assertListingsEqual(entries,
+ buildPathStrings(parent, "file1", "file2", "file3"));
+ }
+ }
+
+ private void setupListStatus() throws IOException {
+ createNewDirs("/a1", "/a2", "/a1/b1", "/a1/b2", "/a1/b1/c1",
+ "/a1/b1/c1/d1");
+ ms.put(new PathMetadata(makeFileStatus("/a1/b1/file1", 100)));
+ ms.put(new PathMetadata(makeFileStatus("/a1/b1/file2", 100)));
+ }
+
+ private void assertListingsEqual(Collection listing,
+ String ...pathStrs) throws IOException {
+ Set a = new HashSet<>();
+ for (PathMetadata meta : listing) {
+ a.add(meta.getFileStatus().getPath());
+ }
+
+ Set b = new HashSet<>();
+ for (String ps : pathStrs) {
+ b.add(strToPath(ps));
+ }
+ assertEquals("Same set of files", b, a);
+ }
+
+ private void putListStatusFiles(String dirPath, boolean authoritative,
+ String... filenames) throws IOException {
+ ArrayList metas = new ArrayList<>(filenames .length);
+ for (String filename : filenames) {
+ metas.add(new PathMetadata(makeFileStatus(filename, 100)));
+ }
+ DirListingMetadata dirMeta =
+ new DirListingMetadata(strToPath(dirPath), metas, authoritative);
+ ms.put(dirMeta);
+ }
+
+ private void createNewDirs(String... dirs)
+ throws IOException {
+ for (String pathStr : dirs) {
+ ms.put(new PathMetadata(makeDirStatus(pathStr)));
+ }
+ }
+
+ private void assertDirectorySize(String pathStr, int size)
+ throws IOException {
+ DirListingMetadata dirMeta = ms.listChildren(strToPath(pathStr));
+ if (!allowMissing()) {
+ assertNotNull("Directory " + pathStr + " in cache", dirMeta);
+ }
+ if (!allowMissing() || dirMeta != null) {
+ dirMeta = dirMeta.withoutTombstones();
+ assertEquals("Number of entries in dir " + pathStr, size,
+ nonDeleted(dirMeta.getListing()).size());
+ }
+ }
+
+ /** @return only file statuses which are *not* marked deleted. */
+ private Collection nonDeleted(
+ Collection statuses) {
+ Collection currentStatuses = new ArrayList<>();
+ for (PathMetadata status : statuses) {
+ if (!status.isDeleted()) {
+ currentStatuses.add(status);
+ }
+ }
+ return currentStatuses;
+ }
+
+ private void assertDeleted(String pathStr) throws IOException {
+ Path path = strToPath(pathStr);
+ PathMetadata meta = ms.get(path);
+ boolean cached = meta != null && !meta.isDeleted();
+ assertFalse(pathStr + " should not be cached.", cached);
+ }
+
+ protected void assertCached(String pathStr) throws IOException {
+ Path path = strToPath(pathStr);
+ PathMetadata meta = ms.get(path);
+ boolean cached = meta != null && !meta.isDeleted();
+ assertTrue(pathStr + " should be cached.", cached);
+ }
+
+ /**
+ * Convenience to create a fully qualified Path from string.
+ */
+ Path strToPath(String p) throws IOException {
+ final Path path = new Path(p);
+ assert path.isAbsolute();
+ return path.makeQualified(contract.getFileSystem().getUri(), null);
+ }
+
+ private void assertEmptyDirectory(String pathStr) throws IOException {
+ assertDirectorySize(pathStr, 0);
+ }
+
+ private void assertEmptyDirs(String ...dirs) throws IOException {
+ for (String pathStr : dirs) {
+ assertEmptyDirectory(pathStr);
+ }
+ }
+
+ FileStatus basicFileStatus(Path path, int size, boolean isDir) throws
+ IOException {
+ return basicFileStatus(path, size, isDir, modTime, accessTime);
+ }
+
+ FileStatus basicFileStatus(Path path, int size, boolean isDir,
+ long newModTime, long newAccessTime) throws IOException {
+ return new FileStatus(size, isDir, REPLICATION, BLOCK_SIZE, newModTime,
+ newAccessTime, PERMISSION, OWNER, GROUP, path);
+ }
+
+ private FileStatus makeFileStatus(String pathStr, int size) throws
+ IOException {
+ return makeFileStatus(pathStr, size, modTime, accessTime);
+ }
+
+ private FileStatus makeFileStatus(String pathStr, int size, long newModTime,
+ long newAccessTime) throws IOException {
+ return basicFileStatus(strToPath(pathStr), size, false,
+ newModTime, newAccessTime);
+ }
+
+ void verifyFileStatus(FileStatus status, long size) {
+ S3ATestUtils.verifyFileStatus(status, size, BLOCK_SIZE, modTime);
+ }
+
+ private FileStatus makeDirStatus(String pathStr) throws IOException {
+ return basicFileStatus(strToPath(pathStr), 0, true, modTime, accessTime);
+ }
+
+ /**
+ * Verify the directory file status. Subclass may verify additional fields.
+ */
+ void verifyDirStatus(FileStatus status) {
+ assertTrue("Is a dir", status.isDirectory());
+ assertEquals("zero length", 0, status.getLen());
+ }
+
+ long getModTime() {
+ return modTime;
+ }
+
+ long getAccessTime() {
+ return accessTime;
+ }
+
+ protected static long getTime() {
+ return System.currentTimeMillis();
+ }
+
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDirListingMetadata.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDirListingMetadata.java
new file mode 100644
index 00000000000..8458252af76
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDirListingMetadata.java
@@ -0,0 +1,303 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.S3AFileStatus;
+
+import static org.hamcrest.CoreMatchers.notNullValue;
+import static org.junit.Assert.*;
+
+/**
+ * Unit tests of {@link DirListingMetadata}.
+ */
+public class TestDirListingMetadata {
+
+ private static final String TEST_OWNER = "hadoop";
+
+ @Rule
+ public ExpectedException exception = ExpectedException.none();
+
+ @Test
+ public void testNullPath() {
+ exception.expect(NullPointerException.class);
+ exception.expectMessage(notNullValue(String.class));
+ new DirListingMetadata(null, null, false);
+ }
+
+ @Test
+ public void testNullListing() {
+ Path path = new Path("/path");
+ DirListingMetadata meta = new DirListingMetadata(path, null, false);
+ assertEquals(path, meta.getPath());
+ assertNotNull(meta.getListing());
+ assertTrue(meta.getListing().isEmpty());
+ assertFalse(meta.isAuthoritative());
+ }
+
+ @Test
+ public void testEmptyListing() {
+ Path path = new Path("/path");
+ DirListingMetadata meta = new DirListingMetadata(path,
+ new ArrayList(0),
+ false);
+ assertEquals(path, meta.getPath());
+ assertNotNull(meta.getListing());
+ assertTrue(meta.getListing().isEmpty());
+ assertFalse(meta.isAuthoritative());
+ }
+
+ @Test
+ public void testListing() {
+ Path path = new Path("/path");
+ PathMetadata pathMeta1 = new PathMetadata(
+ new S3AFileStatus(true, new Path(path, "dir1"), TEST_OWNER));
+ PathMetadata pathMeta2 = new PathMetadata(
+ new S3AFileStatus(true, new Path(path, "dir2"), TEST_OWNER));
+ PathMetadata pathMeta3 = new PathMetadata(
+ new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER));
+ List listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3);
+ DirListingMetadata meta = new DirListingMetadata(path, listing, false);
+ assertEquals(path, meta.getPath());
+ assertNotNull(meta.getListing());
+ assertFalse(meta.getListing().isEmpty());
+ assertTrue(meta.getListing().contains(pathMeta1));
+ assertTrue(meta.getListing().contains(pathMeta2));
+ assertTrue(meta.getListing().contains(pathMeta3));
+ assertFalse(meta.isAuthoritative());
+ }
+
+ @Test
+ public void testListingUnmodifiable() {
+ Path path = new Path("/path");
+ DirListingMetadata meta = makeTwoDirsOneFile(path);
+ assertNotNull(meta.getListing());
+ exception.expect(UnsupportedOperationException.class);
+ meta.getListing().clear();
+ }
+
+ @Test
+ public void testAuthoritative() {
+ Path path = new Path("/path");
+ DirListingMetadata meta = new DirListingMetadata(path, null, true);
+ assertEquals(path, meta.getPath());
+ assertNotNull(meta.getListing());
+ assertTrue(meta.getListing().isEmpty());
+ assertTrue(meta.isAuthoritative());
+ }
+
+ @Test
+ public void testSetAuthoritative() {
+ Path path = new Path("/path");
+ DirListingMetadata meta = new DirListingMetadata(path, null, false);
+ assertEquals(path, meta.getPath());
+ assertNotNull(meta.getListing());
+ assertTrue(meta.getListing().isEmpty());
+ assertFalse(meta.isAuthoritative());
+ meta.setAuthoritative(true);
+ assertTrue(meta.isAuthoritative());
+ }
+
+ @Test
+ public void testGet() {
+ Path path = new Path("/path");
+ PathMetadata pathMeta1 = new PathMetadata(
+ new S3AFileStatus(true, new Path(path, "dir1"), TEST_OWNER));
+ PathMetadata pathMeta2 = new PathMetadata(
+ new S3AFileStatus(true, new Path(path, "dir2"), TEST_OWNER));
+ PathMetadata pathMeta3 = new PathMetadata(
+ new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER));
+ List listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3);
+ DirListingMetadata meta = new DirListingMetadata(path, listing, false);
+ assertEquals(path, meta.getPath());
+ assertNotNull(meta.getListing());
+ assertFalse(meta.getListing().isEmpty());
+ assertTrue(meta.getListing().contains(pathMeta1));
+ assertTrue(meta.getListing().contains(pathMeta2));
+ assertTrue(meta.getListing().contains(pathMeta3));
+ assertFalse(meta.isAuthoritative());
+ assertEquals(pathMeta1, meta.get(pathMeta1.getFileStatus().getPath()));
+ assertEquals(pathMeta2, meta.get(pathMeta2.getFileStatus().getPath()));
+ assertEquals(pathMeta3, meta.get(pathMeta3.getFileStatus().getPath()));
+ assertNull(meta.get(new Path(path, "notfound")));
+ }
+
+ @Test
+ public void testGetNull() {
+ Path path = new Path("/path");
+ DirListingMetadata meta = new DirListingMetadata(path, null, false);
+ exception.expect(NullPointerException.class);
+ exception.expectMessage(notNullValue(String.class));
+ meta.get(null);
+ }
+
+ @Test
+ public void testGetRoot() {
+ Path path = new Path("/path");
+ DirListingMetadata meta = new DirListingMetadata(path, null, false);
+ exception.expect(IllegalArgumentException.class);
+ exception.expectMessage(notNullValue(String.class));
+ meta.get(new Path("/"));
+ }
+
+ @Test
+ public void testGetNotChild() {
+ Path path = new Path("/path");
+ DirListingMetadata meta = new DirListingMetadata(path, null, false);
+ exception.expect(IllegalArgumentException.class);
+ exception.expectMessage(notNullValue(String.class));
+ meta.get(new Path("/different/ancestor"));
+ }
+
+ @Test
+ public void testPut() {
+ Path path = new Path("/path");
+ PathMetadata pathMeta1 = new PathMetadata(
+ new S3AFileStatus(true, new Path(path, "dir1"), TEST_OWNER));
+ PathMetadata pathMeta2 = new PathMetadata(
+ new S3AFileStatus(true, new Path(path, "dir2"), TEST_OWNER));
+ PathMetadata pathMeta3 = new PathMetadata(
+ new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER));
+ List listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3);
+ DirListingMetadata meta = new DirListingMetadata(path, listing, false);
+ assertEquals(path, meta.getPath());
+ assertNotNull(meta.getListing());
+ assertFalse(meta.getListing().isEmpty());
+ assertTrue(meta.getListing().contains(pathMeta1));
+ assertTrue(meta.getListing().contains(pathMeta2));
+ assertTrue(meta.getListing().contains(pathMeta3));
+ assertFalse(meta.isAuthoritative());
+ PathMetadata pathMeta4 = new PathMetadata(
+ new S3AFileStatus(true, new Path(path, "dir3"), TEST_OWNER));
+ meta.put(pathMeta4.getFileStatus());
+ assertTrue(meta.getListing().contains(pathMeta4));
+ assertEquals(pathMeta4, meta.get(pathMeta4.getFileStatus().getPath()));
+ }
+
+ @Test
+ public void testPutNull() {
+ Path path = new Path("/path");
+ DirListingMetadata meta = new DirListingMetadata(path, null, false);
+ exception.expect(NullPointerException.class);
+ exception.expectMessage(notNullValue(String.class));
+ meta.put(null);
+ }
+
+ @Test
+ public void testPutNullPath() {
+ Path path = new Path("/path");
+ DirListingMetadata meta = new DirListingMetadata(path, null, false);
+ exception.expect(NullPointerException.class);
+ exception.expectMessage(notNullValue(String.class));
+ meta.put(new S3AFileStatus(true, null, TEST_OWNER));
+ }
+
+ @Test
+ public void testPutRoot() {
+ Path path = new Path("/path");
+ DirListingMetadata meta = new DirListingMetadata(path, null, false);
+ exception.expect(IllegalArgumentException.class);
+ exception.expectMessage(notNullValue(String.class));
+ meta.put(new S3AFileStatus(true, new Path("/"), TEST_OWNER));
+ }
+
+ @Test
+ public void testPutNotChild() {
+ Path path = new Path("/path");
+ DirListingMetadata meta = new DirListingMetadata(path, null, false);
+ exception.expect(IllegalArgumentException.class);
+ exception.expectMessage(notNullValue(String.class));
+ meta.put(new S3AFileStatus(true, new Path("/different/ancestor"),
+ TEST_OWNER));
+ }
+
+ @Test
+ public void testRemove() {
+ Path path = new Path("/path");
+ PathMetadata pathMeta1 = new PathMetadata(
+ new S3AFileStatus(true, new Path(path, "dir1"), TEST_OWNER));
+ PathMetadata pathMeta2 = new PathMetadata(
+ new S3AFileStatus(true, new Path(path, "dir2"), TEST_OWNER));
+ PathMetadata pathMeta3 = new PathMetadata(
+ new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER));
+ List listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3);
+ DirListingMetadata meta = new DirListingMetadata(path, listing, false);
+ assertEquals(path, meta.getPath());
+ assertNotNull(meta.getListing());
+ assertFalse(meta.getListing().isEmpty());
+ assertTrue(meta.getListing().contains(pathMeta1));
+ assertTrue(meta.getListing().contains(pathMeta2));
+ assertTrue(meta.getListing().contains(pathMeta3));
+ assertFalse(meta.isAuthoritative());
+ meta.remove(pathMeta1.getFileStatus().getPath());
+ assertFalse(meta.getListing().contains(pathMeta1));
+ assertNull(meta.get(pathMeta1.getFileStatus().getPath()));
+ }
+
+ @Test
+ public void testRemoveNull() {
+ Path path = new Path("/path");
+ DirListingMetadata meta = new DirListingMetadata(path, null, false);
+ exception.expect(NullPointerException.class);
+ exception.expectMessage(notNullValue(String.class));
+ meta.remove(null);
+ }
+
+ @Test
+ public void testRemoveRoot() {
+ Path path = new Path("/path");
+ DirListingMetadata meta = new DirListingMetadata(path, null, false);
+ exception.expect(IllegalArgumentException.class);
+ exception.expectMessage(notNullValue(String.class));
+ meta.remove(new Path("/"));
+ }
+
+ @Test
+ public void testRemoveNotChild() {
+ Path path = new Path("/path");
+ DirListingMetadata meta = new DirListingMetadata(path, null, false);
+ exception.expect(IllegalArgumentException.class);
+ exception.expectMessage(notNullValue(String.class));
+ meta.remove(new Path("/different/ancestor"));
+ }
+
+ /*
+ * Create DirListingMetadata with two dirs and one file living in directory
+ * 'parent'
+ */
+ private static DirListingMetadata makeTwoDirsOneFile(Path parent) {
+ PathMetadata pathMeta1 = new PathMetadata(
+ new S3AFileStatus(true, new Path(parent, "dir1"), TEST_OWNER));
+ PathMetadata pathMeta2 = new PathMetadata(
+ new S3AFileStatus(true, new Path(parent, "dir2"), TEST_OWNER));
+ PathMetadata pathMeta3 = new PathMetadata(
+ new S3AFileStatus(123, 456, new Path(parent, "file1"), 8192,
+ TEST_OWNER));
+ List listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3);
+ return new DirListingMetadata(parent, listing, false);
+ }
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDynamoDBMetadataStore.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDynamoDBMetadataStore.java
new file mode 100644
index 00000000000..02eb7b875a2
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDynamoDBMetadataStore.java
@@ -0,0 +1,594 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import java.io.IOException;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import com.amazonaws.AmazonServiceException;
+import com.amazonaws.services.dynamodbv2.document.DynamoDB;
+import com.amazonaws.services.dynamodbv2.document.Item;
+import com.amazonaws.services.dynamodbv2.document.PrimaryKey;
+import com.amazonaws.services.dynamodbv2.document.Table;
+import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputDescription;
+import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException;
+import com.amazonaws.services.dynamodbv2.model.TableDescription;
+
+import com.google.common.collect.Lists;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.hadoop.fs.s3a.Tristate;
+
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.Timeout;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.MockS3ClientFactory;
+import org.apache.hadoop.fs.s3a.S3AFileStatus;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.fs.s3a.S3ClientFactory;
+import org.apache.hadoop.security.UserGroupInformation;
+
+import static org.apache.hadoop.fs.s3a.Constants.*;
+import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.*;
+import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.*;
+import static org.apache.hadoop.test.LambdaTestUtils.*;
+
+/**
+ * Test that {@link DynamoDBMetadataStore} implements {@link MetadataStore}.
+ *
+ * In this unit test, we use an in-memory DynamoDBLocal server instead of real
+ * AWS DynamoDB. An {@link S3AFileSystem} object is created and shared for
+ * initializing {@link DynamoDBMetadataStore} objects. There are no real S3
+ * request issued as the underlying AWS S3Client is mocked. You won't be
+ * charged bills for AWS S3 or DynamoDB when you run this test.
+ *
+ * According to the base class, every test case will have independent contract
+ * to create a new {@link DynamoDBMetadataStore} instance and initializes it.
+ * A table will be created for each test by the test contract, and will be
+ * destroyed after the test case finishes.
+ */
+public class TestDynamoDBMetadataStore extends MetadataStoreTestBase {
+ private static final Logger LOG =
+ LoggerFactory.getLogger(TestDynamoDBMetadataStore.class);
+ private static final String BUCKET = "TestDynamoDBMetadataStore";
+ private static final String S3URI =
+ URI.create(FS_S3A + "://" + BUCKET + "/").toString();
+ public static final PrimaryKey
+ VERSION_MARKER_PRIMARY_KEY = createVersionMarkerPrimaryKey(
+ DynamoDBMetadataStore.VERSION_MARKER);
+
+ /** The DynamoDB instance that can issue requests directly to server. */
+ private static DynamoDB dynamoDB;
+
+ @Rule
+ public final Timeout timeout = new Timeout(60 * 1000);
+
+ /**
+ * Start the in-memory DynamoDBLocal server and initializes s3 file system.
+ */
+ @BeforeClass
+ public static void setUpBeforeClass() throws Exception {
+ DynamoDBLocalClientFactory.startSingletonServer();
+ try {
+ dynamoDB = new DynamoDBMSContract().getMetadataStore().getDynamoDB();
+ } catch (AmazonServiceException e) {
+ final String msg = "Cannot initialize a DynamoDBMetadataStore instance "
+ + "against the local DynamoDB server. Perhaps the DynamoDBLocal "
+ + "server is not configured correctly. ";
+ LOG.error(msg, e);
+ // fail fast if the DynamoDBLocal server can not work
+ throw e;
+ }
+ }
+
+ @AfterClass
+ public static void tearDownAfterClass() throws Exception {
+ if (dynamoDB != null) {
+ dynamoDB.shutdown();
+ }
+ DynamoDBLocalClientFactory.stopSingletonServer();
+ }
+
+ /**
+ * Each contract has its own S3AFileSystem and DynamoDBMetadataStore objects.
+ */
+ private static class DynamoDBMSContract extends AbstractMSContract {
+ private final S3AFileSystem s3afs;
+ private final DynamoDBMetadataStore ms = new DynamoDBMetadataStore();
+
+ DynamoDBMSContract() throws IOException {
+ this(new Configuration());
+ }
+
+ DynamoDBMSContract(Configuration conf) throws IOException {
+ // using mocked S3 clients
+ conf.setClass(S3_CLIENT_FACTORY_IMPL, MockS3ClientFactory.class,
+ S3ClientFactory.class);
+ conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, S3URI);
+ // setting config for creating a DynamoDBClient against local server
+ conf.set(ACCESS_KEY, "dummy-access-key");
+ conf.set(SECRET_KEY, "dummy-secret-key");
+ conf.setBoolean(S3GUARD_DDB_TABLE_CREATE_KEY, true);
+ conf.setClass(S3Guard.S3GUARD_DDB_CLIENT_FACTORY_IMPL,
+ DynamoDBLocalClientFactory.class, DynamoDBClientFactory.class);
+
+ // always create new file system object for a test contract
+ s3afs = (S3AFileSystem) FileSystem.newInstance(conf);
+ ms.initialize(s3afs);
+ }
+
+ @Override
+ public S3AFileSystem getFileSystem() {
+ return s3afs;
+ }
+
+ @Override
+ public DynamoDBMetadataStore getMetadataStore() {
+ return ms;
+ }
+ }
+
+ @Override
+ public DynamoDBMSContract createContract() throws IOException {
+ return new DynamoDBMSContract();
+ }
+
+ @Override
+ public DynamoDBMSContract createContract(Configuration conf) throws
+ IOException {
+ return new DynamoDBMSContract(conf);
+ }
+
+ @Override
+ FileStatus basicFileStatus(Path path, int size, boolean isDir)
+ throws IOException {
+ String owner = UserGroupInformation.getCurrentUser().getShortUserName();
+ return isDir
+ ? new S3AFileStatus(true, path, owner)
+ : new S3AFileStatus(size, getModTime(), path, BLOCK_SIZE, owner);
+ }
+
+ private DynamoDBMetadataStore getDynamoMetadataStore() throws IOException {
+ return (DynamoDBMetadataStore) getContract().getMetadataStore();
+ }
+
+ private S3AFileSystem getFileSystem() throws IOException {
+ return (S3AFileSystem) getContract().getFileSystem();
+ }
+
+ /**
+ * This tests that after initialize() using an S3AFileSystem object, the
+ * instance should have been initialized successfully, and tables are ACTIVE.
+ */
+ @Test
+ public void testInitialize() throws IOException {
+ final String tableName = "testInitializeWithFileSystem";
+ final S3AFileSystem s3afs = getFileSystem();
+ final Configuration conf = s3afs.getConf();
+ conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName);
+ try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
+ ddbms.initialize(s3afs);
+ verifyTableInitialized(tableName);
+ assertNotNull(ddbms.getTable());
+ assertEquals(tableName, ddbms.getTable().getTableName());
+ String expectedRegion = conf.get(S3GUARD_DDB_REGION_KEY,
+ s3afs.getBucketLocation(tableName));
+ assertEquals("DynamoDB table should be in configured region or the same" +
+ " region as S3 bucket",
+ expectedRegion,
+ ddbms.getRegion());
+ }
+ }
+
+ /**
+ * This tests that after initialize() using a Configuration object, the
+ * instance should have been initialized successfully, and tables are ACTIVE.
+ */
+ @Test
+ public void testInitializeWithConfiguration() throws IOException {
+ final String tableName = "testInitializeWithConfiguration";
+ final Configuration conf = getFileSystem().getConf();
+ conf.unset(S3GUARD_DDB_TABLE_NAME_KEY);
+ String savedRegion = conf.get(S3GUARD_DDB_REGION_KEY,
+ getFileSystem().getBucketLocation());
+ conf.unset(S3GUARD_DDB_REGION_KEY);
+ try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
+ ddbms.initialize(conf);
+ fail("Should have failed because the table name is not set!");
+ } catch (IllegalArgumentException ignored) {
+ }
+ // config table name
+ conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName);
+ try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
+ ddbms.initialize(conf);
+ fail("Should have failed because as the region is not set!");
+ } catch (IllegalArgumentException ignored) {
+ }
+ // config region
+ conf.set(S3GUARD_DDB_REGION_KEY, savedRegion);
+ try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
+ ddbms.initialize(conf);
+ verifyTableInitialized(tableName);
+ assertNotNull(ddbms.getTable());
+ assertEquals(tableName, ddbms.getTable().getTableName());
+ assertEquals("Unexpected key schema found!",
+ keySchema(),
+ ddbms.getTable().describe().getKeySchema());
+ }
+ }
+
+ /**
+ * Test that for a large batch write request, the limit is handled correctly.
+ */
+ @Test
+ public void testBatchWrite() throws IOException {
+ final int[] numMetasToDeleteOrPut = {
+ -1, // null
+ 0, // empty collection
+ 1, // one path
+ S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT, // exact limit of a batch request
+ S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT + 1 // limit + 1
+ };
+ for (int numOldMetas : numMetasToDeleteOrPut) {
+ for (int numNewMetas : numMetasToDeleteOrPut) {
+ doTestBatchWrite(numOldMetas, numNewMetas);
+ }
+ }
+ }
+
+ private void doTestBatchWrite(int numDelete, int numPut) throws IOException {
+ final String root = S3URI + "/testBatchWrite_" + numDelete + '_' + numPut;
+ final Path oldDir = new Path(root, "oldDir");
+ final Path newDir = new Path(root, "newDir");
+ LOG.info("doTestBatchWrite: oldDir={}, newDir={}", oldDir, newDir);
+
+ DynamoDBMetadataStore ms = getDynamoMetadataStore();
+ ms.put(new PathMetadata(basicFileStatus(oldDir, 0, true)));
+ ms.put(new PathMetadata(basicFileStatus(newDir, 0, true)));
+
+ final List oldMetas =
+ numDelete < 0 ? null : new ArrayList(numDelete);
+ for (int i = 0; i < numDelete; i++) {
+ oldMetas.add(new PathMetadata(
+ basicFileStatus(new Path(oldDir, "child" + i), i, true)));
+ }
+ final List newMetas =
+ numPut < 0 ? null : new ArrayList(numPut);
+ for (int i = 0; i < numPut; i++) {
+ newMetas.add(new PathMetadata(
+ basicFileStatus(new Path(newDir, "child" + i), i, false)));
+ }
+
+ Collection pathsToDelete = null;
+ if (oldMetas != null) {
+ // put all metadata of old paths and verify
+ ms.put(new DirListingMetadata(oldDir, oldMetas, false));
+ assertEquals(0, ms.listChildren(newDir).withoutTombstones().numEntries());
+ assertTrue(CollectionUtils.isEqualCollection(oldMetas,
+ ms.listChildren(oldDir).getListing()));
+
+ pathsToDelete = new ArrayList<>(oldMetas.size());
+ for (PathMetadata meta : oldMetas) {
+ pathsToDelete.add(meta.getFileStatus().getPath());
+ }
+ }
+
+ // move the old paths to new paths and verify
+ ms.move(pathsToDelete, newMetas);
+ assertEquals(0, ms.listChildren(oldDir).withoutTombstones().numEntries());
+ if (newMetas != null) {
+ assertTrue(CollectionUtils.isEqualCollection(newMetas,
+ ms.listChildren(newDir).getListing()));
+ }
+ }
+
+ @Test
+ public void testInitExistingTable() throws IOException {
+ final DynamoDBMetadataStore ddbms = getDynamoMetadataStore();
+ final String tableName = ddbms.getTable().getTableName();
+ verifyTableInitialized(tableName);
+ // create existing table
+ ddbms.initTable();
+ verifyTableInitialized(tableName);
+ }
+
+ /**
+ * Test the low level version check code.
+ */
+ @Test
+ public void testItemVersionCompatibility() throws Throwable {
+ verifyVersionCompatibility("table",
+ createVersionMarker(VERSION_MARKER, VERSION, 0));
+ }
+
+ /**
+ * Test that a version marker entry without the version number field
+ * is rejected as incompatible with a meaningful error message.
+ */
+ @Test
+ public void testItemLacksVersion() throws Throwable {
+ intercept(IOException.class, E_NOT_VERSION_MARKER,
+ new VoidCallable() {
+ @Override
+ public void call() throws Exception {
+ verifyVersionCompatibility("table",
+ new Item().withPrimaryKey(
+ createVersionMarkerPrimaryKey(VERSION_MARKER)));
+ }
+ });
+ }
+
+ /**
+ * Delete the version marker and verify that table init fails.
+ */
+ @Test
+ public void testTableVersionRequired() throws Exception {
+ Configuration conf = getFileSystem().getConf();
+ int maxRetries = conf.getInt(S3GUARD_DDB_MAX_RETRIES,
+ S3GUARD_DDB_MAX_RETRIES_DEFAULT);
+ conf.setInt(S3GUARD_DDB_MAX_RETRIES, 3);
+
+ final DynamoDBMetadataStore ddbms = createContract(conf).getMetadataStore();
+ String tableName = conf.get(S3GUARD_DDB_TABLE_NAME_KEY, BUCKET);
+ Table table = verifyTableInitialized(tableName);
+ table.deleteItem(VERSION_MARKER_PRIMARY_KEY);
+
+ // create existing table
+ intercept(IOException.class, E_NO_VERSION_MARKER,
+ new VoidCallable() {
+ @Override
+ public void call() throws Exception {
+ ddbms.initTable();
+ }
+ });
+
+ conf.setInt(S3GUARD_DDB_MAX_RETRIES, maxRetries);
+ }
+
+ /**
+ * Set the version value to a different number and verify that
+ * table init fails.
+ */
+ @Test
+ public void testTableVersionMismatch() throws Exception {
+ final DynamoDBMetadataStore ddbms = createContract().getMetadataStore();
+ String tableName = getFileSystem().getConf()
+ .get(S3GUARD_DDB_TABLE_NAME_KEY, BUCKET);
+ Table table = verifyTableInitialized(tableName);
+ table.deleteItem(VERSION_MARKER_PRIMARY_KEY);
+ Item v200 = createVersionMarker(VERSION_MARKER, 200, 0);
+ table.putItem(v200);
+
+ // create existing table
+ intercept(IOException.class, E_INCOMPATIBLE_VERSION,
+ new VoidCallable() {
+ @Override
+ public void call() throws Exception {
+ ddbms.initTable();
+ }
+ });
+ }
+
+ /**
+ * Test that initTable fails with IOException when table does not exist and
+ * table auto-creation is disabled.
+ */
+ @Test
+ public void testFailNonexistentTable() throws IOException {
+ final String tableName = "testFailNonexistentTable";
+ final S3AFileSystem s3afs = getFileSystem();
+ final Configuration conf = s3afs.getConf();
+ conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName);
+ conf.unset(S3GUARD_DDB_TABLE_CREATE_KEY);
+ try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
+ ddbms.initialize(s3afs);
+ fail("Should have failed as table does not exist and table auto-creation"
+ + " is disabled");
+ } catch (IOException ignored) {
+ }
+ }
+
+ /**
+ * Test cases about root directory as it is not in the DynamoDB table.
+ */
+ @Test
+ public void testRootDirectory() throws IOException {
+ final DynamoDBMetadataStore ddbms = getDynamoMetadataStore();
+ Path rootPath = new Path(S3URI);
+ verifyRootDirectory(ddbms.get(rootPath), true);
+
+ ddbms.put(new PathMetadata(new S3AFileStatus(true,
+ new Path(rootPath, "foo"),
+ UserGroupInformation.getCurrentUser().getShortUserName())));
+ verifyRootDirectory(ddbms.get(new Path(S3URI)), false);
+ }
+
+ private void verifyRootDirectory(PathMetadata rootMeta, boolean isEmpty) {
+ assertNotNull(rootMeta);
+ final FileStatus status = rootMeta.getFileStatus();
+ assertNotNull(status);
+ assertTrue(status.isDirectory());
+ // UNKNOWN is always a valid option, but true / false should not contradict
+ if (isEmpty) {
+ assertNotSame("Should not be marked non-empty",
+ Tristate.FALSE,
+ rootMeta.isEmptyDirectory());
+ } else {
+ assertNotSame("Should not be marked empty",
+ Tristate.TRUE,
+ rootMeta.isEmptyDirectory());
+ }
+ }
+
+ /**
+ * Test that when moving nested paths, all its ancestors up to destination
+ * root will also be created.
+ * Here is the directory tree before move:
+ *
+ * testMovePopulateAncestors
+ * ├── a
+ * │ └── b
+ * │ └── src
+ * │ ├── dir1
+ * │ │ └── dir2
+ * │ └── file1.txt
+ * └── c
+ * └── d
+ * └── dest
+ *
+ * As part of rename(a/b/src, d/c/dest), S3A will enumerate the subtree at
+ * a/b/src. This test verifies that after the move, the new subtree at
+ * 'dest' is reachable from the root (i.e. c/ and c/d exist in the table.
+ * DynamoDBMetadataStore depends on this property to do recursive delete
+ * without a full table scan.
+ */
+ @Test
+ public void testMovePopulatesAncestors() throws IOException {
+ final DynamoDBMetadataStore ddbms = getDynamoMetadataStore();
+ final String testRoot = "/testMovePopulatesAncestors";
+ final String srcRoot = testRoot + "/a/b/src";
+ final String destRoot = testRoot + "/c/d/e/dest";
+
+ final Path nestedPath1 = strToPath(srcRoot + "/file1.txt");
+ ddbms.put(new PathMetadata(basicFileStatus(nestedPath1, 1024, false)));
+ final Path nestedPath2 = strToPath(srcRoot + "/dir1/dir2");
+ ddbms.put(new PathMetadata(basicFileStatus(nestedPath2, 0, true)));
+
+ // We don't put the destRoot path here, since put() would create ancestor
+ // entries, and we want to ensure that move() does it, instead.
+
+ // Build enumeration of src / dest paths and do the move()
+ final Collection fullSourcePaths = Lists.newArrayList(
+ strToPath(srcRoot),
+ strToPath(srcRoot + "/dir1"),
+ strToPath(srcRoot + "/dir1/dir2"),
+ strToPath(srcRoot + "/file1.txt")
+ );
+ final Collection pathsToCreate = Lists.newArrayList(
+ new PathMetadata(basicFileStatus(strToPath(destRoot),
+ 0, true)),
+ new PathMetadata(basicFileStatus(strToPath(destRoot + "/dir1"),
+ 0, true)),
+ new PathMetadata(basicFileStatus(strToPath(destRoot + "/dir1/dir2"),
+ 0, true)),
+ new PathMetadata(basicFileStatus(strToPath(destRoot + "/file1.txt"),
+ 1024, false))
+ );
+
+ ddbms.move(fullSourcePaths, pathsToCreate);
+
+ // assert that all the ancestors should have been populated automatically
+ assertCached(testRoot + "/c");
+ assertCached(testRoot + "/c/d");
+ assertCached(testRoot + "/c/d/e");
+ assertCached(destRoot /* /c/d/e/dest */);
+
+ // Also check moved files while we're at it
+ assertCached(destRoot + "/dir1");
+ assertCached(destRoot + "/dir1/dir2");
+ assertCached(destRoot + "/file1.txt");
+ }
+
+ @Test
+ public void testProvisionTable() throws IOException {
+ final DynamoDBMetadataStore ddbms = getDynamoMetadataStore();
+ final String tableName = ddbms.getTable().getTableName();
+ final ProvisionedThroughputDescription oldProvision =
+ dynamoDB.getTable(tableName).describe().getProvisionedThroughput();
+ ddbms.provisionTable(oldProvision.getReadCapacityUnits() * 2,
+ oldProvision.getWriteCapacityUnits() * 2);
+ final ProvisionedThroughputDescription newProvision =
+ dynamoDB.getTable(tableName).describe().getProvisionedThroughput();
+ LOG.info("Old provision = {}, new provision = {}",
+ oldProvision, newProvision);
+ assertEquals(oldProvision.getReadCapacityUnits() * 2,
+ newProvision.getReadCapacityUnits().longValue());
+ assertEquals(oldProvision.getWriteCapacityUnits() * 2,
+ newProvision.getWriteCapacityUnits().longValue());
+ }
+
+ @Test
+ public void testDeleteTable() throws IOException {
+ final String tableName = "testDeleteTable";
+ final S3AFileSystem s3afs = getFileSystem();
+ final Configuration conf = s3afs.getConf();
+ conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName);
+ try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
+ ddbms.initialize(s3afs);
+ // we can list the empty table
+ ddbms.listChildren(new Path(S3URI));
+
+ ddbms.destroy();
+ verifyTableNotExist(tableName);
+
+ // delete table once more; be ResourceNotFoundException swallowed silently
+ ddbms.destroy();
+ verifyTableNotExist(tableName);
+
+ try {
+ // we can no longer list the destroyed table
+ ddbms.listChildren(new Path(S3URI));
+ fail("Should have failed after the table is destroyed!");
+ } catch (IOException ignored) {
+ }
+ }
+ }
+
+ /**
+ * This validates the table is created and ACTIVE in DynamoDB.
+ *
+ * This should not rely on the {@link DynamoDBMetadataStore} implementation.
+ * Return the table
+ */
+ private static Table verifyTableInitialized(String tableName) {
+ final Table table = dynamoDB.getTable(tableName);
+ final TableDescription td = table.describe();
+ assertEquals(tableName, td.getTableName());
+ assertEquals("ACTIVE", td.getTableStatus());
+ return table;
+ }
+
+ /**
+ * This validates the table is not found in DynamoDB.
+ *
+ * This should not rely on the {@link DynamoDBMetadataStore} implementation.
+ */
+ private static void verifyTableNotExist(String tableName) {
+ final Table table = dynamoDB.getTable(tableName);
+ try {
+ table.describe();
+ fail("Expecting ResourceNotFoundException for table '" + tableName + "'");
+ } catch (ResourceNotFoundException ignored) {
+ }
+ }
+
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestLocalMetadataStore.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestLocalMetadataStore.java
new file mode 100644
index 00000000000..1b765afec2f
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestLocalMetadataStore.java
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.junit.Test;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.S3ATestUtils;
+
+/**
+ * MetadataStore unit test for {@link LocalMetadataStore}.
+ */
+public class TestLocalMetadataStore extends MetadataStoreTestBase {
+
+ private static final String MAX_ENTRIES_STR = "16";
+
+ private final static class LocalMSContract extends AbstractMSContract {
+
+ private FileSystem fs;
+
+ private LocalMSContract() throws IOException {
+ this(new Configuration());
+ }
+
+ private LocalMSContract(Configuration config) throws IOException {
+ config.set(LocalMetadataStore.CONF_MAX_RECORDS, MAX_ENTRIES_STR);
+ fs = FileSystem.getLocal(config);
+ }
+
+ @Override
+ public FileSystem getFileSystem() {
+ return fs;
+ }
+
+ @Override
+ public MetadataStore getMetadataStore() throws IOException {
+ LocalMetadataStore lms = new LocalMetadataStore();
+ return lms;
+ }
+ }
+
+ @Override
+ public AbstractMSContract createContract() throws IOException {
+ return new LocalMSContract();
+ }
+
+ @Override
+ public AbstractMSContract createContract(Configuration conf) throws
+ IOException {
+ return new LocalMSContract(conf);
+ }
+
+ @Test
+ public void testClearByAncestor() {
+ Map map = new HashMap<>();
+
+ // 1. Test paths without scheme/host
+ assertClearResult(map, "", "/", 0);
+ assertClearResult(map, "", "/dirA/dirB", 2);
+ assertClearResult(map, "", "/invalid", 5);
+
+
+ // 2. Test paths w/ scheme/host
+ String p = "s3a://fake-bucket-name";
+ assertClearResult(map, p, "/", 0);
+ assertClearResult(map, p, "/dirA/dirB", 2);
+ assertClearResult(map, p, "/invalid", 5);
+ }
+
+ private static void populateMap(Map map,
+ String prefix) {
+ populateEntry(map, new Path(prefix + "/dirA/dirB/"));
+ populateEntry(map, new Path(prefix + "/dirA/dirB/dirC"));
+ populateEntry(map, new Path(prefix + "/dirA/dirB/dirC/file1"));
+ populateEntry(map, new Path(prefix + "/dirA/dirB/dirC/file2"));
+ populateEntry(map, new Path(prefix + "/dirA/file1"));
+ }
+
+ private static void populateEntry(Map map,
+ Path path) {
+ map.put(path, new PathMetadata(new FileStatus(0, true, 0, 0, 0, path)));
+ }
+
+ private static int sizeOfMap(Map map) {
+ int count = 0;
+ for (PathMetadata meta : map.values()) {
+ if (!meta.isDeleted()) {
+ count++;
+ }
+ }
+ return count;
+ }
+
+ private static void assertClearResult(Map map,
+ String prefixStr, String pathStr, int leftoverSize) {
+ populateMap(map, prefixStr);
+ LocalMetadataStore.deleteHashByAncestor(new Path(prefixStr + pathStr), map,
+ true);
+ assertEquals(String.format("Map should have %d entries", leftoverSize),
+ leftoverSize, sizeOfMap(map));
+ map.clear();
+ }
+
+ @Override
+ protected void verifyFileStatus(FileStatus status, long size) {
+ S3ATestUtils.verifyFileStatus(status, size, REPLICATION, getModTime(),
+ getAccessTime(),
+ BLOCK_SIZE, OWNER, GROUP, PERMISSION);
+ }
+
+ @Override
+ protected void verifyDirStatus(FileStatus status) {
+ S3ATestUtils.verifyDirStatus(status, REPLICATION, getModTime(),
+ getAccessTime(), OWNER, GROUP, PERMISSION);
+ }
+
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestNullMetadataStore.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestNullMetadataStore.java
new file mode 100644
index 00000000000..c0541ea98ee
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestNullMetadataStore.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+
+import java.io.IOException;
+
+/**
+ * Run MetadataStore unit tests on the NullMetadataStore implementation.
+ */
+public class TestNullMetadataStore extends MetadataStoreTestBase {
+ private static class NullMSContract extends AbstractMSContract {
+ @Override
+ public FileSystem getFileSystem() throws IOException {
+ Configuration config = new Configuration();
+ return FileSystem.getLocal(config);
+ }
+
+ @Override
+ public MetadataStore getMetadataStore() throws IOException {
+ return new NullMetadataStore();
+ }
+ }
+
+ /** This MetadataStore always says "I don't know, ask the backing store". */
+ @Override
+ public boolean allowMissing() {
+ return true;
+ }
+
+ @Override
+ public AbstractMSContract createContract() {
+ return new NullMSContract();
+ }
+
+ @Override
+ public AbstractMSContract createContract(Configuration conf) {
+ return createContract();
+ }
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestPathMetadataDynamoDBTranslation.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestPathMetadataDynamoDBTranslation.java
new file mode 100644
index 00000000000..1678746abd4
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestPathMetadataDynamoDBTranslation.java
@@ -0,0 +1,238 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import java.io.IOException;
+import java.net.URI;
+import java.util.Collection;
+import java.util.concurrent.Callable;
+
+import com.amazonaws.services.dynamodbv2.document.Item;
+import com.amazonaws.services.dynamodbv2.document.KeyAttribute;
+import com.amazonaws.services.dynamodbv2.document.PrimaryKey;
+import com.amazonaws.services.dynamodbv2.model.AttributeDefinition;
+import com.amazonaws.services.dynamodbv2.model.KeySchemaElement;
+import com.google.common.base.Preconditions;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.Timeout;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.S3AFileStatus;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.test.LambdaTestUtils;
+
+import static com.amazonaws.services.dynamodbv2.model.KeyType.HASH;
+import static com.amazonaws.services.dynamodbv2.model.KeyType.RANGE;
+import static com.amazonaws.services.dynamodbv2.model.ScalarAttributeType.S;
+import static org.hamcrest.CoreMatchers.anyOf;
+import static org.hamcrest.CoreMatchers.is;
+
+import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.*;
+import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.VERSION_MARKER;
+import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.VERSION;
+
+/**
+ * Test the PathMetadataDynamoDBTranslation is able to translate between domain
+ * model objects and DynamoDB items.
+ */
+public class TestPathMetadataDynamoDBTranslation extends Assert {
+
+ private static final Path TEST_DIR_PATH = new Path("s3a://test-bucket/myDir");
+ private static final Item TEST_DIR_ITEM = new Item();
+ private static PathMetadata testDirPathMetadata;
+
+ private static final long TEST_FILE_LENGTH = 100;
+ private static final long TEST_MOD_TIME = 9999;
+ private static final long TEST_BLOCK_SIZE = 128;
+ private static final Path TEST_FILE_PATH = new Path(TEST_DIR_PATH, "myFile");
+ private static final Item TEST_FILE_ITEM = new Item();
+ private static PathMetadata testFilePathMetadata;
+
+ @BeforeClass
+ public static void setUpBeforeClass() throws IOException {
+ String username = UserGroupInformation.getCurrentUser().getShortUserName();
+
+ testDirPathMetadata =
+ new PathMetadata(new S3AFileStatus(false, TEST_DIR_PATH, username));
+ TEST_DIR_ITEM
+ .withPrimaryKey(PARENT, "/test-bucket", CHILD, TEST_DIR_PATH.getName())
+ .withBoolean(IS_DIR, true);
+
+ testFilePathMetadata = new PathMetadata(
+ new S3AFileStatus(TEST_FILE_LENGTH, TEST_MOD_TIME, TEST_FILE_PATH,
+ TEST_BLOCK_SIZE, username));
+ TEST_FILE_ITEM
+ .withPrimaryKey(PARENT, pathToParentKey(TEST_FILE_PATH.getParent()),
+ CHILD, TEST_FILE_PATH.getName())
+ .withBoolean(IS_DIR, false)
+ .withLong(FILE_LENGTH, TEST_FILE_LENGTH)
+ .withLong(MOD_TIME, TEST_MOD_TIME)
+ .withLong(BLOCK_SIZE, TEST_BLOCK_SIZE);
+ }
+
+ /**
+ * It should not take long time as it doesn't involve remote server operation.
+ */
+ @Rule
+ public final Timeout timeout = new Timeout(30 * 1000);
+
+ @Test
+ public void testKeySchema() {
+ final Collection keySchema =
+ PathMetadataDynamoDBTranslation.keySchema();
+ assertNotNull(keySchema);
+ assertEquals("There should be HASH and RANGE key in key schema",
+ 2, keySchema.size());
+ for (KeySchemaElement element : keySchema) {
+ assertThat(element.getAttributeName(), anyOf(is(PARENT), is(CHILD)));
+ assertThat(element.getKeyType(),
+ anyOf(is(HASH.toString()), is(RANGE.toString())));
+ }
+ }
+
+ @Test
+ public void testAttributeDefinitions() {
+ final Collection attrs =
+ PathMetadataDynamoDBTranslation.attributeDefinitions();
+ assertNotNull(attrs);
+ assertEquals("There should be HASH and RANGE attributes", 2, attrs.size());
+ for (AttributeDefinition definition : attrs) {
+ assertThat(definition.getAttributeName(), anyOf(is(PARENT), is(CHILD)));
+ assertEquals(S.toString(), definition.getAttributeType());
+ }
+ }
+
+ @Test
+ public void testItemToPathMetadata() throws IOException {
+ final String user =
+ UserGroupInformation.getCurrentUser().getShortUserName();
+ assertNull(itemToPathMetadata(null, user));
+
+ verify(TEST_DIR_ITEM, itemToPathMetadata(TEST_DIR_ITEM, user));
+ verify(TEST_FILE_ITEM, itemToPathMetadata(TEST_FILE_ITEM, user));
+ }
+
+ /**
+ * Verify that the Item and PathMetadata objects hold the same information.
+ */
+ private static void verify(Item item, PathMetadata meta) {
+ assertNotNull(meta);
+ final FileStatus status = meta.getFileStatus();
+ final Path path = status.getPath();
+ assertEquals(item.get(PARENT), pathToParentKey(path.getParent()));
+ assertEquals(item.get(CHILD), path.getName());
+ boolean isDir = item.hasAttribute(IS_DIR) && item.getBoolean(IS_DIR);
+ assertEquals(isDir, status.isDirectory());
+ long len = item.hasAttribute(FILE_LENGTH) ? item.getLong(FILE_LENGTH) : 0;
+ assertEquals(len, status.getLen());
+ long bSize = item.hasAttribute(BLOCK_SIZE) ? item.getLong(BLOCK_SIZE) : 0;
+ assertEquals(bSize, status.getBlockSize());
+
+ /*
+ * S3AFileStatue#getModificationTime() reports the current time, so the
+ * following assertion is failing.
+ *
+ * long modTime = item.hasAttribute(MOD_TIME) ? item.getLong(MOD_TIME) : 0;
+ * assertEquals(modTime, status.getModificationTime());
+ */
+ }
+
+ @Test
+ public void testPathMetadataToItem() {
+ verify(pathMetadataToItem(testDirPathMetadata), testDirPathMetadata);
+ verify(pathMetadataToItem(testFilePathMetadata),
+ testFilePathMetadata);
+ }
+
+ @Test
+ public void testPathToParentKeyAttribute() {
+ doTestPathToParentKeyAttribute(TEST_DIR_PATH);
+ doTestPathToParentKeyAttribute(TEST_FILE_PATH);
+ }
+
+ private static void doTestPathToParentKeyAttribute(Path path) {
+ final KeyAttribute attr = pathToParentKeyAttribute(path);
+ assertNotNull(attr);
+ assertEquals(PARENT, attr.getName());
+ // this path is expected as parent filed
+ assertEquals(pathToParentKey(path), attr.getValue());
+ }
+
+ private static String pathToParentKey(Path p) {
+ Preconditions.checkArgument(p.isUriPathAbsolute());
+ URI parentUri = p.toUri();
+ String bucket = parentUri.getHost();
+ Preconditions.checkNotNull(bucket);
+ String s = "/" + bucket + parentUri.getPath();
+ // strip trailing slash
+ if (s.endsWith("/")) {
+ s = s.substring(0, s.length()-1);
+ }
+ return s;
+ }
+
+ @Test
+ public void testPathToKey() throws Exception {
+ LambdaTestUtils.intercept(IllegalArgumentException.class,
+ new Callable() {
+ @Override
+ public PrimaryKey call() throws Exception {
+ return pathToKey(new Path("/"));
+ }
+ });
+ doTestPathToKey(TEST_DIR_PATH);
+ doTestPathToKey(TEST_FILE_PATH);
+ }
+
+ private static void doTestPathToKey(Path path) {
+ final PrimaryKey key = pathToKey(path);
+ assertNotNull(key);
+ assertEquals("There should be both HASH and RANGE keys",
+ 2, key.getComponents().size());
+
+ for (KeyAttribute keyAttribute : key.getComponents()) {
+ assertThat(keyAttribute.getName(), anyOf(is(PARENT), is(CHILD)));
+ if (PARENT.equals(keyAttribute.getName())) {
+ assertEquals(pathToParentKey(path.getParent()),
+ keyAttribute.getValue());
+ } else {
+ assertEquals(path.getName(), keyAttribute.getValue());
+ }
+ }
+ }
+
+ @Test
+ public void testVersionRoundTrip() throws Throwable {
+ final Item marker = createVersionMarker(VERSION_MARKER, VERSION, 0);
+ assertEquals("Extracted version from " + marker,
+ VERSION, extractVersionFromMarker(marker));
+ }
+
+ @Test
+ public void testVersionMarkerNotStatusIllegalPath() throws Throwable {
+ final Item marker = createVersionMarker(VERSION_MARKER, VERSION, 0);
+ assertNull("Path metadata fromfrom " + marker,
+ itemToPathMetadata(marker, "alice"));
+ }
+
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3Guard.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3Guard.java
new file mode 100644
index 00000000000..745e7aad288
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3Guard.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+
+/**
+ * Tests for the {@link S3Guard} utility class.
+ */
+public class TestS3Guard extends Assert {
+
+ /**
+ * Basic test to ensure results from S3 and MetadataStore are merged
+ * correctly.
+ */
+ @Test
+ public void testDirListingUnion() throws Exception {
+ MetadataStore ms = new LocalMetadataStore();
+
+ Path dirPath = new Path("s3a://bucket/dir");
+
+ // Two files in metadata store listing
+ PathMetadata m1 = makePathMeta("s3a://bucket/dir/ms-file1", false);
+ PathMetadata m2 = makePathMeta("s3a://bucket/dir/ms-file2", false);
+ DirListingMetadata dirMeta = new DirListingMetadata(dirPath,
+ Arrays.asList(m1, m2), false);
+
+ // Two other files in s3
+ List s3Listing = Arrays.asList(
+ makeFileStatus("s3a://bucket/dir/s3-file3", false),
+ makeFileStatus("s3a://bucket/dir/s3-file4", false)
+ );
+
+ FileStatus[] result = S3Guard.dirListingUnion(ms, dirPath, s3Listing,
+ dirMeta, false);
+
+ assertEquals("listing length", 4, result.length);
+ assertContainsPath(result, "s3a://bucket/dir/ms-file1");
+ assertContainsPath(result, "s3a://bucket/dir/ms-file2");
+ assertContainsPath(result, "s3a://bucket/dir/s3-file3");
+ assertContainsPath(result, "s3a://bucket/dir/s3-file4");
+ }
+
+ void assertContainsPath(FileStatus[] statuses, String pathStr) {
+ assertTrue("listing doesn't contain " + pathStr,
+ containsPath(statuses, pathStr));
+ }
+
+ boolean containsPath(FileStatus[] statuses, String pathStr) {
+ for (FileStatus s : statuses) {
+ if (s.getPath().toString().equals(pathStr)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private PathMetadata makePathMeta(String pathStr, boolean isDir) {
+ return new PathMetadata(makeFileStatus(pathStr, isDir));
+ }
+
+ private FileStatus makeFileStatus(String pathStr, boolean isDir) {
+ Path p = new Path(pathStr);
+ if (isDir) {
+ return new FileStatus(0, true, 1, 1, System.currentTimeMillis(), p);
+ } else {
+ return new FileStatus(100, false, 1, 1, System.currentTimeMillis(), p);
+ }
+ }
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractITestS3AMetadataStoreScale.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractITestS3AMetadataStoreScale.java
new file mode 100644
index 00000000000..876cc8020d3
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractITestS3AMetadataStoreScale.java
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.scale;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.S3AFileStatus;
+import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
+import org.apache.hadoop.fs.s3a.s3guard.PathMetadata;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import static org.apache.hadoop.fs.contract.ContractTestUtils.NanoTimer;
+
+/**
+ * Test the performance of a MetadataStore. Useful for load testing.
+ * Could be separated from S3A code, but we're using the S3A scale test
+ * framework for convenience.
+ */
+public abstract class AbstractITestS3AMetadataStoreScale extends
+ S3AScaleTestBase {
+ private static final Logger LOG = LoggerFactory.getLogger(
+ AbstractITestS3AMetadataStoreScale.class);
+
+ /** Some dummy values for FileStatus contents. */
+ static final long BLOCK_SIZE = 32 * 1024 * 1024;
+ static final long SIZE = BLOCK_SIZE * 2;
+ static final String OWNER = "bob";
+ static final long ACCESS_TIME = System.currentTimeMillis();
+
+ static final Path BUCKET_ROOT = new Path("s3a://fake-bucket/");
+
+ /**
+ * Subclasses should override this to provide the MetadataStore they which
+ * to test.
+ * @return MetadataStore to test against
+ * @throws IOException
+ */
+ public abstract MetadataStore createMetadataStore() throws IOException;
+
+ @Test
+ public void testPut() throws Throwable {
+ describe("Test workload of put() operations");
+
+ // As described in hadoop-aws site docs, count parameter is used for
+ // width and depth of directory tree
+ int width = getConf().getInt(KEY_DIRECTORY_COUNT, DEFAULT_DIRECTORY_COUNT);
+ int depth = width;
+
+ List paths = new ArrayList<>();
+ createDirTree(BUCKET_ROOT, depth, width, paths);
+
+ long count = 1; // Some value in case we throw an exception below
+ try (MetadataStore ms = createMetadataStore()) {
+
+ try {
+ count = populateMetadataStore(paths, ms);
+ } finally {
+ clearMetadataStore(ms, count);
+ }
+ }
+ }
+
+ @Test
+ public void testMoves() throws Throwable {
+ describe("Test workload of batched move() operations");
+
+ // As described in hadoop-aws site docs, count parameter is used for
+ // width and depth of directory tree
+ int width = getConf().getInt(KEY_DIRECTORY_COUNT, DEFAULT_DIRECTORY_COUNT);
+ int depth = width;
+
+ long operations = getConf().getLong(KEY_OPERATION_COUNT,
+ DEFAULT_OPERATION_COUNT);
+
+ List origMetas = new ArrayList<>();
+ createDirTree(BUCKET_ROOT, depth, width, origMetas);
+
+ // Pre-compute source and destination paths for move() loop below
+ List origPaths = metasToPaths(origMetas);
+ List movedMetas = moveMetas(origMetas, BUCKET_ROOT,
+ new Path(BUCKET_ROOT, "moved-here"));
+ List movedPaths = metasToPaths(movedMetas);
+
+ long count = 1; // Some value in case we throw an exception below
+ try (MetadataStore ms = createMetadataStore()) {
+
+ try {
+ // Setup
+ count = populateMetadataStore(origMetas, ms);
+
+ // Main loop: move things back and forth
+ describe("Running move workload");
+ NanoTimer moveTimer = new NanoTimer();
+ LOG.info("Running {} moves of {} paths each", operations,
+ origMetas.size());
+ for (int i = 0; i < operations; i++) {
+ Collection toDelete;
+ Collection toCreate;
+ if (i % 2 == 0) {
+ toDelete = origPaths;
+ toCreate = movedMetas;
+ } else {
+ toDelete = movedPaths;
+ toCreate = origMetas;
+ }
+ ms.move(toDelete, toCreate);
+ }
+ moveTimer.end();
+ printTiming(LOG, "move", moveTimer, operations);
+ } finally {
+ // Cleanup
+ clearMetadataStore(ms, count);
+ }
+ }
+ }
+
+ /**
+ * Create a copy of given list of PathMetadatas with the paths moved from
+ * src to dest.
+ */
+ private List moveMetas(List metas, Path src,
+ Path dest) throws IOException {
+ List moved = new ArrayList<>(metas.size());
+ for (PathMetadata srcMeta : metas) {
+ S3AFileStatus status = copyStatus((S3AFileStatus)srcMeta.getFileStatus());
+ status.setPath(movePath(status.getPath(), src, dest));
+ moved.add(new PathMetadata(status));
+ }
+ return moved;
+ }
+
+ private Path movePath(Path p, Path src, Path dest) {
+ String srcStr = src.toUri().getPath();
+ String pathStr = p.toUri().getPath();
+ // Strip off src dir
+ pathStr = pathStr.substring(srcStr.length());
+ // Prepend new dest
+ return new Path(dest, pathStr);
+ }
+
+ private S3AFileStatus copyStatus(S3AFileStatus status) {
+ if (status.isDirectory()) {
+ return new S3AFileStatus(status.isEmptyDirectory(), status.getPath(),
+ status.getOwner());
+ } else {
+ return new S3AFileStatus(status.getLen(), status.getModificationTime(),
+ status.getPath(), status.getBlockSize(), status.getOwner());
+ }
+ }
+
+ /** @return number of PathMetadatas put() into MetadataStore */
+ private long populateMetadataStore(Collection paths,
+ MetadataStore ms) throws IOException {
+ long count = 0;
+ NanoTimer putTimer = new NanoTimer();
+ describe("Inserting into MetadataStore");
+ for (PathMetadata p : paths) {
+ ms.put(p);
+ count++;
+ }
+ putTimer.end();
+ printTiming(LOG, "put", putTimer, count);
+ return count;
+ }
+
+ private void clearMetadataStore(MetadataStore ms, long count)
+ throws IOException {
+ describe("Recursive deletion");
+ NanoTimer deleteTimer = new NanoTimer();
+ ms.deleteSubtree(BUCKET_ROOT);
+ deleteTimer.end();
+ printTiming(LOG, "delete", deleteTimer, count);
+ }
+
+ private static void printTiming(Logger log, String op, NanoTimer timer,
+ long count) {
+ double msec = (double)timer.duration() / 1000;
+ double msecPerOp = msec / count;
+ log.info(String.format("Elapsed %.2f msec. %.3f msec / %s (%d ops)", msec,
+ msecPerOp, op, count));
+ }
+
+ private static S3AFileStatus makeFileStatus(Path path) throws IOException {
+ return new S3AFileStatus(SIZE, ACCESS_TIME, path, BLOCK_SIZE, OWNER);
+ }
+
+ private static S3AFileStatus makeDirStatus(Path p) throws IOException {
+ return new S3AFileStatus(false, p, OWNER);
+ }
+
+ private List metasToPaths(List metas) {
+ List paths = new ArrayList<>(metas.size());
+ for (PathMetadata meta : metas) {
+ paths.add(meta.getFileStatus().getPath());
+ }
+ return paths;
+ }
+
+ /**
+ * Recursively create a directory tree.
+ * @param parent Parent dir of the paths to create.
+ * @param depth How many more levels deep past parent to create.
+ * @param width Number of files (and directories, if depth > 0) per directory.
+ * @param paths List to add generated paths to.
+ */
+ private static void createDirTree(Path parent, int depth, int width,
+ Collection paths) throws IOException {
+
+ // Create files
+ for (int i = 0; i < width; i++) {
+ Path p = new Path(parent, String.format("file-%d", i));
+ PathMetadata meta = new PathMetadata(makeFileStatus(p));
+ paths.add(meta);
+ }
+
+ if (depth == 0) {
+ return;
+ }
+
+ // Create directories if there is depth remaining
+ for (int i = 0; i < width; i++) {
+ Path dir = new Path(parent, String.format("dir-%d", i));
+ PathMetadata meta = new PathMetadata(makeDirStatus(dir));
+ paths.add(meta);
+ createDirTree(dir, depth-1, width, paths);
+ }
+ }
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java
index 89fae822d83..8b163cbee63 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java
@@ -25,6 +25,7 @@ import java.util.concurrent.atomic.AtomicLong;
import com.amazonaws.event.ProgressEvent;
import com.amazonaws.event.ProgressEventType;
import com.amazonaws.event.ProgressListener;
+import org.apache.hadoop.fs.FileStatus;
import org.junit.FixMethodOrder;
import org.junit.Test;
import org.junit.runners.MethodSorters;
@@ -34,11 +35,9 @@ import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.StorageStatistics;
import org.apache.hadoop.fs.contract.ContractTestUtils;
-import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.S3AInstrumentation;
import org.apache.hadoop.fs.s3a.Statistic;
@@ -222,7 +221,7 @@ public abstract class AbstractSTestS3AHugeFiles extends S3AScaleTestBase {
assertEquals("active put requests in \n" + fs,
0, gaugeValue(putRequestsActive));
ContractTestUtils.assertPathExists(fs, "Huge file", hugefile);
- S3AFileStatus status = fs.getFileStatus(hugefile);
+ FileStatus status = fs.getFileStatus(hugefile);
ContractTestUtils.assertIsFile(hugefile, status);
assertEquals("File size in " + status, filesize, status.getLen());
if (progress != null) {
@@ -324,7 +323,7 @@ public abstract class AbstractSTestS3AHugeFiles extends S3AScaleTestBase {
String filetype = encrypted ? "encrypted file" : "file";
describe("Positioned reads of %s %s", filetype, hugefile);
S3AFileSystem fs = getFileSystem();
- S3AFileStatus status = fs.getFileStatus(hugefile);
+ FileStatus status = fs.getFileStatus(hugefile);
long filesize = status.getLen();
int ops = 0;
final int bufferSize = 8192;
@@ -364,7 +363,7 @@ public abstract class AbstractSTestS3AHugeFiles extends S3AScaleTestBase {
assumeHugeFileExists();
describe("Reading %s", hugefile);
S3AFileSystem fs = getFileSystem();
- S3AFileStatus status = fs.getFileStatus(hugefile);
+ FileStatus status = fs.getFileStatus(hugefile);
long filesize = status.getLen();
long blocks = filesize / uploadBlockSize;
byte[] data = new byte[uploadBlockSize];
@@ -390,7 +389,7 @@ public abstract class AbstractSTestS3AHugeFiles extends S3AScaleTestBase {
assumeHugeFileExists();
describe("renaming %s to %s", hugefile, hugefileRenamed);
S3AFileSystem fs = getFileSystem();
- S3AFileStatus status = fs.getFileStatus(hugefile);
+ FileStatus status = fs.getFileStatus(hugefile);
long filesize = status.getLen();
fs.delete(hugefileRenamed, false);
ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer();
@@ -401,7 +400,7 @@ public abstract class AbstractSTestS3AHugeFiles extends S3AScaleTestBase {
toHuman(timer.nanosPerOperation(mb)));
bandwidth(timer, filesize);
logFSState();
- S3AFileStatus destFileStatus = fs.getFileStatus(hugefileRenamed);
+ FileStatus destFileStatus = fs.getFileStatus(hugefileRenamed);
assertEquals(filesize, destFileStatus.getLen());
// rename back
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestDynamoDBMetadataStoreScale.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestDynamoDBMetadataStoreScale.java
new file mode 100644
index 00000000000..3de19350faf
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestDynamoDBMetadataStoreScale.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.scale;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore;
+import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
+
+import java.io.IOException;
+
+import static org.junit.Assume.*;
+import static org.apache.hadoop.fs.s3a.Constants.*;
+
+/**
+ * Scale test for DynamoDBMetadataStore.
+ */
+public class ITestDynamoDBMetadataStoreScale
+ extends AbstractITestS3AMetadataStoreScale {
+
+ @Override
+ public MetadataStore createMetadataStore() throws IOException {
+ Configuration conf = getFileSystem().getConf();
+ String ddbTable = conf.get(S3GUARD_DDB_TABLE_NAME_KEY);
+ assumeNotNull("DynamoDB table is configured", ddbTable);
+ String ddbEndpoint = conf.get(S3GUARD_DDB_REGION_KEY);
+ assumeNotNull("DynamoDB endpoint is configured", ddbEndpoint);
+
+ DynamoDBMetadataStore ms = new DynamoDBMetadataStore();
+ ms.initialize(getFileSystem().getConf());
+ return ms;
+ }
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestLocalMetadataStoreScale.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestLocalMetadataStoreScale.java
new file mode 100644
index 00000000000..591fb0e1c0a
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestLocalMetadataStoreScale.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.scale;
+
+import org.apache.hadoop.fs.s3a.s3guard.LocalMetadataStore;
+import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
+
+import java.io.IOException;
+
+/**
+ * Scale test for LocalMetadataStore.
+ */
+public class ITestLocalMetadataStoreScale
+ extends AbstractITestS3AMetadataStoreScale {
+ @Override
+ public MetadataStore createMetadataStore() throws IOException {
+ MetadataStore ms = new LocalMetadataStore();
+ ms.initialize(getFileSystem());
+ return ms;
+ }
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AConcurrentOps.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AConcurrentOps.java
index b4d3862c3cd..e320bb21918 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AConcurrentOps.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AConcurrentOps.java
@@ -107,7 +107,7 @@ public class ITestS3AConcurrentOps extends S3AScaleTestBase {
private S3AFileSystem getNormalFileSystem() throws Exception {
S3AFileSystem s3a = new S3AFileSystem();
- Configuration conf = new Configuration();
+ Configuration conf = createScaleConfiguration();
URI rootURI = new URI(conf.get(TEST_FS_S3A_NAME));
s3a.initialize(rootURI, conf);
return s3a;
@@ -115,6 +115,7 @@ public class ITestS3AConcurrentOps extends S3AScaleTestBase {
@After
public void teardown() throws Exception {
+ super.teardown();
if (auxFs != null) {
auxFs.delete(testRoot, true);
}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3ACreatePerformance.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3ACreatePerformance.java
new file mode 100644
index 00000000000..fd32ba5bb62
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3ACreatePerformance.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.scale;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.OutputStream;
+
+import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
+
+/**
+ * Tests for create(): performance and/or load testing.
+ */
+public class ITestS3ACreatePerformance extends S3AScaleTestBase {
+ private static final Logger LOG = LoggerFactory.getLogger(
+ ITestS3ADirectoryPerformance.class);
+
+ private Path basePath;
+ private int basePathDepth;
+ private static final int PATH_DEPTH = 10;
+
+ @Override
+ public void setup() throws Exception {
+ super.setup();
+ basePath = getTestPath();
+ basePathDepth = basePath.depth();
+ }
+
+ /**
+ * Test rate at which we can create deeply-nested files from a single thread.
+ * @throws Exception
+ */
+ @Test
+ public void testDeepSequentialCreate() throws Exception {
+ long numOperations = getOperationCount();
+ S3AFileSystem fs = getFileSystem();
+
+ NanoTimer timer = new NanoTimer();
+ for (int i = 0; i < numOperations; i++) {
+ Path p = getPathIteration(i, PATH_DEPTH);
+ OutputStream out = fs.create(p);
+ out.write(40); // one byte file with some value 40
+ out.close();
+ }
+ timer.end("Time to create %d files of depth %d", getOperationCount(),
+ PATH_DEPTH);
+ LOG.info("Time per create: {} msec",
+ timer.nanosPerOperation(numOperations) / 1000);
+ }
+
+ /* Get a unique path of depth totalDepth for given test iteration. */
+ private Path getPathIteration(long iter, int totalDepth) throws Exception {
+ assertTrue("Test path too long, increase PATH_DEPTH in test.",
+ totalDepth > basePathDepth);
+
+ int neededDirs = totalDepth - basePathDepth - 1;
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < neededDirs; i++) {
+ sb.append("iter-").append(iter);
+ sb.append("-dir-").append(i);
+ sb.append("/");
+ }
+ sb.append("file").append(iter);
+ return new Path(basePath, sb.toString());
+ }
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3ADirectoryPerformance.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3ADirectoryPerformance.java
index d71364f4fcd..03f1e220b50 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3ADirectoryPerformance.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3ADirectoryPerformance.java
@@ -113,14 +113,15 @@ public class ITestS3ADirectoryPerformance extends S3AScaleTestBase {
listContinueRequests,
listStatusCalls,
getFileStatusCalls);
- assertEquals(listRequests.toString(), 2, listRequests.diff());
+ if (!fs.hasMetadataStore()) {
+ assertEquals(listRequests.toString(), 2, listRequests.diff());
+ }
reset(metadataRequests,
listRequests,
listContinueRequests,
listStatusCalls,
getFileStatusCalls);
-
} finally {
describe("deletion");
// deletion at the end of the run
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AInputStreamPerformance.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AInputStreamPerformance.java
index 236ffcdae77..83ab2102bf6 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AInputStreamPerformance.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AInputStreamPerformance.java
@@ -20,10 +20,10 @@ package org.apache.hadoop.fs.s3a.scale;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.ContractTestUtils;
-import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.S3AInputPolicy;
import org.apache.hadoop.fs.s3a.S3AInputStream;
@@ -56,7 +56,7 @@ public class ITestS3AInputStreamPerformance extends S3AScaleTestBase {
private S3AFileSystem s3aFS;
private Path testData;
- private S3AFileStatus testDataStatus;
+ private FileStatus testDataStatus;
private FSDataInputStream in;
private S3AInstrumentation.InputStreamStatistics streamStatistics;
public static final int BLOCK_SIZE = 32 * 1024;
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java
index 0f844b1787e..b2a1aa09851 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java
@@ -126,7 +126,7 @@ public class S3AScaleTestBase extends AbstractS3ATestBase {
* @return a configuration with which to create FS instances
*/
protected Configuration createScaleConfiguration() {
- return new Configuration();
+ return super.createConfiguration();
}
protected Path getTestPath() {
diff --git a/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml b/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml
index d424aa42aa2..e8200da9c33 100644
--- a/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml
+++ b/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml
@@ -36,6 +36,25 @@
The endpoint for s3a://landsat-pds URLs
+
+
+ fs.s3a.bucket.landsat-pds.metadatastore.impl
+ ${s3guard.null}
+ The read-only landsat-pds repository isn't
+ managed by s3guard
+
+
+
+
+ s3guard.null
+ org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore
+
+
+
+ s3guard.dynamo
+ org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore
+
+
diff --git a/hadoop-tools/hadoop-aws/src/test/resources/log4j.properties b/hadoop-tools/hadoop-aws/src/test/resources/log4j.properties
index 1330ed1aef3..9376ebd6027 100644
--- a/hadoop-tools/hadoop-aws/src/test/resources/log4j.properties
+++ b/hadoop-tools/hadoop-aws/src/test/resources/log4j.properties
@@ -19,5 +19,16 @@ log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} (%F:%
log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR
-# for debugging low level S3a operations, uncomment this line
-# log4j.logger.org.apache.hadoop.fs.s3a=DEBUG
+# for debugging low level S3a operations, uncomment these lines
+# Log all S3A classes
+#log4j.logger.org.apache.hadoop.fs.s3a=DEBUG
+
+# Log S3Guard classes
+#log4j.logger.org.apache.hadoop.fs.s3a.s3guard=DEBUG
+
+# Enable debug logging of AWS DynamoDB client
+#log4j.logger.com.amazonaws.services.dynamodbv2.AmazonDynamoDB=DEBUG
+
+# Log all HTTP requests made; includes S3 interaction. This may
+# include sensitive information such as account IDs in HTTP headers.
+#log4j.logger.com.amazonaws.request=DEBUG