HADOOP-13345 HS3Guard: Improved Consistency for S3A.

Contributed by: Chris Nauroth, Aaron Fabbri, Mingliang Liu, Lei (Eddy) Xu,
Sean Mackrory, Steve Loughran and others.
This commit is contained in:
Steve Loughran 2017-09-01 14:13:41 +01:00
parent 7a96033b15
commit 621b43e254
No known key found for this signature in database
GPG Key ID: 950CC3E032B79CA2
101 changed files with 13061 additions and 534 deletions

View File

@ -174,6 +174,19 @@
<directory>../hadoop-sls/target/hadoop-sls-${project.version}/sls</directory>
<outputDirectory>/share/hadoop/${hadoop.component}/sls</outputDirectory>
</fileSet>
<fileSet>
<directory>../hadoop-aws/src/main/bin</directory>
<outputDirectory>/bin</outputDirectory>
<fileMode>0755</fileMode>
</fileSet>
<fileSet>
<directory>../hadoop-aws/src/main/shellprofile.d</directory>
<includes>
<include>*</include>
</includes>
<outputDirectory>/libexec/shellprofile.d</outputDirectory>
<fileMode>0755</fileMode>
</fileSet>
</fileSets>
<dependencySets>
<dependencySet>

View File

@ -171,6 +171,11 @@
<artifactId>commons-configuration2</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>

View File

@ -20,6 +20,7 @@ package org.apache.hadoop.fs;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
@ -132,6 +133,13 @@ public abstract class AbstractFileSystem {
CONSTRUCTOR_CACHE.put(theClass, meth);
}
result = meth.newInstance(uri, conf);
} catch (InvocationTargetException e) {
Throwable cause = e.getCause();
if (cause instanceof RuntimeException) {
throw (RuntimeException) cause;
} else {
throw new RuntimeException(cause);
}
} catch (Exception e) {
throw new RuntimeException(e);
}

View File

@ -331,6 +331,15 @@ public class FileContext {
return AbstractFileSystem.get(uri, conf);
}
});
} catch (RuntimeException ex) {
// RTEs can wrap other exceptions; if there is an IOException inner,
// throw it direct.
Throwable cause = ex.getCause();
if (cause instanceof IOException) {
throw (IOException) cause;
} else {
throw ex;
}
} catch (InterruptedException ex) {
LOG.error(ex.toString());
throw new IOException("Failed to get the AbstractFileSystem for path: "

View File

@ -1308,12 +1308,120 @@
</description>
</property>
<property>
<name>fs.s3a.metadatastore.authoritative</name>
<value>false</value>
<description>
When true, allow MetadataStore implementations to act as source of
truth for getting file status and directory listings. Even if this
is set to true, MetadataStore implementations may choose not to
return authoritative results. If the configured MetadataStore does
not support being authoritative, this setting will have no effect.
</description>
</property>
<property>
<name>fs.s3a.metadatastore.impl</name>
<value>org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore</value>
<description>
Fully-qualified name of the class that implements the MetadataStore
to be used by s3a. The default class, NullMetadataStore, has no
effect: s3a will continue to treat the backing S3 service as the one
and only source of truth for file and directory metadata.
</description>
</property>
<property>
<name>fs.s3a.s3guard.cli.prune.age</name>
<value>86400000</value>
<description>
Default age (in milliseconds) after which to prune metadata from the
metadatastore when the prune command is run. Can be overridden on the
command-line.
</description>
</property>
<property>
<name>fs.s3a.impl</name>
<value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>
<description>The implementation class of the S3A Filesystem</description>
</property>
<property>
<name>fs.s3a.s3guard.ddb.region</name>
<value></value>
<description>
AWS DynamoDB region to connect to. An up-to-date list is
provided in the AWS Documentation: regions and endpoints. Without this
property, the S3Guard will operate table in the associated S3 bucket region.
</description>
</property>
<property>
<name>fs.s3a.s3guard.ddb.table</name>
<value></value>
<description>
The DynamoDB table name to operate. Without this property, the respective
S3 bucket name will be used.
</description>
</property>
<property>
<name>fs.s3a.s3guard.ddb.table.create</name>
<value>false</value>
<description>
If true, the S3A client will create the table if it does not already exist.
</description>
</property>
<property>
<name>fs.s3a.s3guard.ddb.table.capacity.read</name>
<value>500</value>
<description>
Provisioned throughput requirements for read operations in terms of capacity
units for the DynamoDB table. This config value will only be used when
creating a new DynamoDB table, though later you can manually provision by
increasing or decreasing read capacity as needed for existing tables.
See DynamoDB documents for more information.
</description>
</property>
<property>
<name>fs.s3a.s3guard.ddb.table.capacity.write</name>
<value>100</value>
<description>
Provisioned throughput requirements for write operations in terms of
capacity units for the DynamoDB table. Refer to related config
fs.s3a.s3guard.ddb.table.capacity.read before usage.
</description>
</property>
<property>
<name>fs.s3a.s3guard.ddb.max.retries</name>
<value>9</value>
<description>
Max retries on batched DynamoDB operations before giving up and
throwing an IOException. Each retry is delayed with an exponential
backoff timer which starts at 100 milliseconds and approximately
doubles each time. The minimum wait before throwing an exception is
sum(100, 200, 400, 800, .. 100*2^N-1 ) == 100 * ((2^N)-1)
So N = 9 yields at least 51.1 seconds (51,100) milliseconds of blocking
before throwing an IOException.
</description>
</property>
<property>
<name>fs.s3a.s3guard.ddb.background.sleep</name>
<value>25</value>
<description>
Length (in milliseconds) of pause between each batch of deletes when
pruning metadata. Prevents prune operations (which can typically be low
priority background operations) from overly interfering with other I/O
operations.
</description>
</property>
<property>
<name>fs.AbstractFileSystem.s3a.impl</name>
<value>org.apache.hadoop.fs.s3a.S3A</value>

View File

@ -748,13 +748,27 @@ public abstract class FileSystemContractBaseTest {
/**
* This a sanity check to make sure that any filesystem's handling of
* renames doesn't cause any regressions
* renames empty dirs doesn't cause any regressions.
*/
public void testRenameEmptyToDirWithSamePrefixAllowed() throws Throwable {
assumeTrue(renameSupported());
Path parentdir = path("testRenameEmptyToDirWithSamePrefixAllowed");
fs.mkdirs(parentdir);
Path dest = path("testRenameEmptyToDirWithSamePrefixAllowedDest");
rename(parentdir, dest, true, false, true);
}
/**
* This a sanity check to make sure that any filesystem's handling of
* renames non-empty dirs doesn't cause any regressions.
*/
@Test
public void testRenameToDirWithSamePrefixAllowed() throws Throwable {
assumeTrue(renameSupported());
final Path parentdir = path("testRenameToDirWithSamePrefixAllowed");
fs.mkdirs(parentdir);
// Before renaming, we create one file under the source parent directory
createFile(new Path(parentdir, "mychild"));
final Path dest = path("testRenameToDirWithSamePrefixAllowedDest");
rename(parentdir, dest, true, false, true);
}

View File

@ -222,4 +222,67 @@ public abstract class AbstractContractRenameTest extends
assertPathDoesNotExist("not deleted",
new Path(srcDir, "source.txt"));
}
/**
* Test that after renaming, the nested subdirectory is moved along with all
* its ancestors.
*/
@Test
public void testRenamePopulatesDirectoryAncestors() throws IOException {
final FileSystem fs = getFileSystem();
final Path src = path("testRenamePopulatesDirectoryAncestors/source");
fs.mkdirs(src);
final String nestedDir = "/dir1/dir2/dir3/dir4";
fs.mkdirs(path(src + nestedDir));
Path dst = path("testRenamePopulatesDirectoryAncestorsNew");
fs.rename(src, dst);
validateAncestorsMoved(src, dst, nestedDir);
}
/**
* Test that after renaming, the nested file is moved along with all its
* ancestors. It is similar to {@link #testRenamePopulatesDirectoryAncestors}.
*/
@Test
public void testRenamePopulatesFileAncestors() throws IOException {
final FileSystem fs = getFileSystem();
final Path src = path("testRenamePopulatesFileAncestors/source");
fs.mkdirs(src);
final String nestedFile = "/dir1/dir2/dir3/file4";
byte[] srcDataset = dataset(256, 'a', 'z');
writeDataset(fs, path(src + nestedFile), srcDataset, srcDataset.length,
1024, false);
Path dst = path("testRenamePopulatesFileAncestorsNew");
fs.rename(src, dst);
validateAncestorsMoved(src, dst, nestedFile);
}
/**
* Validate that the nested path and its ancestors should have been moved.
*
* @param src the source root to move
* @param dst the destination root to move
* @param nestedPath the nested path to move
*/
private void validateAncestorsMoved(Path src, Path dst, String nestedPath)
throws IOException {
assertIsDirectory(dst);
assertPathDoesNotExist("src path should not exist", path(src + nestedPath));
assertPathExists("dst path should exist", path(dst + nestedPath));
Path path = new Path(nestedPath).getParent();
while (path != null && !path.isRoot()) {
final Path parentSrc = path(src + path.toString());
assertPathDoesNotExist(parentSrc + " is not deleted", parentSrc);
final Path parentDst = path(dst + path.toString());
assertPathExists(parentDst + " should exist after rename", parentDst);
assertIsDirectory(parentDst);
path = path.getParent();
}
}
}

View File

@ -248,6 +248,23 @@ public final class LambdaTestUtils {
throw ex;
}
/**
* Variant of {@link #eventually(int, Callable, Callable)} method for
* void lambda expressions.
* @param timeoutMillis timeout in milliseconds.
* Can be zero, in which case only one attempt is made before failing.
* @param eval expression to evaluate
* @param retry retry interval generator
* @throws Exception the last exception thrown before timeout was triggered
* @throws FailFastException if raised -without any retry attempt.
* @throws InterruptedException if interrupted during the sleep operation.
*/
public static void eventually(int timeoutMillis,
VoidCallable eval,
Callable<Integer> retry) throws Exception {
eventually(timeoutMillis, new VoidCaller(eval), retry);
}
/**
* Simplified {@link #eventually(int, Callable, Callable)} method
* with a fixed interval.
@ -276,6 +293,25 @@ public final class LambdaTestUtils {
new FixedRetryInterval(intervalMillis));
}
/**
/**
* Variant of {@link #eventually(int, int, Callable)} method for
* void lambda expressions.
* @param timeoutMillis timeout in milliseconds.
* Can be zero, in which case only one attempt is made before failing.
* @param intervalMillis interval in milliseconds
* @param eval expression to evaluate
* @throws Exception the last exception thrown before timeout was triggered
* @throws FailFastException if raised -without any retry attempt.
* @throws InterruptedException if interrupted during the sleep operation.
*/
public static void eventually(int timeoutMillis,
int intervalMillis,
VoidCallable eval) throws Exception {
eventually(timeoutMillis, eval,
new FixedRetryInterval(intervalMillis));
}
/**
* Intercept an exception; throw an {@code AssertionError} if one not raised.
* The caught exception is rethrown if it is of the wrong class or
@ -318,6 +354,32 @@ public final class LambdaTestUtils {
}
}
/**
* Variant of {@link #intercept(Class, Callable)} to simplify void
* invocations.
* @param clazz class of exception; the raised exception must be this class
* <i>or a subclass</i>.
* @param eval expression to eval
* @param <E> exception class
* @return the caught exception if it was of the expected type
* @throws Exception any other exception raised
* @throws AssertionError if the evaluation call didn't raise an exception.
*/
public static <E extends Throwable> E intercept(
Class<E> clazz,
VoidCallable eval)
throws Exception {
try {
eval.call();
throw new AssertionError("Expected an exception");
} catch (Throwable e) {
if (clazz.isAssignableFrom(e.getClass())) {
return (E)e;
}
throw e;
}
}
/**
* Intercept an exception; throw an {@code AssertionError} if one not raised.
* The caught exception is rethrown if it is of the wrong class or
@ -358,6 +420,29 @@ public final class LambdaTestUtils {
return ex;
}
/**
* Variant of {@link #intercept(Class, Callable)} to simplify void
* invocations.
* @param clazz class of exception; the raised exception must be this class
* <i>or a subclass</i>.
* @param contained string which must be in the {@code toString()} value
* of the exception
* @param eval expression to eval
* @param <E> exception class
* @return the caught exception if it was of the expected type
* @throws Exception any other exception raised
* @throws AssertionError if the evaluation call didn't raise an exception.
*/
public static <E extends Throwable> E intercept(
Class<E> clazz,
String contained,
VoidCallable eval)
throws Exception {
E ex = intercept(clazz, eval);
GenericTestUtils.assertExceptionContains(contained, ex);
return ex;
}
/**
* Robust string converter for exception messages; if the {@code toString()}
* method throws an exception then that exception is caught and logged,
@ -518,4 +603,31 @@ public final class LambdaTestUtils {
return new FailFastException(String.format(format, args));
}
}
/**
* A simple interface for lambdas, which returns nothing; this exists
* to simplify lambda tests on operations with no return value.
*/
public interface VoidCallable {
void call() throws Exception;
}
/**
* Bridge class to make {@link VoidCallable} something to use in anything
* which takes an {@link Callable}.
*/
public static class VoidCaller implements Callable<Void> {
private final VoidCallable callback;
public VoidCaller(VoidCallable callback) {
this.callback = callback;
}
@Override
public Void call() throws Exception {
callback.call();
return null;
}
}
}

View File

@ -870,6 +870,17 @@
<groupId>org.apache.commons</groupId>
<artifactId>commons-configuration2</artifactId>
<version>2.1</version>
<exclusions>
<exclusion>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.4</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
@ -1734,4 +1745,12 @@
</build>
</profile>
</profiles>
<repositories>
<repository>
<id>dynamodb-local-oregon</id>
<name>DynamoDB Local Release Repository</name>
<url>https://s3-us-west-2.amazonaws.com/dynamodb-local/release</url>
</repository>
</repositories>
</project>

View File

@ -26,4 +26,10 @@
<Match>
<Class name="org.apache.hadoop.fs.s3.INode" />
</Match>
<!-- Redundant null check makes code clearer, future-proof here. -->
<Match>
<Class name="org.apache.hadoop.fs.s3a.S3AFileSystem" />
<Method name="s3Exists" />
<Bug pattern="RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE" />
</Match>
</FindBugsFilter>

View File

@ -36,6 +36,7 @@
<downloadSources>true</downloadSources>
<hadoop.tmp.dir>${project.build.directory}/test</hadoop.tmp.dir>
<dynamodb.local.version>1.11.86</dynamodb.local.version>
<!-- are scale tests enabled ? -->
<fs.s3a.scale.test.enabled>unset</fs.s3a.scale.test.enabled>
<!-- Size in MB of huge files. -->
@ -44,6 +45,11 @@
<fs.s3a.scale.test.huge.partitionsize>unset</fs.s3a.scale.test.huge.partitionsize>
<!-- Timeout in seconds for scale tests.-->
<fs.s3a.scale.test.timeout>3600</fs.s3a.scale.test.timeout>
<!-- are scale tests enabled ? -->
<fs.s3a.s3guard.test.enabled>false</fs.s3a.s3guard.test.enabled>
<fs.s3a.s3guard.test.authoritative>false</fs.s3a.s3guard.test.authoritative>
<fs.s3a.s3guard.test.implementation>local</fs.s3a.s3guard.test.implementation>
</properties>
<profiles>
@ -164,6 +170,11 @@
<fs.s3a.scale.test.huge.filesize>${fs.s3a.scale.test.huge.filesize}</fs.s3a.scale.test.huge.filesize>
<fs.s3a.scale.test.huge.huge.partitionsize>${fs.s3a.scale.test.huge.partitionsize}</fs.s3a.scale.test.huge.huge.partitionsize>
<fs.s3a.scale.test.timeout>${fs.s3a.scale.test.timeout}</fs.s3a.scale.test.timeout>
<!-- S3Guard -->
<fs.s3a.s3guard.test.enabled>${fs.s3a.s3guard.test.enabled}</fs.s3a.s3guard.test.enabled>
<fs.s3a.s3guard.test.authoritative>${fs.s3a.s3guard.test.authoritative}</fs.s3a.s3guard.test.authoritative>
<fs.s3a.s3guard.test.implementation>${fs.s3a.s3guard.test.implementation}</fs.s3a.s3guard.test.implementation>
</systemPropertyVariables>
<!-- Some tests cannot run in parallel. Tests that cover -->
<!-- access to the root directory must run in isolation -->
@ -205,6 +216,10 @@
<fs.s3a.scale.test.huge.filesize>${fs.s3a.scale.test.huge.filesize}</fs.s3a.scale.test.huge.filesize>
<fs.s3a.scale.test.huge.huge.partitionsize>${fs.s3a.scale.test.huge.partitionsize}</fs.s3a.scale.test.huge.huge.partitionsize>
<fs.s3a.scale.test.timeout>${fs.s3a.scale.test.timeout}</fs.s3a.scale.test.timeout>
<!-- S3Guard -->
<fs.s3a.s3guard.test.enabled>${fs.s3a.s3guard.test.enabled}</fs.s3a.s3guard.test.enabled>
<fs.s3a.s3guard.test.implementation>${fs.s3a.s3guard.test.implementation}</fs.s3a.s3guard.test.implementation>
<fs.s3a.s3guard.test.authoritative>${fs.s3a.s3guard.test.authoritative}</fs.s3a.s3guard.test.authoritative>
</systemPropertyVariables>
<!-- Do a sequential run for tests that cannot handle -->
<!-- parallel execution. -->
@ -247,6 +262,10 @@
<fs.s3a.scale.test.enabled>${fs.s3a.scale.test.enabled}</fs.s3a.scale.test.enabled>
<fs.s3a.scale.test.huge.filesize>${fs.s3a.scale.test.huge.filesize}</fs.s3a.scale.test.huge.filesize>
<fs.s3a.scale.test.timeout>${fs.s3a.scale.test.timeout}</fs.s3a.scale.test.timeout>
<!-- S3Guard -->
<fs.s3a.s3guard.test.enabled>${fs.s3a.s3guard.test.enabled}</fs.s3a.s3guard.test.enabled>
<fs.s3a.s3guard.test.implementation>${fs.s3a.s3guard.test.implementation}</fs.s3a.s3guard.test.implementation>
<fs.s3a.s3guard.test.authoritative>${fs.s3a.s3guard.test.authoritative}</fs.s3a.s3guard.test.authoritative>
</systemPropertyVariables>
<forkedProcessTimeoutInSeconds>${fs.s3a.scale.test.timeout}</forkedProcessTimeoutInSeconds>
</configuration>
@ -269,6 +288,60 @@
<fs.s3a.scale.test.enabled>true</fs.s3a.scale.test.enabled>
</properties>
</profile>
<!-- Turn on S3Guard tests-->
<profile>
<id>s3guard</id>
<activation>
<property>
<name>s3guard</name>
</property>
</activation>
<properties >
<fs.s3a.s3guard.test.enabled>true</fs.s3a.s3guard.test.enabled>
</properties>
</profile>
<!-- Switch to DynamoDB for S3Guard. Has no effect unless S3Guard is enabled -->
<profile>
<id>dynamo</id>
<activation>
<property>
<name>dynamo</name>
</property>
</activation>
<properties >
<fs.s3a.s3guard.test.implementation>dynamo</fs.s3a.s3guard.test.implementation>
</properties>
</profile>
<!-- Switch to DynamoDBLocal for S3Guard. Has no effect unless S3Guard is enabled -->
<profile>
<id>dynamodblocal</id>
<activation>
<property>
<name>dynamodblocal</name>
</property>
</activation>
<properties>
<fs.s3a.s3guard.test.implementation>dynamodblocal</fs.s3a.s3guard.test.implementation>
</properties>
</profile>
<!-- Switch S3Guard from Authoritative=false to true
Has no effect unless S3Guard is enabled -->
<profile>
<id>non-auth</id>
<activation>
<property>
<name>auth</name>
</property>
</activation>
<properties >
<fs.s3a.s3guard.test.authoritative>true</fs.s3a.s3guard.test.authoritative>
</properties>
</profile>
</profiles>
<build>
@ -296,16 +369,48 @@
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>deplist</id>
<id>deplist1</id>
<phase>compile</phase>
<goals>
<goal>list</goal>
</goals>
<configuration>
<!-- build a shellprofile -->
<!-- build a shellprofile for hadoop-aws optional tools -->
<outputFile>${project.basedir}/target/hadoop-tools-deps/${project.artifactId}.tools-optional.txt</outputFile>
</configuration>
</execution>
<execution>
<id>copy</id>
<phase>test-compile</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<includeScope>test</includeScope>
<includeTypes>so,dll,dylib</includeTypes>
<outputDirectory>${project.build.directory}/native-libs</outputDirectory>
</configuration>
</execution>
<execution>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/lib</outputDirectory>
</configuration>
</execution>
<execution>
<id>deplist2</id>
<phase>compile</phase>
<goals>
<goal>list</goal>
</goals>
<configuration>
<!-- referenced by the s3guard command -->
<outputFile>${project.basedir}/target/hadoop-tools-deps/${project.artifactId}.tools-builtin.txt</outputFile>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
@ -333,6 +438,26 @@
<artifactId>aws-java-sdk-bundle</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>DynamoDBLocal</artifactId>
<version>${dynamodb.local.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-http</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>

View File

@ -267,6 +267,11 @@ public final class Constants {
public static final String USER_AGENT_PREFIX = "fs.s3a.user.agent.prefix";
/** Whether or not to allow MetadataStore to be source of truth. */
public static final String METADATASTORE_AUTHORITATIVE =
"fs.s3a.metadatastore.authoritative";
public static final boolean DEFAULT_METADATASTORE_AUTHORITATIVE = false;
/** read ahead buffer size to prevent connection re-establishments. */
public static final String READAHEAD_RANGE = "fs.s3a.readahead.range";
public static final long DEFAULT_READAHEAD_RANGE = 64 * 1024;
@ -312,7 +317,7 @@ public final class Constants {
@InterfaceStability.Unstable
public static final Class<? extends S3ClientFactory>
DEFAULT_S3_CLIENT_FACTORY_IMPL =
S3ClientFactory.DefaultS3ClientFactory.class;
DefaultS3ClientFactory.class;
/**
* Maximum number of partitions in a multipart upload: {@value}.
@ -320,4 +325,130 @@ public final class Constants {
@InterfaceAudience.Private
public static final int MAX_MULTIPART_COUNT = 10000;
/**
* Classname of the S3A-specific output committer factory. This
* is what must be declared when attempting to use
*/
@InterfaceStability.Unstable
public static final String S3A_OUTPUT_COMMITTER_FACTORY =
"org.apache.hadoop.fs.s3a.commit.S3AOutputCommitterFactory";
/* Constants. */
public static final String S3_METADATA_STORE_IMPL =
"fs.s3a.metadatastore.impl";
/** Minimum period of time (in milliseconds) to keep metadata (may only be
* applied when a prune command is manually run).
*/
@InterfaceStability.Unstable
public static final String S3GUARD_CLI_PRUNE_AGE =
"fs.s3a.s3guard.cli.prune.age";
/**
* The region of the DynamoDB service.
*
* This config has no default value. If the user does not set this, the
* S3Guard will operate table in the associated S3 bucket region.
*/
@InterfaceStability.Unstable
public static final String S3GUARD_DDB_REGION_KEY =
"fs.s3a.s3guard.ddb.region";
/**
* The DynamoDB table name to use.
*
* This config has no default value. If the user does not set this, the
* S3Guard implementation will use the respective S3 bucket name.
*/
@InterfaceStability.Unstable
public static final String S3GUARD_DDB_TABLE_NAME_KEY =
"fs.s3a.s3guard.ddb.table";
/**
* Whether to create the DynamoDB table if the table does not exist.
*/
@InterfaceStability.Unstable
public static final String S3GUARD_DDB_TABLE_CREATE_KEY =
"fs.s3a.s3guard.ddb.table.create";
@InterfaceStability.Unstable
public static final String S3GUARD_DDB_TABLE_CAPACITY_READ_KEY =
"fs.s3a.s3guard.ddb.table.capacity.read";
public static final long S3GUARD_DDB_TABLE_CAPACITY_READ_DEFAULT = 500;
@InterfaceStability.Unstable
public static final String S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY =
"fs.s3a.s3guard.ddb.table.capacity.write";
public static final long S3GUARD_DDB_TABLE_CAPACITY_WRITE_DEFAULT = 100;
/**
* The maximum put or delete requests per BatchWriteItem request.
*
* Refer to Amazon API reference for this limit.
*/
public static final int S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT = 25;
@InterfaceStability.Unstable
public static final String S3GUARD_DDB_MAX_RETRIES =
"fs.s3a.s3guard.ddb.max.retries";
/**
* Max retries on batched DynamoDB operations before giving up and
* throwing an IOException. Default is {@value}. See core-default.xml for
* more detail.
*/
public static final int S3GUARD_DDB_MAX_RETRIES_DEFAULT = 9;
/**
* Period of time (in milliseconds) to sleep between batches of writes.
* Currently only applies to prune operations, as they are naturally a
* lower priority than other operations.
*/
@InterfaceStability.Unstable
public static final String S3GUARD_DDB_BACKGROUND_SLEEP_MSEC_KEY =
"fs.s3a.s3guard.ddb.background.sleep";
public static final int S3GUARD_DDB_BACKGROUND_SLEEP_MSEC_DEFAULT = 25;
/**
* V1 committer.
*/
@InterfaceStability.Unstable
public static final String S3A_OUTPUT_COMMITTER_MRV1 =
"org.apache.hadoop.fs.s3a.commit.S3OutputCommitterMRv1";
/**
* The default "Null" metadata store: {@value}.
*/
@InterfaceStability.Unstable
public static final String S3GUARD_METASTORE_NULL
= "org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore";
/**
* Use Local memory for the metadata: {@value}.
* This is not coherent across processes and must be used for testing only.
*/
@InterfaceStability.Unstable
public static final String S3GUARD_METASTORE_LOCAL
= "org.apache.hadoop.fs.s3a.s3guard.LocalMetadataStore";
/**
* Use DynamoDB for the metadata: {@value}.
*/
@InterfaceStability.Unstable
public static final String S3GUARD_METASTORE_DYNAMO
= "org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore";
/**
* Inconsistency (visibility delay) injection settings.
*/
@InterfaceStability.Unstable
public static final String FAIL_INJECT_INCONSISTENCY_KEY =
"fs.s3a.failinject.inconsistency.key.substring";
@InterfaceStability.Unstable
public static final String FAIL_INJECT_INCONSISTENCY_MSEC =
"fs.s3a.failinject.inconsistency.msec";
@InterfaceStability.Unstable
public static final String FAIL_INJECT_INCONSISTENCY_PROBABILITY =
"fs.s3a.failinject.inconsistency.probability";
}

View File

@ -0,0 +1,233 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.Protocol;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.S3ClientOptions;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.util.VersionInfo;
import org.slf4j.Logger;
import java.io.IOException;
import java.net.URI;
import static org.apache.hadoop.fs.s3a.Constants.*;
import static org.apache.hadoop.fs.s3a.S3AUtils.createAWSCredentialProviderSet;
import static org.apache.hadoop.fs.s3a.S3AUtils.intOption;
/**
* The default factory implementation, which calls the AWS SDK to configure
* and create an {@link AmazonS3Client} that communicates with the S3 service.
*/
public class DefaultS3ClientFactory extends Configured implements
S3ClientFactory {
protected static final Logger LOG = S3AFileSystem.LOG;
@Override
public AmazonS3 createS3Client(URI name) throws IOException {
Configuration conf = getConf();
AWSCredentialsProvider credentials =
createAWSCredentialProviderSet(name, conf);
final ClientConfiguration awsConf = createAwsConf(getConf());
AmazonS3 s3 = newAmazonS3Client(credentials, awsConf);
return createAmazonS3Client(s3, conf, credentials, awsConf);
}
/**
* Create a new {@link ClientConfiguration}.
* @param conf The Hadoop configuration
* @return new AWS client configuration
*/
public static ClientConfiguration createAwsConf(Configuration conf) {
final ClientConfiguration awsConf = new ClientConfiguration();
initConnectionSettings(conf, awsConf);
initProxySupport(conf, awsConf);
initUserAgent(conf, awsConf);
return awsConf;
}
/**
* Wrapper around constructor for {@link AmazonS3} client. Override this to
* provide an extended version of the client
* @param credentials credentials to use
* @param awsConf AWS configuration
* @return new AmazonS3 client
*/
protected AmazonS3 newAmazonS3Client(
AWSCredentialsProvider credentials, ClientConfiguration awsConf) {
return new AmazonS3Client(credentials, awsConf);
}
/**
* Initializes all AWS SDK settings related to connection management.
*
* @param conf Hadoop configuration
* @param awsConf AWS SDK configuration
*/
private static void initConnectionSettings(Configuration conf,
ClientConfiguration awsConf) {
awsConf.setMaxConnections(intOption(conf, MAXIMUM_CONNECTIONS,
DEFAULT_MAXIMUM_CONNECTIONS, 1));
boolean secureConnections = conf.getBoolean(SECURE_CONNECTIONS,
DEFAULT_SECURE_CONNECTIONS);
awsConf.setProtocol(secureConnections ? Protocol.HTTPS : Protocol.HTTP);
awsConf.setMaxErrorRetry(intOption(conf, MAX_ERROR_RETRIES,
DEFAULT_MAX_ERROR_RETRIES, 0));
awsConf.setConnectionTimeout(intOption(conf, ESTABLISH_TIMEOUT,
DEFAULT_ESTABLISH_TIMEOUT, 0));
awsConf.setSocketTimeout(intOption(conf, SOCKET_TIMEOUT,
DEFAULT_SOCKET_TIMEOUT, 0));
int sockSendBuffer = intOption(conf, SOCKET_SEND_BUFFER,
DEFAULT_SOCKET_SEND_BUFFER, 2048);
int sockRecvBuffer = intOption(conf, SOCKET_RECV_BUFFER,
DEFAULT_SOCKET_RECV_BUFFER, 2048);
awsConf.setSocketBufferSizeHints(sockSendBuffer, sockRecvBuffer);
String signerOverride = conf.getTrimmed(SIGNING_ALGORITHM, "");
if (!signerOverride.isEmpty()) {
LOG.debug("Signer override = {}", signerOverride);
awsConf.setSignerOverride(signerOverride);
}
}
/**
* Initializes AWS SDK proxy support if configured.
*
* @param conf Hadoop configuration
* @param awsConf AWS SDK configuration
* @throws IllegalArgumentException if misconfigured
*/
private static void initProxySupport(Configuration conf,
ClientConfiguration awsConf) throws IllegalArgumentException {
String proxyHost = conf.getTrimmed(PROXY_HOST, "");
int proxyPort = conf.getInt(PROXY_PORT, -1);
if (!proxyHost.isEmpty()) {
awsConf.setProxyHost(proxyHost);
if (proxyPort >= 0) {
awsConf.setProxyPort(proxyPort);
} else {
if (conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS)) {
LOG.warn("Proxy host set without port. Using HTTPS default 443");
awsConf.setProxyPort(443);
} else {
LOG.warn("Proxy host set without port. Using HTTP default 80");
awsConf.setProxyPort(80);
}
}
String proxyUsername = conf.getTrimmed(PROXY_USERNAME);
String proxyPassword = conf.getTrimmed(PROXY_PASSWORD);
if ((proxyUsername == null) != (proxyPassword == null)) {
String msg = "Proxy error: " + PROXY_USERNAME + " or " +
PROXY_PASSWORD + " set without the other.";
LOG.error(msg);
throw new IllegalArgumentException(msg);
}
awsConf.setProxyUsername(proxyUsername);
awsConf.setProxyPassword(proxyPassword);
awsConf.setProxyDomain(conf.getTrimmed(PROXY_DOMAIN));
awsConf.setProxyWorkstation(conf.getTrimmed(PROXY_WORKSTATION));
if (LOG.isDebugEnabled()) {
LOG.debug("Using proxy server {}:{} as user {} with password {} on " +
"domain {} as workstation {}", awsConf.getProxyHost(),
awsConf.getProxyPort(),
String.valueOf(awsConf.getProxyUsername()),
awsConf.getProxyPassword(), awsConf.getProxyDomain(),
awsConf.getProxyWorkstation());
}
} else if (proxyPort >= 0) {
String msg =
"Proxy error: " + PROXY_PORT + " set without " + PROXY_HOST;
LOG.error(msg);
throw new IllegalArgumentException(msg);
}
}
/**
* Initializes the User-Agent header to send in HTTP requests to the S3
* back-end. We always include the Hadoop version number. The user also
* may set an optional custom prefix to put in front of the Hadoop version
* number. The AWS SDK interally appends its own information, which seems
* to include the AWS SDK version, OS and JVM version.
*
* @param conf Hadoop configuration
* @param awsConf AWS SDK configuration
*/
private static void initUserAgent(Configuration conf,
ClientConfiguration awsConf) {
String userAgent = "Hadoop " + VersionInfo.getVersion();
String userAgentPrefix = conf.getTrimmed(USER_AGENT_PREFIX, "");
if (!userAgentPrefix.isEmpty()) {
userAgent = userAgentPrefix + ", " + userAgent;
}
LOG.debug("Using User-Agent: {}", userAgent);
awsConf.setUserAgentPrefix(userAgent);
}
/**
* Creates an {@link AmazonS3Client} from the established configuration.
*
* @param conf Hadoop configuration
* @param credentials AWS credentials
* @param awsConf AWS SDK configuration
* @return S3 client
* @throws IllegalArgumentException if misconfigured
*/
private static AmazonS3 createAmazonS3Client(AmazonS3 s3, Configuration conf,
AWSCredentialsProvider credentials, ClientConfiguration awsConf)
throws IllegalArgumentException {
String endPoint = conf.getTrimmed(ENDPOINT, "");
if (!endPoint.isEmpty()) {
try {
s3.setEndpoint(endPoint);
} catch (IllegalArgumentException e) {
String msg = "Incorrect endpoint: " + e.getMessage();
LOG.error(msg);
throw new IllegalArgumentException(msg, e);
}
}
enablePathStyleAccessIfRequired(s3, conf);
return s3;
}
/**
* Enables path-style access to S3 buckets if configured. By default, the
* behavior is to use virtual hosted-style access with URIs of the form
* http://bucketname.s3.amazonaws.com. Enabling path-style access and a
* region-specific endpoint switches the behavior to use URIs of the form
* http://s3-eu-west-1.amazonaws.com/bucketname.
*
* @param s3 S3 client
* @param conf Hadoop configuration
*/
private static void enablePathStyleAccessIfRequired(AmazonS3 s3,
Configuration conf) {
final boolean pathStyleAccess = conf.getBoolean(PATH_STYLE_ACCESS, false);
if (pathStyleAccess) {
LOG.debug("Enabling path style access!");
s3.setS3ClientOptions(S3ClientOptions.builder()
.setPathStyleAccess(true)
.build());
}
}
}

View File

@ -0,0 +1,434 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a;
import com.amazonaws.AmazonClientException;
import com.amazonaws.AmazonServiceException;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.model.DeleteObjectRequest;
import com.amazonaws.services.s3.model.DeleteObjectsRequest;
import com.amazonaws.services.s3.model.DeleteObjectsResult;
import com.amazonaws.services.s3.model.ListObjectsRequest;
import com.amazonaws.services.s3.model.ObjectListing;
import com.amazonaws.services.s3.model.PutObjectRequest;
import com.amazonaws.services.s3.model.PutObjectResult;
import com.amazonaws.services.s3.model.S3ObjectSummary;
import com.google.common.base.Preconditions;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.hadoop.fs.s3a.Constants.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
/**
* A wrapper around {@link com.amazonaws.services.s3.AmazonS3} that injects
* inconsistency and/or errors. Used for testing S3Guard.
* Currently only delays listing visibility, not affecting GET.
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
public class InconsistentAmazonS3Client extends AmazonS3Client {
/**
* Keys containing this substring will be subject to delayed visibility.
*/
public static final String DEFAULT_DELAY_KEY_SUBSTRING = "DELAY_LISTING_ME";
/**
* How many seconds affected keys will be delayed from appearing in listing.
* This should probably be a config value.
*/
public static final long DEFAULT_DELAY_KEY_MSEC = 5 * 1000;
public static final float DEFAULT_DELAY_KEY_PROBABILITY = 1.0f;
/** Special config value since we can't store empty strings in XML. */
public static final String MATCH_ALL_KEYS = "*";
private static final Logger LOG =
LoggerFactory.getLogger(InconsistentAmazonS3Client.class);
/** Empty string matches all keys. */
private String delayKeySubstring;
/** Probability to delay visibility of a matching key. */
private float delayKeyProbability;
/** Time in milliseconds to delay visibility of newly modified object. */
private long delayKeyMsec;
/**
* Composite of data we need to track about recently deleted objects:
* when it was deleted (same was with recently put objects) and the object
* summary (since we should keep returning it for sometime after its
* deletion).
*/
private static class Delete {
private Long time;
private S3ObjectSummary summary;
Delete(Long time, S3ObjectSummary summary) {
this.time = time;
this.summary = summary;
}
public Long time() {
return time;
}
public S3ObjectSummary summary() {
return summary;
}
}
/** Map of key to delay -> time it was deleted + object summary (object
* summary is null for prefixes. */
private Map<String, Delete> delayedDeletes = new HashMap<>();
/** Map of key to delay -> time it was created. */
private Map<String, Long> delayedPutKeys = new HashMap<>();
public InconsistentAmazonS3Client(AWSCredentialsProvider credentials,
ClientConfiguration clientConfiguration, Configuration conf) {
super(credentials, clientConfiguration);
setupConfig(conf);
}
protected void setupConfig(Configuration conf) {
delayKeySubstring = conf.get(FAIL_INJECT_INCONSISTENCY_KEY,
DEFAULT_DELAY_KEY_SUBSTRING);
// "" is a substring of all strings, use it to match all keys.
if (delayKeySubstring.equals(MATCH_ALL_KEYS)) {
delayKeySubstring = "";
}
delayKeyProbability = conf.getFloat(FAIL_INJECT_INCONSISTENCY_PROBABILITY,
DEFAULT_DELAY_KEY_PROBABILITY);
delayKeyMsec = conf.getLong(FAIL_INJECT_INCONSISTENCY_MSEC,
DEFAULT_DELAY_KEY_MSEC);
LOG.info("Enabled with {} msec delay, substring {}, probability {}",
delayKeyMsec, delayKeySubstring, delayKeyProbability);
}
/**
* Clear all oustanding inconsistent keys. After calling this function,
* listings should behave normally (no failure injection), until additional
* keys are matched for delay, e.g. via putObject(), deleteObject().
*/
public void clearInconsistency() {
LOG.info("clearing all delayed puts / deletes");
delayedDeletes.clear();
delayedPutKeys.clear();
}
/**
* Convenience function for test code to cast from supertype.
* @param c supertype to cast from
* @return subtype, not null
* @throws Exception on error
*/
public static InconsistentAmazonS3Client castFrom(AmazonS3 c) throws
Exception {
InconsistentAmazonS3Client ic = null;
if (c instanceof InconsistentAmazonS3Client) {
ic = (InconsistentAmazonS3Client) c;
}
Preconditions.checkNotNull(ic, "Not an instance of " +
"InconsistentAmazonS3Client");
return ic;
}
@Override
public DeleteObjectsResult deleteObjects(DeleteObjectsRequest
deleteObjectsRequest)
throws AmazonClientException, AmazonServiceException {
for (DeleteObjectsRequest.KeyVersion keyVersion :
deleteObjectsRequest.getKeys()) {
registerDeleteObject(keyVersion.getKey(), deleteObjectsRequest
.getBucketName());
}
return super.deleteObjects(deleteObjectsRequest);
}
@Override
public void deleteObject(DeleteObjectRequest deleteObjectRequest)
throws AmazonClientException, AmazonServiceException {
String key = deleteObjectRequest.getKey();
LOG.debug("key {}", key);
registerDeleteObject(key, deleteObjectRequest.getBucketName());
super.deleteObject(deleteObjectRequest);
}
/* We should only need to override this version of putObject() */
@Override
public PutObjectResult putObject(PutObjectRequest putObjectRequest)
throws AmazonClientException, AmazonServiceException {
LOG.debug("key {}", putObjectRequest.getKey());
registerPutObject(putObjectRequest);
return super.putObject(putObjectRequest);
}
/* We should only need to override this version of listObjects() */
@Override
public ObjectListing listObjects(ListObjectsRequest listObjectsRequest)
throws AmazonClientException, AmazonServiceException {
LOG.debug("prefix {}", listObjectsRequest.getPrefix());
ObjectListing listing = super.listObjects(listObjectsRequest);
listing = filterListObjects(listObjectsRequest, listing);
listing = restoreListObjects(listObjectsRequest, listing);
return listing;
}
private void addSummaryIfNotPresent(List<S3ObjectSummary> list,
S3ObjectSummary item) {
// Behavior of S3ObjectSummary
String key = item.getKey();
for (S3ObjectSummary member : list) {
if (member.getKey().equals(key)) {
return;
}
}
list.add(item);
}
/**
* Add prefix of child to given list. The added prefix will be equal to
* ancestor plus one directory past ancestor. e.g.:
* if ancestor is "/a/b/c" and child is "/a/b/c/d/e/file" then "a/b/c/d" is
* added to list.
* @param prefixes list to add to
* @param ancestor path we are listing in
* @param child full path to get prefix from
*/
private void addPrefixIfNotPresent(List<String> prefixes, String ancestor,
String child) {
Path prefixCandidate = new Path(child).getParent();
Path ancestorPath = new Path(ancestor);
Preconditions.checkArgument(child.startsWith(ancestor), "%s does not " +
"start with %s", child, ancestor);
while (!prefixCandidate.isRoot()) {
Path nextParent = prefixCandidate.getParent();
if (nextParent.equals(ancestorPath)) {
String prefix = prefixCandidate.toString();
if (!prefixes.contains(prefix)) {
prefixes.add(prefix);
}
return;
}
prefixCandidate = nextParent;
}
}
/**
* Checks that the parent key is an ancestor of the child key.
* @param parent key that may be the parent.
* @param child key that may be the child.
* @param recursive if false, only return true for direct children. If
* true, any descendant will count.
* @return true if parent is an ancestor of child
*/
private boolean isDescendant(String parent, String child, boolean recursive) {
if (recursive) {
if (!parent.endsWith("/")) {
parent = parent + "/";
}
return child.startsWith(parent);
} else {
Path actualParentPath = new Path(child).getParent();
Path expectedParentPath = new Path(parent);
return actualParentPath.equals(expectedParentPath);
}
}
/**
* Simulate eventual consistency of delete for this list operation: Any
* recently-deleted keys will be added.
* @param request List request
* @param rawListing listing returned from underlying S3
* @return listing with recently-deleted items restored
*/
private ObjectListing restoreListObjects(ListObjectsRequest request,
ObjectListing rawListing) {
List<S3ObjectSummary> outputList = rawListing.getObjectSummaries();
List<String> outputPrefixes = rawListing.getCommonPrefixes();
// recursive list has no delimiter, returns everything that matches a
// prefix.
boolean recursiveObjectList = !("/".equals(request.getDelimiter()));
// Go through all deleted keys
for (String key : new HashSet<>(delayedDeletes.keySet())) {
Delete delete = delayedDeletes.get(key);
if (isKeyDelayed(delete.time(), key)) {
if (isDescendant(request.getPrefix(), key, recursiveObjectList)) {
if (delete.summary() != null) {
addSummaryIfNotPresent(outputList, delete.summary());
}
}
// Non-recursive list has delimiter: will return rolled-up prefixes for
// all keys that are not direct children
if (!recursiveObjectList) {
if (isDescendant(request.getPrefix(), key, true)) {
addPrefixIfNotPresent(outputPrefixes, request.getPrefix(), key);
}
}
} else {
// Clean up any expired entries
delayedDeletes.remove(key);
}
}
return new CustomObjectListing(rawListing, outputList, outputPrefixes);
}
private ObjectListing filterListObjects(ListObjectsRequest request,
ObjectListing rawListing) {
// Filter object listing
List<S3ObjectSummary> outputList = new ArrayList<>();
for (S3ObjectSummary s : rawListing.getObjectSummaries()) {
String key = s.getKey();
if (!isKeyDelayed(delayedPutKeys.get(key), key)) {
outputList.add(s);
}
}
// Filter prefixes (directories)
List<String> outputPrefixes = new ArrayList<>();
for (String key : rawListing.getCommonPrefixes()) {
if (!isKeyDelayed(delayedPutKeys.get(key), key)) {
outputPrefixes.add(key);
}
}
return new CustomObjectListing(rawListing, outputList, outputPrefixes);
}
private boolean isKeyDelayed(Long enqueueTime, String key) {
if (enqueueTime == null) {
LOG.debug("no delay for key {}", key);
return false;
}
long currentTime = System.currentTimeMillis();
long deadline = enqueueTime + delayKeyMsec;
if (currentTime >= deadline) {
delayedDeletes.remove(key);
LOG.debug("no longer delaying {}", key);
return false;
} else {
LOG.info("delaying {}", key);
return true;
}
}
private void registerDeleteObject(String key, String bucket) {
if (shouldDelay(key)) {
// Record summary so we can add it back for some time post-deletion
S3ObjectSummary summary = null;
ObjectListing list = listObjects(bucket, key);
for (S3ObjectSummary result : list.getObjectSummaries()) {
if (result.getKey().equals(key)) {
summary = result;
break;
}
}
delayedDeletes.put(key, new Delete(System.currentTimeMillis(), summary));
}
}
private void registerPutObject(PutObjectRequest req) {
String key = req.getKey();
if (shouldDelay(key)) {
enqueueDelayedPut(key);
}
}
/**
* Should we delay listing visibility for this key?
* @param key key which is being put
* @return true if we should delay
*/
private boolean shouldDelay(String key) {
boolean delay = key.contains(delayKeySubstring);
delay = delay && trueWithProbability(delayKeyProbability);
LOG.debug("{} -> {}", key, delay);
return delay;
}
private boolean trueWithProbability(float p) {
return Math.random() < p;
}
/**
* Record this key as something that should not become visible in
* listObject replies for a while, to simulate eventual list consistency.
* @param key key to delay visibility of
*/
private void enqueueDelayedPut(String key) {
LOG.debug("delaying put of {}", key);
delayedPutKeys.put(key, System.currentTimeMillis());
}
/** Since ObjectListing is immutable, we just override it with wrapper. */
private static class CustomObjectListing extends ObjectListing {
private final List<S3ObjectSummary> customListing;
private final List<String> customPrefixes;
CustomObjectListing(ObjectListing rawListing,
List<S3ObjectSummary> customListing,
List<String> customPrefixes) {
super();
this.customListing = customListing;
this.customPrefixes = customPrefixes;
this.setBucketName(rawListing.getBucketName());
this.setCommonPrefixes(rawListing.getCommonPrefixes());
this.setDelimiter(rawListing.getDelimiter());
this.setEncodingType(rawListing.getEncodingType());
this.setMarker(rawListing.getMarker());
this.setMaxKeys(rawListing.getMaxKeys());
this.setNextMarker(rawListing.getNextMarker());
this.setPrefix(rawListing.getPrefix());
this.setTruncated(rawListing.isTruncated());
}
@Override
public List<S3ObjectSummary> getObjectSummaries() {
return customListing;
}
@Override
public List<String> getCommonPrefixes() {
return customPrefixes;
}
}
}

View File

@ -0,0 +1,40 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.services.s3.AmazonS3;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
/**
* S3 Client factory used for testing with eventual consistency fault injection.
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
public class InconsistentS3ClientFactory extends DefaultS3ClientFactory {
@Override
protected AmazonS3 newAmazonS3Client(AWSCredentialsProvider credentials,
ClientConfiguration awsConf) {
LOG.warn("** FAILURE INJECTION ENABLED. Do not run in production! **");
return new InconsistentAmazonS3Client(credentials, awsConf, getConf());
}
}

View File

@ -22,18 +22,25 @@ import com.amazonaws.AmazonClientException;
import com.amazonaws.services.s3.model.ListObjectsRequest;
import com.amazonaws.services.s3.model.ObjectListing;
import com.amazonaws.services.s3.model.S3ObjectSummary;
import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.RemoteIterator;
import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.NoSuchElementException;
import java.util.Set;
import static org.apache.hadoop.fs.s3a.Constants.S3N_FOLDER_SUFFIX;
import static org.apache.hadoop.fs.s3a.S3AUtils.createFileStatus;
@ -54,8 +61,25 @@ public class Listing {
}
/**
* Create a FileStatus iterator against a path, with a given
* list object request.
* Create a FileStatus iterator against a provided list of file status, with
* a given status filter.
*
* @param fileStatuses the provided list of file status. NO remote calls.
* @param filter file path filter on which paths to accept
* @param acceptor the file status acceptor
* @return the file status iterator
*/
ProvidedFileStatusIterator createProvidedFileStatusIterator(
FileStatus[] fileStatuses,
PathFilter filter,
FileStatusAcceptor acceptor) {
return new ProvidedFileStatusIterator(fileStatuses, filter, acceptor);
}
/**
* Create a FileStatus iterator against a path, with a given list object
* request.
*
* @param listPath path of the listing
* @param request initial request to make
* @param filter the filter on which paths to accept
@ -69,10 +93,34 @@ public class Listing {
ListObjectsRequest request,
PathFilter filter,
Listing.FileStatusAcceptor acceptor) throws IOException {
return createFileStatusListingIterator(listPath, request, filter, acceptor,
null);
}
/**
* Create a FileStatus iterator against a path, with a given
* list object request.
* @param listPath path of the listing
* @param request initial request to make
* @param filter the filter on which paths to accept
* @param acceptor the class/predicate to decide which entries to accept
* in the listing based on the full file status.
* @param providedStatus the provided list of file status, which may contain
* items that are not listed from source.
* @return the iterator
* @throws IOException IO Problems
*/
FileStatusListingIterator createFileStatusListingIterator(
Path listPath,
ListObjectsRequest request,
PathFilter filter,
Listing.FileStatusAcceptor acceptor,
RemoteIterator<FileStatus> providedStatus) throws IOException {
return new FileStatusListingIterator(
new ObjectListingIterator(listPath, request),
filter,
acceptor);
acceptor,
providedStatus);
}
/**
@ -80,11 +128,26 @@ public class Listing {
* @param statusIterator an iterator over the remote status entries
* @return a new remote iterator
*/
@VisibleForTesting
LocatedFileStatusIterator createLocatedFileStatusIterator(
RemoteIterator<FileStatus> statusIterator) {
return new LocatedFileStatusIterator(statusIterator);
}
/**
* Create an located status iterator that wraps another to filter out a set
* of recently deleted items.
* @param iterator an iterator over the remote located status entries.
* @param tombstones set of paths that are recently deleted and should be
* filtered.
* @return a new remote iterator.
*/
@VisibleForTesting
TombstoneReconcilingIterator createTombstoneReconcilingIterator(
RemoteIterator<LocatedFileStatus> iterator, Set<Path> tombstones) {
return new TombstoneReconcilingIterator(iterator, tombstones);
}
/**
* Interface to implement by the logic deciding whether to accept a summary
* entry or path as a valid file or directory.
@ -108,6 +171,13 @@ public class Listing {
* should be generated.)
*/
boolean accept(Path keyPath, String commonPrefix);
/**
* Predicate to decide whether or not to accept a file status.
* @param status file status containing file path information
* @return true if the status is accepted else false
*/
boolean accept(FileStatus status);
}
/**
@ -115,9 +185,9 @@ public class Listing {
* value.
*
* If the status value is null, the iterator declares that it has no data.
* This iterator is used to handle {@link listStatus()} calls where the path
* handed in refers to a file, not a directory: this is the iterator
* returned.
* This iterator is used to handle {@link S3AFileSystem#listStatus} calls
* where the path handed in refers to a file, not a directory: this is the
* iterator returned.
*/
static final class SingleStatusRemoteIterator
implements RemoteIterator<LocatedFileStatus> {
@ -168,6 +238,47 @@ public class Listing {
}
}
/**
* This wraps up a provided non-null list of file status as a remote iterator.
*
* It firstly filters the provided list and later {@link #next} call will get
* from the filtered list. This suffers from scalability issues if the
* provided list is too large.
*
* There is no remote data to fetch.
*/
static class ProvidedFileStatusIterator
implements RemoteIterator<FileStatus> {
private final ArrayList<FileStatus> filteredStatusList;
private int index = 0;
ProvidedFileStatusIterator(FileStatus[] fileStatuses, PathFilter filter,
FileStatusAcceptor acceptor) {
Preconditions.checkArgument(fileStatuses != null, "Null status list!");
filteredStatusList = new ArrayList<>(fileStatuses.length);
for (FileStatus status : fileStatuses) {
if (filter.accept(status.getPath()) && acceptor.accept(status)) {
filteredStatusList.add(status);
}
}
filteredStatusList.trimToSize();
}
@Override
public boolean hasNext() throws IOException {
return index < filteredStatusList.size();
}
@Override
public FileStatus next() throws IOException {
if (!hasNext()) {
throw new NoSuchElementException();
}
return filteredStatusList.get(index++);
}
}
/**
* Wraps up object listing into a remote iterator which will ask for more
* listing data if needed.
@ -179,7 +290,7 @@ public class Listing {
* iterator can declare that there is more data available.
*
* The need to filter the results precludes the iterator from simply
* declaring that if the {@link S3AFileSystem.ObjectListingIterator#hasNext()}
* declaring that if the {@link ObjectListingIterator#hasNext()}
* is true then there are more results. Instead the next batch of results must
* be retrieved and filtered.
*
@ -208,20 +319,33 @@ public class Listing {
/** Iterator over the current set of results. */
private ListIterator<FileStatus> statusBatchIterator;
private final Set<FileStatus> providedStatus;
private Iterator<FileStatus> providedStatusIterator;
/**
* Create an iterator over file status entries.
* @param source the listing iterator from a listObjects call.
* @param filter the filter on which paths to accept
* @param acceptor the class/predicate to decide which entries to accept
* in the listing based on the full file status.
* @param providedStatus the provided list of file status, which may contain
* items that are not listed from source.
* @throws IOException IO Problems
*/
FileStatusListingIterator(ObjectListingIterator source,
PathFilter filter,
FileStatusAcceptor acceptor) throws IOException {
FileStatusAcceptor acceptor,
RemoteIterator<FileStatus> providedStatus) throws IOException {
this.source = source;
this.filter = filter;
this.acceptor = acceptor;
this.providedStatus = new HashSet<>();
for (; providedStatus != null && providedStatus.hasNext();) {
final FileStatus status = providedStatus.next();
if (filter.accept(status.getPath()) && acceptor.accept(status)) {
this.providedStatus.add(status);
}
}
// build the first set of results. This will not trigger any
// remote IO, assuming the source iterator is in its initial
// iteration
@ -233,26 +357,53 @@ public class Listing {
* If there is data in the local filtered list, return true.
* Else: request more data util that condition is met, or there
* is no more remote listing data.
* Lastly, return true if the {@code providedStatusIterator}
* has left items.
* @return true if a call to {@link #next()} will succeed.
* @throws IOException
*/
@Override
public boolean hasNext() throws IOException {
return statusBatchIterator.hasNext() || requestNextBatch();
return sourceHasNext() || providedStatusIterator.hasNext();
}
private boolean sourceHasNext() throws IOException {
if (statusBatchIterator.hasNext() || requestNextBatch()) {
return true;
} else {
// turn to file status that are only in provided list
if (providedStatusIterator == null) {
LOG.debug("Start iterating the provided status.");
providedStatusIterator = providedStatus.iterator();
}
return false;
}
}
@Override
public FileStatus next() throws IOException {
if (!hasNext()) {
throw new NoSuchElementException();
final FileStatus status;
if (sourceHasNext()) {
status = statusBatchIterator.next();
// We remove from provided list the file status listed by S3 so that
// this does not return duplicate items.
LOG.debug("Removing the status from provided file status {}", status);
providedStatus.remove(status);
} else {
if (providedStatusIterator.hasNext()) {
status = providedStatusIterator.next();
LOG.debug("Returning provided file status {}", status);
} else {
throw new NoSuchElementException();
}
}
return statusBatchIterator.next();
return status;
}
/**
* Try to retrieve another batch.
* Note that for the initial batch,
* {@link S3AFileSystem.ObjectListingIterator} does not generate a request;
* {@link ObjectListingIterator} does not generate a request;
* it simply returns the initial set.
*
* @return true if a new batch was created.
@ -312,7 +463,7 @@ public class Listing {
for (String prefix : objects.getCommonPrefixes()) {
Path keyPath = owner.keyToQualifiedPath(prefix);
if (acceptor.accept(keyPath, prefix) && filter.accept(keyPath)) {
FileStatus status = new S3AFileStatus(false, keyPath,
FileStatus status = new S3AFileStatus(Tristate.FALSE, keyPath,
owner.getUsername());
LOG.debug("Adding directory: {}", status);
added++;
@ -352,7 +503,7 @@ public class Listing {
* instance.
*
* 2. Second and later invocations will continue the ongoing listing,
* calling {@link #continueListObjects(ObjectListing)} to request the next
* calling {@link S3AFileSystem#continueListObjects} to request the next
* batch of results.
*
* 3. The {@link #hasNext()} predicate returns true for the initial call,
@ -504,6 +655,11 @@ public class Listing {
public boolean accept(Path keyPath, String prefix) {
return false;
}
@Override
public boolean accept(FileStatus status) {
return (status != null) && status.isFile();
}
}
/**
@ -533,6 +689,80 @@ public class Listing {
}
}
/**
* Wraps another iterator and filters out files that appear in the provided
* set of tombstones. Will read ahead in the iterator when necessary to
* ensure that emptiness is detected early enough if only deleted objects
* remain in the source iterator.
*/
static class TombstoneReconcilingIterator implements
RemoteIterator<LocatedFileStatus> {
private LocatedFileStatus next = null;
private final RemoteIterator<LocatedFileStatus> iterator;
private final Set<Path> tombstones;
/**
* @param iterator Source iterator to filter
* @param tombstones set of tombstone markers to filter out of results
*/
TombstoneReconcilingIterator(RemoteIterator<LocatedFileStatus>
iterator, Set<Path> tombstones) {
this.iterator = iterator;
if (tombstones != null) {
this.tombstones = tombstones;
} else {
this.tombstones = Collections.EMPTY_SET;
}
}
private boolean fetch() throws IOException {
while (next == null && iterator.hasNext()) {
LocatedFileStatus candidate = iterator.next();
if (!tombstones.contains(candidate.getPath())) {
next = candidate;
return true;
}
}
return false;
}
public boolean hasNext() throws IOException {
if (next != null) {
return true;
}
return fetch();
}
public LocatedFileStatus next() throws IOException {
if (hasNext()) {
LocatedFileStatus result = next;
next = null;
fetch();
return result;
}
throw new NoSuchElementException();
}
}
/**
* Accept all entries except those which map to S3N pseudo directory markers.
*/
static class AcceptAllButS3nDirs implements FileStatusAcceptor {
public boolean accept(Path keyPath, S3ObjectSummary summary) {
return !summary.getKey().endsWith(S3N_FOLDER_SUFFIX);
}
public boolean accept(Path keyPath, String prefix) {
return !keyPath.toString().endsWith(S3N_FOLDER_SUFFIX);
}
public boolean accept(FileStatus status) {
return !status.getPath().toString().endsWith(S3N_FOLDER_SUFFIX);
}
}
/**
* Accept all entries except the base path and those which map to S3N
* pseudo directory markers.
@ -575,6 +805,11 @@ public class Listing {
public boolean accept(Path keyPath, String prefix) {
return !keyPath.equals(qualifiedPath);
}
@Override
public boolean accept(FileStatus status) {
return (status != null) && !status.getPath().equals(qualifiedPath);
}
}
/**

View File

@ -79,6 +79,9 @@ class S3ABlockOutputStream extends OutputStream {
/** Size of all blocks. */
private final int blockSize;
/** Total bytes for uploads submitted so far. */
private long bytesSubmitted;
/** Callback for progress. */
private final ProgressListener progressListener;
private final ListeningExecutorService executorService;
@ -302,6 +305,7 @@ class S3ABlockOutputStream extends OutputStream {
}
try {
multiPartUpload.uploadBlockAsync(getActiveBlock());
bytesSubmitted += getActiveBlock().dataSize();
} finally {
// set the block to null, so the next write will create a new block.
clearActiveBlock();
@ -330,13 +334,14 @@ class S3ABlockOutputStream extends OutputStream {
this,
blockCount,
hasBlock ? block : "(none)");
long bytes = 0;
try {
if (multiPartUpload == null) {
if (hasBlock) {
// no uploads of data have taken place, put the single block up.
// This must happen even if there is no data, so that 0 byte files
// are created.
putObject();
bytes = putObject();
}
} else {
// there has already been at least one block scheduled for upload;
@ -350,6 +355,7 @@ class S3ABlockOutputStream extends OutputStream {
multiPartUpload.waitForAllPartUploads();
// then complete the operation
multiPartUpload.complete(partETags);
bytes = bytesSubmitted;
}
LOG.debug("Upload complete for {}", writeOperationHelper);
} catch (IOException ioe) {
@ -362,7 +368,7 @@ class S3ABlockOutputStream extends OutputStream {
clearActiveBlock();
}
// All end of write operations, including deleting fake parent directories
writeOperationHelper.writeSuccessful();
writeOperationHelper.writeSuccessful(bytes);
}
/**
@ -370,8 +376,11 @@ class S3ABlockOutputStream extends OutputStream {
* is empty a 0-byte PUT will be invoked, as it is needed to create an
* entry at the far end.
* @throws IOException any problem.
* @return number of bytes uploaded. If thread was interrupted while
* waiting for upload to complete, returns zero with interrupted flag set
* on this thread.
*/
private void putObject() throws IOException {
private int putObject() throws IOException {
LOG.debug("Executing regular upload for {}", writeOperationHelper);
final S3ADataBlocks.DataBlock block = getActiveBlock();
@ -405,9 +414,11 @@ class S3ABlockOutputStream extends OutputStream {
//wait for completion
try {
putObjectResult.get();
return size;
} catch (InterruptedException ie) {
LOG.warn("Interrupted object upload", ie);
Thread.currentThread().interrupt();
return 0;
} catch (ExecutionException ee) {
throw extractException("regular upload", key, ee);
}

View File

@ -31,7 +31,7 @@ import org.apache.hadoop.fs.Path;
@InterfaceAudience.Private
@InterfaceStability.Evolving
public class S3AFileStatus extends FileStatus {
private boolean isEmptyDirectory;
private Tristate isEmptyDirectory;
/**
* Create a directory status.
@ -42,6 +42,18 @@ public class S3AFileStatus extends FileStatus {
public S3AFileStatus(boolean isemptydir,
Path path,
String owner) {
this(Tristate.fromBool(isemptydir), path, owner);
}
/**
* Create a directory status.
* @param isemptydir is this an empty directory?
* @param path the path
* @param owner the owner
*/
public S3AFileStatus(Tristate isemptydir,
Path path,
String owner) {
super(0, true, 1, 0, 0, path);
isEmptyDirectory = isemptydir;
setOwner(owner);
@ -59,12 +71,37 @@ public class S3AFileStatus extends FileStatus {
public S3AFileStatus(long length, long modification_time, Path path,
long blockSize, String owner) {
super(length, false, 1, blockSize, modification_time, path);
isEmptyDirectory = false;
isEmptyDirectory = Tristate.FALSE;
setOwner(owner);
setGroup(owner);
}
public boolean isEmptyDirectory() {
/**
* Convenience constructor for creating from a vanilla FileStatus plus
* an isEmptyDirectory flag.
* @param source FileStatus to convert to S3AFileStatus
* @param isEmptyDirectory TRUE/FALSE if known to be / not be an empty
* directory, UNKNOWN if that information was not computed.
* @return a new S3AFileStatus
*/
public static S3AFileStatus fromFileStatus(FileStatus source,
Tristate isEmptyDirectory) {
if (source.isDirectory()) {
return new S3AFileStatus(isEmptyDirectory, source.getPath(),
source.getOwner());
} else {
return new S3AFileStatus(source.getLen(), source.getModificationTime(),
source.getPath(), source.getBlockSize(), source.getOwner());
}
}
/**
* @return FALSE if status is not a directory, or its a dir, but known to
* not be empty. TRUE if it is an empty directory. UNKNOWN if it is a
* directory, but we have not computed whether or not it is empty.
*/
public Tristate isEmptyDirectory() {
return isEmptyDirectory;
}
@ -110,7 +147,7 @@ public class S3AFileStatus extends FileStatus {
@Override
public String toString() {
return super.toString() +
String.format(" isEmptyDirectory=%s", isEmptyDirectory());
String.format(" isEmptyDirectory=%s", isEmptyDirectory().name());
}
}

View File

@ -25,12 +25,16 @@ import java.io.InputStream;
import java.io.InterruptedIOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.Set;
import java.util.Objects;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
@ -92,6 +96,11 @@ import org.apache.hadoop.fs.PathIsNotEmptyDirectoryException;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.StorageStatistics;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.fs.s3a.s3guard.DirListingMetadata;
import org.apache.hadoop.fs.s3a.s3guard.MetadataStoreListFilesIterator;
import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
import org.apache.hadoop.fs.s3a.s3guard.PathMetadata;
import org.apache.hadoop.fs.s3a.s3guard.S3Guard;
import org.apache.hadoop.fs.s3native.S3xLoginHelper;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.Progressable;
@ -149,6 +158,8 @@ public class S3AFileSystem extends FileSystem {
private long readAhead;
private S3AInputPolicy inputPolicy;
private final AtomicBoolean closed = new AtomicBoolean(false);
private MetadataStore metadataStore;
private boolean allowAuthoritative;
// The maximum number of entries that can be deleted in any call to s3
private static final int MAX_ENTRIES_TO_DELETE = 1000;
@ -277,6 +288,10 @@ public class S3AFileSystem extends FileSystem {
} else {
LOG.debug("Using S3AOutputStream");
}
metadataStore = S3Guard.getMetadataStore(this);
allowAuthoritative = conf.getBoolean(METADATASTORE_AUTHORITATIVE,
DEFAULT_METADATASTORE_AUTHORITATIVE);
} catch (AmazonClientException e) {
throw translateException("initializing ", new Path(name), e);
}
@ -388,11 +403,34 @@ public class S3AFileSystem extends FileSystem {
* Returns the S3 client used by this filesystem.
* @return AmazonS3Client
*/
@VisibleForTesting
AmazonS3 getAmazonS3Client() {
return s3;
}
/**
* Get the region of a bucket.
* @return the region in which a bucket is located
* @throws IOException on any failure.
*/
public String getBucketLocation() throws IOException {
return getBucketLocation(bucket);
}
/**
* Get the region of a bucket.
* @param bucketName the name of the bucket
* @return the region in which a bucket is located
* @throws IOException on any failure.
*/
public String getBucketLocation(String bucketName) throws IOException {
try {
return s3.getBucketLocation(bucketName);
} catch (AmazonClientException e) {
throw translateException("getBucketLocation()",
bucketName, e);
}
}
/**
* Returns the read ahead range value used by this filesystem
* @return
@ -457,7 +495,7 @@ public class S3AFileSystem extends FileSystem {
* @return a key excluding the leading "/", or, if it is the root path, ""
*/
@VisibleForTesting
String pathToKey(Path path) {
public String pathToKey(Path path) {
if (!path.isAbsolute()) {
path = new Path(workingDir, path);
}
@ -508,7 +546,7 @@ public class S3AFileSystem extends FileSystem {
* @param path path to qualify
* @return a qualified path.
*/
Path qualify(Path path) {
public Path qualify(Path path) {
return path.makeQualified(uri, workingDir);
}
@ -578,7 +616,7 @@ public class S3AFileSystem extends FileSystem {
boolean overwrite, int bufferSize, short replication, long blockSize,
Progressable progress) throws IOException {
String key = pathToKey(f);
S3AFileStatus status = null;
FileStatus status = null;
try {
// get the status or throw an FNFE
status = getFileStatus(f);
@ -706,8 +744,8 @@ public class S3AFileSystem extends FileSystem {
* the description of the operation.
* This operation throws an exception on any failure which needs to be
* reported and downgraded to a failure. That is: if a rename
* @param src path to be renamed
* @param dst new path after rename
* @param source path to be renamed
* @param dest new path after rename
* @throws RenameFailedException if some criteria for a state changing
* rename was not met. This means work didn't happen; it's not something
* which is reported upstream to the FileSystem APIs, for which the semantics
@ -716,9 +754,12 @@ public class S3AFileSystem extends FileSystem {
* @throws IOException on IO failure.
* @throws AmazonClientException on failures inside the AWS SDK
*/
private boolean innerRename(Path src, Path dst)
private boolean innerRename(Path source, Path dest)
throws RenameFailedException, FileNotFoundException, IOException,
AmazonClientException {
Path src = qualify(source);
Path dst = qualify(dest);
LOG.debug("Rename path {} to {}", src, dst);
incrementStatistic(INVOCATION_RENAME);
@ -734,7 +775,7 @@ public class S3AFileSystem extends FileSystem {
// get the source file status; this raises a FNFE if there is no source
// file.
S3AFileStatus srcStatus = getFileStatus(src);
S3AFileStatus srcStatus = innerGetFileStatus(src, true);
if (srcKey.equals(dstKey)) {
LOG.debug("rename: src and dest refer to the same file or directory: {}",
@ -746,7 +787,7 @@ public class S3AFileSystem extends FileSystem {
S3AFileStatus dstStatus = null;
try {
dstStatus = getFileStatus(dst);
dstStatus = innerGetFileStatus(dst, true);
// if there is no destination entry, an exception is raised.
// hence this code sequence can assume that there is something
// at the end of the path; the only detail being what it is and
@ -756,7 +797,7 @@ public class S3AFileSystem extends FileSystem {
throw new RenameFailedException(src, dst,
"source is a directory and dest is a file")
.withExitCode(srcStatus.isFile());
} else if (!dstStatus.isEmptyDirectory()) {
} else if (dstStatus.isEmptyDirectory() != Tristate.TRUE) {
throw new RenameFailedException(src, dst,
"Destination is a non-empty directory")
.withExitCode(false);
@ -778,7 +819,8 @@ public class S3AFileSystem extends FileSystem {
Path parent = dst.getParent();
if (!pathToKey(parent).isEmpty()) {
try {
S3AFileStatus dstParentStatus = getFileStatus(dst.getParent());
S3AFileStatus dstParentStatus = innerGetFileStatus(dst.getParent(),
false);
if (!dstParentStatus.isDirectory()) {
throw new RenameFailedException(src, dst,
"destination parent is not a directory");
@ -790,9 +832,20 @@ public class S3AFileSystem extends FileSystem {
}
}
// If we have a MetadataStore, track deletions/creations.
Collection<Path> srcPaths = null;
List<PathMetadata> dstMetas = null;
if (hasMetadataStore()) {
srcPaths = new HashSet<>(); // srcPaths need fast look up before put
dstMetas = new ArrayList<>();
}
// TODO S3Guard HADOOP-13761: retries when source paths are not visible yet
// TODO S3Guard: performance: mark destination dirs as authoritative
// Ok! Time to start
if (srcStatus.isFile()) {
LOG.debug("rename: renaming file {} to {}", src, dst);
long length = srcStatus.getLen();
if (dstStatus != null && dstStatus.isDirectory()) {
String newDstKey = dstKey;
if (!newDstKey.endsWith("/")) {
@ -801,9 +854,14 @@ public class S3AFileSystem extends FileSystem {
String filename =
srcKey.substring(pathToKey(src.getParent()).length()+1);
newDstKey = newDstKey + filename;
copyFile(srcKey, newDstKey, srcStatus.getLen());
copyFile(srcKey, newDstKey, length);
S3Guard.addMoveFile(metadataStore, srcPaths, dstMetas, src,
keyToQualifiedPath(newDstKey), length, getDefaultBlockSize(dst),
username);
} else {
copyFile(srcKey, dstKey, srcStatus.getLen());
S3Guard.addMoveFile(metadataStore, srcPaths, dstMetas, src, dst,
length, getDefaultBlockSize(dst), username);
}
innerDelete(srcStatus, false);
} else {
@ -825,42 +883,66 @@ public class S3AFileSystem extends FileSystem {
}
List<DeleteObjectsRequest.KeyVersion> keysToDelete = new ArrayList<>();
if (dstStatus != null && dstStatus.isEmptyDirectory()) {
if (dstStatus != null && dstStatus.isEmptyDirectory() == Tristate.TRUE) {
// delete unnecessary fake directory.
keysToDelete.add(new DeleteObjectsRequest.KeyVersion(dstKey));
}
ListObjectsRequest request = new ListObjectsRequest();
request.setBucketName(bucket);
request.setPrefix(srcKey);
request.setMaxKeys(maxKeys);
Path parentPath = keyToPath(srcKey);
RemoteIterator<LocatedFileStatus> iterator = listFilesAndEmptyDirectories(
parentPath, true);
while (iterator.hasNext()) {
LocatedFileStatus status = iterator.next();
long length = status.getLen();
String key = pathToKey(status.getPath());
if (status.isDirectory() && !key.endsWith("/")) {
key += "/";
}
keysToDelete
.add(new DeleteObjectsRequest.KeyVersion(key));
String newDstKey =
dstKey + key.substring(srcKey.length());
copyFile(key, newDstKey, length);
ObjectListing objects = listObjects(request);
while (true) {
for (S3ObjectSummary summary : objects.getObjectSummaries()) {
keysToDelete.add(
new DeleteObjectsRequest.KeyVersion(summary.getKey()));
String newDstKey =
dstKey + summary.getKey().substring(srcKey.length());
copyFile(summary.getKey(), newDstKey, summary.getSize());
if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) {
removeKeys(keysToDelete, true, false);
if (hasMetadataStore()) {
// with a metadata store, the object entries need to be updated,
// including, potentially, the ancestors
Path childSrc = keyToQualifiedPath(key);
Path childDst = keyToQualifiedPath(newDstKey);
if (objectRepresentsDirectory(key, length)) {
S3Guard.addMoveDir(metadataStore, srcPaths, dstMetas, childSrc,
childDst, username);
} else {
S3Guard.addMoveFile(metadataStore, srcPaths, dstMetas, childSrc,
childDst, length, getDefaultBlockSize(childDst), username);
}
// Ancestor directories may not be listed, so we explicitly add them
S3Guard.addMoveAncestors(metadataStore, srcPaths, dstMetas,
keyToQualifiedPath(srcKey), childSrc, childDst, username);
}
if (objects.isTruncated()) {
objects = continueListObjects(objects);
} else {
if (!keysToDelete.isEmpty()) {
removeKeys(keysToDelete, false, false);
}
break;
if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) {
removeKeys(keysToDelete, true, false);
}
}
if (!keysToDelete.isEmpty()) {
removeKeys(keysToDelete, false, false);
}
// We moved all the children, now move the top-level dir
// Empty directory should have been added as the object summary
if (hasMetadataStore()
&& srcPaths != null
&& !srcPaths.contains(src)) {
LOG.debug("To move the non-empty top-level dir src={} and dst={}",
src, dst);
S3Guard.addMoveDir(metadataStore, srcPaths, dstMetas, src, dst,
username);
}
}
metadataStore.move(srcPaths, dstMetas);
if (src.getParent() != dst.getParent()) {
deleteUnnecessaryFakeDirectories(dst.getParent());
createFakeDirectoryIfNecessary(src.getParent());
@ -879,6 +961,31 @@ public class S3AFileSystem extends FileSystem {
return getObjectMetadata(pathToKey(path));
}
/**
* Does this Filesystem have a metadata store?
* @return true iff the FS has been instantiated with a metadata store
*/
public boolean hasMetadataStore() {
return !S3Guard.isNullMetadataStore(metadataStore);
}
/**
* Get the metadata store.
* This will always be non-null, but may be bound to the
* {@code NullMetadataStore}.
* @return the metadata store of this FS instance
*/
@VisibleForTesting
MetadataStore getMetadataStore() {
return metadataStore;
}
/** For testing only. See ITestS3GuardEmptyDirs. */
@VisibleForTesting
void setMetadataStore(MetadataStore ms) {
metadataStore = ms;
}
/**
* Increment a statistic by 1.
* @param statistic The operation to increment
@ -1063,8 +1170,9 @@ public class S3AFileSystem extends FileSystem {
* @param inputStream source data.
* @return the request
*/
private PutObjectRequest newPutObjectRequest(String key,
ObjectMetadata metadata, InputStream inputStream) {
PutObjectRequest newPutObjectRequest(String key,
ObjectMetadata metadata,
InputStream inputStream) {
Preconditions.checkNotNull(inputStream);
PutObjectRequest putObjectRequest = new PutObjectRequest(bucket, key,
inputStream, metadata);
@ -1115,7 +1223,7 @@ public class S3AFileSystem extends FileSystem {
* @param putObjectRequest the request
* @return the upload initiated
*/
public Upload putObject(PutObjectRequest putObjectRequest) {
public UploadInfo putObject(PutObjectRequest putObjectRequest) {
long len;
if (putObjectRequest.getFile() != null) {
len = putObjectRequest.getFile().length();
@ -1126,7 +1234,7 @@ public class S3AFileSystem extends FileSystem {
try {
Upload upload = transfers.upload(putObjectRequest);
incrementPutCompletedStatistics(true, len);
return upload;
return new UploadInfo(upload, len);
} catch (AmazonClientException e) {
incrementPutCompletedStatistics(false, len);
throw e;
@ -1142,14 +1250,10 @@ public class S3AFileSystem extends FileSystem {
* @return the upload initiated
* @throws AmazonClientException on problems
*/
public PutObjectResult putObjectDirect(PutObjectRequest putObjectRequest)
PutObjectResult putObjectDirect(PutObjectRequest putObjectRequest)
throws AmazonClientException {
long len;
if (putObjectRequest.getFile() != null) {
len = putObjectRequest.getFile().length();
} else {
len = putObjectRequest.getMetadata().getContentLength();
}
long len = getPutRequestLength(putObjectRequest);
LOG.debug("PUT {} bytes to {}", len, putObjectRequest.getKey());
incrementPutStartStatistics(len);
try {
PutObjectResult result = s3.putObject(putObjectRequest);
@ -1161,6 +1265,23 @@ public class S3AFileSystem extends FileSystem {
}
}
/**
* Get the length of the PUT, verifying that the length is known.
* @param putObjectRequest a request bound to a file or a stream.
* @return the request length
* @throws IllegalArgumentException if the length is negative
*/
private long getPutRequestLength(PutObjectRequest putObjectRequest) {
long len;
if (putObjectRequest.getFile() != null) {
len = putObjectRequest.getFile().length();
} else {
len = putObjectRequest.getMetadata().getContentLength();
}
Preconditions.checkState(len >= 0, "Cannot PUT object of unknown length");
return len;
}
/**
* Upload part of a multi-partition file.
* Increments the write and put counters.
@ -1288,7 +1409,7 @@ public class S3AFileSystem extends FileSystem {
*/
public boolean delete(Path f, boolean recursive) throws IOException {
try {
return innerDelete(getFileStatus(f), recursive);
return innerDelete(innerGetFileStatus(f, true), recursive);
} catch (FileNotFoundException e) {
LOG.debug("Couldn't delete {} - does not exist", f);
instrumentation.errorIgnored();
@ -1318,6 +1439,9 @@ public class S3AFileSystem extends FileSystem {
if (status.isDirectory()) {
LOG.debug("delete: Path is a directory: {}", f);
Preconditions.checkArgument(
status.isEmptyDirectory() != Tristate.UNKNOWN,
"File status must have directory emptiness computed");
if (!key.endsWith("/")) {
key = key + "/";
@ -1327,13 +1451,15 @@ public class S3AFileSystem extends FileSystem {
return rejectRootDirectoryDelete(status, recursive);
}
if (!recursive && !status.isEmptyDirectory()) {
if (!recursive && status.isEmptyDirectory() == Tristate.FALSE) {
throw new PathIsNotEmptyDirectoryException(f.toString());
}
if (status.isEmptyDirectory()) {
if (status.isEmptyDirectory() == Tristate.TRUE) {
LOG.debug("Deleting fake empty directory {}", key);
// HADOOP-13761 S3Guard: retries here
deleteObject(key);
metadataStore.delete(f);
instrumentation.directoryDeleted();
} else {
LOG.debug("Getting objects for directory prefix {} to delete", key);
@ -1349,6 +1475,7 @@ public class S3AFileSystem extends FileSystem {
LOG.debug("Got object to delete {}", summary.getKey());
if (keys.size() == MAX_ENTRIES_TO_DELETE) {
// TODO: HADOOP-13761 S3Guard: retries
removeKeys(keys, true, false);
}
}
@ -1357,16 +1484,19 @@ public class S3AFileSystem extends FileSystem {
objects = continueListObjects(objects);
} else {
if (!keys.isEmpty()) {
// TODO: HADOOP-13761 S3Guard: retries
removeKeys(keys, false, false);
}
break;
}
}
}
metadataStore.deleteSubtree(f);
} else {
LOG.debug("delete: Path is a file");
instrumentation.fileDeleted(1);
deleteObject(key);
metadataStore.delete(f);
}
Path parent = f.getParent();
@ -1390,7 +1520,7 @@ public class S3AFileSystem extends FileSystem {
private boolean rejectRootDirectoryDelete(S3AFileStatus status,
boolean recursive) throws IOException {
LOG.info("s3a delete the {} root directory of {}", bucket, recursive);
boolean emptyRoot = status.isEmptyDirectory();
boolean emptyRoot = status.isEmptyDirectory() == Tristate.TRUE;
if (emptyRoot) {
return true;
}
@ -1405,7 +1535,7 @@ public class S3AFileSystem extends FileSystem {
private void createFakeDirectoryIfNecessary(Path f)
throws IOException, AmazonClientException {
String key = pathToKey(f);
if (!key.isEmpty() && !exists(f)) {
if (!key.isEmpty() && !s3Exists(f)) {
LOG.debug("Creating new fake directory at {}", f);
createFakeDirectory(key);
}
@ -1454,6 +1584,11 @@ public class S3AFileSystem extends FileSystem {
key = key + '/';
}
DirListingMetadata dirMeta = metadataStore.listChildren(path);
if (allowAuthoritative && dirMeta != null && dirMeta.isAuthoritative()) {
return S3Guard.dirMetaToStatuses(dirMeta);
}
ListObjectsRequest request = createListObjectsRequest(key, "/");
LOG.debug("listStatus: doing listObjects for directory {}", key);
@ -1466,7 +1601,8 @@ public class S3AFileSystem extends FileSystem {
while (files.hasNext()) {
result.add(files.next());
}
return result.toArray(new FileStatus[result.size()]);
return S3Guard.dirListingUnion(metadataStore, path, result, dirMeta,
allowAuthoritative);
} else {
LOG.debug("Adding: rd (not a dir): {}", path);
FileStatus[] stats = new FileStatus[1];
@ -1482,7 +1618,8 @@ public class S3AFileSystem extends FileSystem {
* @param delimiter any delimiter
* @return the request
*/
private ListObjectsRequest createListObjectsRequest(String key,
@VisibleForTesting
ListObjectsRequest createListObjectsRequest(String key,
String delimiter) {
ListObjectsRequest request = new ListObjectsRequest();
request.setBucketName(bucket);
@ -1541,23 +1678,30 @@ public class S3AFileSystem extends FileSystem {
throw translateException("innerMkdirs", path, e);
}
}
/**
*
* Make the given path and all non-existent parents into
* directories.
* See {@link #mkdirs(Path, FsPermission)}
* @param f path to create
* @param p path to create
* @param permission to apply to f
* @return true if a directory was created
* @return true if a directory was created or already existed
* @throws FileAlreadyExistsException there is a file at the path specified
* @throws IOException other IO problems
* @throws AmazonClientException on failures inside the AWS SDK
*/
private boolean innerMkdirs(Path f, FsPermission permission)
private boolean innerMkdirs(Path p, FsPermission permission)
throws IOException, FileAlreadyExistsException, AmazonClientException {
Path f = qualify(p);
LOG.debug("Making directory: {}", f);
incrementStatistic(INVOCATION_MKDIRS);
FileStatus fileStatus;
List<Path> metadataStoreDirs = null;
if (hasMetadataStore()) {
metadataStoreDirs = new ArrayList<>();
}
try {
fileStatus = getFileStatus(f);
@ -1567,8 +1711,12 @@ public class S3AFileSystem extends FileSystem {
throw new FileAlreadyExistsException("Path is a file: " + f);
}
} catch (FileNotFoundException e) {
// Walk path to root, ensuring closest ancestor is a directory, not file
Path fPart = f.getParent();
do {
if (metadataStoreDirs != null) {
metadataStoreDirs.add(f);
}
while (fPart != null) {
try {
fileStatus = getFileStatus(fPart);
if (fileStatus.isDirectory()) {
@ -1581,12 +1729,17 @@ public class S3AFileSystem extends FileSystem {
}
} catch (FileNotFoundException fnfe) {
instrumentation.errorIgnored();
// We create all missing directories in MetadataStore; it does not
// infer directories exist by prefix like S3.
if (metadataStoreDirs != null) {
metadataStoreDirs.add(fPart);
}
}
fPart = fPart.getParent();
} while (fPart != null);
}
String key = pathToKey(f);
createFakeDirectory(key);
S3Guard.makeDirsOrdered(metadataStore, metadataStoreDirs, username, true);
// this is complicated because getParent(a/b/c/) returns a/b/c, but
// we want a/b. See HADOOP-14428 for more details.
deleteUnnecessaryFakeDirectories(new Path(f.toString()).getParent());
@ -1598,21 +1751,93 @@ public class S3AFileSystem extends FileSystem {
* Return a file status object that represents the path.
* @param f The path we want information from
* @return a FileStatus object
* @throws java.io.FileNotFoundException when the path does not exist;
* @throws FileNotFoundException when the path does not exist
* @throws IOException on other problems.
*/
public S3AFileStatus getFileStatus(final Path f) throws IOException {
public FileStatus getFileStatus(final Path f) throws IOException {
return innerGetFileStatus(f, false);
}
/**
* Internal version of {@link #getFileStatus(Path)}.
* @param f The path we want information from
* @param needEmptyDirectoryFlag if true, implementation will calculate
* a TRUE or FALSE value for {@link S3AFileStatus#isEmptyDirectory()}
* @return a S3AFileStatus object
* @throws FileNotFoundException when the path does not exist
* @throws IOException on other problems.
*/
@VisibleForTesting
S3AFileStatus innerGetFileStatus(final Path f,
boolean needEmptyDirectoryFlag) throws IOException {
incrementStatistic(INVOCATION_GET_FILE_STATUS);
final Path path = qualify(f);
String key = pathToKey(path);
LOG.debug("Getting path status for {} ({})", path , key);
LOG.debug("Getting path status for {} ({})", path, key);
// Check MetadataStore, if any.
PathMetadata pm = metadataStore.get(path, needEmptyDirectoryFlag);
Set<Path> tombstones = Collections.EMPTY_SET;
if (pm != null) {
if (pm.isDeleted()) {
throw new FileNotFoundException("Path " + f + " is recorded as " +
"deleted by S3Guard");
}
FileStatus msStatus = pm.getFileStatus();
if (needEmptyDirectoryFlag && msStatus.isDirectory()) {
if (pm.isEmptyDirectory() != Tristate.UNKNOWN) {
// We have a definitive true / false from MetadataStore, we are done.
return S3AFileStatus.fromFileStatus(msStatus, pm.isEmptyDirectory());
} else {
DirListingMetadata children = metadataStore.listChildren(path);
if (children != null) {
tombstones = children.listTombstones();
}
LOG.debug("MetadataStore doesn't know if dir is empty, using S3.");
}
} else {
// Either this is not a directory, or we don't care if it is empty
return S3AFileStatus.fromFileStatus(msStatus, pm.isEmptyDirectory());
}
// If the metadata store has no children for it and it's not listed in
// S3 yet, we'll assume the empty directory is true;
S3AFileStatus s3FileStatus;
try {
s3FileStatus = s3GetFileStatus(path, key, tombstones);
} catch (FileNotFoundException e) {
return S3AFileStatus.fromFileStatus(msStatus, Tristate.TRUE);
}
// entry was found, save in S3Guard
return S3Guard.putAndReturn(metadataStore, s3FileStatus, instrumentation);
} else {
// there was no entry in S3Guard
// retrieve the data and update the metadata store in the process.
return S3Guard.putAndReturn(metadataStore,
s3GetFileStatus(path, key, tombstones), instrumentation);
}
}
/**
* Raw {@code getFileStatus} that talks direct to S3.
* Used to implement {@link #innerGetFileStatus(Path, boolean)},
* and for direct management of empty directory blobs.
* @param path Qualified path
* @param key Key string for the path
* @return Status
* @throws FileNotFoundException when the path does not exist
* @throws IOException on other problems.
*/
private S3AFileStatus s3GetFileStatus(final Path path, String key,
Set<Path> tombstones) throws IOException {
if (!key.isEmpty()) {
try {
ObjectMetadata meta = getObjectMetadata(key);
if (objectRepresentsDirectory(key, meta.getContentLength())) {
LOG.debug("Found exact file: fake directory");
return new S3AFileStatus(true, path, username);
return new S3AFileStatus(Tristate.TRUE, path, username);
} else {
LOG.debug("Found exact file: normal file");
return new S3AFileStatus(meta.getContentLength(),
@ -1637,16 +1862,16 @@ public class S3AFileSystem extends FileSystem {
if (objectRepresentsDirectory(newKey, meta.getContentLength())) {
LOG.debug("Found file (with /): fake directory");
return new S3AFileStatus(true, path, username);
return new S3AFileStatus(Tristate.TRUE, path, username);
} else {
LOG.warn("Found file (with /): real file? should not happen: {}",
key);
return new S3AFileStatus(meta.getContentLength(),
dateToLong(meta.getLastModified()),
path,
getDefaultBlockSize(path),
username);
dateToLong(meta.getLastModified()),
path,
getDefaultBlockSize(path),
username);
}
} catch (AmazonServiceException e) {
if (e.getStatusCode() != 404) {
@ -1668,25 +1893,26 @@ public class S3AFileSystem extends FileSystem {
ObjectListing objects = listObjects(request);
if (!objects.getCommonPrefixes().isEmpty()
|| !objects.getObjectSummaries().isEmpty()) {
Collection<String> prefixes = objects.getCommonPrefixes();
Collection<S3ObjectSummary> summaries = objects.getObjectSummaries();
if (!isEmptyOfKeys(prefixes, tombstones) ||
!isEmptyOfObjects(summaries, tombstones)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Found path as directory (with /): {}/{}",
objects.getCommonPrefixes().size() ,
objects.getObjectSummaries().size());
prefixes.size(), summaries.size());
for (S3ObjectSummary summary : objects.getObjectSummaries()) {
for (S3ObjectSummary summary : summaries) {
LOG.debug("Summary: {} {}", summary.getKey(), summary.getSize());
}
for (String prefix : objects.getCommonPrefixes()) {
for (String prefix : prefixes) {
LOG.debug("Prefix: {}", prefix);
}
}
return new S3AFileStatus(false, path, username);
return new S3AFileStatus(Tristate.FALSE, path, username);
} else if (key.isEmpty()) {
LOG.debug("Found root directory");
return new S3AFileStatus(true, path, username);
return new S3AFileStatus(Tristate.TRUE, path, username);
}
} catch (AmazonServiceException e) {
if (e.getStatusCode() != 404) {
@ -1700,6 +1926,64 @@ public class S3AFileSystem extends FileSystem {
throw new FileNotFoundException("No such file or directory: " + path);
}
/**
* Helper function to determine if a collection of paths is empty
* after accounting for tombstone markers (if provided).
* @param keys Collection of path (prefixes / directories or keys).
* @param tombstones Set of tombstone markers, or null if not applicable.
* @return false if summaries contains objects not accounted for by
* tombstones.
*/
private boolean isEmptyOfKeys(Collection<String> keys, Set<Path>
tombstones) {
if (tombstones == null) {
return keys.isEmpty();
}
for (String key : keys) {
Path qualified = keyToQualifiedPath(key);
if (!tombstones.contains(qualified)) {
return false;
}
}
return true;
}
/**
* Helper function to determine if a collection of object summaries is empty
* after accounting for tombstone markers (if provided).
* @param summaries Collection of objects as returned by listObjects.
* @param tombstones Set of tombstone markers, or null if not applicable.
* @return false if summaries contains objects not accounted for by
* tombstones.
*/
private boolean isEmptyOfObjects(Collection<S3ObjectSummary> summaries,
Set<Path> tombstones) {
if (tombstones == null) {
return summaries.isEmpty();
}
Collection<String> stringCollection = new ArrayList<>(summaries.size());
for (S3ObjectSummary summary : summaries) {
stringCollection.add(summary.getKey());
}
return isEmptyOfKeys(stringCollection, tombstones);
}
/**
* Raw version of {@link FileSystem#exists(Path)} which uses S3 only:
* S3Guard MetadataStore, if any, will be skipped.
* @return true if path exists in S3
*/
private boolean s3Exists(final Path f) throws IOException {
Path path = qualify(f);
String key = pathToKey(path);
try {
s3GetFileStatus(path, key, null);
return true;
} catch (FileNotFoundException e) {
return false;
}
}
/**
* The src file is on the local disk. Add it to FS at
* the given dst name.
@ -1777,12 +2061,13 @@ public class S3AFileSystem extends FileSystem {
final String key = pathToKey(dst);
final ObjectMetadata om = newObjectMetadata(srcfile.length());
PutObjectRequest putObjectRequest = newPutObjectRequest(key, om, srcfile);
Upload up = putObject(putObjectRequest);
UploadInfo info = putObject(putObjectRequest);
Upload upload = info.getUpload();
ProgressableProgressListener listener = new ProgressableProgressListener(
this, key, up, null);
up.addProgressListener(listener);
this, key, upload, null);
upload.addProgressListener(listener);
try {
up.waitForUploadResult();
upload.waitForUploadResult();
} catch (InterruptedException e) {
throw new InterruptedIOException("Interrupted copying " + src
+ " to " + dst + ", cancelling");
@ -1790,7 +2075,7 @@ public class S3AFileSystem extends FileSystem {
listener.uploadCompleted();
// This will delete unnecessary fake parent directories
finishedWrite(key);
finishedWrite(key, info.getLength());
if (delSrc) {
local.delete(src, false);
@ -1814,6 +2099,10 @@ public class S3AFileSystem extends FileSystem {
transfers.shutdownNow(true);
transfers = null;
}
if (metadataStore != null) {
metadataStore.close();
metadataStore = null;
}
}
}
@ -1956,11 +2245,38 @@ public class S3AFileSystem extends FileSystem {
/**
* Perform post-write actions.
* This operation MUST be called after any PUT/multipart PUT completes
* successfully.
* This includes
* <ol>
* <li>Calling {@link #deleteUnnecessaryFakeDirectories(Path)}</li>
* <li>Updating any metadata store with details on the newly created
* object.</li>
* </ol>
* @param key key written to
* @param length total length of file written
*/
public void finishedWrite(String key) {
LOG.debug("Finished write to {}", key);
deleteUnnecessaryFakeDirectories(keyToPath(key).getParent());
@InterfaceAudience.Private
void finishedWrite(String key, long length) {
LOG.debug("Finished write to {}, len {}", key, length);
Path p = keyToQualifiedPath(key);
deleteUnnecessaryFakeDirectories(p.getParent());
Preconditions.checkArgument(length >= 0, "content length is negative");
// See note about failure semantics in S3Guard documentation
try {
if (hasMetadataStore()) {
S3Guard.addAncestors(metadataStore, p, username);
S3AFileStatus status = createUploadFileStatus(p,
S3AUtils.objectRepresentsDirectory(key, length), length,
getDefaultBlockSize(p), username);
S3Guard.putAndReturn(metadataStore, status, instrumentation);
}
} catch (IOException e) {
LOG.error("S3Guard: Error updating MetadataStore for write to {}:",
key, e);
instrumentation.errorIgnored();
}
}
/**
@ -2015,9 +2331,9 @@ public class S3AFileSystem extends FileSystem {
PutObjectRequest putObjectRequest = newPutObjectRequest(objectName,
newObjectMetadata(0L),
im);
Upload upload = putObject(putObjectRequest);
UploadInfo info = putObject(putObjectRequest);
try {
upload.waitForUploadResult();
info.getUpload().waitForUploadResult();
} catch (InterruptedException e) {
throw new InterruptedIOException("Interrupted creating " + objectName);
}
@ -2123,6 +2439,8 @@ public class S3AFileSystem extends FileSystem {
if (blockFactory != null) {
sb.append(", blockFactory=").append(blockFactory);
}
sb.append(", metastore=").append(metadataStore);
sb.append(", authoritative=").append(allowAuthoritative);
sb.append(", boundedExecutor=").append(boundedThreadPool);
sb.append(", unboundedExecutor=").append(unboundedThreadPool);
sb.append(", statistics {")
@ -2241,6 +2559,18 @@ public class S3AFileSystem extends FileSystem {
@Override
public RemoteIterator<LocatedFileStatus> listFiles(Path f,
boolean recursive) throws FileNotFoundException, IOException {
return innerListFiles(f, recursive,
new Listing.AcceptFilesOnly(qualify(f)));
}
public RemoteIterator<LocatedFileStatus> listFilesAndEmptyDirectories(Path f,
boolean recursive) throws IOException {
return innerListFiles(f, recursive,
new Listing.AcceptAllButS3nDirs());
}
private RemoteIterator<LocatedFileStatus> innerListFiles(Path f, boolean
recursive, Listing.FileStatusAcceptor acceptor) throws IOException {
incrementStatistic(INVOCATION_LIST_FILES);
Path path = qualify(f);
LOG.debug("listFiles({}, {})", path, recursive);
@ -2258,13 +2588,42 @@ public class S3AFileSystem extends FileSystem {
String delimiter = recursive ? null : "/";
LOG.debug("Requesting all entries under {} with delimiter '{}'",
key, delimiter);
return listing.createLocatedFileStatusIterator(
listing.createFileStatusListingIterator(path,
createListObjectsRequest(key, delimiter),
ACCEPT_ALL,
new Listing.AcceptFilesOnly(path)));
final RemoteIterator<FileStatus> cachedFilesIterator;
final Set<Path> tombstones;
if (recursive) {
final PathMetadata pm = metadataStore.get(path, true);
// shouldn't need to check pm.isDeleted() because that will have
// been caught by getFileStatus above.
MetadataStoreListFilesIterator metadataStoreListFilesIterator =
new MetadataStoreListFilesIterator(metadataStore, pm,
allowAuthoritative);
tombstones = metadataStoreListFilesIterator.listTombstones();
cachedFilesIterator = metadataStoreListFilesIterator;
} else {
DirListingMetadata meta = metadataStore.listChildren(path);
if (meta != null) {
tombstones = meta.listTombstones();
} else {
tombstones = null;
}
cachedFilesIterator = listing.createProvidedFileStatusIterator(
S3Guard.dirMetaToStatuses(meta), ACCEPT_ALL, acceptor);
if (allowAuthoritative && meta != null && meta.isAuthoritative()) {
// metadata listing is authoritative, so return it directly
return listing.createLocatedFileStatusIterator(cachedFilesIterator);
}
}
return listing.createTombstoneReconcilingIterator(
listing.createLocatedFileStatusIterator(
listing.createFileStatusListingIterator(path,
createListObjectsRequest(key, delimiter),
ACCEPT_ALL,
acceptor,
cachedFilesIterator)),
tombstones);
}
} catch (AmazonClientException e) {
// TODO S3Guard: retry on file not found exception
throw translateException("listFiles", path, e);
}
}
@ -2309,12 +2668,21 @@ public class S3AFileSystem extends FileSystem {
filter.accept(path) ? toLocatedFileStatus(fileStatus) : null);
} else {
// directory: trigger a lookup
String key = maybeAddTrailingSlash(pathToKey(path));
return listing.createLocatedFileStatusIterator(
listing.createFileStatusListingIterator(path,
createListObjectsRequest(key, "/"),
filter,
new Listing.AcceptAllButSelfAndS3nDirs(path)));
final String key = maybeAddTrailingSlash(pathToKey(path));
final Listing.FileStatusAcceptor acceptor =
new Listing.AcceptAllButSelfAndS3nDirs(path);
DirListingMetadata meta = metadataStore.listChildren(path);
final RemoteIterator<FileStatus> cachedFileStatusIterator =
listing.createProvidedFileStatusIterator(
S3Guard.dirMetaToStatuses(meta), filter, acceptor);
return (allowAuthoritative && meta != null && meta.isAuthoritative())
? listing.createLocatedFileStatusIterator(cachedFileStatusIterator)
: listing.createLocatedFileStatusIterator(
listing.createFileStatusListingIterator(path,
createListObjectsRequest(key, "/"),
filter,
acceptor,
cachedFileStatusIterator));
}
} catch (AmazonClientException e) {
throw translateException("listLocatedStatus", path, e);
@ -2389,8 +2757,8 @@ public class S3AFileSystem extends FileSystem {
/**
* Callback on a successful write.
*/
void writeSuccessful() {
finishedWrite(key);
void writeSuccessful(long length) {
finishedWrite(key, length);
}
/**

View File

@ -23,6 +23,7 @@ import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.FileSystem.Statistics;
import org.apache.hadoop.metrics2.MetricStringBuilder;
import org.apache.hadoop.metrics2.annotation.Metrics;
import org.apache.hadoop.metrics2.lib.Interns;
@ -30,6 +31,7 @@ import org.apache.hadoop.metrics2.lib.MetricsRegistry;
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
import org.apache.hadoop.metrics2.lib.MutableMetric;
import org.apache.hadoop.metrics2.lib.MutableQuantiles;
import java.io.Closeable;
import java.net.URI;
@ -38,7 +40,6 @@ import java.util.Map;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.hadoop.fs.FileSystem.Statistics;
import static org.apache.hadoop.fs.s3a.Statistic.*;
@ -90,6 +91,10 @@ public class S3AInstrumentation {
private final Map<String, MutableCounterLong> streamMetrics =
new HashMap<>(30);
/** Instantiate this without caring whether or not S3Guard is enabled. */
private final S3GuardInstrumentation s3GuardInstrumentation
= new S3GuardInstrumentation();
private static final Statistic[] COUNTERS_TO_CREATE = {
INVOCATION_COPY_FROM_LOCAL_FILE,
INVOCATION_EXISTS,
@ -117,6 +122,8 @@ public class S3AInstrumentation {
STREAM_WRITE_BLOCK_UPLOADS_ABORTED,
STREAM_WRITE_TOTAL_TIME,
STREAM_WRITE_TOTAL_DATA,
S3GUARD_METADATASTORE_PUT_PATH_REQUEST,
S3GUARD_METADATASTORE_INITIALIZATION
};
@ -171,6 +178,9 @@ public class S3AInstrumentation {
for (Statistic statistic : GAUGES_TO_CREATE) {
gauge(statistic.getSymbol(), statistic.getDescription());
}
//todo need a config for the quantiles interval?
quantiles(S3GUARD_METADATASTORE_PUT_PATH_LATENCY,
"ops", "latency", 1);
}
/**
@ -226,6 +236,22 @@ public class S3AInstrumentation {
return registry.newGauge(name, desc, 0L);
}
/**
* Create a quantiles in the registry.
* @param op statistic to collect
* @param sampleName sample name of the quantiles
* @param valueName value name of the quantiles
* @param interval interval of the quantiles in seconds
* @return the created quantiles metric
*/
protected final MutableQuantiles quantiles(Statistic op,
String sampleName,
String valueName,
int interval) {
return registry.newQuantiles(op.getSymbol(), op.getDescription(),
sampleName, valueName, interval);
}
/**
* Get the metrics registry.
* @return the registry
@ -310,6 +336,20 @@ public class S3AInstrumentation {
return (MutableGaugeLong) metric;
}
/**
* Look up a quantiles.
* @param name quantiles name
* @return the quantiles or null
* @throws ClassCastException if the metric is not a Quantiles.
*/
public MutableQuantiles lookupQuantiles(String name) {
MutableMetric metric = lookupMetric(name);
if (metric == null) {
LOG.debug("No quantiles {}", name);
}
return (MutableQuantiles) metric;
}
/**
* Look up a metric from both the registered set and the lighter weight
* stream entries.
@ -391,6 +431,21 @@ public class S3AInstrumentation {
counter.incr(count);
}
}
/**
* Add a value to a quantiles statistic. No-op if the quantile
* isn't found.
* @param op operation to look up.
* @param value value to add.
* @throws ClassCastException if the metric is not a Quantiles.
*/
public void addValueToQuantiles(Statistic op, long value) {
MutableQuantiles quantiles = lookupQuantiles(op.getSymbol());
if (quantiles != null) {
quantiles.add(value);
}
}
/**
* Increment a specific counter.
* No-op if not defined.
@ -441,6 +496,15 @@ public class S3AInstrumentation {
return new InputStreamStatistics();
}
/**
* Create a S3Guard instrumentation instance.
* There's likely to be at most one instance of this per FS instance.
* @return the S3Guard instrumentation point.
*/
public S3GuardInstrumentation getS3GuardInstrumentation() {
return s3GuardInstrumentation;
}
/**
* Merge in the statistics of a single input stream into
* the filesystem-wide statistics.
@ -840,4 +904,19 @@ public class S3AInstrumentation {
return sb.toString();
}
}
/**
* Instrumentation exported to S3Guard.
*/
public final class S3GuardInstrumentation {
/** Initialized event. */
public void initialized() {
incrementCounter(S3GUARD_METADATASTORE_INITIALIZATION, 1);
}
public void storeClosed() {
}
}
}

View File

@ -20,7 +20,6 @@ package org.apache.hadoop.fs.s3a;
import com.amazonaws.AmazonClientException;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.amazonaws.services.s3.transfer.Upload;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
@ -101,19 +100,20 @@ public class S3AOutputStream extends OutputStream {
try {
final ObjectMetadata om = fs.newObjectMetadata(backupFile.length());
Upload upload = fs.putObject(
UploadInfo info = fs.putObject(
fs.newPutObjectRequest(
key,
om,
backupFile));
ProgressableProgressListener listener =
new ProgressableProgressListener(fs, key, upload, progress);
upload.addProgressListener(listener);
new ProgressableProgressListener(fs, key, info.getUpload(), progress);
info.getUpload().addProgressListener(listener);
upload.waitForUploadResult();
info.getUpload().waitForUploadResult();
listener.uploadCompleted();
// This will delete unnecessary fake parent directories
fs.finishedWrite(key);
// This will delete unnecessary fake parent directories, update any
// MetadataStore
fs.finishedWrite(key, info.getLength());
} catch (InterruptedException e) {
throw (InterruptedIOException) new InterruptedIOException(e.toString())
.initCause(e);

View File

@ -294,12 +294,38 @@ public final class S3AUtils {
S3ObjectSummary summary,
long blockSize,
String owner) {
if (objectRepresentsDirectory(summary.getKey(), summary.getSize())) {
return new S3AFileStatus(true, keyPath, owner);
long size = summary.getSize();
return createFileStatus(keyPath,
objectRepresentsDirectory(summary.getKey(), size),
size, summary.getLastModified(), blockSize, owner);
}
/**
* Create a file status for object we just uploaded. For files, we use
* current time as modification time, since s3a uses S3's service-based
* modification time, which will not be available until we do a
* getFileStatus() later on.
* @param keyPath path for created object
* @param isDir true iff directory
* @param size file length
* @param blockSize block size for file status
* @param owner Hadoop username
* @return a status entry
*/
public static S3AFileStatus createUploadFileStatus(Path keyPath,
boolean isDir, long size, long blockSize, String owner) {
Date date = isDir ? null : new Date();
return createFileStatus(keyPath, isDir, size, date, blockSize, owner);
}
/* Date 'modified' is ignored when isDir is true. */
private static S3AFileStatus createFileStatus(Path keyPath, boolean isDir,
long size, Date modified, long blockSize, String owner) {
if (isDir) {
return new S3AFileStatus(Tristate.UNKNOWN, keyPath, owner);
} else {
return new S3AFileStatus(summary.getSize(),
dateToLong(summary.getLastModified()), keyPath,
blockSize, owner);
return new S3AFileStatus(size, dateToLong(modified), keyPath, blockSize,
owner);
}
}

View File

@ -18,33 +18,20 @@
package org.apache.hadoop.fs.s3a;
import static org.apache.hadoop.fs.s3a.Constants.*;
import static org.apache.hadoop.fs.s3a.S3AUtils.*;
import java.io.IOException;
import java.net.URI;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.Protocol;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.S3ClientOptions;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.util.VersionInfo;
import org.slf4j.Logger;
/**
* Factory for creation of S3 client instances to be used by {@link S3Store}.
* Factory for creation of {@link AmazonS3} client instances.
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
interface S3ClientFactory {
public interface S3ClientFactory {
/**
* Creates a new {@link AmazonS3} client. This method accepts the S3A file
@ -57,177 +44,4 @@ interface S3ClientFactory {
*/
AmazonS3 createS3Client(URI name) throws IOException;
/**
* The default factory implementation, which calls the AWS SDK to configure
* and create an {@link AmazonS3Client} that communicates with the S3 service.
*/
static class DefaultS3ClientFactory extends Configured
implements S3ClientFactory {
private static final Logger LOG = S3AFileSystem.LOG;
@Override
public AmazonS3 createS3Client(URI name) throws IOException {
Configuration conf = getConf();
AWSCredentialsProvider credentials =
createAWSCredentialProviderSet(name, conf);
ClientConfiguration awsConf = new ClientConfiguration();
initConnectionSettings(conf, awsConf);
initProxySupport(conf, awsConf);
initUserAgent(conf, awsConf);
return createAmazonS3Client(conf, credentials, awsConf);
}
/**
* Initializes all AWS SDK settings related to connection management.
*
* @param conf Hadoop configuration
* @param awsConf AWS SDK configuration
*/
private static void initConnectionSettings(Configuration conf,
ClientConfiguration awsConf) {
awsConf.setMaxConnections(intOption(conf, MAXIMUM_CONNECTIONS,
DEFAULT_MAXIMUM_CONNECTIONS, 1));
boolean secureConnections = conf.getBoolean(SECURE_CONNECTIONS,
DEFAULT_SECURE_CONNECTIONS);
awsConf.setProtocol(secureConnections ? Protocol.HTTPS : Protocol.HTTP);
awsConf.setMaxErrorRetry(intOption(conf, MAX_ERROR_RETRIES,
DEFAULT_MAX_ERROR_RETRIES, 0));
awsConf.setConnectionTimeout(intOption(conf, ESTABLISH_TIMEOUT,
DEFAULT_ESTABLISH_TIMEOUT, 0));
awsConf.setSocketTimeout(intOption(conf, SOCKET_TIMEOUT,
DEFAULT_SOCKET_TIMEOUT, 0));
int sockSendBuffer = intOption(conf, SOCKET_SEND_BUFFER,
DEFAULT_SOCKET_SEND_BUFFER, 2048);
int sockRecvBuffer = intOption(conf, SOCKET_RECV_BUFFER,
DEFAULT_SOCKET_RECV_BUFFER, 2048);
awsConf.setSocketBufferSizeHints(sockSendBuffer, sockRecvBuffer);
String signerOverride = conf.getTrimmed(SIGNING_ALGORITHM, "");
if (!signerOverride.isEmpty()) {
LOG.debug("Signer override = {}", signerOverride);
awsConf.setSignerOverride(signerOverride);
}
}
/**
* Initializes AWS SDK proxy support if configured.
*
* @param conf Hadoop configuration
* @param awsConf AWS SDK configuration
* @throws IllegalArgumentException if misconfigured
*/
private static void initProxySupport(Configuration conf,
ClientConfiguration awsConf) throws IllegalArgumentException {
String proxyHost = conf.getTrimmed(PROXY_HOST, "");
int proxyPort = conf.getInt(PROXY_PORT, -1);
if (!proxyHost.isEmpty()) {
awsConf.setProxyHost(proxyHost);
if (proxyPort >= 0) {
awsConf.setProxyPort(proxyPort);
} else {
if (conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS)) {
LOG.warn("Proxy host set without port. Using HTTPS default 443");
awsConf.setProxyPort(443);
} else {
LOG.warn("Proxy host set without port. Using HTTP default 80");
awsConf.setProxyPort(80);
}
}
String proxyUsername = conf.getTrimmed(PROXY_USERNAME);
String proxyPassword = conf.getTrimmed(PROXY_PASSWORD);
if ((proxyUsername == null) != (proxyPassword == null)) {
String msg = "Proxy error: " + PROXY_USERNAME + " or " +
PROXY_PASSWORD + " set without the other.";
LOG.error(msg);
throw new IllegalArgumentException(msg);
}
awsConf.setProxyUsername(proxyUsername);
awsConf.setProxyPassword(proxyPassword);
awsConf.setProxyDomain(conf.getTrimmed(PROXY_DOMAIN));
awsConf.setProxyWorkstation(conf.getTrimmed(PROXY_WORKSTATION));
if (LOG.isDebugEnabled()) {
LOG.debug("Using proxy server {}:{} as user {} with password {} on " +
"domain {} as workstation {}", awsConf.getProxyHost(),
awsConf.getProxyPort(),
String.valueOf(awsConf.getProxyUsername()),
awsConf.getProxyPassword(), awsConf.getProxyDomain(),
awsConf.getProxyWorkstation());
}
} else if (proxyPort >= 0) {
String msg =
"Proxy error: " + PROXY_PORT + " set without " + PROXY_HOST;
LOG.error(msg);
throw new IllegalArgumentException(msg);
}
}
/**
* Initializes the User-Agent header to send in HTTP requests to the S3
* back-end. We always include the Hadoop version number. The user also
* may set an optional custom prefix to put in front of the Hadoop version
* number. The AWS SDK interally appends its own information, which seems
* to include the AWS SDK version, OS and JVM version.
*
* @param conf Hadoop configuration
* @param awsConf AWS SDK configuration
*/
private static void initUserAgent(Configuration conf,
ClientConfiguration awsConf) {
String userAgent = "Hadoop " + VersionInfo.getVersion();
String userAgentPrefix = conf.getTrimmed(USER_AGENT_PREFIX, "");
if (!userAgentPrefix.isEmpty()) {
userAgent = userAgentPrefix + ", " + userAgent;
}
LOG.debug("Using User-Agent: {}", userAgent);
awsConf.setUserAgentPrefix(userAgent);
}
/**
* Creates an {@link AmazonS3Client} from the established configuration.
*
* @param conf Hadoop configuration
* @param credentials AWS credentials
* @param awsConf AWS SDK configuration
* @return S3 client
* @throws IllegalArgumentException if misconfigured
*/
private static AmazonS3 createAmazonS3Client(Configuration conf,
AWSCredentialsProvider credentials, ClientConfiguration awsConf)
throws IllegalArgumentException {
AmazonS3 s3 = new AmazonS3Client(credentials, awsConf);
String endPoint = conf.getTrimmed(ENDPOINT, "");
if (!endPoint.isEmpty()) {
try {
s3.setEndpoint(endPoint);
} catch (IllegalArgumentException e) {
String msg = "Incorrect endpoint: " + e.getMessage();
LOG.error(msg);
throw new IllegalArgumentException(msg, e);
}
}
enablePathStyleAccessIfRequired(s3, conf);
return s3;
}
/**
* Enables path-style access to S3 buckets if configured. By default, the
* behavior is to use virtual hosted-style access with URIs of the form
* http://bucketname.s3.amazonaws.com. Enabling path-style access and a
* region-specific endpoint switches the behavior to use URIs of the form
* http://s3-eu-west-1.amazonaws.com/bucketname.
*
* @param s3 S3 client
* @param conf Hadoop configuration
*/
private static void enablePathStyleAccessIfRequired(AmazonS3 s3,
Configuration conf) {
final boolean pathStyleAccess = conf.getBoolean(PATH_STYLE_ACCESS, false);
if (pathStyleAccess) {
LOG.debug("Enabling path style access!");
s3.setS3ClientOptions(S3ClientOptions.builder()
.setPathStyleAccess(true)
.build());
}
}
}
}

View File

@ -140,7 +140,18 @@ public enum Statistic {
STREAM_WRITE_TOTAL_DATA("stream_write_total_data",
"Count of total data uploaded in block output"),
STREAM_WRITE_QUEUE_DURATION("stream_write_queue_duration",
"Total queue duration of all block uploads");
"Total queue duration of all block uploads"),
// S3Guard stats
S3GUARD_METADATASTORE_PUT_PATH_REQUEST(
"s3guard_metadatastore_put_path_request",
"s3guard metadata store put one metadata path request"),
S3GUARD_METADATASTORE_PUT_PATH_LATENCY(
"s3guard_metadatastore_put_path_latency",
"s3guard metadata store put one metadata path lantency"),
S3GUARD_METADATASTORE_INITIALIZATION("s3guard_metadatastore_initialization",
"s3guard metadata store initialization times");
private static final Map<String, Statistic> SYMBOL_MAP =
new HashMap<>(Statistic.values().length);

View File

@ -0,0 +1,32 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a;
/**
* Simple enum to express {true, false, don't know}.
*/
public enum Tristate {
// Do not add additional values here. Logic will assume there are exactly
// three possibilities.
TRUE, FALSE, UNKNOWN;
public static Tristate fromBool(boolean v) {
return v ? TRUE : FALSE;
}
}

View File

@ -0,0 +1,43 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a;
import com.amazonaws.services.s3.transfer.Upload;
/**
* Simple struct that contains information about a S3 upload.
*/
public class UploadInfo {
private final Upload upload;
private final long length;
public UploadInfo(Upload upload, long length) {
this.upload = upload;
this.length = length;
}
public Upload getUpload() {
return upload;
}
public long getLength() {
return length;
}
}

View File

@ -0,0 +1,142 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import java.io.IOException;
import java.util.Collection;
import java.util.LinkedList;
import java.util.NoSuchElementException;
import java.util.Queue;
import com.google.common.base.Preconditions;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
/**
* {@code DescendantsIterator} is a {@link RemoteIterator} that implements
* pre-ordering breadth-first traversal (BFS) of a path and all of its
* descendants recursively. After visiting each path, that path's direct
* children are discovered by calling {@link MetadataStore#listChildren(Path)}.
* Each iteration returns the next direct child, and if that child is a
* directory, also pushes it onto a queue to discover its children later.
*
* For example, assume the consistent store contains metadata representing this
* file system structure:
*
* <pre>
* /dir1
* |-- dir2
* | |-- file1
* | `-- file2
* `-- dir3
* |-- dir4
* | `-- file3
* |-- dir5
* | `-- file4
* `-- dir6
* </pre>
*
* Consider this code sample:
* <pre>
* final PathMetadata dir1 = get(new Path("/dir1"));
* for (DescendantsIterator descendants = new DescendantsIterator(dir1);
* descendants.hasNext(); ) {
* final FileStatus status = descendants.next().getFileStatus();
* System.out.printf("%s %s%n", status.isDirectory() ? 'D' : 'F',
* status.getPath());
* }
* </pre>
*
* The output is:
* <pre>
* D /dir1
* D /dir1/dir2
* D /dir1/dir3
* F /dir1/dir2/file1
* F /dir1/dir2/file2
* D /dir1/dir3/dir4
* D /dir1/dir3/dir5
* F /dir1/dir3/dir4/file3
* F /dir1/dir3/dir5/file4
* D /dir1/dir3/dir6
* </pre>
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
public class DescendantsIterator implements RemoteIterator<FileStatus> {
private final MetadataStore metadataStore;
private final Queue<PathMetadata> queue = new LinkedList<>();
/**
* Creates a new {@code DescendantsIterator}.
*
* @param ms the associated {@link MetadataStore}
* @param meta base path for descendants iteration, which will be the first
* returned during iteration (except root). Null makes empty iterator.
* @throws IOException if errors happen during metadata store listing
*/
public DescendantsIterator(MetadataStore ms, PathMetadata meta)
throws IOException {
Preconditions.checkNotNull(ms);
this.metadataStore = ms;
if (meta != null) {
final Path path = meta.getFileStatus().getPath();
if (path.isRoot()) {
DirListingMetadata rootListing = ms.listChildren(path);
if (rootListing != null) {
rootListing = rootListing.withoutTombstones();
queue.addAll(rootListing.getListing());
}
} else {
queue.add(meta);
}
}
}
@Override
public boolean hasNext() throws IOException {
return !queue.isEmpty();
}
@Override
public FileStatus next() throws IOException {
if (!hasNext()) {
throw new NoSuchElementException("No more descendants.");
}
PathMetadata next;
next = queue.poll();
if (next.getFileStatus().isDirectory()) {
final Path path = next.getFileStatus().getPath();
DirListingMetadata meta = metadataStore.listChildren(path);
if (meta != null) {
Collection<PathMetadata> more = meta.withoutTombstones().getListing();
if (!more.isEmpty()) {
queue.addAll(more);
}
}
}
return next.getFileStatus();
}
}

View File

@ -0,0 +1,322 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import com.google.common.base.Preconditions;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.Tristate;
/**
* {@code DirListingMetadata} models a directory listing stored in a
* {@link MetadataStore}. Instances of this class are mutable and thread-safe.
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
public class DirListingMetadata {
/**
* Convenience parameter for passing into constructor.
*/
public static final Collection<PathMetadata> EMPTY_DIR =
Collections.emptyList();
private final Path path;
/** Using a map for fast find / remove with large directories. */
private Map<Path, PathMetadata> listMap = new ConcurrentHashMap<>();
private boolean isAuthoritative;
/**
* Create a directory listing metadata container.
*
* @param path Path of the directory. If this path has a host component, then
* all paths added later via {@link #put(FileStatus)} must also have
* the same host.
* @param listing Entries in the directory.
* @param isAuthoritative true iff listing is the full contents of the
* directory, and the calling client reports that this may be cached as
* the full and authoritative listing of all files in the directory.
*/
public DirListingMetadata(Path path, Collection<PathMetadata> listing,
boolean isAuthoritative) {
checkPathAbsolute(path);
this.path = path;
if (listing != null) {
for (PathMetadata entry : listing) {
Path childPath = entry.getFileStatus().getPath();
checkChildPath(childPath);
listMap.put(childPath, entry);
}
}
this.isAuthoritative = isAuthoritative;
}
/**
* Copy constructor.
* @param d the existing {@link DirListingMetadata} object.
*/
public DirListingMetadata(DirListingMetadata d) {
path = d.path;
isAuthoritative = d.isAuthoritative;
listMap = new ConcurrentHashMap<>(d.listMap);
}
/**
* @return {@code Path} of the directory that contains this listing.
*/
public Path getPath() {
return path;
}
/**
* @return entries in the directory
*/
public Collection<PathMetadata> getListing() {
return Collections.unmodifiableCollection(listMap.values());
}
public Set<Path> listTombstones() {
Set<Path> tombstones = new HashSet<>();
for (PathMetadata meta : listMap.values()) {
if (meta.isDeleted()) {
tombstones.add(meta.getFileStatus().getPath());
}
}
return tombstones;
}
public DirListingMetadata withoutTombstones() {
Collection<PathMetadata> filteredList = new ArrayList<>();
for (PathMetadata meta : listMap.values()) {
if (!meta.isDeleted()) {
filteredList.add(meta);
}
}
return new DirListingMetadata(path, filteredList, isAuthoritative);
}
/**
* @return number of entries tracked. This is not the same as the number
* of entries in the actual directory unless {@link #isAuthoritative()} is
* true.
*/
public int numEntries() {
return listMap.size();
}
/**
* @return true iff this directory listing is full and authoritative within
* the scope of the {@code MetadataStore} that returned it.
*/
public boolean isAuthoritative() {
return isAuthoritative;
}
/**
* Is the underlying directory known to be empty?
* @return FALSE if directory is known to have a child entry, TRUE if
* directory is known to be empty, UNKNOWN otherwise.
*/
public Tristate isEmpty() {
if (getListing().isEmpty()) {
if (isAuthoritative()) {
return Tristate.TRUE;
} else {
// This listing is empty, but may not be full list of underlying dir.
return Tristate.UNKNOWN;
}
} else { // not empty listing
// There exists at least one child, dir not empty.
return Tristate.FALSE;
}
}
/**
* Marks this directory listing as full and authoritative.
* @param authoritative see {@link #isAuthoritative()}.
*/
public void setAuthoritative(boolean authoritative) {
this.isAuthoritative = authoritative;
}
/**
* Lookup entry within this directory listing. This may return null if the
* {@code MetadataStore} only tracks a partial set of the directory entries.
* In the case where {@link #isAuthoritative()} is true, however, this
* function returns null iff the directory is known not to contain the listing
* at given path (within the scope of the {@code MetadataStore} that returned
* it).
*
* @param childPath path of entry to look for.
* @return entry, or null if it is not present or not being tracked.
*/
public PathMetadata get(Path childPath) {
checkChildPath(childPath);
return listMap.get(childPath);
}
/**
* Replace an entry with a tombstone.
* @param childPath path of entry to replace.
*/
public void markDeleted(Path childPath) {
checkChildPath(childPath);
listMap.put(childPath, PathMetadata.tombstone(childPath));
}
/**
* Remove entry from this directory.
*
* @param childPath path of entry to remove.
*/
public void remove(Path childPath) {
checkChildPath(childPath);
listMap.remove(childPath);
}
/**
* Add an entry to the directory listing. If this listing already contains a
* {@code FileStatus} with the same path, it will be replaced.
*
* @param childFileStatus entry to add to this directory listing.
* @return true if the status was added or replaced with a new value. False
* if the same FileStatus value was already present.
*/
public boolean put(FileStatus childFileStatus) {
Preconditions.checkNotNull(childFileStatus,
"childFileStatus must be non-null");
Path childPath = childStatusToPathKey(childFileStatus);
PathMetadata newValue = new PathMetadata(childFileStatus);
PathMetadata oldValue = listMap.put(childPath, newValue);
return oldValue == null || !oldValue.equals(newValue);
}
@Override
public String toString() {
return "DirListingMetadata{" +
"path=" + path +
", listMap=" + listMap +
", isAuthoritative=" + isAuthoritative +
'}';
}
/**
* Log contents to supplied StringBuilder in a pretty fashion.
* @param sb target StringBuilder
*/
public void prettyPrint(StringBuilder sb) {
sb.append(String.format("DirMeta %-20s %-18s",
path.toString(),
isAuthoritative ? "Authoritative" : "Not Authoritative"));
for (Map.Entry<Path, PathMetadata> entry : listMap.entrySet()) {
sb.append("\n key: ").append(entry.getKey()).append(": ");
entry.getValue().prettyPrint(sb);
}
sb.append("\n");
}
public String prettyPrint() {
StringBuilder sb = new StringBuilder();
prettyPrint(sb);
return sb.toString();
}
/**
* Checks that child path is valid.
* @param childPath path to check.
*/
private void checkChildPath(Path childPath) {
checkPathAbsolute(childPath);
// If this dir's path has host (and thus scheme), so must its children
URI parentUri = path.toUri();
if (parentUri.getHost() != null) {
URI childUri = childPath.toUri();
Preconditions.checkNotNull(childUri.getHost(), "Expected non-null URI " +
"host: %s", childUri);
Preconditions.checkArgument(
childUri.getHost().equals(parentUri.getHost()),
"childUri %s and parentUri %s must have the same host",
childUri, parentUri);
Preconditions.checkNotNull(childUri.getScheme(), "No scheme in path %s",
childUri);
}
Preconditions.checkArgument(!childPath.isRoot(),
"childPath cannot be the root path: %s", childPath);
Preconditions.checkArgument(childPath.getParent().equals(path),
"childPath %s must be a child of %s", childPath, path);
}
/**
* For Paths that are handed in directly, we assert they are in consistent
* format with checkPath(). For paths that are supplied embedded in
* FileStatus, we attempt to fill in missing scheme and host, when this
* DirListingMetadata is associated with one.
*
* @return Path suitable for consistent hashtable lookups
* @throws NullPointerException null status argument
* @throws IllegalArgumentException bad status values or failure to
* create a URI.
*/
private Path childStatusToPathKey(FileStatus status) {
Path p = status.getPath();
Preconditions.checkNotNull(p, "Child status' path cannot be null");
Preconditions.checkArgument(!p.isRoot(),
"childPath cannot be the root path: %s", p);
Preconditions.checkArgument(p.getParent().equals(path),
"childPath %s must be a child of %s", p, path);
URI uri = p.toUri();
URI parentUri = path.toUri();
// If FileStatus' path is missing host, but should have one, add it.
if (uri.getHost() == null && parentUri.getHost() != null) {
try {
return new Path(new URI(parentUri.getScheme(), parentUri.getHost(),
uri.getPath(), uri.getFragment()));
} catch (URISyntaxException e) {
throw new IllegalArgumentException("FileStatus path invalid with" +
" added " + parentUri.getScheme() + "://" + parentUri.getHost() +
" added", e);
}
}
return p;
}
private void checkPathAbsolute(Path p) {
Preconditions.checkNotNull(p, "path must be non-null");
Preconditions.checkArgument(p.isAbsolute(), "path must be absolute: %s", p);
}
}

View File

@ -0,0 +1,132 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import java.io.IOException;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.regions.Regions;
import com.amazonaws.services.dynamodbv2.AmazonDynamoDB;
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder;
import com.google.common.base.Preconditions;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.s3a.DefaultS3ClientFactory;
import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_REGION_KEY;
import static org.apache.hadoop.fs.s3a.S3AUtils.createAWSCredentialProviderSet;
/**
* Interface to create a DynamoDB client.
*
* Implementation should be configured for setting and getting configuration.
*/
@InterfaceAudience.Private
public interface DynamoDBClientFactory extends Configurable {
Logger LOG = LoggerFactory.getLogger(DynamoDBClientFactory.class);
/**
* Create a DynamoDB client object from configuration.
*
* The DynamoDB client to create does not have to relate to any S3 buckets.
* All information needed to create a DynamoDB client is from the hadoop
* configuration. Specially, if the region is not configured, it will use the
* provided region parameter. If region is neither configured nor provided,
* it will indicate an error.
*
* @param defaultRegion the default region of the AmazonDynamoDB client
* @return a new DynamoDB client
* @throws IOException if any IO error happens
*/
AmazonDynamoDB createDynamoDBClient(String defaultRegion) throws IOException;
/**
* The default implementation for creating an AmazonDynamoDB.
*/
class DefaultDynamoDBClientFactory extends Configured
implements DynamoDBClientFactory {
@Override
public AmazonDynamoDB createDynamoDBClient(String defaultRegion)
throws IOException {
Preconditions.checkNotNull(getConf(),
"Should have been configured before usage");
final Configuration conf = getConf();
final AWSCredentialsProvider credentials =
createAWSCredentialProviderSet(null, conf);
final ClientConfiguration awsConf =
DefaultS3ClientFactory.createAwsConf(conf);
final String region = getRegion(conf, defaultRegion);
LOG.debug("Creating DynamoDB client in region {}", region);
return AmazonDynamoDBClientBuilder.standard()
.withCredentials(credentials)
.withClientConfiguration(awsConf)
.withRegion(region)
.build();
}
/**
* Helper method to get and validate the AWS region for DynamoDBClient.
*
* @param conf configuration
* @param defaultRegion the default region
* @return configured region or else the provided default region
* @throws IOException if the region is not valid
*/
static String getRegion(Configuration conf, String defaultRegion)
throws IOException {
String region = conf.getTrimmed(S3GUARD_DDB_REGION_KEY);
if (StringUtils.isEmpty(region)) {
region = defaultRegion;
}
try {
Regions.fromName(region);
} catch (IllegalArgumentException | NullPointerException e) {
throw new IOException("Invalid region specified: " + region + "; " +
"Region can be configured with " + S3GUARD_DDB_REGION_KEY + ": " +
validRegionsString());
}
return region;
}
private static String validRegionsString() {
final String delimiter = ", ";
Regions[] regions = Regions.values();
StringBuilder sb = new StringBuilder();
for (int i = 0; i < regions.length; i++) {
if (i > 0) {
sb.append(delimiter);
}
sb.append(regions[i].getName());
}
return sb.toString();
}
}
}

View File

@ -0,0 +1,435 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.Tristate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.URI;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
/**
* This is a local, in-memory, implementation of MetadataStore.
* This is <i>not</i> a coherent cache across processes. It is only
* locally-coherent.
*
* The purpose of this is for unit and integration testing.
* It could also be used to accelerate local-only operations where only one
* process is operating on a given object store, or multiple processes are
* accessing a read-only storage bucket.
*
* This MetadataStore does not enforce filesystem rules such as disallowing
* non-recursive removal of non-empty directories. It is assumed the caller
* already has to perform these sorts of checks.
*/
public class LocalMetadataStore implements MetadataStore {
public static final Logger LOG = LoggerFactory.getLogger(MetadataStore.class);
// TODO HADOOP-13649: use time instead of capacity for eviction.
public static final int DEFAULT_MAX_RECORDS = 128;
/**
* Maximum number of records.
*/
public static final String CONF_MAX_RECORDS =
"fs.metadatastore.local.max_records";
/** Contains directories and files. */
private LruHashMap<Path, PathMetadata> fileHash;
/** Contains directory listings. */
private LruHashMap<Path, DirListingMetadata> dirHash;
private FileSystem fs;
/* Null iff this FS does not have an associated URI host. */
private String uriHost;
@Override
public void initialize(FileSystem fileSystem) throws IOException {
Preconditions.checkNotNull(fileSystem);
fs = fileSystem;
URI fsURI = fs.getUri();
uriHost = fsURI.getHost();
if (uriHost != null && uriHost.equals("")) {
uriHost = null;
}
initialize(fs.getConf());
}
@Override
public void initialize(Configuration conf) throws IOException {
Preconditions.checkNotNull(conf);
int maxRecords = conf.getInt(CONF_MAX_RECORDS, DEFAULT_MAX_RECORDS);
if (maxRecords < 4) {
maxRecords = 4;
}
// Start w/ less than max capacity. Space / time trade off.
fileHash = new LruHashMap<>(maxRecords/2, maxRecords);
dirHash = new LruHashMap<>(maxRecords/4, maxRecords);
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder(
"LocalMetadataStore{");
sb.append(", uriHost='").append(uriHost).append('\'');
sb.append('}');
return sb.toString();
}
@Override
public void delete(Path p) throws IOException {
doDelete(p, false, true);
}
@Override
public void forgetMetadata(Path p) throws IOException {
doDelete(p, false, false);
}
@Override
public void deleteSubtree(Path path) throws IOException {
doDelete(path, true, true);
}
private synchronized void doDelete(Path p, boolean recursive, boolean
tombstone) {
Path path = standardize(p);
// Delete entry from file cache, then from cached parent directory, if any
deleteHashEntries(path, tombstone);
if (recursive) {
// Remove all entries that have this dir as path prefix.
deleteHashByAncestor(path, dirHash, tombstone);
deleteHashByAncestor(path, fileHash, tombstone);
}
}
@Override
public synchronized PathMetadata get(Path p) throws IOException {
return get(p, false);
}
@Override
public PathMetadata get(Path p, boolean wantEmptyDirectoryFlag)
throws IOException {
Path path = standardize(p);
synchronized (this) {
PathMetadata m = fileHash.mruGet(path);
if (wantEmptyDirectoryFlag && m != null &&
m.getFileStatus().isDirectory()) {
m.setIsEmptyDirectory(isEmptyDirectory(p));
}
LOG.debug("get({}) -> {}", path, m == null ? "null" : m.prettyPrint());
return m;
}
}
/**
* Determine if directory is empty.
* Call with lock held.
* @param p a Path, already filtered through standardize()
* @return TRUE / FALSE if known empty / not-empty, UNKNOWN otherwise.
*/
private Tristate isEmptyDirectory(Path p) {
DirListingMetadata dirMeta = dirHash.get(p);
return dirMeta.withoutTombstones().isEmpty();
}
@Override
public synchronized DirListingMetadata listChildren(Path p) throws
IOException {
Path path = standardize(p);
DirListingMetadata listing = dirHash.mruGet(path);
if (LOG.isDebugEnabled()) {
LOG.debug("listChildren({}) -> {}", path,
listing == null ? "null" : listing.prettyPrint());
}
// Make a copy so callers can mutate without affecting our state
return listing == null ? null : new DirListingMetadata(listing);
}
@Override
public void move(Collection<Path> pathsToDelete,
Collection<PathMetadata> pathsToCreate) throws IOException {
Preconditions.checkNotNull(pathsToDelete, "pathsToDelete is null");
Preconditions.checkNotNull(pathsToCreate, "pathsToCreate is null");
Preconditions.checkArgument(pathsToDelete.size() == pathsToCreate.size(),
"Must supply same number of paths to delete/create.");
// I feel dirty for using reentrant lock. :-|
synchronized (this) {
// 1. Delete pathsToDelete
for (Path meta : pathsToDelete) {
LOG.debug("move: deleting metadata {}", meta);
delete(meta);
}
// 2. Create new destination path metadata
for (PathMetadata meta : pathsToCreate) {
LOG.debug("move: adding metadata {}", meta);
put(meta);
}
// 3. We now know full contents of all dirs in destination subtree
for (PathMetadata meta : pathsToCreate) {
FileStatus status = meta.getFileStatus();
if (status == null || status.isDirectory()) {
continue;
}
DirListingMetadata dir = listChildren(status.getPath());
if (dir != null) { // could be evicted already
dir.setAuthoritative(true);
}
}
}
}
@Override
public void put(PathMetadata meta) throws IOException {
Preconditions.checkNotNull(meta);
FileStatus status = meta.getFileStatus();
Path path = standardize(status.getPath());
synchronized (this) {
/* Add entry for this file. */
if (LOG.isDebugEnabled()) {
LOG.debug("put {} -> {}", path, meta.prettyPrint());
}
fileHash.put(path, meta);
/* Directory case:
* We also make sure we have an entry in the dirHash, so subsequent
* listStatus(path) at least see the directory.
*
* If we had a boolean flag argument "isNew", we would know whether this
* is an existing directory the client discovered via getFileStatus(),
* or if it is a newly-created directory. In the latter case, we would
* be able to mark the directory as authoritative (fully-cached),
* saving round trips to underlying store for subsequent listStatus()
*/
if (status.isDirectory()) {
DirListingMetadata dir = dirHash.mruGet(path);
if (dir == null) {
dirHash.put(path, new DirListingMetadata(path, DirListingMetadata
.EMPTY_DIR, false));
}
}
/* Update cached parent dir. */
Path parentPath = path.getParent();
if (parentPath != null) {
DirListingMetadata parent = dirHash.mruGet(parentPath);
if (parent == null) {
/* Track this new file's listing in parent. Parent is not
* authoritative, since there may be other items in it we don't know
* about. */
parent = new DirListingMetadata(parentPath,
DirListingMetadata.EMPTY_DIR, false);
dirHash.put(parentPath, parent);
}
parent.put(status);
}
}
}
@Override
public synchronized void put(DirListingMetadata meta) throws IOException {
if (LOG.isDebugEnabled()) {
LOG.debug("put dirMeta {}", meta.prettyPrint());
}
dirHash.put(standardize(meta.getPath()), meta);
}
public synchronized void put(Collection<PathMetadata> metas) throws
IOException {
for (PathMetadata meta : metas) {
put(meta);
}
}
@Override
public void close() throws IOException {
}
@Override
public void destroy() throws IOException {
if (dirHash != null) {
dirHash.clear();
}
}
@Override
public synchronized void prune(long modTime) throws IOException {
Iterator<Map.Entry<Path, PathMetadata>> files =
fileHash.entrySet().iterator();
while (files.hasNext()) {
Map.Entry<Path, PathMetadata> entry = files.next();
if (expired(entry.getValue().getFileStatus(), modTime)) {
files.remove();
}
}
Iterator<Map.Entry<Path, DirListingMetadata>> dirs =
dirHash.entrySet().iterator();
while (dirs.hasNext()) {
Map.Entry<Path, DirListingMetadata> entry = dirs.next();
Path path = entry.getKey();
DirListingMetadata metadata = entry.getValue();
Collection<PathMetadata> oldChildren = metadata.getListing();
Collection<PathMetadata> newChildren = new LinkedList<>();
for (PathMetadata child : oldChildren) {
FileStatus status = child.getFileStatus();
if (!expired(status, modTime)) {
newChildren.add(child);
}
}
if (newChildren.size() != oldChildren.size()) {
dirHash.put(path, new DirListingMetadata(path, newChildren, false));
if (!path.isRoot()) {
DirListingMetadata parent = dirHash.get(path.getParent());
if (parent != null) {
parent.setAuthoritative(false);
}
}
}
}
}
private boolean expired(FileStatus status, long expiry) {
// Note: S3 doesn't track modification time on directories, so for
// consistency with the DynamoDB implementation we ignore that here
return status.getModificationTime() < expiry && !status.isDirectory();
}
@VisibleForTesting
static <T> void deleteHashByAncestor(Path ancestor, Map<Path, T> hash,
boolean tombstone) {
for (Iterator<Map.Entry<Path, T>> it = hash.entrySet().iterator();
it.hasNext();) {
Map.Entry<Path, T> entry = it.next();
Path f = entry.getKey();
T meta = entry.getValue();
if (isAncestorOf(ancestor, f)) {
if (tombstone) {
if (meta instanceof PathMetadata) {
entry.setValue((T) PathMetadata.tombstone(f));
} else if (meta instanceof DirListingMetadata) {
it.remove();
} else {
throw new IllegalStateException("Unknown type in hash");
}
} else {
it.remove();
}
}
}
}
/**
* @return true iff 'ancestor' is ancestor dir in path 'f'.
* All paths here are absolute. Dir does not count as its own ancestor.
*/
private static boolean isAncestorOf(Path ancestor, Path f) {
String aStr = ancestor.toString();
if (!ancestor.isRoot()) {
aStr += "/";
}
String fStr = f.toString();
return (fStr.startsWith(aStr));
}
/**
* Update fileHash and dirHash to reflect deletion of file 'f'. Call with
* lock held.
*/
private void deleteHashEntries(Path path, boolean tombstone) {
// Remove target file/dir
LOG.debug("delete file entry for {}", path);
if (tombstone) {
fileHash.put(path, PathMetadata.tombstone(path));
} else {
fileHash.remove(path);
}
// Update this and parent dir listing, if any
/* If this path is a dir, remove its listing */
LOG.debug("removing listing of {}", path);
dirHash.remove(path);
/* Remove this path from parent's dir listing */
Path parent = path.getParent();
if (parent != null) {
DirListingMetadata dir = dirHash.get(parent);
if (dir != null) {
LOG.debug("removing parent's entry for {} ", path);
if (tombstone) {
dir.markDeleted(path);
} else {
dir.remove(path);
}
}
}
}
/**
* Return a "standardized" version of a path so we always have a consistent
* hash value. Also asserts the path is absolute, and contains host
* component.
* @param p input Path
* @return standardized version of Path, suitable for hash key
*/
private Path standardize(Path p) {
Preconditions.checkArgument(p.isAbsolute(), "Path must be absolute");
URI uri = p.toUri();
if (uriHost != null) {
Preconditions.checkArgument(!isEmpty(uri.getHost()));
}
return p;
}
private static boolean isEmpty(String s) {
return (s == null || s.isEmpty());
}
}

View File

@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import java.util.LinkedHashMap;
import java.util.Map;
/**
* LinkedHashMap that implements a maximum size and LRU eviction policy.
*/
public class LruHashMap<K, V> extends LinkedHashMap<K, V> {
private final int maxSize;
public LruHashMap(int initialCapacity, int maxSize) {
super(initialCapacity);
this.maxSize = maxSize;
}
@Override
protected boolean removeEldestEntry(Map.Entry<K, V> eldest) {
return size() > maxSize;
}
/**
* get() plus side-effect of making the element Most Recently Used.
* @param key lookup key
* @return value
*/
public V mruGet(K key) {
V val = remove(key);
if (val != null) {
put(key, val);
}
return val;
}
}

View File

@ -0,0 +1,221 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import java.io.Closeable;
import java.io.IOException;
import java.util.Collection;
import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
/**
* {@code MetadataStore} defines the set of operations that any metadata store
* implementation must provide. Note that all {@link Path} objects provided
* to methods must be absolute, not relative paths.
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
public interface MetadataStore extends Closeable {
/**
* Performs one-time initialization of the metadata store.
*
* @param fs {@code FileSystem} associated with the MetadataStore
* @throws IOException if there is an error
*/
void initialize(FileSystem fs) throws IOException;
/**
* Performs one-time initialization of the metadata store via configuration.
* @see #initialize(FileSystem)
* @param conf Configuration.
* @throws IOException if there is an error
*/
void initialize(Configuration conf) throws IOException;
/**
* Deletes exactly one path, leaving a tombstone to prevent lingering,
* inconsistent copies of it from being listed.
*
* @param path the path to delete
* @throws IOException if there is an error
*/
void delete(Path path) throws IOException;
/**
* Removes the record of exactly one path. Does not leave a tombstone (see
* {@link MetadataStore#delete(Path)}. It is currently intended for testing
* only, and a need to use it as part of normal FileSystem usage is not
* anticipated.
*
* @param path the path to delete
* @throws IOException if there is an error
*/
@VisibleForTesting
void forgetMetadata(Path path) throws IOException;
/**
* Deletes the entire sub-tree rooted at the given path, leaving tombstones
* to prevent lingering, inconsistent copies of it from being listed.
*
* In addition to affecting future calls to {@link #get(Path)},
* implementations must also update any stored {@code DirListingMetadata}
* objects which track the parent of this file.
*
* @param path the root of the sub-tree to delete
* @throws IOException if there is an error
*/
void deleteSubtree(Path path) throws IOException;
/**
* Gets metadata for a path.
*
* @param path the path to get
* @return metadata for {@code path}, {@code null} if not found
* @throws IOException if there is an error
*/
PathMetadata get(Path path) throws IOException;
/**
* Gets metadata for a path. Alternate method that includes a hint
* whether or not the MetadataStore should do work to compute the value for
* {@link PathMetadata#isEmptyDirectory()}. Since determining emptiness
* may be an expensive operation, this can save wasted work.
*
* @param path the path to get
* @param wantEmptyDirectoryFlag Set to true to give a hint to the
* MetadataStore that it should try to compute the empty directory flag.
* @return metadata for {@code path}, {@code null} if not found
* @throws IOException if there is an error
*/
PathMetadata get(Path path, boolean wantEmptyDirectoryFlag)
throws IOException;
/**
* Lists metadata for all direct children of a path.
*
* @param path the path to list
* @return metadata for all direct children of {@code path} which are being
* tracked by the MetadataStore, or {@code null} if the path was not found
* in the MetadataStore.
* @throws IOException if there is an error
*/
DirListingMetadata listChildren(Path path) throws IOException;
/**
* Record the effects of a {@link FileSystem#rename(Path, Path)} in the
* MetadataStore. Clients provide explicit enumeration of the affected
* paths (recursively), before and after the rename.
*
* This operation is not atomic, unless specific implementations claim
* otherwise.
*
* On the need to provide an enumeration of directory trees instead of just
* source and destination paths:
* Since a MetadataStore does not have to track all metadata for the
* underlying storage system, and a new MetadataStore may be created on an
* existing underlying filesystem, this move() may be the first time the
* MetadataStore sees the affected paths. Therefore, simply providing src
* and destination paths may not be enough to record the deletions (under
* src path) and creations (at destination) that are happening during the
* rename().
*
* @param pathsToDelete Collection of all paths that were removed from the
* source directory tree of the move.
* @param pathsToCreate Collection of all PathMetadata for the new paths
* that were created at the destination of the rename
* ().
* @throws IOException if there is an error
*/
void move(Collection<Path> pathsToDelete,
Collection<PathMetadata> pathsToCreate) throws IOException;
/**
* Saves metadata for exactly one path.
*
* Implementations may pre-create all the path's ancestors automatically.
* Implementations must update any {@code DirListingMetadata} objects which
* track the immediate parent of this file.
*
* @param meta the metadata to save
* @throws IOException if there is an error
*/
void put(PathMetadata meta) throws IOException;
/**
* Saves metadata for any number of paths.
*
* Semantics are otherwise the same as single-path puts.
*
* @param metas the metadata to save
* @throws IOException if there is an error
*/
void put(Collection<PathMetadata> metas) throws IOException;
/**
* Save directory listing metadata. Callers may save a partial directory
* listing for a given path, or may store a complete and authoritative copy
* of the directory listing. {@code MetadataStore} implementations may
* subsequently keep track of all modifications to the directory contents at
* this path, and return authoritative results from subsequent calls to
* {@link #listChildren(Path)}. See {@link DirListingMetadata}.
*
* Any authoritative results returned are only authoritative for the scope
* of the {@code MetadataStore}: A per-process {@code MetadataStore}, for
* example, would only show results visible to that process, potentially
* missing metadata updates (create, delete) made to the same path by
* another process.
*
* @param meta Directory listing metadata.
* @throws IOException if there is an error
*/
void put(DirListingMetadata meta) throws IOException;
/**
* Destroy all resources associated with the metadata store.
*
* The destroyed resources can be DynamoDB tables, MySQL databases/tables, or
* HDFS directories. Any operations after calling this method may possibly
* fail.
*
* This operation is idempotent.
*
* @throws IOException if there is an error
*/
void destroy() throws IOException;
/**
* Clear any metadata older than a specified time from the repository.
* Implementations MUST clear file metadata, and MAY clear directory metadata
* (s3a itself does not track modification time for directories).
* Implementations may also choose to throw UnsupportedOperationException
* istead. Note that modification times should be in UTC, as returned by
* System.currentTimeMillis at the time of modification.
*
* @param modTime Oldest modification time to allow
* @throws IOException if there is an error
* @throws UnsupportedOperationException if not implemented
*/
void prune(long modTime) throws IOException, UnsupportedOperationException;
}

View File

@ -0,0 +1,169 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Queue;
import java.util.Set;
import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
/**
* {@code MetadataStoreListFilesIterator} is a {@link RemoteIterator} that
* is similar to {@code DescendantsIterator} but does not return directories
* that have (or may have) children, and will also provide access to the set of
* tombstones to allow recently deleted S3 objects to be filtered out from a
* corresponding request. In other words, it returns tombstones and the same
* set of objects that should exist in S3: empty directories, and files, and not
* other directories whose existence is inferred therefrom.
*
* For example, assume the consistent store contains metadata representing this
* file system structure:
*
* <pre>
* /dir1
* |-- dir2
* | |-- file1
* | `-- file2
* `-- dir3
* |-- dir4
* | `-- file3
* |-- dir5
* | `-- file4
* `-- dir6
* </pre>
*
* Consider this code sample:
* <pre>
* final PathMetadata dir1 = get(new Path("/dir1"));
* for (MetadataStoreListFilesIterator files =
* new MetadataStoreListFilesIterator(dir1); files.hasNext(); ) {
* final FileStatus status = files.next().getFileStatus();
* System.out.printf("%s %s%n", status.isDirectory() ? 'D' : 'F',
* status.getPath());
* }
* </pre>
*
* The output is:
* <pre>
* F /dir1/dir2/file1
* F /dir1/dir2/file2
* F /dir1/dir3/dir4/file3
* F /dir1/dir3/dir5/file4
* D /dir1/dir3/dir6
* </pre>
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
public class MetadataStoreListFilesIterator implements
RemoteIterator<FileStatus> {
public static final Logger LOG = LoggerFactory.getLogger(
MetadataStoreListFilesIterator.class);
private final boolean allowAuthoritative;
private final MetadataStore metadataStore;
private final Set<Path> tombstones = new HashSet<>();
private Iterator<FileStatus> leafNodesIterator = null;
public MetadataStoreListFilesIterator(MetadataStore ms, PathMetadata meta,
boolean allowAuthoritative) throws IOException {
Preconditions.checkNotNull(ms);
this.metadataStore = ms;
this.allowAuthoritative = allowAuthoritative;
prefetch(meta);
}
private void prefetch(PathMetadata meta) throws IOException {
final Queue<PathMetadata> queue = new LinkedList<>();
final Collection<FileStatus> leafNodes = new ArrayList<>();
if (meta != null) {
final Path path = meta.getFileStatus().getPath();
if (path.isRoot()) {
DirListingMetadata rootListing = metadataStore.listChildren(path);
if (rootListing != null) {
tombstones.addAll(rootListing.listTombstones());
queue.addAll(rootListing.withoutTombstones().getListing());
}
} else {
queue.add(meta);
}
}
while(!queue.isEmpty()) {
PathMetadata nextMetadata = queue.poll();
FileStatus nextStatus = nextMetadata.getFileStatus();
if (nextStatus.isFile()) {
// All files are leaf nodes by definition
leafNodes.add(nextStatus);
continue;
}
if (nextStatus.isDirectory()) {
final Path path = nextStatus.getPath();
DirListingMetadata children = metadataStore.listChildren(path);
if (children != null) {
tombstones.addAll(children.listTombstones());
Collection<PathMetadata> liveChildren =
children.withoutTombstones().getListing();
if (!liveChildren.isEmpty()) {
// If it's a directory, has children, not all deleted, then we
// add the children to the queue and move on to the next node
queue.addAll(liveChildren);
continue;
} else if (allowAuthoritative && children.isAuthoritative()) {
leafNodes.add(nextStatus);
}
}
}
// Directories that *might* be empty are ignored for now, since we
// cannot confirm that they are empty without incurring other costs.
// Users of this class can still discover empty directories via S3's
// fake directories, subject to the same consistency semantics as before.
// The only other possibility is a symlink, which is unsupported on S3A.
}
leafNodesIterator = leafNodes.iterator();
}
@Override
public boolean hasNext() {
return leafNodesIterator.hasNext();
}
@Override
public FileStatus next() {
return leafNodesIterator.next();
}
public Set<Path> listTombstones() {
return tombstones;
}
}

View File

@ -0,0 +1,104 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.IOException;
import java.util.Collection;
/**
* A no-op implementation of MetadataStore. Clients that use this
* implementation should behave the same as they would without any
* MetadataStore.
*/
public class NullMetadataStore implements MetadataStore {
@Override
public void initialize(FileSystem fs) throws IOException {
}
@Override
public void initialize(Configuration conf) throws IOException {
}
@Override
public void close() throws IOException {
}
@Override
public void delete(Path path) throws IOException {
}
@Override
public void forgetMetadata(Path path) throws IOException {
}
@Override
public void deleteSubtree(Path path) throws IOException {
}
@Override
public PathMetadata get(Path path) throws IOException {
return null;
}
@Override
public PathMetadata get(Path path, boolean wantEmptyDirectoryFlag)
throws IOException {
return null;
}
@Override
public DirListingMetadata listChildren(Path path) throws IOException {
return null;
}
@Override
public void move(Collection<Path> pathsToDelete,
Collection<PathMetadata> pathsToCreate) throws IOException {
}
@Override
public void put(PathMetadata meta) throws IOException {
}
@Override
public void put(Collection<PathMetadata> meta) throws IOException {
}
@Override
public void put(DirListingMetadata meta) throws IOException {
}
@Override
public void destroy() throws IOException {
}
@Override
public void prune(long modTime) {
}
@Override
public String toString() {
return "NullMetadataStore";
}
}

View File

@ -0,0 +1,143 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import com.google.common.base.Preconditions;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.Tristate;
/**
* {@code PathMetadata} models path metadata stored in the
* {@link MetadataStore}.
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
public class PathMetadata {
private final FileStatus fileStatus;
private Tristate isEmptyDirectory;
private boolean isDeleted;
/**
* Create a tombstone from the current time.
* @param path path to tombstone
* @return the entry.
*/
public static PathMetadata tombstone(Path path) {
long now = System.currentTimeMillis();
FileStatus status = new FileStatus(0, false, 0, 0, now, path);
return new PathMetadata(status, Tristate.UNKNOWN, true);
}
/**
* Creates a new {@code PathMetadata} containing given {@code FileStatus}.
* @param fileStatus file status containing an absolute path.
*/
public PathMetadata(FileStatus fileStatus) {
this(fileStatus, Tristate.UNKNOWN);
}
public PathMetadata(FileStatus fileStatus, Tristate isEmptyDir) {
this(fileStatus, isEmptyDir, false);
}
public PathMetadata(FileStatus fileStatus, Tristate isEmptyDir, boolean
isDeleted) {
Preconditions.checkNotNull(fileStatus, "fileStatus must be non-null");
Preconditions.checkNotNull(fileStatus.getPath(), "fileStatus path must be" +
" non-null");
Preconditions.checkArgument(fileStatus.getPath().isAbsolute(), "path must" +
" be absolute");
this.fileStatus = fileStatus;
this.isEmptyDirectory = isEmptyDir;
this.isDeleted = isDeleted;
}
/**
* @return {@code FileStatus} contained in this {@code PathMetadata}.
*/
public final FileStatus getFileStatus() {
return fileStatus;
}
/**
* Query if a directory is empty.
* @return Tristate.TRUE if this is known to be an empty directory,
* Tristate.FALSE if known to not be empty, and Tristate.UNKNOWN if the
* MetadataStore does have enough information to determine either way.
*/
public Tristate isEmptyDirectory() {
return isEmptyDirectory;
}
void setIsEmptyDirectory(Tristate isEmptyDirectory) {
this.isEmptyDirectory = isEmptyDirectory;
}
public boolean isDeleted() {
return isDeleted;
}
void setIsDeleted(boolean isDeleted) {
this.isDeleted = isDeleted;
}
@Override
public boolean equals(Object o) {
if (!(o instanceof PathMetadata)) {
return false;
}
return this.fileStatus.equals(((PathMetadata)o).fileStatus);
}
@Override
public int hashCode() {
return fileStatus.hashCode();
}
@Override
public String toString() {
return "PathMetadata{" +
"fileStatus=" + fileStatus +
"; isEmptyDirectory=" + isEmptyDirectory +
"; isDeleted=" + isDeleted +
'}';
}
/**
* Log contents to supplied StringBuilder in a pretty fashion.
* @param sb target StringBuilder
*/
public void prettyPrint(StringBuilder sb) {
sb.append(String.format("%-5s %-20s %-7d %-8s %-6s",
fileStatus.isDirectory() ? "dir" : "file",
fileStatus.getPath().toString(), fileStatus.getLen(),
isEmptyDirectory.name(), isDeleted));
sb.append(fileStatus);
}
public String prettyPrint() {
StringBuilder sb = new StringBuilder();
prettyPrint(sb);
return sb.toString();
}
}

View File

@ -0,0 +1,304 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import java.io.IOException;
import java.net.URI;
import java.util.Arrays;
import java.util.Collection;
import com.amazonaws.services.dynamodbv2.document.Item;
import com.amazonaws.services.dynamodbv2.document.KeyAttribute;
import com.amazonaws.services.dynamodbv2.document.PrimaryKey;
import com.amazonaws.services.dynamodbv2.model.AttributeDefinition;
import com.amazonaws.services.dynamodbv2.model.KeySchemaElement;
import com.amazonaws.services.dynamodbv2.model.KeyType;
import com.amazonaws.services.dynamodbv2.model.ScalarAttributeType;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.Constants;
import org.apache.hadoop.fs.s3a.Tristate;
/**
* Defines methods for translating between domain model objects and their
* representations in the DynamoDB schema.
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
final class PathMetadataDynamoDBTranslation {
/** The HASH key name of each item. */
@VisibleForTesting
static final String PARENT = "parent";
/** The RANGE key name of each item. */
@VisibleForTesting
static final String CHILD = "child";
@VisibleForTesting
static final String IS_DIR = "is_dir";
@VisibleForTesting
static final String MOD_TIME = "mod_time";
@VisibleForTesting
static final String FILE_LENGTH = "file_length";
@VisibleForTesting
static final String BLOCK_SIZE = "block_size";
static final String IS_DELETED = "is_deleted";
/** Table version field {@value} in version marker item. */
@VisibleForTesting
static final String TABLE_VERSION = "table_version";
/** Table creation timestampfield {@value} in version marker item. */
@VisibleForTesting
static final String TABLE_CREATED = "table_created";
/** The version marker field is invalid. */
static final String E_NOT_VERSION_MARKER = "Not a version marker: ";
/**
* Returns the key schema for the DynamoDB table.
*
* @return DynamoDB key schema
*/
static Collection<KeySchemaElement> keySchema() {
return Arrays.asList(
new KeySchemaElement(PARENT, KeyType.HASH),
new KeySchemaElement(CHILD, KeyType.RANGE));
}
/**
* Returns the attribute definitions for the DynamoDB table.
*
* @return DynamoDB attribute definitions
*/
static Collection<AttributeDefinition> attributeDefinitions() {
return Arrays.asList(
new AttributeDefinition(PARENT, ScalarAttributeType.S),
new AttributeDefinition(CHILD, ScalarAttributeType.S));
}
/**
* Converts a DynamoDB item to a {@link PathMetadata}.
*
* @param item DynamoDB item to convert
* @return {@code item} converted to a {@link PathMetadata}
*/
static PathMetadata itemToPathMetadata(Item item, String username)
throws IOException {
if (item == null) {
return null;
}
String parentStr = item.getString(PARENT);
Preconditions.checkNotNull(parentStr, "No parent entry in item %s", item);
String childStr = item.getString(CHILD);
Preconditions.checkNotNull(childStr, "No child entry in item %s", item);
// Skip table version markers, which are only non-absolute paths stored.
Path rawPath = new Path(parentStr, childStr);
if (!rawPath.isAbsoluteAndSchemeAuthorityNull()) {
return null;
}
Path parent = new Path(Constants.FS_S3A + ":/" + parentStr + "/");
Path path = new Path(parent, childStr);
boolean isDir = item.hasAttribute(IS_DIR) && item.getBoolean(IS_DIR);
final FileStatus fileStatus;
if (isDir) {
fileStatus = DynamoDBMetadataStore.makeDirStatus(path, username);
} else {
long len = item.hasAttribute(FILE_LENGTH) ? item.getLong(FILE_LENGTH) : 0;
long modTime = item.hasAttribute(MOD_TIME) ? item.getLong(MOD_TIME) : 0;
long block = item.hasAttribute(BLOCK_SIZE) ? item.getLong(BLOCK_SIZE) : 0;
fileStatus = new FileStatus(len, false, 1, block, modTime, 0, null,
username, username, path);
}
boolean isDeleted =
item.hasAttribute(IS_DELETED) && item.getBoolean(IS_DELETED);
return new PathMetadata(fileStatus, Tristate.UNKNOWN, isDeleted);
}
/**
* Converts a {@link PathMetadata} to a DynamoDB item.
*
* @param meta {@link PathMetadata} to convert
* @return {@code meta} converted to DynamoDB item
*/
static Item pathMetadataToItem(PathMetadata meta) {
Preconditions.checkNotNull(meta);
final FileStatus status = meta.getFileStatus();
final Item item = new Item().withPrimaryKey(pathToKey(status.getPath()));
if (status.isDirectory()) {
item.withBoolean(IS_DIR, true);
} else {
item.withLong(FILE_LENGTH, status.getLen())
.withLong(MOD_TIME, status.getModificationTime())
.withLong(BLOCK_SIZE, status.getBlockSize());
}
item.withBoolean(IS_DELETED, meta.isDeleted());
return item;
}
/**
* The version marker has a primary key whose PARENT is {@code name};
* this MUST NOT be a value which represents an absolute path.
* @param name name of the version marker
* @param version version number
* @param timestamp creation timestamp
* @return an item representing a version marker.
*/
static Item createVersionMarker(String name, int version, long timestamp) {
return new Item().withPrimaryKey(createVersionMarkerPrimaryKey(name))
.withInt(TABLE_VERSION, version)
.withLong(TABLE_CREATED, timestamp);
}
/**
* Create the primary key of the version marker.
* @param name key name
* @return the key to use when registering or resolving version markers
*/
static PrimaryKey createVersionMarkerPrimaryKey(String name) {
return new PrimaryKey(PARENT, name, CHILD, name);
}
/**
* Extract the version from a version marker item.
* @param marker version marker item
* @return the extracted version field
* @throws IOException if the item is not a version marker
*/
static int extractVersionFromMarker(Item marker) throws IOException {
if (marker.hasAttribute(TABLE_VERSION)) {
return marker.getInt(TABLE_VERSION);
} else {
throw new IOException(E_NOT_VERSION_MARKER + marker);
}
}
/**
* Extract the creation time, if present.
* @param marker version marker item
* @return the creation time, or null
* @throws IOException if the item is not a version marker
*/
static Long extractCreationTimeFromMarker(Item marker) throws IOException {
if (marker.hasAttribute(TABLE_CREATED)) {
return marker.getLong(TABLE_CREATED);
} else {
return null;
}
}
/**
* Converts a collection {@link PathMetadata} to a collection DynamoDB items.
*
* @see #pathMetadataToItem(PathMetadata)
*/
static Item[] pathMetadataToItem(Collection<PathMetadata> metas) {
if (metas == null) {
return null;
}
final Item[] items = new Item[metas.size()];
int i = 0;
for (PathMetadata meta : metas) {
items[i++] = pathMetadataToItem(meta);
}
return items;
}
/**
* Converts a {@link Path} to a DynamoDB equality condition on that path as
* parent, suitable for querying all direct children of the path.
*
* @param path the path; can not be null
* @return DynamoDB equality condition on {@code path} as parent
*/
static KeyAttribute pathToParentKeyAttribute(Path path) {
return new KeyAttribute(PARENT, pathToParentKey(path));
}
/**
* e.g. {@code pathToParentKey(s3a://bucket/path/a) -> /bucket/path/a}
* @param path path to convert
* @return string for parent key
*/
static String pathToParentKey(Path path) {
Preconditions.checkNotNull(path);
Preconditions.checkArgument(path.isUriPathAbsolute(), "Path not absolute");
URI uri = path.toUri();
String bucket = uri.getHost();
Preconditions.checkArgument(!StringUtils.isEmpty(bucket),
"Path missing bucket");
String pKey = "/" + bucket + uri.getPath();
// Strip trailing slash
if (pKey.endsWith("/")) {
pKey = pKey.substring(0, pKey.length() - 1);
}
return pKey;
}
/**
* Converts a {@link Path} to a DynamoDB key, suitable for getting the item
* matching the path.
*
* @param path the path; can not be null
* @return DynamoDB key for item matching {@code path}
*/
static PrimaryKey pathToKey(Path path) {
Preconditions.checkArgument(!path.isRoot(),
"Root path is not mapped to any PrimaryKey");
return new PrimaryKey(PARENT, pathToParentKey(path.getParent()), CHILD,
path.getName());
}
/**
* Converts a collection of {@link Path} to a collection of DynamoDB keys.
*
* @see #pathToKey(Path)
*/
static PrimaryKey[] pathToKey(Collection<Path> paths) {
if (paths == null) {
return null;
}
final PrimaryKey[] keys = new PrimaryKey[paths.size()];
int i = 0;
for (Path p : paths) {
keys[i++] = pathToKey(p);
}
return keys;
}
/**
* There is no need to instantiate this class.
*/
private PathMetadataDynamoDBTranslation() {
}
}

View File

@ -0,0 +1,463 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Set;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.S3AInstrumentation;
import org.apache.hadoop.fs.s3a.Tristate;
import org.apache.hadoop.util.ReflectionUtils;
import static org.apache.hadoop.fs.s3a.Constants.S3_METADATA_STORE_IMPL;
import static org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_PUT_PATH_LATENCY;
import static org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_PUT_PATH_REQUEST;
import static org.apache.hadoop.fs.s3a.S3AUtils.createUploadFileStatus;
/**
* Logic for integrating MetadataStore with S3A.
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
public final class S3Guard {
private static final Logger LOG = LoggerFactory.getLogger(S3Guard.class);
@InterfaceAudience.Private
@InterfaceStability.Unstable
@VisibleForTesting
public static final String S3GUARD_DDB_CLIENT_FACTORY_IMPL =
"fs.s3a.s3guard.ddb.client.factory.impl";
static final Class<? extends DynamoDBClientFactory>
S3GUARD_DDB_CLIENT_FACTORY_IMPL_DEFAULT =
DynamoDBClientFactory.DefaultDynamoDBClientFactory.class;
private static final FileStatus[] EMPTY_LISTING = new FileStatus[0];
// Utility class. All static functions.
private S3Guard() { }
/* Utility functions. */
/**
* Create a new instance of the configured MetadataStore.
* The returned MetadataStore will have been initialized via
* {@link MetadataStore#initialize(FileSystem)} by this function before
* returning it. Callers must clean up by calling
* {@link MetadataStore#close()} when done using the MetadataStore.
*
* @param fs FileSystem whose Configuration specifies which
* implementation to use.
* @return Reference to new MetadataStore.
* @throws IOException if the metadata store cannot be instantiated
*/
public static MetadataStore getMetadataStore(FileSystem fs)
throws IOException {
Preconditions.checkNotNull(fs);
Configuration conf = fs.getConf();
Preconditions.checkNotNull(conf);
MetadataStore msInstance;
try {
Class<? extends MetadataStore> msClass = getMetadataStoreClass(conf);
msInstance = ReflectionUtils.newInstance(msClass, conf);
LOG.debug("Using {} metadata store for {} filesystem",
msClass.getSimpleName(), fs.getScheme());
msInstance.initialize(fs);
return msInstance;
} catch (RuntimeException | IOException e) {
String message = "Failed to instantiate metadata store " +
conf.get(S3_METADATA_STORE_IMPL)
+ " defined in " + S3_METADATA_STORE_IMPL
+ ": " + e;
LOG.error(message, e);
if (e instanceof IOException) {
throw e;
} else {
throw new IOException(message, e);
}
}
}
private static Class<? extends MetadataStore> getMetadataStoreClass(
Configuration conf) {
if (conf == null) {
return NullMetadataStore.class;
}
return conf.getClass(S3_METADATA_STORE_IMPL, NullMetadataStore.class,
MetadataStore.class);
}
/**
* Helper function which puts a given S3AFileStatus into the MetadataStore and
* returns the same S3AFileStatus. Instrumentation monitors the put operation.
* @param ms MetadataStore to {@code put()} into.
* @param status status to store
* @param instrumentation instrumentation of the s3a file system
* @return The same status as passed in
* @throws IOException if metadata store update failed
*/
public static S3AFileStatus putAndReturn(MetadataStore ms,
S3AFileStatus status,
S3AInstrumentation instrumentation) throws IOException {
long startTimeNano = System.nanoTime();
ms.put(new PathMetadata(status));
instrumentation.addValueToQuantiles(S3GUARD_METADATASTORE_PUT_PATH_LATENCY,
(System.nanoTime() - startTimeNano));
instrumentation.incrementCounter(S3GUARD_METADATASTORE_PUT_PATH_REQUEST, 1);
return status;
}
/**
* Convert the data of a directory listing to an array of {@link FileStatus}
* entries. Tombstones are filtered out at this point. If the listing is null
* an empty array is returned.
* @param dirMeta directory listing -may be null
* @return a possibly-empty array of file status entries
*/
public static FileStatus[] dirMetaToStatuses(DirListingMetadata dirMeta) {
if (dirMeta == null) {
return EMPTY_LISTING;
}
Collection<PathMetadata> listing = dirMeta.getListing();
List<FileStatus> statuses = new ArrayList<>();
for (PathMetadata pm : listing) {
if (!pm.isDeleted()) {
statuses.add(pm.getFileStatus());
}
}
return statuses.toArray(new FileStatus[0]);
}
/**
* Given directory listing metadata from both the backing store and the
* MetadataStore, merge the two sources of truth to create a consistent
* view of the current directory contents, which can be returned to clients.
*
* Also update the MetadataStore to reflect the resulting directory listing.
*
* @param ms MetadataStore to use.
* @param path path to directory
* @param backingStatuses Directory listing from the backing store.
* @param dirMeta Directory listing from MetadataStore. May be null.
* @param isAuthoritative State of authoritative mode
* @return Final result of directory listing.
* @throws IOException if metadata store update failed
*/
public static FileStatus[] dirListingUnion(MetadataStore ms, Path path,
List<FileStatus> backingStatuses, DirListingMetadata dirMeta,
boolean isAuthoritative) throws IOException {
// Fast-path for NullMetadataStore
if (isNullMetadataStore(ms)) {
return backingStatuses.toArray(new FileStatus[backingStatuses.size()]);
}
assertQualified(path);
if (dirMeta == null) {
// The metadataStore had zero state for this directory
dirMeta = new DirListingMetadata(path, DirListingMetadata.EMPTY_DIR,
false);
}
Set<Path> deleted = dirMeta.listTombstones();
// Since we treat the MetadataStore as a "fresher" or "consistent" view
// of metadata, we always use its metadata first.
// Since the authoritative case is already handled outside this function,
// we will basically start with the set of directory entries in the
// DirListingMetadata, and add any that only exist in the backingStatuses.
boolean changed = false;
for (FileStatus s : backingStatuses) {
if (deleted.contains(s.getPath())) {
continue;
}
// Minor race condition here. Multiple threads could add to this
// mutable DirListingMetadata. Since it is backed by a
// ConcurrentHashMap, the last put() wins.
// More concerning is two threads racing on listStatus() and delete().
// Any FileSystem has similar race conditions, but we could persist
// a stale entry longer. We could expose an atomic
// DirListingMetadata#putIfNotPresent()
boolean updated = dirMeta.put(s);
changed = changed || updated;
}
if (changed && isAuthoritative) {
dirMeta.setAuthoritative(true); // This is the full directory contents
ms.put(dirMeta);
}
return dirMetaToStatuses(dirMeta);
}
/**
* Although NullMetadataStore does nothing, callers may wish to avoid work
* (fast path) when the NullMetadataStore is in use.
* @param ms The MetadataStore to test
* @return true iff the MetadataStore is the null, or no-op, implementation.
*/
public static boolean isNullMetadataStore(MetadataStore ms) {
return (ms instanceof NullMetadataStore);
}
/**
* Update MetadataStore to reflect creation of the given directories.
*
* If an IOException is raised while trying to update the entry, this
* operation catches the exception and returns.
* @param ms MetadataStore to update.
* @param dirs null, or an ordered list of directories from leaf to root.
* E.g. if /a/ exists, and mkdirs(/a/b/c/d) is called, this
* list will contain [/a/b/c/d, /a/b/c, /a/b]. /a/b/c/d is
* an empty, dir, and the other dirs only contain their child
* dir.
* @param owner Hadoop user name.
* @param authoritative Whether to mark new directories as authoritative.
*/
public static void makeDirsOrdered(MetadataStore ms, List<Path> dirs,
String owner, boolean authoritative) {
if (dirs == null) {
return;
}
/* We discussed atomicity of this implementation.
* The concern is that multiple clients could race to write different
* cached directories to the MetadataStore. Two solutions are proposed:
* 1. Move mkdirs() into MetadataStore interface and let implementations
* ensure they are atomic.
* 2. Specify that the semantics of MetadataStore#putListStatus() is
* always additive, That is, if MetadataStore has listStatus() state
* for /a/b that contains [/a/b/file0, /a/b/file1], and we then call
* putListStatus(/a/b -> [/a/b/file2, /a/b/file3], isAuthoritative=true),
* then we will end up with final state of
* [/a/b/file0, /a/b/file1, /a/b/file2, /a/b/file3], isAuthoritative =
* true
*/
FileStatus prevStatus = null;
// Use new batched put to reduce round trips.
List<PathMetadata> pathMetas = new ArrayList<>(dirs.size());
try {
// Iterate from leaf to root
for (int i = 0; i < dirs.size(); i++) {
boolean isLeaf = (prevStatus == null);
Path f = dirs.get(i);
assertQualified(f);
FileStatus status =
createUploadFileStatus(f, true, 0, 0, owner);
// We only need to put a DirListingMetadata if we are setting
// authoritative bit
DirListingMetadata dirMeta = null;
if (authoritative) {
Collection<PathMetadata> children;
if (isLeaf) {
children = DirListingMetadata.EMPTY_DIR;
} else {
children = new ArrayList<>(1);
children.add(new PathMetadata(prevStatus));
}
dirMeta = new DirListingMetadata(f, children, authoritative);
ms.put(dirMeta);
}
pathMetas.add(new PathMetadata(status));
prevStatus = status;
}
// Batched put
ms.put(pathMetas);
} catch (IOException ioe) {
LOG.error("MetadataStore#put() failure:", ioe);
}
}
/**
* Helper function that records the move of directory paths, adding
* resulting metadata to the supplied lists.
* Does not store in MetadataStore.
* @param ms MetadataStore, used to make this a no-op, when it is
* NullMetadataStore.
* @param srcPaths stores the source path here
* @param dstMetas stores destination metadata here
* @param srcPath source path to store
* @param dstPath destination path to store
* @param owner file owner to use in created records
*/
public static void addMoveDir(MetadataStore ms, Collection<Path> srcPaths,
Collection<PathMetadata> dstMetas, Path srcPath, Path dstPath,
String owner) {
if (isNullMetadataStore(ms)) {
return;
}
assertQualified(srcPath, dstPath);
FileStatus dstStatus = createUploadFileStatus(dstPath, true, 0, 0, owner);
addMoveStatus(srcPaths, dstMetas, srcPath, dstStatus);
}
/**
* Like {@link #addMoveDir(MetadataStore, Collection, Collection, Path,
* Path, String)} (), but for files.
* @param ms MetadataStore, used to make this a no-op, when it is
* NullMetadataStore.
* @param srcPaths stores the source path here
* @param dstMetas stores destination metadata here
* @param srcPath source path to store
* @param dstPath destination path to store
* @param size length of file moved
* @param blockSize blocksize to associate with destination file
* @param owner file owner to use in created records
*/
public static void addMoveFile(MetadataStore ms, Collection<Path> srcPaths,
Collection<PathMetadata> dstMetas, Path srcPath, Path dstPath,
long size, long blockSize, String owner) {
if (isNullMetadataStore(ms)) {
return;
}
assertQualified(srcPath, dstPath);
FileStatus dstStatus = createUploadFileStatus(dstPath, false,
size, blockSize, owner);
addMoveStatus(srcPaths, dstMetas, srcPath, dstStatus);
}
/**
* Helper method that records the move of all ancestors of a path.
*
* In S3A, an optimization is to delete unnecessary fake directory objects if
* the directory is non-empty. In that case, for a nested child to move, S3A
* is not listing and thus moving all its ancestors (up to source root). So we
* take care of those inferred directories of this path explicitly.
*
* As {@link #addMoveFile} and {@link #addMoveDir}, this method adds resulting
* metadata to the supplied lists. It does not store in MetadataStore.
*
* @param ms MetadataStore, no-op if it is NullMetadataStore
* @param srcPaths stores the source path here
* @param dstMetas stores destination metadata here
* @param srcRoot source root up to which (exclusive) should we add ancestors
* @param srcPath source path of the child to add ancestors
* @param dstPath destination path of the child to add ancestors
* @param owner Hadoop user name
*/
public static void addMoveAncestors(MetadataStore ms,
Collection<Path> srcPaths, Collection<PathMetadata> dstMetas,
Path srcRoot, Path srcPath, Path dstPath, String owner) {
if (isNullMetadataStore(ms)) {
return;
}
assertQualified(srcRoot, srcPath, dstPath);
if (srcPath.equals(srcRoot)) {
LOG.debug("Skip moving ancestors of source root directory {}", srcRoot);
return;
}
Path parentSrc = srcPath.getParent();
Path parentDst = dstPath.getParent();
while (parentSrc != null
&& !parentSrc.isRoot()
&& !parentSrc.equals(srcRoot)
&& !srcPaths.contains(parentSrc)) {
LOG.debug("Renaming non-listed parent {} to {}", parentSrc, parentDst);
S3Guard.addMoveDir(ms, srcPaths, dstMetas, parentSrc, parentDst, owner);
parentSrc = parentSrc.getParent();
parentDst = parentDst.getParent();
}
}
public static void addAncestors(MetadataStore metadataStore,
Path qualifiedPath, String username) throws IOException {
Collection<PathMetadata> newDirs = new ArrayList<>();
Path parent = qualifiedPath.getParent();
while (!parent.isRoot()) {
PathMetadata directory = metadataStore.get(parent);
if (directory == null || directory.isDeleted()) {
FileStatus status = new FileStatus(0, true, 1, 0, 0, 0, null, username,
null, parent);
PathMetadata meta = new PathMetadata(status, Tristate.FALSE, false);
newDirs.add(meta);
} else {
break;
}
parent = parent.getParent();
}
metadataStore.put(newDirs);
}
private static void addMoveStatus(Collection<Path> srcPaths,
Collection<PathMetadata> dstMetas,
Path srcPath,
FileStatus dstStatus) {
srcPaths.add(srcPath);
dstMetas.add(new PathMetadata(dstStatus));
}
/**
* Assert that the path is qualified with a host and scheme.
* @param p path to check
* @throws NullPointerException if either argument does not hold
*/
public static void assertQualified(Path p) {
URI uri = p.toUri();
// Paths must include bucket in case MetadataStore is shared between
// multiple S3AFileSystem instances
Preconditions.checkNotNull(uri.getHost(), "Null host in " + uri);
// This should never fail, but is retained for completeness.
Preconditions.checkNotNull(uri.getScheme(), "Null scheme in " + uri);
}
/**
* Assert that all paths are valid.
* @param paths path to check
* @throws NullPointerException if either argument does not hold
*/
public static void assertQualified(Path...paths) {
for (Path path : paths) {
assertQualified(path);
}
}
}

View File

@ -0,0 +1,924 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.s3a.Constants;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.shell.CommandFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import static org.apache.hadoop.fs.s3a.Constants.*;
/**
* CLI to manage S3Guard Metadata Store.
*/
public abstract class S3GuardTool extends Configured implements Tool {
private static final Logger LOG = LoggerFactory.getLogger(S3GuardTool.class);
private static final String NAME = "s3guard";
private static final String COMMON_USAGE =
"When possible and not overridden by more specific options, metadata\n" +
"repository information will be inferred from the S3A URL (if provided)" +
"\n\n" +
"Generic options supported are:\n" +
" -conf <config file> - specify an application configuration file\n" +
" -D <property=value> - define a value for a given property\n";
private static final String USAGE = NAME +
" [command] [OPTIONS] [s3a://BUCKET]\n\n" +
"Commands: \n" +
"\t" + Init.NAME + " - " + Init.PURPOSE + "\n" +
"\t" + Destroy.NAME + " - " + Destroy.PURPOSE + "\n" +
"\t" + Import.NAME + " - " + Import.PURPOSE + "\n" +
"\t" + Diff.NAME + " - " + Diff.PURPOSE + "\n" +
"\t" + Prune.NAME + " - " + Prune.PURPOSE + "\n";
private static final String DATA_IN_S3_IS_PRESERVED
= "(all data in S3 is preserved";
abstract public String getUsage();
// Exit codes
static final int SUCCESS = 0;
static final int INVALID_ARGUMENT = 1;
static final int ERROR = 99;
private S3AFileSystem filesystem;
private MetadataStore store;
private final CommandFormat commandFormat;
private static final String META_FLAG = "meta";
private static final String DAYS_FLAG = "days";
private static final String HOURS_FLAG = "hours";
private static final String MINUTES_FLAG = "minutes";
private static final String SECONDS_FLAG = "seconds";
private static final String REGION_FLAG = "region";
private static final String READ_FLAG = "read";
private static final String WRITE_FLAG = "write";
/**
* Constructor a S3Guard tool with HDFS configuration.
* @param conf Configuration.
*/
protected S3GuardTool(Configuration conf) {
super(conf);
commandFormat = new CommandFormat(0, Integer.MAX_VALUE);
// For metadata store URI
commandFormat.addOptionWithValue(META_FLAG);
// DDB region.
commandFormat.addOptionWithValue(REGION_FLAG);
}
/**
* Return sub-command name.
*/
abstract String getName();
/**
* Parse DynamoDB region from either -m option or a S3 path.
*
* This function should only be called from {@link Init} or
* {@link Destroy}.
*
* @param paths remaining parameters from CLI.
* @return false for invalid parameters.
* @throws IOException on I/O errors.
*/
boolean parseDynamoDBRegion(List<String> paths) throws IOException {
Configuration conf = getConf();
String fromCli = getCommandFormat().getOptValue(REGION_FLAG);
String fromConf = conf.get(S3GUARD_DDB_REGION_KEY);
boolean hasS3Path = !paths.isEmpty();
if (fromCli != null) {
if (fromCli.isEmpty()) {
System.err.println("No region provided with -" + REGION_FLAG + " flag");
return false;
}
if (hasS3Path) {
System.err.println("Providing both an S3 path and the -" + REGION_FLAG
+ " flag is not supported. If you need to specify a different "
+ "region than the S3 bucket, configure " + S3GUARD_DDB_REGION_KEY);
return false;
}
conf.set(S3GUARD_DDB_REGION_KEY, fromCli);
return true;
}
if (fromConf != null) {
if (fromConf.isEmpty()) {
System.err.printf("No region provided with config %s, %n",
S3GUARD_DDB_REGION_KEY);
return false;
}
return true;
}
if (hasS3Path) {
String s3Path = paths.get(0);
initS3AFileSystem(s3Path);
return true;
}
System.err.println("No region found from -" + REGION_FLAG + " flag, " +
"config, or S3 bucket");
return false;
}
/**
* Parse metadata store from command line option or HDFS configuration.
*
* @param forceCreate override the auto-creation setting to true.
* @return a initialized metadata store.
*/
MetadataStore initMetadataStore(boolean forceCreate) throws IOException {
if (getStore() != null) {
return getStore();
}
Configuration conf;
if (filesystem == null) {
conf = getConf();
} else {
conf = filesystem.getConf();
}
String metaURI = getCommandFormat().getOptValue(META_FLAG);
if (metaURI != null && !metaURI.isEmpty()) {
URI uri = URI.create(metaURI);
LOG.info("create metadata store: {}", uri + " scheme: "
+ uri.getScheme());
switch (uri.getScheme().toLowerCase(Locale.ENGLISH)) {
case "local":
setStore(new LocalMetadataStore());
break;
case "dynamodb":
setStore(new DynamoDBMetadataStore());
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, uri.getAuthority());
if (forceCreate) {
conf.setBoolean(S3GUARD_DDB_TABLE_CREATE_KEY, true);
}
break;
default:
throw new IOException(
String.format("Metadata store %s is not supported", uri));
}
} else {
// CLI does not specify metadata store URI, it uses default metadata store
// DynamoDB instead.
setStore(new DynamoDBMetadataStore());
if (forceCreate) {
conf.setBoolean(S3GUARD_DDB_TABLE_CREATE_KEY, true);
}
}
if (filesystem == null) {
getStore().initialize(conf);
} else {
getStore().initialize(filesystem);
}
LOG.info("Metadata store {} is initialized.", getStore());
return getStore();
}
/**
* Initialize S3A FileSystem instance.
*
* @param path s3a URI
* @throws IOException
*/
void initS3AFileSystem(String path) throws IOException {
URI uri;
try {
uri = new URI(path);
} catch (URISyntaxException e) {
throw new IOException(e);
}
// Make sure that S3AFileSystem does not hold an actual MetadataStore
// implementation.
Configuration conf = getConf();
conf.setClass(S3_METADATA_STORE_IMPL, NullMetadataStore.class,
MetadataStore.class);
FileSystem fs = FileSystem.get(uri, getConf());
if (!(fs instanceof S3AFileSystem)) {
throw new IOException(
String.format("URI %s is not a S3A file system: %s", uri,
fs.getClass().getName()));
}
filesystem = (S3AFileSystem) fs;
}
/**
* Parse CLI arguments and returns the position arguments.
* The options are stored in {@link #commandFormat}
*
* @param args command line arguments.
* @return the position arguments from CLI.
*/
List<String> parseArgs(String[] args) {
return getCommandFormat().parse(args, 1);
}
protected S3AFileSystem getFilesystem() {
return filesystem;
}
protected void setFilesystem(S3AFileSystem filesystem) {
this.filesystem = filesystem;
}
@VisibleForTesting
public MetadataStore getStore() {
return store;
}
@VisibleForTesting
protected void setStore(MetadataStore store) {
Preconditions.checkNotNull(store);
this.store = store;
}
protected CommandFormat getCommandFormat() {
return commandFormat;
}
/**
* Create the metadata store.
*/
static class Init extends S3GuardTool {
private static final String NAME = "init";
public static final String PURPOSE = "initialize metadata repository";
private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
"\t" + PURPOSE + "\n\n" +
"Common options:\n" +
" -" + META_FLAG + " URL - Metadata repository details " +
"(implementation-specific)\n" +
"\n" +
"Amazon DynamoDB-specific options:\n" +
" -" + REGION_FLAG + " REGION - Service region for connections\n" +
" -" + READ_FLAG + " UNIT - Provisioned read throughput units\n" +
" -" + WRITE_FLAG + " UNIT - Provisioned write through put units\n" +
"\n" +
" URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
" Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
" is not supported.";
Init(Configuration conf) {
super(conf);
// read capacity.
getCommandFormat().addOptionWithValue(READ_FLAG);
// write capacity.
getCommandFormat().addOptionWithValue(WRITE_FLAG);
}
@Override
String getName() {
return NAME;
}
@Override
public String getUsage() {
return USAGE;
}
@Override
public int run(String[] args) throws IOException {
List<String> paths = parseArgs(args);
String readCap = getCommandFormat().getOptValue(READ_FLAG);
if (readCap != null && !readCap.isEmpty()) {
int readCapacity = Integer.parseInt(readCap);
getConf().setInt(S3GUARD_DDB_TABLE_CAPACITY_READ_KEY, readCapacity);
}
String writeCap = getCommandFormat().getOptValue(WRITE_FLAG);
if (writeCap != null && !writeCap.isEmpty()) {
int writeCapacity = Integer.parseInt(writeCap);
getConf().setInt(S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY, writeCapacity);
}
// Validate parameters.
if (!parseDynamoDBRegion(paths)) {
System.err.println(USAGE);
return INVALID_ARGUMENT;
}
initMetadataStore(true);
return SUCCESS;
}
}
/**
* Destroy a metadata store.
*/
static class Destroy extends S3GuardTool {
private static final String NAME = "destroy";
public static final String PURPOSE = "destroy Metadata Store data "
+ DATA_IN_S3_IS_PRESERVED;
private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
"\t" + PURPOSE + "\n\n" +
"Common options:\n" +
" -" + META_FLAG + " URL - Metadata repository details " +
"(implementation-specific)\n" +
"\n" +
"Amazon DynamoDB-specific options:\n" +
" -" + REGION_FLAG + " REGION - Service region for connections\n" +
"\n" +
" URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
" Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
" is not supported.";
Destroy(Configuration conf) {
super(conf);
}
@Override
String getName() {
return NAME;
}
@Override
public String getUsage() {
return USAGE;
}
public int run(String[] args) throws IOException {
List<String> paths = parseArgs(args);
if (!parseDynamoDBRegion(paths)) {
System.err.println(USAGE);
return INVALID_ARGUMENT;
}
try {
initMetadataStore(false);
} catch (FileNotFoundException e) {
// indication that the table was not found
LOG.debug("Failed to bind to store to be destroyed", e);
LOG.info("Metadata Store does not exist.");
return SUCCESS;
}
Preconditions.checkState(getStore() != null,
"Metadata Store is not initialized");
getStore().destroy();
LOG.info("Metadata store is deleted.");
return SUCCESS;
}
}
/**
* Import s3 metadata to the metadata store.
*/
static class Import extends S3GuardTool {
private static final String NAME = "import";
public static final String PURPOSE = "import metadata from existing S3 " +
"data";
private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
"\t" + PURPOSE + "\n\n" +
"Common options:\n" +
" -" + META_FLAG + " URL - Metadata repository details " +
"(implementation-specific)\n" +
"\n" +
"Amazon DynamoDB-specific options:\n" +
" -" + REGION_FLAG + " REGION - Service region for connections\n" +
"\n" +
" URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
" Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
" is not supported.";
private final Set<Path> dirCache = new HashSet<>();
Import(Configuration conf) {
super(conf);
}
@Override
String getName() {
return NAME;
}
@Override
public String getUsage() {
return USAGE;
}
/**
* Put parents into MS and cache if the parents are not presented.
*
* @param f the file or an empty directory.
* @throws IOException on I/O errors.
*/
private void putParentsIfNotPresent(FileStatus f) throws IOException {
Preconditions.checkNotNull(f);
Path parent = f.getPath().getParent();
while (parent != null) {
if (dirCache.contains(parent)) {
return;
}
FileStatus dir = DynamoDBMetadataStore.makeDirStatus(parent,
f.getOwner());
getStore().put(new PathMetadata(dir));
dirCache.add(parent);
parent = parent.getParent();
}
}
/**
* Recursively import every path under path.
* @return number of items inserted into MetadataStore
* @throws IOException on I/O errors.
*/
private long importDir(FileStatus status) throws IOException {
Preconditions.checkArgument(status.isDirectory());
RemoteIterator<LocatedFileStatus> it = getFilesystem()
.listFilesAndEmptyDirectories(status.getPath(), true);
long items = 0;
while (it.hasNext()) {
LocatedFileStatus located = it.next();
FileStatus child;
if (located.isDirectory()) {
child = DynamoDBMetadataStore.makeDirStatus(located.getPath(),
located.getOwner());
dirCache.add(child.getPath());
} else {
child = new S3AFileStatus(located.getLen(),
located.getModificationTime(),
located.getPath(),
located.getBlockSize(),
located.getOwner());
}
putParentsIfNotPresent(child);
getStore().put(new PathMetadata(child));
items++;
}
return items;
}
@Override
public int run(String[] args) throws IOException {
List<String> paths = parseArgs(args);
if (paths.isEmpty()) {
System.err.println(getUsage());
return INVALID_ARGUMENT;
}
String s3Path = paths.get(0);
initS3AFileSystem(s3Path);
URI uri;
try {
uri = new URI(s3Path);
} catch (URISyntaxException e) {
throw new IOException(e);
}
String filePath = uri.getPath();
if (filePath.isEmpty()) {
// If they specify a naked S3 URI (e.g. s3a://bucket), we'll consider
// root to be the path
filePath = "/";
}
Path path = new Path(filePath);
FileStatus status = getFilesystem().getFileStatus(path);
initMetadataStore(false);
long items = 1;
if (status.isFile()) {
PathMetadata meta = new PathMetadata(status);
getStore().put(meta);
} else {
items = importDir(status);
}
System.out.printf("Inserted %d items into Metadata Store%n", items);
return SUCCESS;
}
}
/**
* Show diffs between the s3 and metadata store.
*/
static class Diff extends S3GuardTool {
private static final String NAME = "diff";
public static final String PURPOSE = "report on delta between S3 and " +
"repository";
private static final String USAGE = NAME + " [OPTIONS] s3a://BUCKET\n" +
"\t" + PURPOSE + "\n\n" +
"Common options:\n" +
" -" + META_FLAG + " URL - Metadata repository details " +
"(implementation-specific)\n" +
"\n" +
"Amazon DynamoDB-specific options:\n" +
" -" + REGION_FLAG + " REGION - Service region for connections\n" +
"\n" +
" URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
" Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
" is not supported.";
private static final String SEP = "\t";
static final String S3_PREFIX = "S3";
static final String MS_PREFIX = "MS";
Diff(Configuration conf) {
super(conf);
}
@Override
String getName() {
return NAME;
}
@Override
public String getUsage() {
return USAGE;
}
/**
* Formats the output of printing a FileStatus in S3guard diff tool.
* @param status the status to print.
* @return the string of output.
*/
private static String formatFileStatus(FileStatus status) {
return String.format("%s%s%d%s%s",
status.isDirectory() ? "D" : "F",
SEP,
status.getLen(),
SEP,
status.getPath().toString());
}
/**
* Compares metadata from 2 S3 FileStatus's to see if they differ.
* @param thisOne
* @param thatOne
* @return true if the metadata is not identical
*/
private static boolean differ(FileStatus thisOne, FileStatus thatOne) {
Preconditions.checkArgument(!(thisOne == null && thatOne == null));
return (thisOne == null || thatOne == null) ||
(thisOne.getLen() != thatOne.getLen()) ||
(thisOne.isDirectory() != thatOne.isDirectory()) ||
(!thisOne.isDirectory() &&
thisOne.getModificationTime() != thatOne.getModificationTime());
}
/**
* Print difference, if any, between two file statuses to the output stream.
*
* @param msStatus file status from metadata store.
* @param s3Status file status from S3.
* @param out output stream.
*/
private static void printDiff(FileStatus msStatus,
FileStatus s3Status,
PrintStream out) {
Preconditions.checkArgument(!(msStatus == null && s3Status == null));
if (msStatus != null && s3Status != null) {
Preconditions.checkArgument(
msStatus.getPath().equals(s3Status.getPath()),
String.format("The path from metadata store and s3 are different:" +
" ms=%s s3=%s", msStatus.getPath(), s3Status.getPath()));
}
if (differ(msStatus, s3Status)) {
if (s3Status != null) {
out.printf("%s%s%s%n", S3_PREFIX, SEP, formatFileStatus(s3Status));
}
if (msStatus != null) {
out.printf("%s%s%s%n", MS_PREFIX, SEP, formatFileStatus(msStatus));
}
}
}
/**
* Compare the metadata of the directory with the same path, on S3 and
* the metadata store, respectively. If one of them is null, consider the
* metadata of the directory and all its subdirectories are missing from
* the source.
*
* Pass the FileStatus obtained from s3 and metadata store to avoid one
* round trip to fetch the same metadata twice, because the FileStatus
* hve already been obtained from listStatus() / listChildren operations.
*
* @param msDir the directory FileStatus obtained from the metadata store.
* @param s3Dir the directory FileStatus obtained from S3.
* @param out the output stream to generate diff results.
* @throws IOException on I/O errors.
*/
private void compareDir(FileStatus msDir, FileStatus s3Dir,
PrintStream out) throws IOException {
Preconditions.checkArgument(!(msDir == null && s3Dir == null));
if (msDir != null && s3Dir != null) {
Preconditions.checkArgument(msDir.getPath().equals(s3Dir.getPath()),
String.format("The path from metadata store and s3 are different:" +
" ms=%s s3=%s", msDir.getPath(), s3Dir.getPath()));
}
Map<Path, FileStatus> s3Children = new HashMap<>();
if (s3Dir != null && s3Dir.isDirectory()) {
for (FileStatus status : getFilesystem().listStatus(s3Dir.getPath())) {
s3Children.put(status.getPath(), status);
}
}
Map<Path, FileStatus> msChildren = new HashMap<>();
if (msDir != null && msDir.isDirectory()) {
DirListingMetadata dirMeta =
getStore().listChildren(msDir.getPath());
if (dirMeta != null) {
for (PathMetadata meta : dirMeta.getListing()) {
FileStatus status = meta.getFileStatus();
msChildren.put(status.getPath(), status);
}
}
}
Set<Path> allPaths = new HashSet<>(s3Children.keySet());
allPaths.addAll(msChildren.keySet());
for (Path path : allPaths) {
FileStatus s3Status = s3Children.get(path);
FileStatus msStatus = msChildren.get(path);
printDiff(msStatus, s3Status, out);
if ((s3Status != null && s3Status.isDirectory()) ||
(msStatus != null && msStatus.isDirectory())) {
compareDir(msStatus, s3Status, out);
}
}
out.flush();
}
/**
* Compare both metadata store and S3 on the same path.
*
* @param path the path to be compared.
* @param out the output stream to display results.
* @throws IOException on I/O errors.
*/
private void compareRoot(Path path, PrintStream out) throws IOException {
Path qualified = getFilesystem().qualify(path);
FileStatus s3Status = null;
try {
s3Status = getFilesystem().getFileStatus(qualified);
} catch (FileNotFoundException e) {
}
PathMetadata meta = getStore().get(qualified);
FileStatus msStatus = (meta != null && !meta.isDeleted()) ?
meta.getFileStatus() : null;
compareDir(msStatus, s3Status, out);
}
@VisibleForTesting
public int run(String[] args, PrintStream out) throws IOException {
List<String> paths = parseArgs(args);
if (paths.isEmpty()) {
out.println(USAGE);
return INVALID_ARGUMENT;
}
String s3Path = paths.get(0);
initS3AFileSystem(s3Path);
initMetadataStore(true);
URI uri;
try {
uri = new URI(s3Path);
} catch (URISyntaxException e) {
throw new IOException(e);
}
Path root;
if (uri.getPath().isEmpty()) {
root = new Path("/");
} else {
root = new Path(uri.getPath());
}
root = getFilesystem().qualify(root);
compareRoot(root, out);
out.flush();
return SUCCESS;
}
@Override
public int run(String[] args) throws IOException {
return run(args, System.out);
}
}
/**
* Prune metadata that has not been modified recently.
*/
static class Prune extends S3GuardTool {
private static final String NAME = "prune";
public static final String PURPOSE = "truncate older metadata from " +
"repository "
+ DATA_IN_S3_IS_PRESERVED;;
private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
"\t" + PURPOSE + "\n\n" +
"Common options:\n" +
" -" + META_FLAG + " URL - Metadata repository details " +
"(implementation-specific)\n" +
"\n" +
"Amazon DynamoDB-specific options:\n" +
" -" + REGION_FLAG + " REGION - Service region for connections\n" +
"\n" +
" URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
" Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
" is not supported.";
Prune(Configuration conf) {
super(conf);
CommandFormat format = getCommandFormat();
format.addOptionWithValue(DAYS_FLAG);
format.addOptionWithValue(HOURS_FLAG);
format.addOptionWithValue(MINUTES_FLAG);
format.addOptionWithValue(SECONDS_FLAG);
}
@VisibleForTesting
void setMetadataStore(MetadataStore ms) {
Preconditions.checkNotNull(ms);
this.setStore(ms);
}
@Override
String getName() {
return NAME;
}
@Override
public String getUsage() {
return USAGE;
}
private long getDeltaComponent(TimeUnit unit, String arg) {
String raw = getCommandFormat().getOptValue(arg);
if (raw == null || raw.isEmpty()) {
return 0;
}
Long parsed = Long.parseLong(raw);
return unit.toMillis(parsed);
}
@VisibleForTesting
public int run(String[] args, PrintStream out) throws
InterruptedException, IOException {
List<String> paths = parseArgs(args);
if (!parseDynamoDBRegion(paths)) {
System.err.println(USAGE);
return INVALID_ARGUMENT;
}
initMetadataStore(false);
Configuration conf = getConf();
long confDelta = conf.getLong(Constants.S3GUARD_CLI_PRUNE_AGE, 0);
long cliDelta = 0;
cliDelta += getDeltaComponent(TimeUnit.DAYS, "days");
cliDelta += getDeltaComponent(TimeUnit.HOURS, "hours");
cliDelta += getDeltaComponent(TimeUnit.MINUTES, "minutes");
cliDelta += getDeltaComponent(TimeUnit.SECONDS, "seconds");
if (confDelta <= 0 && cliDelta <= 0) {
System.err.println(
"You must specify a positive age for metadata to prune.");
}
// A delta provided on the CLI overrides if one is configured
long delta = confDelta;
if (cliDelta > 0) {
delta = cliDelta;
}
long now = System.currentTimeMillis();
long divide = now - delta;
getStore().prune(divide);
out.flush();
return SUCCESS;
}
@Override
public int run(String[] args) throws InterruptedException, IOException {
return run(args, System.out);
}
}
private static S3GuardTool command;
private static void printHelp() {
if (command == null) {
System.err.println("Usage: hadoop " + USAGE);
System.err.println("\tperform S3Guard metadata store " +
"administrative commands.");
} else {
System.err.println("Usage: hadoop " + command.getUsage());
}
System.err.println();
System.err.println(COMMON_USAGE);
}
/**
* Execute the command with the given arguments.
*
* @param args command specific arguments.
* @param conf Hadoop configuration.
* @return exit code.
* @throws Exception on I/O errors.
*/
public static int run(String[] args, Configuration conf) throws
Exception {
/* ToolRunner.run does this too, but we must do it before looking at
subCommand or instantiating the cmd object below */
String[] otherArgs = new GenericOptionsParser(conf, args)
.getRemainingArgs();
if (otherArgs.length == 0) {
printHelp();
return INVALID_ARGUMENT;
}
final String subCommand = otherArgs[0];
switch (subCommand) {
case Init.NAME:
command = new Init(conf);
break;
case Destroy.NAME:
command = new Destroy(conf);
break;
case Import.NAME:
command = new Import(conf);
break;
case Diff.NAME:
command = new Diff(conf);
break;
case Prune.NAME:
command = new Prune(conf);
break;
default:
printHelp();
return INVALID_ARGUMENT;
}
return ToolRunner.run(conf, command, otherArgs);
}
/**
* Main entry point. Calls {@code System.exit()} on all execution paths.
* @param args argument list
*/
public static void main(String[] args) {
try {
int ret = run(args, new Configuration());
System.exit(ret);
} catch (CommandFormat.UnknownOptionException e) {
System.err.println(e.getMessage());
printHelp();
System.exit(INVALID_ARGUMENT);
} catch (Throwable e) {
e.printStackTrace(System.err);
System.exit(ERROR);
}
}
}

View File

@ -0,0 +1,30 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* This package contains classes related to S3Guard: a feature of S3A to mask
* the eventual consistency behavior of S3 and optimize access patterns by
* coordinating with a strongly consistent external store for file system
* metadata.
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
package org.apache.hadoop.fs.s3a.s3guard;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;

View File

@ -105,6 +105,10 @@ public final class S3xLoginHelper {
* @return a login tuple, possibly empty.
*/
public static Login extractLoginDetails(URI name) {
if (name == null) {
return Login.EMPTY;
}
try {
String authority = name.getAuthority();
if (authority == null) {

View File

@ -0,0 +1,37 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if ! declare -f hadoop_subcommand_s3guard >/dev/null 2>/dev/null; then
if [[ "${HADOOP_SHELL_EXECNAME}" = hadoop ]]; then
hadoop_add_subcommand "s3guard" client "manage metadata on S3"
fi
# this can't be indented otherwise shelldocs won't get it
## @description s3guard command for hadoop
## @audience public
## @stability stable
## @replaceable yes
function hadoop_subcommand_s3guard
{
# shellcheck disable=SC2034
HADOOP_CLASSNAME=org.apache.hadoop.fs.s3a.s3guard.S3GuardTool
hadoop_add_to_classpath_tools hadoop-aws
}
fi

View File

@ -46,6 +46,7 @@ See also:
* [Testing](testing.html)
* [Troubleshooting S3a](troubleshooting_s3a.html)
* [S3Guard](s3guard.html)
### Warning #1: Object Stores are not filesystems
@ -1552,7 +1553,7 @@ for `fs.s3a.server-side-encryption-algorithm` is `AES256`.
SSE-KMS is where the user specifies a Customer Master Key(CMK) that is used to
encrypt the objects. The user may specify a specific CMK or leave the
`fs.s3a.server-side-encryption-key` empty to use the default auto-generated key
`fs.s3a.server-side-encryption.key` empty to use the default auto-generated key
in AWS IAM. Each CMK configured in AWS IAM is region specific, and cannot be
used in a in a S3 bucket in a different region. There is can also be policies
assigned to the CMK that prohibit or restrict its use for users causing S3A

View File

@ -0,0 +1,610 @@
<!---
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
# S3Guard: Consistency and Metadata Caching for S3A
**Experimental Feature**
<!-- MACRO{toc|fromDepth=0|toDepth=5} -->
## Overview
*S3Guard* is an experimental feature for the S3A client of the S3 object store,
which can use a (consistent) database as the store of metadata about objects
in an S3 bucket.
S3Guard
1. May improve performance on directory listing/scanning operations,
including those which take place during the partitioning period of query
execution, the process where files are listed and the work divided up amongst
processes.
1. Permits a consistent view of the object store. Without this, changes in
objects may not be immediately visible, especially in listing operations.
1. Offers a platform for future performance improvements for running Hadoop
workloads on top of object stores
The basic idea is that, for each operation in the Hadoop S3 client (s3a) that
reads or modifies metadata, a shadow copy of that metadata is stored in a
separate MetadataStore implementation. Each MetadataStore implementation
offers HDFS-like consistency for the metadata, and may also provide faster
lookups for things like file status or directory listings.
For links to early design documents and related patches, see
[HADOOP-13345](https://issues.apache.org/jira/browse/HADOOP-13345).
*Important*
* S3Guard is experimental and should be considered unstable.
* While all underlying data is persisted in S3, if, for some reason,
the S3Guard-cached metadata becomes inconsistent with that in S3,
queries on the data may become incorrect.
For example, new datasets may be omitted, objects may be overwritten,
or clients may not be aware that some data has been deleted.
It is essential for all clients writing to an S3Guard-enabled
S3 Repository to use the feature. Clients reading the data may work directly
with the S3A data, in which case the normal S3 consistency guarantees apply.
## Setting up S3Guard
The latest configuration parameters are defined in `core-default.xml`. You
should consult that file for full information, but a summary is provided here.
### 1. Choose the Database
A core concept of S3Guard is that the directory listing data of the object
store, *the metadata* is replicated in a higher-performance, consistent,
database. In S3Guard, this database is called *The Metadata Store*
By default, S3Guard is not enabled.
The Metadata Store to use in production is bonded to Amazon's DynamoDB
database service. The following setting will enable this Metadata Store:
```xml
<property>
<name>fs.s3a.metadatastore.impl</name>
<value>org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore</value>
</property>
```
Note that the `NullMetadataStore` store can be explicitly requested if desired.
This offers no metadata storage, and effectively disables S3Guard.
```xml
<property>
<name>fs.s3a.metadatastore.impl</name>
<value>org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore</value>
</property>
```
### 2. Configure S3Guard Settings
More settings will may be added in the future.
Currently the only Metadata Store-independent setting, besides the
implementation class above, is the *allow authoritative* flag.
It is recommended that you leave the default setting here:
```xml
<property>
<name>fs.s3a.metadatastore.authoritative</name>
<value>false</value>
</property>
```
Setting this to `true` is currently an experimental feature. When true, the
S3A client will avoid round-trips to S3 when getting directory listings, if
there is a fully-cached version of the directory stored in the Metadata Store.
Note that if this is set to true, it may exacerbate or persist existing race
conditions around multiple concurrent modifications and listings of a given
directory tree.
In particular: **If the Metadata Store is declared as authoritative,
all interactions with the S3 bucket(s) must be through S3A clients sharing
the same Metadata Store**
### 3. Configure the Metadata Store.
Here are the `DynamoDBMetadataStore` settings. Other Metadata Store
implementations will have their own configuration parameters.
### 4. Name Your Table
First, choose the name of the table you wish to use for the S3Guard metadata
storage in your DynamoDB instance. If you leave it unset/empty, a
separate table will be created for each S3 bucket you access, and that
bucket's name will be used for the name of the DynamoDB table. For example,
this sets the table name to `my-ddb-table-name`
```xml
<property>
<name>fs.s3a.s3guard.ddb.table</name>
<value>my-ddb-table-name</value>
<description>
The DynamoDB table name to operate. Without this property, the respective
S3 bucket names will be used.
</description>
</property>
```
It is good to share a table across multiple buckets for multiple reasons.
1. You are billed for the I/O capacity allocated to the table,
*even when the table is not used*. Sharing capacity can reduce costs.
1. You can share the "provision burden" across the buckets. That is, rather
than allocating for the peak load on a single bucket, you can allocate for
the peak load *across all the buckets*, which is likely to be significantly
lower.
1. It's easier to measure and tune the load requirements and cost of
S3Guard, because there is only one table to review and configure in the
AWS management console.
When wouldn't you want to share a table?
1. When you do explicitly want to provision I/O capacity to a specific bucket
and table, isolated from others.
1. When you are using separate billing for specific buckets allocated
to specific projects.
1. When different users/roles have different access rights to different buckets.
As S3Guard requires all users to have R/W access to the table, all users will
be able to list the metadata in all buckets, even those to which they lack
read access.
### 5. Locate your Table
You may also wish to specify the region to use for DynamoDB. If a region
is not configured, S3A will assume that it is in the same region as the S3
bucket. A list of regions for the DynamoDB service can be found in
[Amazon's documentation](http://docs.aws.amazon.com/general/latest/gr/rande.html#ddb_region).
In this example, to use the US West 2 region:
```xml
<property>
<name>fs.s3a.s3guard.ddb.region</name>
<value>us-west-2</value>
</property>
```
When working with S3Guard-managed buckets from EC2 VMs running in AWS
infrastructure, using a local DynamoDB region ensures the lowest latency
and highest reliability, as well as avoiding all long-haul network charges.
The S3Guard tables, and indeed, the S3 buckets, should all be in the same
region as the VMs.
### 6. Optional: Create your Table
Next, you can choose whether or not the table will be automatically created
(if it doesn't already exist). If you want this feature, set the
`fs.s3a.s3guard.ddb.table.create` option to `true`.
```xml
<property>
<name>fs.s3a.s3guard.ddb.table.create</name>
<value>true</value>
<description>
If true, the S3A client will create the table if it does not already exist.
</description>
</property>
```
### 7. If creating a table: Set your DynamoDB IO Capacity
Next, you need to set the DynamoDB read and write throughput requirements you
expect to need for your cluster. Setting higher values will cost you more
money. *Note* that these settings only affect table creation when
`fs.s3a.s3guard.ddb.table.create` is enabled. To change the throughput for
an existing table, use the AWS console or CLI tool.
For more details on DynamoDB capacity units, see the AWS page on [Capacity
Unit Calculations](http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/WorkingWithTables.html#CapacityUnitCalculations).
The charges are incurred per hour for the life of the table, *even when the
table and the underlying S3 buckets are not being used*.
There are also charges incurred for data storage and for data IO outside of the
region of the DynamoDB instance. S3Guard only stores metadata in DynamoDB: path names
and summary details of objects —the actual data is stored in S3, so billed at S3
rates.
```xml
<property>
<name>fs.s3a.s3guard.ddb.table.capacity.read</name>
<value>500</value>
<description>
Provisioned throughput requirements for read operations in terms of capacity
units for the DynamoDB table. This config value will only be used when
creating a new DynamoDB table, though later you can manually provision by
increasing or decreasing read capacity as needed for existing tables.
See DynamoDB documents for more information.
</description>
</property>
<property>
<name>fs.s3a.s3guard.ddb.table.capacity.write</name>
<value>100</value>
<description>
Provisioned throughput requirements for write operations in terms of
capacity units for the DynamoDB table. Refer to related config
fs.s3a.s3guard.ddb.table.capacity.read before usage.
</description>
</property>
```
Attempting to perform more IO than the capacity requested simply throttles the
IO; small capacity numbers are recommended when initially experimenting
with S3Guard.
## Authenticating with S3Guard
The DynamoDB metadata store takes advantage of the fact that the DynamoDB
service uses the same authentication mechanisms as S3. S3Guard
gets all its credentials from the S3A client that is using it.
All existing S3 authentication mechanisms can be used, except for one
exception. Credentials placed in URIs are not supported for S3Guard, for security
reasons.
## Per-bucket S3Guard configuration
In production, it is likely only some buckets will have S3Guard enabled;
those which are read-only may have disabled, for example. Equally importantly,
buckets in different regions should have different tables, each
in the relevant region.
These options can be managed through S3A's [per-bucket configuration
mechanism](./index.html#Configuring_different_S3_buckets).
All options with the under `fs.s3a.bucket.BUCKETNAME.KEY` are propagated
to the options `fs.s3a.KEY` *for that bucket only*.
As an example, here is a configuration to use different metadata stores
and tables for different buckets
First, we define shortcuts for the metadata store classnames
```xml
<property>
<name>s3guard.null</name>
<value>org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore</value>
</property>
<property>
<name>s3guard.dynamo</name>
<value>org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore</value>
</property>
```
Next, Amazon's public landsat database is configured with no
metadata store
```xml
<property>
<name>fs.s3a.bucket.landsat-pds.metadatastore.impl</name>
<value>${s3guard.null}</value>
<description>The read-only landsat-pds repository isn't
managed by S3Guard</description>
</property>
```
Next the `ireland-2` and `ireland-offline` buckets are configured with
DynamoDB as the store, and a shared table `production-table`
```xml
<property>
<name>fs.s3a.bucket.ireland-2.metadatastore.impl</name>
<value>${s3guard.dynamo}</value>
</property>
<property>
<name>fs.s3a.bucket.ireland-offline.metadatastore.impl</name>
<value>${s3guard.dynamo}</value>
</property>
<property>
<name>fs.s3a.bucket.ireland-2.s3guard.ddb.table</name>
<value>production-table</value>
</property>
```
The region of this table is automatically set to be that of the buckets,
here `eu-west-1`; the same table name may actually be used in different
regions.
Together then, this configuration enables the DynamoDB Metadata Store
for two buckets with a shared table, while disabling it for the public
bucket.
## S3Guard Command Line Interface (CLI)
Note that in some cases an AWS region or `s3a://` URI can be provided.
Metadata store URIs include a scheme that designates the backing store. For
example (e.g. `dynamodb://table_name`;). As documented above, the
AWS region can be inferred if the URI to an existing bucket is provided.
The S3A URI must also be provided for per-bucket configuration options
to be picked up. That is: when an s3a URL is provided on the command line,
all its "resolved" per-bucket settings are used to connect to, authenticate
with and configure the S3Guard table. If no such URL is provided, then
the base settings are picked up.
### Create a table: `s3guard init`
```bash
hadoop s3guard init -meta URI ( -region REGION | s3a://BUCKET )
```
Creates and initializes an empty metadata store.
A DynamoDB metadata store can be initialized with additional parameters
pertaining to [Provisioned Throughput](http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/HowItWorks.ProvisionedThroughput.html):
```bash
[-write PROVISIONED_WRITES] [-read PROVISIONED_READS]
```
Example 1
```bash
hadoop s3guard init -meta dynamodb://ireland-team -write 5 -read 10 s3a://ireland-1
```
Creates a table "ireland-team" with a capacity of 5 for writes, 10 for reads,
in the same location as the bucket "ireland-1".
Example 2
```bash
hadoop s3guard init -meta dynamodb://ireland-team -region eu-west-1
```
Creates a table "ireland-team" in the same region "s3-eu-west-1.amazonaws.com"
### Import a bucket: `s3guard import`
```bash
hadoop s3guard import [-meta URI] s3a://BUCKET
```
Pre-populates a metadata store according to the current contents of an S3
bucket. If the `-meta` option is omitted, the binding information is taken
from the `core-site.xml` configuration.
Example
```bash
hadoop s3guard import s3a://ireland-1
```
### Audit a table: `s3guard diff`
```bash
hadoop s3guard diff [-meta URI] s3a://BUCKET
```
Lists discrepancies between a metadata store and bucket. Note that depending on
how S3Guard is used, certain discrepancies are to be expected.
Example
```bash
hadoop s3guard diff s3a://ireland-1
```
### Delete a table: `s3guard destroy`
Deletes a metadata store. With DynamoDB as the store, this means
the specific DynamoDB table use to store the metadata.
```bash
hadoop s3guard destroy [-meta URI] ( -region REGION | s3a://BUCKET )
```
This *does not* delete the bucket, only the S3Guard table which it is bound
to.
Examples
```bash
hadoop s3guard destroy s3a://ireland-1
```
Deletes the table which the bucket ireland-1 is configured to use
as its MetadataStore.
```bash
hadoop s3guard destroy -meta dynamodb://ireland-team -region eu-west-1
```
### Clean up a table, `s3guard prune`
Delete all file entries in the MetadataStore table whose object "modification
time" is older than the specified age.
```bash
hadoop s3guard prune [-days DAYS] [-hours HOURS] [-minutes MINUTES]
[-seconds SECONDS] [-m URI] ( -region REGION | s3a://BUCKET )
```
A time value must be supplied.
1. This does not delete the entries in the bucket itself.
1. The modification time is effectively the creation time of the objects
in the S3 Bucket.
1. Even when an S3A URI is supplied, all entries in the table older than
a specific age are deleted &mdash; even those from other buckets.
Example
```bash
hadoop s3guard prune -days 7 s3a://ireland-1
```
Deletes all entries in the S3Guard table for files older than seven days from
the table associated with `s3a://ireland-1`.
```bash
hadoop s3guard prune -hours 1 -minutes 30 -meta dynamodb://ireland-team -region eu-west-1
```
Delete all entries more than 90 minutes old from the table "ireland-team" in
the region "eu-west-1".
## Debugging and Error Handling
If you run into network connectivity issues, or have a machine failure in the
middle of an operation, you may end up with your metadata store having state
that differs from S3. The S3Guard CLI commands, covered in the CLI section
above, can be used to diagnose and repair these issues.
There are some logs whose log level can be increased to provide more
information.
```properties
# Log S3Guard classes
log4j.logger.org.apache.hadoop.fs.s3a.s3guard=DEBUG
# Log all S3A classes
log4j.logger.org.apache.hadoop.fs.s3a=DEBUG
# Enable debug logging of AWS DynamoDB client
log4j.logger.com.amazonaws.services.dynamodbv2.AmazonDynamoDB
# Log all HTTP requests made; includes S3 interaction. This may
# include sensitive information such as account IDs in HTTP headers.
log4j.logger.com.amazonaws.request=DEBUG
```
If all else fails, S3Guard is designed to allow for easy recovery by deleting
the metadata store data. In DynamoDB, this can be accomplished by simply
deleting the table, and allowing S3Guard to recreate it from scratch. Note
that S3Guard tracks recent changes to file metadata to implement consistency.
Deleting the metadata store table will simply result in a period of eventual
consistency for any file modifications that were made right before the table
was deleted.
### Failure Semantics
Operations which modify metadata will make changes to S3 first. If, and only
if, those operations succeed, the equivalent changes will be made to the
Metadata Store.
These changes to S3 and Metadata Store are not fully-transactional: If the S3
operations succeed, and the subsequent Metadata Store updates fail, the S3
changes will *not* be rolled back. In this case, an error message will be
logged.
### Versioning
S3Guard tables are created with a version marker, an entry with the primary
key and child entry of `../VERSION`; the use of a relative path guarantees
that it will not be resolved.
#### Versioning policy.
1. The version number of an S3Guard table will only be incremented when
an incompatible change is made to the table structure —that is, the structure
has changed so that it is no longer readable by older versions, or because
it has added new mandatory fields which older versions do not create.
1. The version number of S3Guard tables will only be changed by incrementing
the value.
1. Updated versions of S3Guard MAY continue to support older version tables.
1. If an incompatible change is made such that existing tables are not compatible,
then a means shall be provided to update existing tables. For example:
an option in the Command Line Interface, or an option to upgrade tables
during S3Guard initialization.
*Note*: this policy does not indicate any intent to upgrade table structures
in an incompatible manner. The version marker in tables exists to support
such an option if it ever becomes necessary, by ensuring that all S3Guard
client can recognise any version mismatch.
### Security
All users of the DynamoDB table must have write access to it. This
effectively means they must have write access to the entire object store.
There's not been much testing of using a S3Guard Metadata Store
with a read-only S3 Bucket. It *should* work, provided all users
have write access to the DynamoDB table. And, as updates to the Metadata Store
are only made after successful file creation, deletion and rename, the
store is *unlikely* to get out of sync, it is still something which
merits more testing before it could be considered reliable.
### Troubleshooting
#### Error: `S3Guard table lacks version marker.`
The table which was intended to be used as a S3guard metadata store
does not have any version marker indicating that it is a S3Guard table.
It may be that this is not a S3Guard table.
* Make sure that this is the correct table name.
* Delete the table, so it can be rebuilt.
#### Error: `Database table is from an incompatible S3Guard version`
This indicates that the version of S3Guard which created (or possibly updated)
the database table is from a different version that that expected by the S3A
client.
This error will also include the expected and actual version numbers.
If the expected version is lower than the actual version, then the version
of the S3A client library is too old to interact with this S3Guard-managed
bucket. Upgrade the application/library.
If the expected version is higher than the actual version, then the table
itself will need upgrading.
#### Error `"DynamoDB table TABLE does not exist in region REGION; auto-creation is turned off"`
S3Guard could not find the DynamoDB table for the Metadata Store,
and it was not configured to create it. Either the table was missing,
or the configuration is preventing S3Guard from finding the table.
1. Verify that the value of `fs.s3a.s3guard.ddb.table` is correct.
1. If the region for an existing table has been set in
`fs.s3a.s3guard.ddb.region`, verify that the value is correct.
1. If the region is not set, verify that the table exists in the same
region as the bucket being used.
1. Create the table if necessary.

View File

@ -107,6 +107,10 @@ each filesystem for its testing.
1. `test.fs.s3n.name` : the URL of the bucket for S3n tests
1. `test.fs.s3a.name` : the URL of the bucket for S3a tests
*Note* that running s3a and s3n tests in parallel mode, against the same bucket
is unreliable. We recommend using separate buckets or testing one connector
at a time.
The contents of each bucket will be destroyed during the test process:
do not use the bucket for any purpose other than testing. Furthermore, for
s3a, all in-progress multi-part uploads to the bucket will be aborted at the
@ -691,7 +695,7 @@ use requires the presence of secret credentials, where tests may be slow,
and where finding out why something failed from nothing but the test output
is critical.
#### Subclasses Existing Shared Base Blasses
#### Subclasses Existing Shared Base Classes
Extend `AbstractS3ATestBase` or `AbstractSTestS3AHugeFiles` unless justifiable.
These set things up for testing against the object stores, provide good threadnames,
@ -798,7 +802,7 @@ We really appreciate this &mdash; you will too.
### How to keep your credentials really safe
Although the `auth-keys.xml` file is marged as ignored in git and subversion,
Although the `auth-keys.xml` file is marked as ignored in git and subversion,
it is still in your source tree, and there's always that risk that it may
creep out.
@ -813,3 +817,283 @@ using an absolute XInclude reference to it.
</configuration>
```
# Failure Injection
**Warning do not enable any type of failure injection in production. The
following settings are for testing only.**
One of the challenges with S3A integration tests is the fact that S3 is an
eventually-consistent storage system. In practice, we rarely see delays in
visibility of recently created objects both in listings (`listStatus()`) and
when getting a single file's metadata (`getFileStatus()`). Since this behavior
is rare and non-deterministic, thorough integration testing is challenging.
To address this, S3A supports a shim layer on top of the `AmazonS3Client`
class which artificially delays certain paths from appearing in listings.
This is implemented in the class `InconsistentAmazonS3Client`.
## Simulating List Inconsistencies
### Enabling the InconsistentAmazonS3CClient
There are two ways of enabling the `InconsistentAmazonS3Client`: at
config-time, or programmatically. For an example of programmatic test usage,
see `ITestS3GuardListConsistency`.
To enable the fault-injecting client via configuration, switch the
S3A client to use the "Inconsistent S3 Client Factory" when connecting to
S3:
```xml
<property>
<name>fs.s3a.s3.client.factory.impl</name>
<value>org.apache.hadoop.fs.s3a.InconsistentS3ClientFactory</value>
</property>
```
The inconsistent client works by:
1. Choosing which objects will be "inconsistent" at the time the object is
created or deleted.
2. When `listObjects()` is called, any keys that we have marked as
inconsistent above will not be returned in the results (until the
configured delay has elapsed). Similarly, deleted items may be *added* to
missing results to delay the visibility of the delete.
There are two ways of choosing which keys (filenames) will be affected: By
substring, and by random probability.
```xml
<property>
<name>fs.s3a.failinject.inconsistency.key.substring</name>
<value>DELAY_LISTING_ME</value>
</property>
<property>
<name>fs.s3a.failinject.inconsistency.probability</name>
<value>1.0</value>
</property>
```
By default, any object which has the substring "DELAY_LISTING_ME" in its key
will subject to delayed visibility. For example, the path
`s3a://my-bucket/test/DELAY_LISTING_ME/file.txt` would match this condition.
To match all keys use the value "\*" (a single asterisk). This is a special
value: *We don't support arbitrary wildcards.*
The default probability of delaying an object is 1.0. This means that *all*
keys that match the substring will get delayed visibility. Note that we take
the logical *and* of the two conditions (substring matches *and* probability
random chance occurs). Here are some example configurations:
```
| substring | probability | behavior |
|-----------|-------------|--------------------------------------------|
| | 0.001 | An empty <value> tag in .xml config will |
| | | be interpreted as unset and revert to the |
| | | default value, "DELAY_LISTING_ME" |
| | | |
| * | 0.001 | 1/1000 chance of *any* key being delayed. |
| | | |
| delay | 0.01 | 1/100 chance of any key containing "delay" |
| | | |
| delay | 1.0 | All keys containing substring "delay" .. |
```
You can also configure how long you want the delay in visibility to last.
The default is 5000 milliseconds (five seconds).
```xml
<property>
<name>fs.s3a.failinject.inconsistency.msec</name>
<value>5000</value>
</property>
```
Future versions of this client will introduce new failure modes,
with simulation of S3 throttling exceptions the next feature under
development.
### Limitations of Inconsistency Injection
Although `InconsistentAmazonS3Client` can delay the visibility of an object
or parent directory, it does not prevent the key of that object from
appearing in all prefix searches. For example, if we create the following
object with the default configuration above, in an otherwise empty bucket:
```
s3a://bucket/a/b/c/DELAY_LISTING_ME
```
Then the following paths will still be visible as directories (ignoring
possible real-world inconsistencies):
```
s3a://bucket/a
s3a://bucket/a/b
```
Whereas `getFileStatus()` on the following *will* be subject to delayed
visibility (`FileNotFoundException` until delay has elapsed):
```
s3a://bucket/a/b/c
s3a://bucket/a/b/c/DELAY_LISTING_ME
```
In real-life S3 inconsistency, however, we expect that all the above paths
(including `a` and `b`) will be subject to delayed visiblity.
### Using the `InconsistentAmazonS3CClient` in downstream integration tests
The inconsistent client is shipped in the `hadoop-aws` JAR, so it can
be used in applications which work with S3 to see how they handle
inconsistent directory listings.
## Testing S3Guard
The basic strategy for testing S3Guard correctness consists of:
1. MetadataStore Contract tests.
The MetadataStore contract tests are inspired by the Hadoop FileSystem and
`FileContext` contract tests. Each implementation of the `MetadataStore` interface
subclasses the `MetadataStoreTestBase` class and customizes it to initialize
their MetadataStore. This test ensures that the different implementations
all satisfy the semantics of the MetadataStore API.
2. Running existing S3A unit and integration tests with S3Guard enabled.
You can run the S3A integration tests on top of S3Guard by configuring your
`MetadataStore` in your
`hadoop-tools/hadoop-aws/src/test/resources/core-site.xml` or
`hadoop-tools/hadoop-aws/src/test/resources/auth-keys.xml` files.
Next run the S3A integration tests as outlined in the *Running the Tests* section
of the [S3A documentation](./index.html)
3. Running fault-injection tests that test S3Guard's consistency features.
The `ITestS3GuardListConsistency` uses failure injection to ensure
that list consistency logic is correct even when the underlying storage is
eventually consistent.
The integration test adds a shim above the Amazon S3 Client layer that injects
delays in object visibility.
All of these tests will be run if you follow the steps listed in step 2 above.
No charges are incurred for using this store, and its consistency
guarantees are that of the underlying object store instance. <!-- :) -->
## Testing S3A with S3Guard Enabled
All the S3A tests which work with a private repository can be configured to
run with S3Guard by using the `s3guard` profile. When set, this will run
all the tests with local memory for the metadata set to "non-authoritative" mode.
```bash
mvn -T 1C verify -Dparallel-tests -DtestsThreadCount=6 -Ds3guard
```
When the `s3guard` profile is enabled, following profiles can be specified:
* `dynamo`: use an AWS-hosted DynamoDB table; creating the table if it does
not exist. You will have to pay the bills for DynamoDB web service.
* `dynamodblocal`: use an in-memory DynamoDBLocal server instead of real AWS
DynamoDB web service; launch the server and creating the table.
You won't be charged bills for using DynamoDB in test. As it runs in-JVM,
the table isn't shared across other tests running in parallel.
* `non-auth`: treat the S3Guard metadata as authorative.
```bash
mvn -T 1C verify -Dparallel-tests -DtestsThreadCount=6 -Ds3guard -Ddynamo -Dauth
```
When experimenting with options, it is usually best to run a single test suite
at a time until the operations appear to be working.
```bash
mvn -T 1C verify -Dtest=skip -Dit.test=ITestS3AMiscOperations -Ds3guard -Ddynamo
```
### Notes
1. If the `s3guard` profile is not set, then the S3Guard properties are those
of the test configuration set in `contract-test-options.xml` or `auth-keys.xml`
If the `s3guard` profile *is* set,
1. The S3Guard options from maven (the dynamo and authoritative flags)
overwrite any previously set in the configuration files.
1. DynamoDB will be configured to create any missing tables.
### Warning About Concurrent Tests
You must not run S3A and S3N tests in parallel on the same bucket. This is
especially true when S3Guard is enabled. S3Guard requires that all clients
that are modifying the bucket have S3Guard enabled, so having S3N
integration tests running in parallel with S3A tests will cause strange
failures.
### Scale Testing MetadataStore Directly
There are some scale tests that exercise Metadata Store implementations
directly. These ensure that S3Guard is are robust to things like DynamoDB
throttling, and compare performance for different implementations. These
are included in the scale tests executed when `-Dscale` is passed to
the maven command line.
The two S3Guard scale testse are `ITestDynamoDBMetadataStoreScale` and
`ITestLocalMetadataStoreScale`. To run the DynamoDB test, you will need to
define your table name and region in your test configuration. For example,
the following settings allow us to run `ITestDynamoDBMetadataStoreScale` with
artificially low read and write capacity provisioned, so we can judge the
effects of being throttled by the DynamoDB service:
```xml
<property>
<name>scale.test.operation.count</name>
<value>10</value>
</property>
<property>
<name>scale.test.directory.count</name>
<value>3</value>
</property>
<property>
<name>fs.s3a.scale.test.enabled</name>
<value>true</value>
</property>
<property>
<name>fs.s3a.s3guard.ddb.table</name>
<value>my-scale-test</value>
</property>
<property>
<name>fs.s3a.s3guard.ddb.region</name>
<value>us-west-2</value>
</property>
<property>
<name>fs.s3a.s3guard.ddb.table.create</name>
<value>true</value>
</property>
<property>
<name>fs.s3a.s3guard.ddb.table.capacity.read</name>
<value>10</value>
</property>
<property>
<name>fs.s3a.s3guard.ddb.table.capacity.write</name>
<value>10</value>
</property>
```
### Testing only: Local Metadata Store
There is an in-memory Metadata Store for testing.
```xml
<property>
<name>fs.s3a.metadatastore.impl</name>
<value>org.apache.hadoop.fs.s3a.s3guard.LocalMetadataStore</value>
</property>
```
This is not for use in production.

View File

@ -22,11 +22,25 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractCreateTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
/**
* S3A contract tests creating files.
*/
public class ITestS3AContractCreate extends AbstractContractCreateTest {
/**
* Create a configuration, possibly patching in S3Guard options.
* @return a configuration
*/
@Override
protected Configuration createConfiguration() {
Configuration conf = super.createConfiguration();
// patch in S3Guard options
maybeEnableS3Guard(conf);
return conf;
}
@Override
protected AbstractFSContract createContract(Configuration conf) {
return new S3AContract(conf);

View File

@ -22,11 +22,25 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractDeleteTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
/**
* S3A contract tests covering deletes.
*/
public class ITestS3AContractDelete extends AbstractContractDeleteTest {
/**
* Create a configuration, possibly patching in S3Guard options.
* @return a configuration
*/
@Override
protected Configuration createConfiguration() {
Configuration conf = super.createConfiguration();
// patch in S3Guard options
maybeEnableS3Guard(conf);
return conf;
}
@Override
protected AbstractFSContract createContract(Configuration conf) {
return new S3AContract(conf);

View File

@ -20,6 +20,7 @@ package org.apache.hadoop.fs.contract.s3a;
import static org.apache.hadoop.fs.s3a.Constants.*;
import static org.apache.hadoop.fs.s3a.S3ATestConstants.SCALE_TEST_TIMEOUT_MILLIS;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.tools.contract.AbstractContractDistCpTest;
@ -38,12 +39,18 @@ public class ITestS3AContractDistCp extends AbstractContractDistCpTest {
return SCALE_TEST_TIMEOUT_MILLIS;
}
/**
* Create a configuration, possibly patching in S3Guard options.
* @return a configuration
*/
@Override
protected Configuration createConfiguration() {
Configuration newConf = super.createConfiguration();
newConf.setLong(MULTIPART_SIZE, MULTIPART_SETTING);
newConf.setBoolean(FAST_UPLOAD, true);
newConf.set(FAST_UPLOAD_BUFFER, FAST_UPLOAD_BUFFER_DISK);
// patch in S3Guard options
maybeEnableS3Guard(newConf);
return newConf;
}

View File

@ -23,6 +23,8 @@ import org.apache.hadoop.fs.contract.AbstractContractGetFileStatusTest;
import org.apache.hadoop.fs.s3a.Constants;
import org.apache.hadoop.fs.s3a.S3ATestUtils;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
/**
* S3A contract tests covering getFileStatus.
*/
@ -46,6 +48,8 @@ public class ITestS3AContractGetFileStatus
S3ATestUtils.disableFilesystemCaching(conf);
// aggressively low page size forces tests to go multipage
conf.setInt(Constants.MAX_PAGING_KEYS, 2);
// patch in S3Guard options
maybeEnableS3Guard(conf);
return conf;
}
}

View File

@ -22,11 +22,25 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractMkdirTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
/**
* Test dir operations on S3A.
*/
public class ITestS3AContractMkdir extends AbstractContractMkdirTest {
/**
* Create a configuration, possibly patching in S3Guard options.
* @return a configuration
*/
@Override
protected Configuration createConfiguration() {
Configuration conf = super.createConfiguration();
// patch in S3Guard options
maybeEnableS3Guard(conf);
return conf;
}
@Override
protected AbstractFSContract createContract(Configuration conf) {
return new S3AContract(conf);

View File

@ -22,11 +22,25 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractOpenTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
/**
* S3A contract tests opening files.
*/
public class ITestS3AContractOpen extends AbstractContractOpenTest {
/**
* Create a configuration, possibly patching in S3Guard options.
* @return a configuration
*/
@Override
protected Configuration createConfiguration() {
Configuration conf = super.createConfiguration();
// patch in S3Guard options
maybeEnableS3Guard(conf);
return conf;
}
@Override
protected AbstractFSContract createContract(Configuration conf) {
return new S3AContract(conf);

View File

@ -26,12 +26,25 @@ import org.apache.hadoop.fs.Path;
import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
/**
* S3A contract tests covering rename.
*/
public class ITestS3AContractRename extends AbstractContractRenameTest {
/**
* Create a configuration, possibly patching in S3Guard options.
* @return a configuration
*/
@Override
protected Configuration createConfiguration() {
Configuration conf = super.createConfiguration();
// patch in S3Guard options
maybeEnableS3Guard(conf);
return conf;
}
@Override
protected AbstractFSContract createContract(Configuration conf) {
return new S3AContract(conf);

View File

@ -28,6 +28,8 @@ import org.apache.hadoop.fs.contract.AbstractFSContract;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
/**
* root dir operations against an S3 bucket.
*/
@ -37,6 +39,18 @@ public class ITestS3AContractRootDir extends
private static final Logger LOG =
LoggerFactory.getLogger(ITestS3AContractRootDir.class);
/**
* Create a configuration, possibly patching in S3Guard options.
* @return a configuration
*/
@Override
protected Configuration createConfiguration() {
Configuration conf = super.createConfiguration();
// patch in S3Guard options
maybeEnableS3Guard(conf);
return conf;
}
@Override
protected AbstractFSContract createContract(Configuration conf) {
return new S3AContract(conf);

View File

@ -22,11 +22,25 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractSeekTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
/**
* S3A contract tests covering file seek.
*/
public class ITestS3AContractSeek extends AbstractContractSeekTest {
/**
* Create a configuration, possibly patching in S3Guard options.
* @return a configuration
*/
@Override
protected Configuration createConfiguration() {
Configuration conf = super.createConfiguration();
// patch in S3Guard options
maybeEnableS3Guard(conf);
return conf;
}
@Override
protected AbstractFSContract createContract(Configuration conf) {
return new S3AContract(conf);

View File

@ -26,6 +26,8 @@ import com.amazonaws.services.s3.AmazonS3;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
import org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore;
import org.junit.After;
import org.junit.Before;
@ -33,7 +35,8 @@ import org.junit.Rule;
import org.junit.rules.ExpectedException;
/**
* Abstract base class for S3A unit tests using a mock S3 client.
* Abstract base class for S3A unit tests using a mock S3 client and a null
* metadata store.
*/
public abstract class AbstractS3AMockTest {
@ -55,6 +58,10 @@ public abstract class AbstractS3AMockTest {
Configuration conf = new Configuration();
conf.setClass(S3_CLIENT_FACTORY_IMPL, MockS3ClientFactory.class,
S3ClientFactory.class);
// We explicitly disable MetadataStore even if it's configured. For unit
// test we don't issue request to AWS DynamoDB service.
conf.setClass(S3_METADATA_STORE_IMPL, NullMetadataStore.class,
MetadataStore.class);
fs = new S3AFileSystem();
URI uri = URI.create(FS_S3A + "://" + BUCKET);
fs.initialize(uri, conf);

View File

@ -33,6 +33,7 @@ import java.io.IOException;
import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
/**
* An extension of the contract test base set up for S3A tests.
@ -65,6 +66,18 @@ public abstract class AbstractS3ATestBase extends AbstractFSContractTestBase
return S3A_TEST_TIMEOUT;
}
/**
* Create a configuration, possibly patching in S3Guard options.
* @return a configuration
*/
@Override
protected Configuration createConfiguration() {
Configuration conf = super.createConfiguration();
// patch in S3Guard options
maybeEnableS3Guard(conf);
return conf;
}
protected Configuration getConfiguration() {
return getContract().getConf();
}
@ -99,10 +112,21 @@ public abstract class AbstractS3ATestBase extends AbstractFSContractTestBase
*/
protected Path writeThenReadFile(String name, int len) throws IOException {
Path path = path(name);
writeThenReadFile(path, len);
return path;
}
/**
* Write a file, read it back, validate the dataset. Overwrites the file
* if it is present
* @param path path to file
* @param len length of file
* @throws IOException any IO problem
*/
protected void writeThenReadFile(Path path, int len) throws IOException {
byte[] data = dataset(len, 'a', 'z');
writeDataset(getFileSystem(), path, data, data.length, 1024 * 1024, true);
ContractTestUtils.verifyFileContents(getFileSystem(), path, data);
return path;
}
/**

View File

@ -140,6 +140,10 @@ public class ITestS3AAWSCredentialsProvider {
createFailingFS(conf);
} catch (AccessDeniedException e) {
// expected
} catch (AWSServiceIOException e) {
GenericTestUtils.assertExceptionContains(
"UnrecognizedClientException", e);
// expected
}
}

View File

@ -25,6 +25,7 @@ import com.amazonaws.services.s3.S3ClientOptions;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.reflect.FieldUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.apache.hadoop.fs.s3native.S3xLoginHelper;
@ -483,7 +484,7 @@ public class ITestS3AConfiguration {
}
});
assertEquals("username", alice, fs.getUsername());
S3AFileStatus status = fs.getFileStatus(new Path("/"));
FileStatus status = fs.getFileStatus(new Path("/"));
assertEquals("owner in " + status, alice, status.getOwner());
assertEquals("group in " + status, alice, status.getGroup());
}

View File

@ -29,6 +29,7 @@ import org.apache.commons.io.Charsets;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.fs.FileAlreadyExistsException;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
@ -63,7 +64,7 @@ public class ITestS3ACopyFromLocalFile extends AbstractS3ATestBase {
Path dest = upload(file, true);
assertPathExists("uploaded file not found", dest);
S3AFileSystem fs = getFileSystem();
S3AFileStatus status = fs.getFileStatus(dest);
FileStatus status = fs.getFileStatus(dest);
assertEquals("File length of " + status,
message.getBytes(ASCII).length, status.getLen());
assertFileTextEquals(dest, message);

View File

@ -19,6 +19,7 @@
package org.apache.hadoop.fs.s3a;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.After;
@ -37,6 +38,7 @@ import java.net.URLEncoder;
import java.nio.file.AccessDeniedException;
import static org.apache.hadoop.fs.s3a.S3ATestConstants.TEST_FS_S3A_NAME;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.assumeS3GuardState;
/**
* Tests that credentials can go into the URL. This includes a valid
@ -63,6 +65,11 @@ public class ITestS3ACredentialsInURL extends Assert {
public void testInstantiateFromURL() throws Throwable {
Configuration conf = new Configuration();
// Skip in the case of S3Guard with DynamoDB because it cannot get
// credentials for its own use if they're only in S3 URLs
assumeS3GuardState(false, conf);
String accessKey = conf.get(Constants.ACCESS_KEY);
String secretKey = conf.get(Constants.SECRET_KEY);
String fsname = conf.getTrimmed(TEST_FS_S3A_NAME, "");
@ -84,6 +91,7 @@ public class ITestS3ACredentialsInURL extends Assert {
conf.unset(Constants.ACCESS_KEY);
conf.unset(Constants.SECRET_KEY);
fs = S3ATestUtils.createTestFileSystem(conf);
String fsURI = fs.getUri().toString();
assertFalse("FS URI contains a @ symbol", fsURI.contains("@"));
assertFalse("FS URI contains a % symbol", fsURI.contains("%"));
@ -119,13 +127,14 @@ public class ITestS3ACredentialsInURL extends Assert {
Configuration conf = new Configuration();
String fsname = conf.getTrimmed(TEST_FS_S3A_NAME, "");
Assume.assumeNotNull(fsname);
assumeS3GuardState(false, conf);
URI original = new URI(fsname);
URI testURI = createUriWithEmbeddedSecrets(original, "user", "//");
conf.set(TEST_FS_S3A_NAME, testURI.toString());
fs = S3ATestUtils.createTestFileSystem(conf);
try {
S3AFileStatus status = fs.getFileStatus(new Path("/"));
fs = S3ATestUtils.createTestFileSystem(conf);
FileStatus status = fs.getFileStatus(new Path("/"));
fail("Expected an AccessDeniedException, got " + status);
} catch (AccessDeniedException e) {
// expected

View File

@ -0,0 +1,62 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.apache.hadoop.test.LambdaTestUtils;
import org.junit.Test;
import java.io.FileNotFoundException;
import java.util.concurrent.Callable;
/**
* Tests behavior of a FileNotFound error that happens after open(), i.e. on
* the first read.
*/
public class ITestS3ADelayedFNF extends AbstractS3ATestBase {
/**
* See debugging documentation
* <a href="https://cwiki.apache.org/confluence/display/HADOOP/S3A%3A+FileNotFound+Exception+on+Read">here</a>.
* @throws Exception
*/
@Test
public void testNotFoundFirstRead() throws Exception {
FileSystem fs = getFileSystem();
Path p = path("some-file");
ContractTestUtils.createFile(fs, p, false, new byte[] {20, 21, 22});
final FSDataInputStream in = fs.open(p);
assertDeleted(p, false);
// This should fail since we deleted after the open.
LambdaTestUtils.intercept(FileNotFoundException.class,
new Callable<Integer>() {
@Override
public Integer call() throws Exception {
return in.read();
}
});
}
}

View File

@ -0,0 +1,83 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.junit.Test;
import java.io.IOException;
/**
* Tests which exercise treatment of empty/non-empty directories.
*/
public class ITestS3AEmptyDirectory extends AbstractS3ATestBase {
@Test
public void testDirectoryBecomesEmpty() throws Exception {
S3AFileSystem fs = getFileSystem();
// 1. set up non-empty dir
Path dir = path("testEmptyDir");
Path child = path("testEmptyDir/dir2");
mkdirs(child);
S3AFileStatus status = getS3AFileStatus(fs, dir);
assertEmptyDirectory(false, status);
// 2. Make testEmptyDir empty
assertDeleted(child, false);
status = getS3AFileStatus(fs, dir);
assertEmptyDirectory(true, status);
}
private static void assertEmptyDirectory(boolean isEmpty, S3AFileStatus s) {
String msg = "dir is empty";
// Should *not* be Tristate.UNKNOWN since we request a definitive value
// in getS3AFileStatus() below
Tristate expected = Tristate.fromBool(isEmpty);
assertEquals(msg, expected, s.isEmptyDirectory());
}
@Test
public void testDirectoryBecomesNonEmpty() throws Exception {
S3AFileSystem fs = getFileSystem();
// 1. create empty dir
Path dir = path("testEmptyDir");
mkdirs(dir);
S3AFileStatus status = getS3AFileStatus(fs, dir);
assertEmptyDirectory(true, status);
// 2. Make testEmptyDir non-empty
ContractTestUtils.touch(fs, path("testEmptyDir/file1"));
status = getS3AFileStatus(fs, dir);
assertEmptyDirectory(false, status);
}
private S3AFileStatus getS3AFileStatus(S3AFileSystem fs, Path p) throws
IOException {
return fs.innerGetFileStatus(p, true /* want isEmptyDirectory value */);
}
}

View File

@ -18,19 +18,21 @@
package org.apache.hadoop.fs.s3a;
import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
import static org.apache.hadoop.fs.contract.ContractTestUtils.rm;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.skipIfEncryptionTestsDisabled;
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
import java.io.IOException;
import java.nio.file.AccessDeniedException;
import org.junit.Test;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.apache.hadoop.fs.contract.s3a.S3AContract;
import org.junit.Test;
import org.apache.hadoop.io.IOUtils;
import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
/**
* Concrete class that extends {@link AbstractTestS3AEncryption}
@ -38,17 +40,39 @@ import org.junit.Test;
*/
public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
private static final String SERVICE_AMAZON_S3_STATUS_CODE_403
= "Service: Amazon S3; Status Code: 403;";
private static final String KEY_1
= "4niV/jPK5VFRHY+KNb6wtqYd4xXyMgdJ9XQJpcQUVbs=";
private static final String KEY_2
= "G61nz31Q7+zpjJWbakxfTOZW4VS0UmQWAq2YXhcTXoo=";
private static final String KEY_3
= "NTx0dUPrxoo9+LbNiT/gqf3z9jILqL6ilismFmJO50U=";
private static final String KEY_4
= "msdo3VvvZznp66Gth58a91Hxe/UpExMkwU9BHkIjfW8=";
private static final int TEST_FILE_LEN = 2048;
/**
* Filesystem created with a different key.
*/
private FileSystem fsKeyB;
@Override
protected Configuration createConfiguration() {
Configuration conf = super.createConfiguration();
S3ATestUtils.disableFilesystemCaching(conf);
disableFilesystemCaching(conf);
conf.set(Constants.SERVER_SIDE_ENCRYPTION_ALGORITHM,
getSSEAlgorithm().getMethod());
conf.set(Constants.SERVER_SIDE_ENCRYPTION_KEY,
"4niV/jPK5VFRHY+KNb6wtqYd4xXyMgdJ9XQJpcQUVbs=");
conf.set(Constants.SERVER_SIDE_ENCRYPTION_KEY, KEY_1);
return conf;
}
@Override
public void teardown() throws Exception {
super.teardown();
IOUtils.closeStream(fsKeyB);
}
/**
* This will create and write to a file using encryption key A, then attempt
* to read from it again with encryption key B. This will not work as it
@ -64,26 +88,25 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
assumeEnabled();
skipIfEncryptionTestsDisabled(getConfiguration());
final Path[] path = new Path[1];
intercept(java.nio.file.AccessDeniedException.class,
"Service: Amazon S3; Status Code: 403;", () -> {
intercept(AccessDeniedException.class,
SERVICE_AMAZON_S3_STATUS_CODE_403,
() -> {
int len = TEST_FILE_LEN;
describe("Create an encrypted file of size " + len);
Path src = path("testCreateFileAndReadWithDifferentEncryptionKey");
writeThenReadFile(src, len);
int len = 2048;
describe("Create an encrypted file of size " + len);
String src = createFilename(len);
path[0] = writeThenReadFile(src, len);
//extract the test FS
FileSystem fileSystem = createNewFileSystemWithSSECKey(
"kX7SdwVc/1VXJr76kfKnkQ3ONYhxianyL2+C3rPVT9s=");
byte[] data = dataset(len, 'a', 'z');
ContractTestUtils.verifyFileContents(fileSystem, path[0], data);
throw new Exception("Fail");
});
//extract the test FS
fsKeyB = createNewFileSystemWithSSECKey(
"kX7SdwVc/1VXJr76kfKnkQ3ONYhxianyL2+C3rPVT9s=");
byte[] data = dataset(len, 'a', 'z');
ContractTestUtils.verifyFileContents(fsKeyB, src, data);
return fsKeyB.getFileStatus(src);
});
}
/**
* While each object has it's own key and should be distinct, this verifies
* While each object has its own key and should be distinct, this verifies
* that hadoop treats object keys as a filesystem path. So if a top level
* dir is encrypted with keyA, a sublevel dir cannot be accessed with a
* different keyB.
@ -96,25 +119,20 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
public void testCreateSubdirWithDifferentKey() throws Exception {
assumeEnabled();
skipIfEncryptionTestsDisabled(getConfiguration());
assumeS3GuardState(false, getConfiguration());
final Path[] path = new Path[1];
intercept(java.nio.file.AccessDeniedException.class,
"Service: Amazon S3; Status Code: 403;", () -> {
path[0] = S3ATestUtils.createTestPath(
new Path(createFilename("dir/"))
);
Path nestedDirectory = S3ATestUtils.createTestPath(
new Path(createFilename("dir/nestedDir/"))
);
FileSystem fsKeyB = createNewFileSystemWithSSECKey(
"G61nz31Q7+zpjJWbakxfTOZW4VS0UmQWAq2YXhcTXoo=");
getFileSystem().mkdirs(path[0]);
fsKeyB.mkdirs(nestedDirectory);
throw new Exception("Exception should be thrown.");
});
rm(getFileSystem(), path[0], true, false);
intercept(AccessDeniedException.class,
SERVICE_AMAZON_S3_STATUS_CODE_403,
() -> {
Path base = path("testCreateSubdirWithDifferentKey");
Path nestedDirectory = new Path(base, "nestedDir");
fsKeyB = createNewFileSystemWithSSECKey(
KEY_2);
getFileSystem().mkdirs(base);
fsKeyB.mkdirs(nestedDirectory);
// expected to fail
return fsKeyB.getFileStatus(nestedDirectory);
});
}
/**
@ -130,20 +148,17 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
assumeEnabled();
skipIfEncryptionTestsDisabled(getConfiguration());
final Path[] path = new Path[1];
intercept(java.nio.file.AccessDeniedException.class,
"Service: Amazon S3; Status Code: 403;", () -> {
int len = 2048;
String src = createFilename(len);
path[0] = writeThenReadFile(src, len);
FileSystem fsKeyB = createNewFileSystemWithSSECKey(
"NTx0dUPrxoo9+LbNiT/gqf3z9jILqL6ilismFmJO50U=");
fsKeyB.rename(path[0], new Path(createFilename("different-path.txt")));
throw new Exception("Exception should be thrown.");
});
intercept(AccessDeniedException.class,
SERVICE_AMAZON_S3_STATUS_CODE_403,
() -> {
int len = TEST_FILE_LEN;
Path src = path(createFilename(len));
writeThenReadFile(src, len);
fsKeyB = createNewFileSystemWithSSECKey(KEY_3);
Path dest = path(createFilename("different-path.txt"));
getFileSystem().mkdirs(dest.getParent());
return fsKeyB.rename(src, dest);
});
}
/**
@ -157,11 +172,11 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
assumeEnabled();
skipIfEncryptionTestsDisabled(getConfiguration());
String src = createFilename("original-path.txt");
Path path = writeThenReadFile(src, 2048);
Path newPath = path(createFilename("different-path.txt"));
getFileSystem().rename(path, newPath);
byte[] data = dataset(2048, 'a', 'z');
Path src = path("original-path.txt");
writeThenReadFile(src, TEST_FILE_LEN);
Path newPath = path("different-path.txt");
getFileSystem().rename(src, newPath);
byte[] data = dataset(TEST_FILE_LEN, 'a', 'z');
ContractTestUtils.verifyFileContents(getFileSystem(), newPath, data);
}
@ -175,30 +190,26 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
public void testListEncryptedDir() throws Exception {
assumeEnabled();
skipIfEncryptionTestsDisabled(getConfiguration());
assumeS3GuardState(false, getConfiguration());
Path nestedDirectory = S3ATestUtils.createTestPath(
path(createFilename("/a/b/c/"))
);
Path pathABC = path("testListEncryptedDir/a/b/c/");
Path pathAB = pathABC.getParent();
Path pathA = pathAB.getParent();
Path nestedDirectory = createTestPath(pathABC);
assertTrue(getFileSystem().mkdirs(nestedDirectory));
FileSystem fsKeyB = createNewFileSystemWithSSECKey(
"msdo3VvvZznp66Gth58a91Hxe/UpExMkwU9BHkIjfW8=");
fsKeyB = createNewFileSystemWithSSECKey(KEY_4);
fsKeyB.listFiles(S3ATestUtils.createTestPath(
path(createFilename("/a/"))
), true);
fsKeyB.listFiles(S3ATestUtils.createTestPath(
path(createFilename("/a/b/"))
), true);
fsKeyB.listFiles(pathA, true);
fsKeyB.listFiles(pathAB, true);
//Until this point, no exception is thrown about access
intercept(java.nio.file.AccessDeniedException.class,
"Service: Amazon S3; Status Code: 403;", () -> {
fsKeyB.listFiles(S3ATestUtils.createTestPath(
path(createFilename("/a/b/c/"))
), false);
throw new Exception("Exception should be thrown.");
});
intercept(AccessDeniedException.class,
SERVICE_AMAZON_S3_STATUS_CODE_403,
() -> {
fsKeyB.listFiles(pathABC, false);
});
Configuration conf = this.createConfiguration();
conf.unset(Constants.SERVER_SIDE_ENCRYPTION_ALGORITHM);
@ -209,22 +220,13 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
FileSystem unencryptedFileSystem = contract.getTestFileSystem();
//unencrypted can access until the final directory
unencryptedFileSystem.listFiles(S3ATestUtils.createTestPath(
path(createFilename("/a/"))
), true);
unencryptedFileSystem.listFiles(S3ATestUtils.createTestPath(
path(createFilename("/a/b/"))
), true);
intercept(org.apache.hadoop.fs.s3a.AWSS3IOException.class,
"Bad Request (Service: Amazon S3; Status Code: 400; Error" +
" Code: 400 Bad Request;", () -> {
unencryptedFileSystem.listFiles(S3ATestUtils.createTestPath(
path(createFilename("/a/b/c/"))
), false);
throw new Exception("Exception should be thrown.");
});
rm(getFileSystem(), path(createFilename("/")), true, false);
unencryptedFileSystem.listFiles(pathA, true);
unencryptedFileSystem.listFiles(pathAB, true);
AWSS3IOException ex = intercept(AWSS3IOException.class,
() -> {
unencryptedFileSystem.listFiles(pathABC, false);
});
assertStatusCode(ex, 400);
}
/**
@ -236,31 +238,27 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
public void testListStatusEncryptedDir() throws Exception {
assumeEnabled();
skipIfEncryptionTestsDisabled(getConfiguration());
assumeS3GuardState(false, getConfiguration());
Path nestedDirectory = S3ATestUtils.createTestPath(
path(createFilename("/a/b/c/"))
);
assertTrue(getFileSystem().mkdirs(nestedDirectory));
Path pathABC = path("testListStatusEncryptedDir/a/b/c/");
Path pathAB = pathABC.getParent();
Path pathA = pathAB.getParent();
assertTrue(getFileSystem().mkdirs(pathABC));
FileSystem fsKeyB = createNewFileSystemWithSSECKey(
"msdo3VvvZznp66Gth58a91Hxe/UpExMkwU9BHkIjfW8=");
fsKeyB = createNewFileSystemWithSSECKey(KEY_4);
fsKeyB.listStatus(S3ATestUtils.createTestPath(
path(createFilename("/a/"))));
fsKeyB.listStatus(S3ATestUtils.createTestPath(
path(createFilename("/a/b/"))));
fsKeyB.listStatus(pathA);
fsKeyB.listStatus(pathAB);
//Until this point, no exception is thrown about access
intercept(java.nio.file.AccessDeniedException.class,
"Service: Amazon S3; Status Code: 403;", () -> {
fsKeyB.listStatus(S3ATestUtils.createTestPath(
path(createFilename("/a/b/c/"))));
throw new Exception("Exception should be thrown.");
intercept(AccessDeniedException.class,
SERVICE_AMAZON_S3_STATUS_CODE_403,
() -> {
fsKeyB.listStatus(pathABC);
});
//Now try it with an unencrypted filesystem.
Configuration conf = this.createConfiguration();
Configuration conf = createConfiguration();
conf.unset(Constants.SERVER_SIDE_ENCRYPTION_ALGORITHM);
conf.unset(Constants.SERVER_SIDE_ENCRYPTION_KEY);
@ -269,20 +267,14 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
FileSystem unencryptedFileSystem = contract.getTestFileSystem();
//unencrypted can access until the final directory
unencryptedFileSystem.listStatus(S3ATestUtils.createTestPath(
path(createFilename("/a/"))));
unencryptedFileSystem.listStatus(S3ATestUtils.createTestPath(
path(createFilename("/a/b/"))));
unencryptedFileSystem.listStatus(pathA);
unencryptedFileSystem.listStatus(pathAB);
intercept(org.apache.hadoop.fs.s3a.AWSS3IOException.class,
"Bad Request (Service: Amazon S3; Status Code: 400; Error Code: 400" +
" Bad Request;", () -> {
unencryptedFileSystem.listStatus(S3ATestUtils.createTestPath(
path(createFilename("/a/b/c/"))));
throw new Exception("Exception should be thrown.");
AWSS3IOException ex = intercept(AWSS3IOException.class,
() -> {
unencryptedFileSystem.listStatus(pathABC);
});
rm(getFileSystem(), path(createFilename("/")), true, false);
assertStatusCode(ex, 400);
}
/**
@ -294,31 +286,24 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
public void testListStatusEncryptedFile() throws Exception {
assumeEnabled();
skipIfEncryptionTestsDisabled(getConfiguration());
assumeS3GuardState(false, getConfiguration());
Path pathABC = path("testListStatusEncryptedFile/a/b/c/");
assertTrue(getFileSystem().mkdirs(pathABC));
Path nestedDirectory = S3ATestUtils.createTestPath(
path(createFilename("/a/b/c/"))
);
assertTrue(getFileSystem().mkdirs(nestedDirectory));
Path fileToStat = new Path(pathABC, "fileToStat.txt");
writeThenReadFile(fileToStat, TEST_FILE_LEN);
String src = createFilename("/a/b/c/fileToStat.txt");
Path fileToStat = writeThenReadFile(src, 2048);
FileSystem fsKeyB = createNewFileSystemWithSSECKey(
"msdo3VvvZznp66Gth58a91Hxe/UpExMkwU9BHkIjfW8=");
fsKeyB = createNewFileSystemWithSSECKey(KEY_4);
//Until this point, no exception is thrown about access
intercept(java.nio.file.AccessDeniedException.class,
"Service: Amazon S3; Status Code: 403;", () -> {
fsKeyB.listStatus(S3ATestUtils.createTestPath(fileToStat));
throw new Exception("Exception should be thrown.");
});
rm(getFileSystem(), path(createFilename("/")), true, false);
intercept(AccessDeniedException.class,
SERVICE_AMAZON_S3_STATUS_CODE_403,
() -> {
fsKeyB.listStatus(fileToStat);
});
}
/**
* It is possible to delete directories without the proper encryption key and
* the hierarchy above it.
@ -329,31 +314,26 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
public void testDeleteEncryptedObjectWithDifferentKey() throws Exception {
assumeEnabled();
skipIfEncryptionTestsDisabled(getConfiguration());
assumeS3GuardState(false, getConfiguration());
Path pathABC = path("testDeleteEncryptedObjectWithDifferentKey/a/b/c/");
Path nestedDirectory = S3ATestUtils.createTestPath(
path(createFilename("/a/b/c/"))
);
assertTrue(getFileSystem().mkdirs(nestedDirectory));
String src = createFilename("/a/b/c/filetobedeleted.txt");
Path fileToDelete = writeThenReadFile(src, 2048);
FileSystem fsKeyB = createNewFileSystemWithSSECKey(
"msdo3VvvZznp66Gth58a91Hxe/UpExMkwU9BHkIjfW8=");
intercept(java.nio.file.AccessDeniedException.class,
"Forbidden (Service: Amazon S3; Status Code: 403; Error Code: " +
"403 Forbidden", () -> {
fsKeyB.delete(fileToDelete, false);
throw new Exception("Exception should be thrown.");
});
Path pathAB = pathABC.getParent();
Path pathA = pathAB.getParent();
assertTrue(getFileSystem().mkdirs(pathABC));
Path fileToDelete = new Path(pathABC, "filetobedeleted.txt");
writeThenReadFile(fileToDelete, TEST_FILE_LEN);
fsKeyB = createNewFileSystemWithSSECKey(KEY_4);
intercept(AccessDeniedException.class,
SERVICE_AMAZON_S3_STATUS_CODE_403,
() -> {
fsKeyB.delete(fileToDelete, false);
});
//This is possible
fsKeyB.delete(S3ATestUtils.createTestPath(
path(createFilename("/a/b/c/"))), true);
fsKeyB.delete(S3ATestUtils.createTestPath(
path(createFilename("/a/b/"))), true);
fsKeyB.delete(S3ATestUtils.createTestPath(
path(createFilename("/a/"))), true);
fsKeyB.delete(pathABC, true);
fsKeyB.delete(pathAB, true);
fsKeyB.delete(pathA, true);
assertPathDoesNotExist("expected recursive delete", fileToDelete);
}
private FileSystem createNewFileSystemWithSSECKey(String sseCKey) throws
@ -371,4 +351,5 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
protected S3AEncryptionMethods getSSEAlgorithm() {
return S3AEncryptionMethods.SSE_C;
}
}

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.fs.s3a;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.ContractTestUtils;
@ -32,8 +33,8 @@ import java.net.URI;
import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
import static org.apache.hadoop.fs.s3a.Statistic.*;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.MetricDiff;
import static org.apache.hadoop.test.GenericTestUtils.getTestDir;
import static org.junit.Assume.assumeFalse;
/**
* Use metrics to assert about the cost of file status queries.
@ -62,9 +63,11 @@ public class ITestS3AFileOperationCost extends AbstractS3ATestBase {
S3AFileSystem fs = getFileSystem();
touch(fs, simpleFile);
resetMetricDiffs();
S3AFileStatus status = fs.getFileStatus(simpleFile);
FileStatus status = fs.getFileStatus(simpleFile);
assertTrue("not a file: " + status, status.isFile());
metadataRequests.assertDiffEquals(1);
if (!fs.hasMetadataStore()) {
metadataRequests.assertDiffEquals(1);
}
listRequests.assertDiffEquals(0);
}
@ -79,9 +82,13 @@ public class ITestS3AFileOperationCost extends AbstractS3ATestBase {
Path dir = path("empty");
fs.mkdirs(dir);
resetMetricDiffs();
S3AFileStatus status = fs.getFileStatus(dir);
assertTrue("not empty: " + status, status.isEmptyDirectory());
metadataRequests.assertDiffEquals(2);
S3AFileStatus status = fs.innerGetFileStatus(dir, true);
assertTrue("not empty: " + status,
status.isEmptyDirectory() == Tristate.TRUE);
if (!fs.hasMetadataStore()) {
metadataRequests.assertDiffEquals(2);
}
listRequests.assertDiffEquals(0);
}
@ -92,7 +99,7 @@ public class ITestS3AFileOperationCost extends AbstractS3ATestBase {
Path path = path("missing");
resetMetricDiffs();
try {
S3AFileStatus status = fs.getFileStatus(path);
FileStatus status = fs.getFileStatus(path);
fail("Got a status back from a missing file path " + status);
} catch (FileNotFoundException expected) {
// expected
@ -108,7 +115,7 @@ public class ITestS3AFileOperationCost extends AbstractS3ATestBase {
Path path = path("missingdir/missingpath");
resetMetricDiffs();
try {
S3AFileStatus status = fs.getFileStatus(path);
FileStatus status = fs.getFileStatus(path);
fail("Got a status back from a missing file path " + status);
} catch (FileNotFoundException expected) {
// expected
@ -126,16 +133,18 @@ public class ITestS3AFileOperationCost extends AbstractS3ATestBase {
Path simpleFile = new Path(dir, "simple.txt");
touch(fs, simpleFile);
resetMetricDiffs();
S3AFileStatus status = fs.getFileStatus(dir);
if (status.isEmptyDirectory()) {
S3AFileStatus status = fs.innerGetFileStatus(dir, true);
if (status.isEmptyDirectory() == Tristate.TRUE) {
// erroneous state
String fsState = fs.toString();
fail("FileStatus says directory isempty: " + status
+ "\n" + ContractTestUtils.ls(fs, dir)
+ "\n" + fsState);
}
metadataRequests.assertDiffEquals(2);
listRequests.assertDiffEquals(1);
if (!fs.hasMetadataStore()) {
metadataRequests.assertDiffEquals(2);
listRequests.assertDiffEquals(1);
}
}
@Test
@ -187,6 +196,13 @@ public class ITestS3AFileOperationCost extends AbstractS3ATestBase {
+ "In S3, rename deletes any fake directories as a part of "
+ "clean up activity");
S3AFileSystem fs = getFileSystem();
// As this test uses the s3 metrics to count the number of fake directory
// operations, it depends on side effects happening internally. With
// metadata store enabled, it is brittle to change. We disable this test
// before the internal behavior w/ or w/o metadata store.
assumeFalse(fs.hasMetadataStore());
Path srcBaseDir = path("src");
mkdirs(srcBaseDir);
MetricDiff deleteRequests =

View File

@ -27,6 +27,7 @@ import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystemContractBaseTest;
import org.apache.hadoop.fs.Path;
import static org.junit.Assume.*;
import static org.junit.Assert.*;

View File

@ -0,0 +1,100 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.AbstractFSContract;
import org.apache.hadoop.fs.contract.s3a.S3AContract;
import org.apache.hadoop.test.LambdaTestUtils;
import org.junit.Test;
import java.io.FileNotFoundException;
import java.util.concurrent.Callable;
import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
import static org.apache.hadoop.fs.s3a.Constants.*;
import static org.apache.hadoop.fs.s3a.InconsistentAmazonS3Client.*;
/**
* Tests S3A behavior under forced inconsistency via {@link
* InconsistentAmazonS3Client}.
*
* These tests are for validating expected behavior *without* S3Guard, but
* may also run with S3Guard enabled. For tests that validate S3Guard's
* consistency features, see {@link ITestS3GuardListConsistency}.
*/
public class ITestS3AInconsistency extends AbstractS3ATestBase {
@Override
protected AbstractFSContract createContract(Configuration conf) {
conf.setClass(S3_CLIENT_FACTORY_IMPL, InconsistentS3ClientFactory.class,
S3ClientFactory.class);
conf.set(FAIL_INJECT_INCONSISTENCY_KEY, DEFAULT_DELAY_KEY_SUBSTRING);
conf.setFloat(FAIL_INJECT_INCONSISTENCY_PROBABILITY, 1.0f);
conf.setLong(FAIL_INJECT_INCONSISTENCY_MSEC, DEFAULT_DELAY_KEY_MSEC);
return new S3AContract(conf);
}
@Test
public void testGetFileStatus() throws Exception {
S3AFileSystem fs = getFileSystem();
// 1. Make sure no ancestor dirs exist
Path dir = path("ancestor");
fs.delete(dir, true);
waitUntilDeleted(dir);
// 2. Create a descendant file, which implicitly creates ancestors
// This file has delayed visibility.
touch(getFileSystem(),
path("ancestor/file-" + DEFAULT_DELAY_KEY_SUBSTRING));
// 3. Assert expected behavior. If S3Guard is enabled, we should be able
// to get status for ancestor. If S3Guard is *not* enabled, S3A will
// fail to infer the existence of the ancestor since visibility of the
// child file is delayed, and its key prefix search will return nothing.
try {
FileStatus status = fs.getFileStatus(dir);
if (fs.hasMetadataStore()) {
assertTrue("Ancestor is dir", status.isDirectory());
} else {
fail("getFileStatus should fail due to delayed visibility.");
}
} catch (FileNotFoundException e) {
if (fs.hasMetadataStore()) {
fail("S3Guard failed to list parent of inconsistent child.");
}
LOG.info("File not found, as expected.");
}
}
private void waitUntilDeleted(final Path p) throws Exception {
LambdaTestUtils.eventually(30 * 1000, 1000,
new Callable<Void>() {
@Override
public Void call() throws Exception {
assertPathDoesNotExist("Dir should be deleted", p);
return null;
}
}
);
}
}

View File

@ -22,10 +22,17 @@ import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileAlreadyExistsException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.apache.hadoop.test.LambdaTestUtils;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.amazonaws.services.s3.model.PutObjectRequest;
import com.amazonaws.services.s3.model.PutObjectResult;
import org.junit.Test;
import java.io.ByteArrayInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.concurrent.Callable;
/**
* Tests of the S3A FileSystem which don't have a specific home and can share
@ -55,6 +62,26 @@ public class ITestS3AMiscOperations extends AbstractS3ATestBase {
createNonRecursive(new Path(parent, "fail"));
}
@Test
public void testPutObjectDirect() throws Throwable {
final S3AFileSystem fs = getFileSystem();
ObjectMetadata metadata = fs.newObjectMetadata(-1);
metadata.setContentLength(-1);
Path path = path("putDirect");
final PutObjectRequest put = new PutObjectRequest(fs.getBucket(),
path.toUri().getPath(),
new ByteArrayInputStream("PUT".getBytes()),
metadata);
LambdaTestUtils.intercept(IllegalStateException.class,
new Callable<PutObjectResult>() {
@Override
public PutObjectResult call() throws Exception {
return fs.putObjectDirect(put);
}
});
assertPathDoesNotExist("put object was created", path);
}
private FSDataOutputStream createNonRecursive(Path path) throws IOException {
return getFileSystem().createNonRecursive(path, false, 4096,
(short) 3, (short) 4096,

View File

@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.s3guard.DirListingMetadata;
import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
import org.junit.Assume;
import org.junit.Test;
import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
/**
* Home for testing the creation of new files and directories with S3Guard
* enabled.
*/
public class ITestS3GuardCreate extends AbstractS3ATestBase {
/**
* Test that ancestor creation during S3AFileSystem#create() is properly
* accounted for in the MetadataStore. This should be handled by the
* FileSystem, and be a FS contract test, but S3A does not handle ancestors on
* create(), so we need to take care in the S3Guard code to do the right
* thing. This may change: See HADOOP-13221 for more detail.
*/
@Test
public void testCreatePopulatesFileAncestors() throws Exception {
final S3AFileSystem fs = getFileSystem();
Assume.assumeTrue(fs.hasMetadataStore());
final MetadataStore ms = fs.getMetadataStore();
final Path parent = path("testCreatePopulatesFileAncestors");
try {
fs.mkdirs(parent);
final Path nestedFile = new Path(parent, "dir1/dir2/file4");
touch(fs, nestedFile);
DirListingMetadata list = ms.listChildren(parent);
assertFalse("MetadataStore falsely reports authoritative empty list",
list.isEmpty() == Tristate.TRUE);
} finally {
fs.delete(parent, true);
}
}
}

View File

@ -0,0 +1,85 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
import org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore;
import org.junit.Assume;
import org.junit.Test;
import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
/**
* Test logic around whether or not a directory is empty, with S3Guard enabled.
* The fact that S3AFileStatus has an isEmptyDirectory flag in it makes caching
* S3AFileStatus's really tricky, as the flag can change as a side effect of
* changes to other paths.
* After S3Guard is merged to trunk, we should try to remove the
* isEmptyDirectory flag from S3AFileStatus, or maintain it outside
* of the MetadataStore.
*/
public class ITestS3GuardEmptyDirs extends AbstractS3ATestBase {
@Test
public void testEmptyDirs() throws Exception {
S3AFileSystem fs = getFileSystem();
Assume.assumeTrue(fs.hasMetadataStore());
MetadataStore configuredMs = fs.getMetadataStore();
Path existingDir = path("existing-dir");
Path existingFile = path("existing-dir/existing-file");
try {
// 1. Simulate files already existing in the bucket before we started our
// cluster. Temporarily disable the MetadataStore so it doesn't witness
// us creating these files.
fs.setMetadataStore(new NullMetadataStore());
assertTrue(fs.mkdirs(existingDir));
touch(fs, existingFile);
// 2. Simulate (from MetadataStore's perspective) starting our cluster and
// creating a file in an existing directory.
fs.setMetadataStore(configuredMs); // "start cluster"
Path newFile = path("existing-dir/new-file");
touch(fs, newFile);
S3AFileStatus status = fs.innerGetFileStatus(existingDir, true);
assertEquals("Should not be empty dir", Tristate.FALSE,
status.isEmptyDirectory());
// 3. Assert that removing the only file the MetadataStore witnessed
// being created doesn't cause it to think the directory is now empty.
fs.delete(newFile, false);
status = fs.innerGetFileStatus(existingDir, true);
assertEquals("Should not be empty dir", Tristate.FALSE,
status.isEmptyDirectory());
// 4. Assert that removing the final file, that existed "before"
// MetadataStore started, *does* cause the directory to be marked empty.
fs.delete(existingFile, false);
status = fs.innerGetFileStatus(existingDir, true);
assertEquals("Should be empty dir now", Tristate.TRUE,
status.isEmptyDirectory());
} finally {
configuredMs.forgetMetadata(existingFile);
configuredMs.forgetMetadata(existingDir);
}
}
}

View File

@ -0,0 +1,544 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a;
import com.amazonaws.services.s3.model.ObjectListing;
import com.amazonaws.services.s3.AmazonS3;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.contract.AbstractFSContract;
import org.apache.hadoop.fs.contract.s3a.S3AContract;
import org.junit.Assume;
import org.junit.Test;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
import static org.apache.hadoop.fs.contract.ContractTestUtils.writeTextFile;
import static org.apache.hadoop.fs.s3a.Constants.*;
import static org.apache.hadoop.fs.s3a.InconsistentAmazonS3Client.*;
/**
* Test S3Guard list consistency feature by injecting delayed listObjects()
* visibility via {@link InconsistentAmazonS3Client}.
*
* Tests here generally:
* 1. Use the inconsistency injection mentioned above.
* 2. Only run when S3Guard is enabled.
*/
public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
@Override
protected AbstractFSContract createContract(Configuration conf) {
conf.setClass(S3_CLIENT_FACTORY_IMPL, InconsistentS3ClientFactory.class,
S3ClientFactory.class);
// Other configs would break test assumptions
conf.set(FAIL_INJECT_INCONSISTENCY_KEY, DEFAULT_DELAY_KEY_SUBSTRING);
conf.setFloat(FAIL_INJECT_INCONSISTENCY_PROBABILITY, 1.0f);
conf.setLong(FAIL_INJECT_INCONSISTENCY_MSEC, DEFAULT_DELAY_KEY_MSEC);
return new S3AContract(conf);
}
/**
* Helper function for other test cases: does a single rename operation and
* validates the aftermath.
* @param mkdirs Directories to create
* @param srcdirs Source paths for rename operation
* @param dstdirs Destination paths for rename operation
* @param yesdirs Files that must exist post-rename (e.g. srcdirs children)
* @param nodirs Files that must not exist post-rename (e.g. dstdirs children)
* @throws Exception
*/
private void doTestRenameSequence(Path[] mkdirs, Path[] srcdirs,
Path[] dstdirs, Path[] yesdirs, Path[] nodirs) throws Exception {
S3AFileSystem fs = getFileSystem();
Assume.assumeTrue(fs.hasMetadataStore());
if (mkdirs != null) {
for (Path mkdir : mkdirs) {
assertTrue(fs.mkdirs(mkdir));
}
clearInconsistency(fs);
}
assertTrue("srcdirs and dstdirs must have equal length",
srcdirs.length == dstdirs.length);
for (int i = 0; i < srcdirs.length; i++) {
assertTrue("Rename returned false: " + srcdirs[i] + " -> " + dstdirs[i],
fs.rename(srcdirs[i], dstdirs[i]));
}
for (Path yesdir : yesdirs) {
assertTrue("Path was supposed to exist: " + yesdir, fs.exists(yesdir));
}
for (Path nodir : nodirs) {
assertFalse("Path is not supposed to exist: " + nodir, fs.exists(nodir));
}
}
/**
* Tests that after renaming a directory, the original directory and its
* contents are indeed missing and the corresponding new paths are visible.
* @throws Exception
*/
@Test
public void testConsistentListAfterRename() throws Exception {
Path[] mkdirs = {
path("d1/f"),
path("d1/f" + DEFAULT_DELAY_KEY_SUBSTRING)
};
Path[] srcdirs = {path("d1")};
Path[] dstdirs = {path("d2")};
Path[] yesdirs = {path("d2"), path("d2/f"),
path("d2/f" + DEFAULT_DELAY_KEY_SUBSTRING)};
Path[] nodirs = {path("d1"), path("d1/f"),
path("d1/f" + DEFAULT_DELAY_KEY_SUBSTRING)};
doTestRenameSequence(mkdirs, srcdirs, dstdirs, yesdirs, nodirs);
getFileSystem().delete(path("d1"), true);
getFileSystem().delete(path("d2"), true);
}
/**
* Tests a circular sequence of renames to verify that overwriting recently
* deleted files and reading recently created files from rename operations
* works as expected.
* @throws Exception
*/
@Test
public void testRollingRenames() throws Exception {
Path[] dir0 = {path("rolling/1")};
Path[] dir1 = {path("rolling/2")};
Path[] dir2 = {path("rolling/3")};
// These sets have to be in reverse order compared to the movement
Path[] setA = {dir1[0], dir0[0]};
Path[] setB = {dir2[0], dir1[0]};
Path[] setC = {dir0[0], dir2[0]};
for(int i = 0; i < 2; i++) {
Path[] firstSet = i == 0 ? setA : null;
doTestRenameSequence(firstSet, setA, setB, setB, dir0);
doTestRenameSequence(null, setB, setC, setC, dir1);
doTestRenameSequence(null, setC, setA, setA, dir2);
}
S3AFileSystem fs = getFileSystem();
assertFalse("Renaming deleted file should have failed",
fs.rename(dir2[0], dir1[0]));
assertTrue("Renaming over existing file should have succeeded",
fs.rename(dir1[0], dir0[0]));
}
/**
* Tests that deleted files immediately stop manifesting in list operations
* even when the effect in S3 is delayed.
* @throws Exception
*/
@Test
public void testConsistentListAfterDelete() throws Exception {
S3AFileSystem fs = getFileSystem();
// test will fail if NullMetadataStore (the default) is configured: skip it.
Assume.assumeTrue(fs.hasMetadataStore());
// Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
// in listObjects() results via InconsistentS3Client
Path inconsistentPath =
path("a/b/dir3-" + DEFAULT_DELAY_KEY_SUBSTRING);
Path[] testDirs = {path("a/b/dir1"),
path("a/b/dir2"),
inconsistentPath};
for (Path path : testDirs) {
assertTrue(fs.mkdirs(path));
}
clearInconsistency(fs);
for (Path path : testDirs) {
assertTrue(fs.delete(path, false));
}
FileStatus[] paths = fs.listStatus(path("a/b/"));
List<Path> list = new ArrayList<>();
for (FileStatus fileState : paths) {
list.add(fileState.getPath());
}
assertFalse(list.contains(path("a/b/dir1")));
assertFalse(list.contains(path("a/b/dir2")));
// This should fail without S3Guard, and succeed with it.
assertFalse(list.contains(inconsistentPath));
}
/**
* Tests that rename immediately after files in the source directory are
* deleted results in exactly the correct set of destination files and none
* of the source files.
* @throws Exception
*/
@Test
public void testConsistentRenameAfterDelete() throws Exception {
S3AFileSystem fs = getFileSystem();
// test will fail if NullMetadataStore (the default) is configured: skip it.
Assume.assumeTrue(fs.hasMetadataStore());
// Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
// in listObjects() results via InconsistentS3Client
Path inconsistentPath =
path("a/b/dir3-" + DEFAULT_DELAY_KEY_SUBSTRING);
Path[] testDirs = {path("a/b/dir1"),
path("a/b/dir2"),
inconsistentPath};
for (Path path : testDirs) {
assertTrue(fs.mkdirs(path));
}
clearInconsistency(fs);
assertTrue(fs.delete(testDirs[1], false));
assertTrue(fs.delete(testDirs[2], false));
fs.rename(path("a"), path("a3"));
FileStatus[] paths = fs.listStatus(path("a3/b"));
List<Path> list = new ArrayList<>();
for (FileStatus fileState : paths) {
list.add(fileState.getPath());
}
assertTrue(list.contains(path("a3/b/dir1")));
assertFalse(list.contains(path("a3/b/dir2")));
// This should fail without S3Guard, and succeed with it.
assertFalse(list.contains(path("a3/b/dir3-" +
DEFAULT_DELAY_KEY_SUBSTRING)));
try {
RemoteIterator<LocatedFileStatus> old = fs.listFilesAndEmptyDirectories(
path("a"), true);
fail("Recently renamed dir should not be visible");
} catch(FileNotFoundException e) {
// expected
}
}
@Test
public void testConsistentListStatusAfterPut() throws Exception {
S3AFileSystem fs = getFileSystem();
// This test will fail if NullMetadataStore (the default) is configured:
// skip it.
Assume.assumeTrue(fs.hasMetadataStore());
// Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
// in listObjects() results via InconsistentS3Client
Path inconsistentPath =
path("a/b/dir3-" + DEFAULT_DELAY_KEY_SUBSTRING);
Path[] testDirs = {path("a/b/dir1"),
path("a/b/dir2"),
inconsistentPath};
for (Path path : testDirs) {
assertTrue(fs.mkdirs(path));
}
FileStatus[] paths = fs.listStatus(path("a/b/"));
List<Path> list = new ArrayList<>();
for (FileStatus fileState : paths) {
list.add(fileState.getPath());
}
assertTrue(list.contains(path("a/b/dir1")));
assertTrue(list.contains(path("a/b/dir2")));
// This should fail without S3Guard, and succeed with it.
assertTrue(list.contains(inconsistentPath));
}
/**
* Similar to {@link #testConsistentListStatusAfterPut()}, this tests that the
* FS listLocatedStatus() call will return consistent list.
*/
@Test
public void testConsistentListLocatedStatusAfterPut() throws Exception {
final S3AFileSystem fs = getFileSystem();
// This test will fail if NullMetadataStore (the default) is configured:
// skip it.
Assume.assumeTrue(fs.hasMetadataStore());
String rootDir = "doTestConsistentListLocatedStatusAfterPut";
fs.mkdirs(path(rootDir));
final int[] numOfPaths = {0, 1, 5};
for (int normalPathNum : numOfPaths) {
for (int delayedPathNum : new int[] {0, 2}) {
LOG.info("Testing with normalPathNum={}, delayedPathNum={}",
normalPathNum, delayedPathNum);
doTestConsistentListLocatedStatusAfterPut(fs, rootDir, normalPathNum,
delayedPathNum);
}
}
}
/**
* Helper method to implement the tests of consistent listLocatedStatus().
* @param fs The S3 file system from contract
* @param normalPathNum number paths listed directly from S3 without delaying
* @param delayedPathNum number paths listed with delaying
* @throws Exception
*/
private void doTestConsistentListLocatedStatusAfterPut(S3AFileSystem fs,
String rootDir, int normalPathNum, int delayedPathNum) throws Exception {
final List<Path> testDirs = new ArrayList<>(normalPathNum + delayedPathNum);
int index = 0;
for (; index < normalPathNum; index++) {
testDirs.add(path(rootDir + "/dir-" +
index));
}
for (; index < normalPathNum + delayedPathNum; index++) {
// Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
// in listObjects() results via InconsistentS3Client
testDirs.add(path(rootDir + "/dir-" + index +
DEFAULT_DELAY_KEY_SUBSTRING));
}
for (Path path : testDirs) {
// delete the old test path (if any) so that when we call mkdirs() later,
// the to delay directories will be tracked via putObject() request.
fs.delete(path, true);
assertTrue(fs.mkdirs(path));
}
// this should return the union data from S3 and MetadataStore
final RemoteIterator<LocatedFileStatus> statusIterator =
fs.listLocatedStatus(path(rootDir + "/"));
List<Path> list = new ArrayList<>();
for (; statusIterator.hasNext();) {
list.add(statusIterator.next().getPath());
}
// This should fail without S3Guard, and succeed with it because part of the
// children under test path are delaying visibility
for (Path path : testDirs) {
assertTrue("listLocatedStatus should list " + path, list.contains(path));
}
}
/**
* Tests that the S3AFS listFiles() call will return consistent file list.
*/
@Test
public void testConsistentListFiles() throws Exception {
final S3AFileSystem fs = getFileSystem();
// This test will fail if NullMetadataStore (the default) is configured:
// skip it.
Assume.assumeTrue(fs.hasMetadataStore());
final int[] numOfPaths = {0, 2};
for (int dirNum : numOfPaths) {
for (int normalFile : numOfPaths) {
for (int delayedFile : new int[] {0, 1}) {
for (boolean recursive : new boolean[] {true, false}) {
doTestListFiles(fs, dirNum, normalFile, delayedFile, recursive);
}
}
}
}
}
/**
* Helper method to implement the tests of consistent listFiles().
*
* The file structure has dirNum subdirectories, and each directory (including
* the test base directory itself) has normalFileNum normal files and
* delayedFileNum delayed files.
*
* @param fs The S3 file system from contract
* @param dirNum number of subdirectories
* @param normalFileNum number files in each directory without delay to list
* @param delayedFileNum number files in each directory with delay to list
* @param recursive listFiles recursively if true
* @throws Exception if any unexpected error
*/
private void doTestListFiles(S3AFileSystem fs, int dirNum, int normalFileNum,
int delayedFileNum, boolean recursive) throws Exception {
describe("Testing dirNum=%d, normalFile=%d, delayedFile=%d, "
+ "recursive=%s", dirNum, normalFileNum, delayedFileNum, recursive);
final Path baseTestDir = path("doTestListFiles-" + dirNum + "-"
+ normalFileNum + "-" + delayedFileNum + "-" + recursive);
// delete the old test path (if any) so that when we call mkdirs() later,
// the to delay sub directories will be tracked via putObject() request.
fs.delete(baseTestDir, true);
// make subdirectories (if any)
final List<Path> testDirs = new ArrayList<>(dirNum + 1);
assertTrue(fs.mkdirs(baseTestDir));
testDirs.add(baseTestDir);
for (int i = 0; i < dirNum; i++) {
final Path subdir = path(baseTestDir + "/dir-" + i);
assertTrue(fs.mkdirs(subdir));
testDirs.add(subdir);
}
final Collection<String> fileNames
= new ArrayList<>(normalFileNum + delayedFileNum);
int index = 0;
for (; index < normalFileNum; index++) {
fileNames.add("file-" + index);
}
for (; index < normalFileNum + delayedFileNum; index++) {
// Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
// in listObjects() results via InconsistentS3Client
fileNames.add("file-" + index + "-" + DEFAULT_DELAY_KEY_SUBSTRING);
}
int filesAndEmptyDirectories = 0;
// create files under each test directory
for (Path dir : testDirs) {
for (String fileName : fileNames) {
writeTextFile(fs, new Path(dir, fileName), "I, " + fileName, false);
filesAndEmptyDirectories++;
}
}
// this should return the union data from S3 and MetadataStore
final RemoteIterator<LocatedFileStatus> statusIterator
= fs.listFiles(baseTestDir, recursive);
final Collection<Path> listedFiles = new HashSet<>();
for (; statusIterator.hasNext();) {
final FileStatus status = statusIterator.next();
assertTrue("FileStatus " + status + " is not a file!", status.isFile());
listedFiles.add(status.getPath());
}
LOG.info("S3AFileSystem::listFiles('{}', {}) -> {}",
baseTestDir, recursive, listedFiles);
// This should fail without S3Guard, and succeed with it because part of the
// files to list are delaying visibility
if (!recursive) {
// in this case only the top level files are listed
assertEquals("Unexpected number of files returned by listFiles() call",
normalFileNum + delayedFileNum, listedFiles.size());
verifyFileIsListed(listedFiles, baseTestDir, fileNames);
} else {
assertEquals("Unexpected number of files returned by listFiles() call",
filesAndEmptyDirectories,
listedFiles.size());
for (Path dir : testDirs) {
verifyFileIsListed(listedFiles, dir, fileNames);
}
}
}
private static void verifyFileIsListed(Collection<Path> listedFiles,
Path currentDir, Collection<String> fileNames) {
for (String fileName : fileNames) {
final Path file = new Path(currentDir, fileName);
assertTrue(file + " should have been listed", listedFiles.contains(file));
}
}
@Test
public void testCommitByRenameOperations() throws Throwable {
S3AFileSystem fs = getFileSystem();
Assume.assumeTrue(fs.hasMetadataStore());
Path work = path("test-commit-by-rename-" + DEFAULT_DELAY_KEY_SUBSTRING);
Path task00 = new Path(work, "task00");
fs.mkdirs(task00);
String name = "part-00";
try (FSDataOutputStream out =
fs.create(new Path(task00, name), false)) {
out.writeChars("hello");
}
for (FileStatus stat : fs.listStatus(task00)) {
fs.rename(stat.getPath(), work);
}
List<FileStatus> files = new ArrayList<>(2);
for (FileStatus stat : fs.listStatus(work)) {
if (stat.isFile()) {
files.add(stat);
}
}
assertFalse("renamed file " + name + " not found in " + work,
files.isEmpty());
assertEquals("more files found than expected in " + work
+ " " + ls(work), 1, files.size());
FileStatus status = files.get(0);
assertEquals("Wrong filename in " + status,
name, status.getPath().getName());
}
@Test
public void testInconsistentS3ClientDeletes() throws Throwable {
S3AFileSystem fs = getFileSystem();
Path root = path("testInconsistentClient" + DEFAULT_DELAY_KEY_SUBSTRING);
for (int i = 0; i < 3; i++) {
fs.mkdirs(new Path(root, "dir" + i));
touch(fs, new Path(root, "file" + i));
for (int j = 0; j < 3; j++) {
touch(fs, new Path(new Path(root, "dir" + i), "file" + i + "-" + j));
}
}
clearInconsistency(fs);
AmazonS3 client = fs.getAmazonS3Client();
String key = fs.pathToKey(root) + "/";
ObjectListing preDeleteDelimited = client.listObjects(
fs.createListObjectsRequest(key, "/"));
ObjectListing preDeleteUndelimited = client.listObjects(
fs.createListObjectsRequest(key, null));
fs.delete(root, true);
ObjectListing postDeleteDelimited = client.listObjects(
fs.createListObjectsRequest(key, "/"));
ObjectListing postDeleteUndelimited = client.listObjects(
fs.createListObjectsRequest(key, null));
assertEquals("InconsistentAmazonS3Client added back objects incorrectly " +
"in a non-recursive listing",
preDeleteDelimited.getObjectSummaries().size(),
postDeleteDelimited.getObjectSummaries().size()
);
assertEquals("InconsistentAmazonS3Client added back prefixes incorrectly " +
"in a non-recursive listing",
preDeleteDelimited.getCommonPrefixes().size(),
postDeleteDelimited.getCommonPrefixes().size()
);
assertEquals("InconsistentAmazonS3Client added back objects incorrectly " +
"in a recursive listing",
preDeleteUndelimited.getObjectSummaries().size(),
postDeleteUndelimited.getObjectSummaries().size()
);
assertEquals("InconsistentAmazonS3Client added back prefixes incorrectly " +
"in a recursive listing",
preDeleteUndelimited.getCommonPrefixes().size(),
postDeleteUndelimited.getCommonPrefixes().size()
);
}
private static void clearInconsistency(S3AFileSystem fs) throws Exception {
AmazonS3 s3 = fs.getAmazonS3Client();
InconsistentAmazonS3Client ic = InconsistentAmazonS3Client.castFrom(s3);
ic.clearInconsistency();
}
}

View File

@ -0,0 +1,141 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.s3guard.DirListingMetadata;
import org.junit.Assume;
import org.junit.Test;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.util.Arrays;
/**
* Test cases that validate S3Guard's behavior for writing things like
* directory listings back to the MetadataStore.
*/
public class ITestS3GuardWriteBack extends AbstractS3ATestBase {
/**
* In listStatus(), when S3Guard is enabled, the full listing for a
* directory is "written back" to the MetadataStore before the listing is
* returned. Currently this "write back" behavior occurs when
* fs.s3a.metadatastore.authoritative is true. This test validates this
* behavior.
* @throws Exception on failure
*/
@Test
public void testListStatusWriteBack() throws Exception {
Assume.assumeTrue(getFileSystem().hasMetadataStore());
Path directory = path("ListStatusWriteBack");
// "raw" S3AFileSystem without S3Guard
S3AFileSystem noS3Guard = createTestFS(directory.toUri(), true, false);
// Another with S3Guard and write-back disabled
S3AFileSystem noWriteBack = createTestFS(directory.toUri(), false, false);
// Another S3Guard and write-back enabled
S3AFileSystem yesWriteBack = createTestFS(directory.toUri(), false, true);
// delete the existing directory (in case of last test failure)
noS3Guard.delete(directory, true);
// Create a directory on S3 only
noS3Guard.mkdirs(new Path(directory, "OnS3"));
// Create a directory on both S3 and metadata store
Path p = new Path(directory, "OnS3AndMS");
assertPathDoesntExist(noWriteBack, p);
noWriteBack.mkdirs(p);
FileStatus[] fsResults;
DirListingMetadata mdResults;
// FS should return both even though S3Guard is not writing back to MS
fsResults = noWriteBack.listStatus(directory);
assertEquals("Filesystem enabled S3Guard without write back should have "
+ "both /OnS3 and /OnS3AndMS: " + Arrays.toString(fsResults),
2, fsResults.length);
// Metadata store without write-back should still only contain /OnS3AndMS,
// because newly discovered /OnS3 is not written back to metadata store
mdResults = noWriteBack.getMetadataStore().listChildren(directory);
assertEquals("Metadata store without write back should still only know "
+ "about /OnS3AndMS, but it has: " + mdResults,
1, mdResults.numEntries());
// FS should return both (and will write it back)
fsResults = yesWriteBack.listStatus(directory);
assertEquals("Filesystem enabled S3Guard with write back should have "
+ " both /OnS3 and /OnS3AndMS: " + Arrays.toString(fsResults),
2, fsResults.length);
// Metadata store with write-back should contain both because the newly
// discovered /OnS3 should have been written back to metadata store
mdResults = yesWriteBack.getMetadataStore().listChildren(directory);
assertEquals("Unexpected number of results from metadata store. "
+ "Should have /OnS3 and /OnS3AndMS: " + mdResults,
2, mdResults.numEntries());
// If we don't clean this up, the next test run will fail because it will
// have recorded /OnS3 being deleted even after it's written to noS3Guard.
getFileSystem().getMetadataStore().forgetMetadata(
new Path(directory, "OnS3"));
}
/** Create a separate S3AFileSystem instance for testing. */
private S3AFileSystem createTestFS(URI fsURI, boolean disableS3Guard,
boolean authoritativeMeta) throws IOException {
Configuration conf;
// Create a FileSystem that is S3-backed only
conf = createConfiguration();
S3ATestUtils.disableFilesystemCaching(conf);
if (disableS3Guard) {
conf.set(Constants.S3_METADATA_STORE_IMPL,
Constants.S3GUARD_METASTORE_NULL);
} else {
S3ATestUtils.maybeEnableS3Guard(conf);
conf.setBoolean(Constants.METADATASTORE_AUTHORITATIVE, authoritativeMeta);
}
FileSystem fs = FileSystem.get(fsURI, conf);
return asS3AFS(fs);
}
private static S3AFileSystem asS3AFS(FileSystem fs) {
assertTrue("Not a S3AFileSystem: " + fs, fs instanceof S3AFileSystem);
return (S3AFileSystem)fs;
}
private static void assertPathDoesntExist(FileSystem fs, Path p)
throws IOException {
try {
FileStatus s = fs.getFileStatus(p);
} catch (FileNotFoundException e) {
return;
}
fail("Path should not exist: " + p);
}
}

View File

@ -23,6 +23,7 @@ import static org.mockito.Mockito.*;
import java.net.URI;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.model.Region;
/**
* An {@link S3ClientFactory} that returns Mockito mocks of the {@link AmazonS3}
@ -35,6 +36,8 @@ public class MockS3ClientFactory implements S3ClientFactory {
String bucket = name.getHost();
AmazonS3 s3 = mock(AmazonS3.class);
when(s3.doesBucketExist(bucket)).thenReturn(true);
when(s3.getBucketLocation(anyString()))
.thenReturn(Region.US_West.toString());
return s3;
}
}

View File

@ -134,6 +134,18 @@ public interface S3ATestConstants {
String TEST_STS_ENABLED = "test.fs.s3a.sts.enabled";
String TEST_STS_ENDPOINT = "test.fs.s3a.sts.endpoint";
/**
* Various S3Guard tests.
*/
String TEST_S3GUARD_PREFIX = "fs.s3a.s3guard.test";
String TEST_S3GUARD_ENABLED = TEST_S3GUARD_PREFIX + ".enabled";
String TEST_S3GUARD_AUTHORITATIVE = TEST_S3GUARD_PREFIX + ".authoritative";
String TEST_S3GUARD_IMPLEMENTATION = TEST_S3GUARD_PREFIX + ".implementation";
String TEST_S3GUARD_IMPLEMENTATION_LOCAL = "local";
String TEST_S3GUARD_IMPLEMENTATION_DYNAMO = "dynamo";
String TEST_S3GUARD_IMPLEMENTATION_DYNAMODBLOCAL = "dynamodblocal";
String TEST_S3GUARD_IMPLEMENTATION_NONE = "none";
/**
* Timeout in Milliseconds for standard tests: {@value}.
*/

View File

@ -22,7 +22,14 @@ import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.fs.s3a.s3guard.DynamoDBClientFactory;
import org.apache.hadoop.fs.s3a.s3guard.DynamoDBLocalClientFactory;
import org.apache.hadoop.fs.s3a.s3guard.S3Guard;
import org.hamcrest.core.Is;
import org.junit.Assert;
import org.junit.Assume;
import org.junit.internal.AssumptionViolatedException;
@ -31,11 +38,13 @@ import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.List;
import static org.apache.hadoop.fs.contract.ContractTestUtils.skip;
import static org.apache.hadoop.fs.s3a.S3ATestConstants.*;
import static org.apache.hadoop.fs.s3a.Constants.*;
import static org.apache.hadoop.fs.s3a.S3AUtils.propagateBucketOptions;
import static org.junit.Assert.*;
/**
@ -51,6 +60,15 @@ public final class S3ATestUtils {
*/
public static final String UNSET_PROPERTY = "unset";
/**
* Get S3A FS name.
* @param conf configuration.
* @return S3A fs name.
*/
public static String getFsName(Configuration conf) {
return conf.getTrimmed(TEST_FS_S3A_NAME, "");
}
/**
* Create the test filesystem.
*
@ -97,6 +115,8 @@ public final class S3ATestUtils {
throw new AssumptionViolatedException(
"No test filesystem in " + TEST_FS_S3A_NAME);
}
// patch in S3Guard options
maybeEnableS3Guard(conf);
S3AFileSystem fs1 = new S3AFileSystem();
//enable purging in tests
if (purge) {
@ -137,6 +157,8 @@ public final class S3ATestUtils {
throw new AssumptionViolatedException("No test filesystem in "
+ TEST_FS_S3A_NAME);
}
// patch in S3Guard options
maybeEnableS3Guard(conf);
FileContext fc = FileContext.getFileContext(testURI, conf);
return fc;
}
@ -301,12 +323,95 @@ public final class S3ATestUtils {
* @return a path
*/
public static Path createTestPath(Path defVal) {
String testUniqueForkId = System.getProperty(
S3ATestConstants.TEST_UNIQUE_FORK_ID);
String testUniqueForkId =
System.getProperty(S3ATestConstants.TEST_UNIQUE_FORK_ID);
return testUniqueForkId == null ? defVal :
new Path("/" + testUniqueForkId, "test");
}
/**
* Test assumption that S3Guard is/is not enabled.
* @param shouldBeEnabled should S3Guard be enabled?
* @param originalConf configuration to check
* @throws URISyntaxException
*/
public static void assumeS3GuardState(boolean shouldBeEnabled,
Configuration originalConf) throws URISyntaxException {
boolean isEnabled = getTestPropertyBool(originalConf, TEST_S3GUARD_ENABLED,
originalConf.getBoolean(TEST_S3GUARD_ENABLED, false));
Assume.assumeThat("Unexpected S3Guard test state:"
+ " shouldBeEnabled=" + shouldBeEnabled
+ " and isEnabled=" + isEnabled,
shouldBeEnabled, Is.is(isEnabled));
final String fsname = originalConf.getTrimmed(TEST_FS_S3A_NAME);
Assume.assumeNotNull(fsname);
final String bucket = new URI(fsname).getHost();
final Configuration conf = propagateBucketOptions(originalConf, bucket);
boolean usingNullImpl = S3GUARD_METASTORE_NULL.equals(
conf.getTrimmed(S3_METADATA_STORE_IMPL, S3GUARD_METASTORE_NULL));
Assume.assumeThat("Unexpected S3Guard test state:"
+ " shouldBeEnabled=" + shouldBeEnabled
+ " but usingNullImpl=" + usingNullImpl,
shouldBeEnabled, Is.is(!usingNullImpl));
}
/**
* Conditionally set the S3Guard options from test properties.
* @param conf configuration
*/
public static void maybeEnableS3Guard(Configuration conf) {
if (getTestPropertyBool(conf, TEST_S3GUARD_ENABLED,
conf.getBoolean(TEST_S3GUARD_ENABLED, false))) {
// S3Guard is enabled.
boolean authoritative = getTestPropertyBool(conf,
TEST_S3GUARD_AUTHORITATIVE,
conf.getBoolean(TEST_S3GUARD_AUTHORITATIVE, true));
String impl = getTestProperty(conf, TEST_S3GUARD_IMPLEMENTATION,
conf.get(TEST_S3GUARD_IMPLEMENTATION,
TEST_S3GUARD_IMPLEMENTATION_LOCAL));
String implClass = "";
switch (impl) {
case TEST_S3GUARD_IMPLEMENTATION_LOCAL:
implClass = S3GUARD_METASTORE_LOCAL;
break;
case TEST_S3GUARD_IMPLEMENTATION_DYNAMODBLOCAL:
conf.setClass(S3Guard.S3GUARD_DDB_CLIENT_FACTORY_IMPL,
DynamoDBLocalClientFactory.class, DynamoDBClientFactory.class);
case TEST_S3GUARD_IMPLEMENTATION_DYNAMO:
implClass = S3GUARD_METASTORE_DYNAMO;
break;
case TEST_S3GUARD_IMPLEMENTATION_NONE:
implClass = S3GUARD_METASTORE_NULL;
break;
default:
fail("Unknown s3guard back end: \"" + impl + "\"");
}
LOG.debug("Enabling S3Guard, authoritative={}, implementation={}",
authoritative, implClass);
conf.setBoolean(METADATASTORE_AUTHORITATIVE, authoritative);
conf.set(S3_METADATA_STORE_IMPL, implClass);
conf.setBoolean(S3GUARD_DDB_TABLE_CREATE_KEY, true);
}
}
/**
* Is there a MetadataStore configured for s3a with authoritative enabled?
* @param conf Configuration to test.
* @return true iff there is a MetadataStore configured, and it is
* configured allow authoritative results. This can result in reducing
* round trips to S3 service for cached results, which may affect FS/FC
* statistics.
*/
public static boolean isMetadataStoreAuthoritative(Configuration conf) {
if (conf == null) {
return Constants.DEFAULT_METADATASTORE_AUTHORITATIVE;
}
return conf.getBoolean(
Constants.METADATASTORE_AUTHORITATIVE,
Constants.DEFAULT_METADATASTORE_AUTHORITATIVE);
}
/**
* Reset all metrics in a list.
* @param metrics metrics to reset
@ -503,6 +608,94 @@ public final class S3ATestUtils {
private S3ATestUtils() {
}
/**
* Verify the core size, block size and timestamp values of a file.
* @param status status entry to check
* @param size file size
* @param blockSize block size
* @param modTime modified time
*/
public static void verifyFileStatus(FileStatus status, long size,
long blockSize, long modTime) {
verifyFileStatus(status, size, 0, modTime, 0, blockSize, null, null, null);
}
/**
* Verify the status entry of a file matches that expected.
* @param status status entry to check
* @param size file size
* @param replication replication factor (may be 0)
* @param modTime modified time
* @param accessTime access time (may be 0)
* @param blockSize block size
* @param owner owner (may be null)
* @param group user group (may be null)
* @param permission permission (may be null)
*/
public static void verifyFileStatus(FileStatus status,
long size,
int replication,
long modTime,
long accessTime,
long blockSize,
String owner,
String group,
FsPermission permission) {
String details = status.toString();
assertFalse("Not a dir: " + details, status.isDirectory());
assertEquals("Mod time: " + details, modTime, status.getModificationTime());
assertEquals("File size: " + details, size, status.getLen());
assertEquals("Block size: " + details, blockSize, status.getBlockSize());
if (replication > 0) {
assertEquals("Replication value: " + details, replication,
status.getReplication());
}
if (accessTime != 0) {
assertEquals("Access time: " + details, accessTime,
status.getAccessTime());
}
if (owner != null) {
assertEquals("Owner: " + details, owner, status.getOwner());
}
if (group != null) {
assertEquals("Group: " + details, group, status.getGroup());
}
if (permission != null) {
assertEquals("Permission: " + details, permission,
status.getPermission());
}
}
/**
* Verify the status entry of a directory matches that expected.
* @param status status entry to check
* @param replication replication factor
* @param modTime modified time
* @param accessTime access time
* @param owner owner
* @param group user group
* @param permission permission.
*/
public static void verifyDirStatus(FileStatus status,
int replication,
long modTime,
long accessTime,
String owner,
String group,
FsPermission permission) {
String details = status.toString();
assertTrue("Is a dir: " + details, status.isDirectory());
assertEquals("zero length: " + details, 0, status.getLen());
assertEquals("Mod time: " + details, modTime, status.getModificationTime());
assertEquals("Replication value: " + details, replication,
status.getReplication());
assertEquals("Access time: " + details, accessTime, status.getAccessTime());
assertEquals("Owner: " + details, owner, status.getOwner());
assertEquals("Group: " + details, group, status.getGroup());
assertEquals("Permission: " + details, permission, status.getPermission());
}
/**
* Set a bucket specific property to a particular value.
* If the generic key passed in has an {@code fs.s3a. prefix},

View File

@ -0,0 +1,118 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.junit.Assert;
import org.junit.Test;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.Set;
import static org.apache.hadoop.fs.s3a.Listing.ACCEPT_ALL;
import static org.apache.hadoop.fs.s3a.Listing.ProvidedFileStatusIterator;
/**
* Place for the S3A listing classes; keeps all the small classes under control.
*/
public class TestListing extends AbstractS3AMockTest {
private static class MockRemoteIterator<FileStatus> implements
RemoteIterator<FileStatus> {
private Iterator<FileStatus> iterator;
MockRemoteIterator(Collection<FileStatus> source) {
iterator = source.iterator();
}
public boolean hasNext() {
return iterator.hasNext();
}
public FileStatus next() {
return iterator.next();
}
}
private FileStatus blankFileStatus(Path path) {
return new FileStatus(0, true, 0, 0, 0, path);
}
@Test
public void testTombstoneReconcilingIterator() throws Exception {
Path parent = new Path("/parent");
Path liveChild = new Path(parent, "/liveChild");
Path deletedChild = new Path(parent, "/deletedChild");
Path[] allFiles = {parent, liveChild, deletedChild};
Path[] liveFiles = {parent, liveChild};
Listing listing = new Listing(fs);
Collection<FileStatus> statuses = new ArrayList<>();
statuses.add(blankFileStatus(parent));
statuses.add(blankFileStatus(liveChild));
statuses.add(blankFileStatus(deletedChild));
Set<Path> tombstones = new HashSet<>();
tombstones.add(deletedChild);
RemoteIterator<FileStatus> sourceIterator = new MockRemoteIterator(
statuses);
RemoteIterator<LocatedFileStatus> locatedIterator =
listing.createLocatedFileStatusIterator(sourceIterator);
RemoteIterator<LocatedFileStatus> reconcilingIterator =
listing.createTombstoneReconcilingIterator(locatedIterator, tombstones);
Set<Path> expectedPaths = new HashSet<>();
expectedPaths.add(parent);
expectedPaths.add(liveChild);
Set<Path> actualPaths = new HashSet<>();
while (reconcilingIterator.hasNext()) {
actualPaths.add(reconcilingIterator.next().getPath());
}
Assert.assertTrue(actualPaths.equals(expectedPaths));
}
@Test
public void testProvidedFileStatusIteratorEnd() throws Exception {
FileStatus[] statuses = {
new FileStatus(100, false, 1, 8192, 0, new Path("s3a://blah/blah"))
};
ProvidedFileStatusIterator it = new ProvidedFileStatusIterator(statuses,
ACCEPT_ALL, new Listing.AcceptAllButS3nDirs());
Assert.assertTrue("hasNext() should return true first time", it.hasNext());
Assert.assertNotNull("first element should not be null", it.next());
Assert.assertFalse("hasNext() should now be false", it.hasNext());
try {
it.next();
Assert.fail("next() should have thrown exception");
} catch (NoSuchElementException e) {
// Correct behavior. Any other exceptions are propagated as failure.
return;
}
}
}

View File

@ -39,7 +39,9 @@ public class ITestS3AFileContextStatistics extends FCStatisticsBaseTest {
@After
public void tearDown() throws Exception {
fc.delete(fileContextTestHelper.getTestRootPath(fc, "test"), true);
if (fc != null) {
fc.delete(fileContextTestHelper.getTestRootPath(fc, "test"), true);
}
}
@Override

View File

@ -16,19 +16,29 @@ package org.apache.hadoop.fs.s3a.fileContext;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileContextURIBase;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.S3ATestUtils;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.assume;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.createTestFileSystem;
/**
* S3a implementation of FileContextURIBase.
*/
public class ITestS3AFileContextURI extends FileContextURIBase {
private Configuration conf;
private boolean hasMetadataStore;
@Before
public void setUp() throws IOException, Exception {
Configuration conf = new Configuration();
conf = new Configuration();
try(S3AFileSystem s3aFS = createTestFileSystem(conf)) {
hasMetadataStore = s3aFS.hasMetadataStore();
}
fc1 = S3ATestUtils.createTestFileContext(conf);
fc2 = S3ATestUtils.createTestFileContext(conf); //different object, same FS
super.setUp();
@ -41,4 +51,11 @@ public class ITestS3AFileContextURI extends FileContextURIBase {
// (the statistics tested with this method are not relevant for an S3FS)
}
@Test
@Override
public void testModificationTime() throws IOException {
// skip modtime tests as there may be some inconsistency during creation
assume("modification time tests are skipped", !hasMetadataStore);
super.testModificationTime();
}
}

View File

@ -0,0 +1,33 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import org.apache.hadoop.fs.FileSystem;
import java.io.IOException;
/**
* Test specification for MetadataStore contract tests. Supplies configuration
* and MetadataStore instance.
*/
public abstract class AbstractMSContract {
public abstract FileSystem getFileSystem() throws IOException;
public abstract MetadataStore getMetadataStore() throws IOException;
}

View File

@ -0,0 +1,161 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import java.io.IOException;
import java.util.concurrent.TimeUnit;
import org.junit.Test;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
import org.apache.hadoop.fs.s3a.Constants;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.S3ATestUtils;
import org.apache.hadoop.io.IOUtils;
import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.SUCCESS;
/**
* Common functionality for S3GuardTool test cases.
*/
public abstract class AbstractS3GuardToolTestBase extends AbstractS3ATestBase {
protected static final String OWNER = "hdfs";
private MetadataStore ms;
protected static void expectResult(int expected,
String message,
S3GuardTool tool,
String... args) throws Exception {
assertEquals(message, expected, tool.run(args));
}
protected static void expectSuccess(
String message,
S3GuardTool tool,
String... args) throws Exception {
assertEquals(message, SUCCESS, tool.run(args));
}
protected MetadataStore getMetadataStore() {
return ms;
}
protected abstract MetadataStore newMetadataStore();
@Override
public void setup() throws Exception {
super.setup();
S3ATestUtils.assumeS3GuardState(true, getConfiguration());
ms = newMetadataStore();
ms.initialize(getFileSystem());
}
@Override
public void teardown() throws Exception {
super.teardown();
IOUtils.cleanupWithLogger(LOG, ms);
}
protected void mkdirs(Path path, boolean onS3, boolean onMetadataStore)
throws IOException {
if (onS3) {
getFileSystem().mkdirs(path);
}
if (onMetadataStore) {
S3AFileStatus status = new S3AFileStatus(true, path, OWNER);
ms.put(new PathMetadata(status));
}
}
protected static void putFile(MetadataStore ms, S3AFileStatus f)
throws IOException {
assertNotNull(f);
ms.put(new PathMetadata(f));
Path parent = f.getPath().getParent();
while (parent != null) {
S3AFileStatus dir = new S3AFileStatus(false, parent, f.getOwner());
ms.put(new PathMetadata(dir));
parent = parent.getParent();
}
}
/**
* Create file either on S3 or in metadata store.
* @param path the file path.
* @param onS3 set to true to create the file on S3.
* @param onMetadataStore set to true to create the file on the
* metadata store.
* @throws IOException IO problem
*/
protected void createFile(Path path, boolean onS3, boolean onMetadataStore)
throws IOException {
if (onS3) {
ContractTestUtils.touch(getFileSystem(), path);
}
if (onMetadataStore) {
S3AFileStatus status = new S3AFileStatus(100L, System.currentTimeMillis(),
getFileSystem().qualify(path), 512L, "hdfs");
putFile(ms, status);
}
}
private void testPruneCommand(Configuration cmdConf, String...args)
throws Exception {
Path parent = path("prune-cli");
try {
getFileSystem().mkdirs(parent);
S3GuardTool.Prune cmd = new S3GuardTool.Prune(cmdConf);
cmd.setMetadataStore(ms);
createFile(new Path(parent, "stale"), true, true);
Thread.sleep(TimeUnit.SECONDS.toMillis(2));
createFile(new Path(parent, "fresh"), true, true);
assertEquals(2, ms.listChildren(parent).getListing().size());
expectSuccess("Prune command did not exit successfully - see output", cmd,
args);
assertEquals(1, ms.listChildren(parent).getListing().size());
} finally {
getFileSystem().delete(parent, true);
ms.prune(Long.MAX_VALUE);
}
}
@Test
public void testPruneCommandCLI() throws Exception {
String testPath = path("testPruneCommandCLI").toString();
testPruneCommand(getFileSystem().getConf(),
"prune", "-seconds", "1", testPath);
}
@Test
public void testPruneCommandConf() throws Exception {
getConfiguration().setLong(Constants.S3GUARD_CLI_PRUNE_AGE,
TimeUnit.SECONDS.toMillis(1));
String testPath = path("testPruneCommandConf").toString();
testPruneCommand(getConfiguration(), "prune", testPath);
}
}

View File

@ -0,0 +1,157 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import java.io.File;
import java.io.IOException;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.client.builder.AwsClientBuilder;
import com.amazonaws.services.dynamodbv2.AmazonDynamoDB;
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder;
import com.amazonaws.services.dynamodbv2.local.main.ServerRunner;
import com.amazonaws.services.dynamodbv2.local.server.DynamoDBProxyServer;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.s3a.DefaultS3ClientFactory;
import org.apache.hadoop.net.ServerSocketUtil;
import static org.apache.hadoop.fs.s3a.S3AUtils.createAWSCredentialProviderSet;
import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBClientFactory.DefaultDynamoDBClientFactory.getRegion;
/**
* A DynamoDBClientFactory implementation that creates AmazonDynamoDB clients
* against an in-memory DynamoDBLocal server instance.
*
* You won't be charged bills for issuing any DynamoDB requests. However, the
* DynamoDBLocal is considered a simulator of the DynamoDB web service, so it
* may be stale or different. For example, the throttling is not yet supported
* in DynamoDBLocal. This is for testing purpose only.
*
* To use this for creating DynamoDB client in tests:
* <ol>
* <li>
* As all DynamoDBClientFactory implementations, this should be configured.
* </li>
* <li>
* The singleton DynamoDBLocal server instance is started automatically when
* creating the AmazonDynamoDB client for the first time. It still merits to
* launch the server before all the tests and fail fast if error happens.
* </li>
* <li>
* The server can be stopped explicitly, which is not actually needed in
* tests as JVM termination will do that.
* </li>
* </ol>
*
* @see DefaultDynamoDBClientFactory
*/
public class DynamoDBLocalClientFactory extends Configured
implements DynamoDBClientFactory {
/** The DynamoDBLocal dynamoDBLocalServer instance for testing. */
private static DynamoDBProxyServer dynamoDBLocalServer;
private static String ddbEndpoint;
private static final String SYSPROP_SQLITE_LIB = "sqlite4java.library.path";
@Override
public AmazonDynamoDB createDynamoDBClient(String defaultRegion)
throws IOException {
startSingletonServer();
final Configuration conf = getConf();
final AWSCredentialsProvider credentials =
createAWSCredentialProviderSet(null, conf);
final ClientConfiguration awsConf =
DefaultS3ClientFactory.createAwsConf(conf);
// fail fast in case of service errors
awsConf.setMaxErrorRetry(3);
final String region = getRegion(conf, defaultRegion);
LOG.info("Creating DynamoDBLocal client using endpoint {} in region {}",
ddbEndpoint, region);
return AmazonDynamoDBClientBuilder.standard()
.withCredentials(credentials)
.withClientConfiguration(awsConf)
.withEndpointConfiguration(
new AwsClientBuilder.EndpointConfiguration(ddbEndpoint, region))
.build();
}
/**
* Start a singleton in-memory DynamoDBLocal server if not started yet.
* @throws IOException if any error occurs
*/
public synchronized static void startSingletonServer() throws IOException {
if (dynamoDBLocalServer != null) {
return;
}
// Set this property if it has not been set elsewhere
if (StringUtils.isEmpty(System.getProperty(SYSPROP_SQLITE_LIB))) {
String projectBuildDir = System.getProperty("project.build.directory");
if (StringUtils.isEmpty(projectBuildDir)) {
projectBuildDir = "target";
}
// sqlite4java lib should have been copied to $projectBuildDir/native-libs
System.setProperty(SYSPROP_SQLITE_LIB,
projectBuildDir + File.separator + "native-libs");
LOG.info("Setting {} -> {}",
SYSPROP_SQLITE_LIB, System.getProperty(SYSPROP_SQLITE_LIB));
}
try {
// Start an in-memory local DynamoDB instance
final String port = String.valueOf(ServerSocketUtil.getPort(0, 100));
ddbEndpoint = "http://localhost:" + port;
dynamoDBLocalServer = ServerRunner.createServerFromCommandLineArgs(
new String[]{"-inMemory", "-port", port});
dynamoDBLocalServer.start();
LOG.info("DynamoDBLocal singleton server was started at {}", ddbEndpoint);
} catch (Exception t) {
String msg = "Error starting DynamoDBLocal server at " + ddbEndpoint
+ " " + t;
LOG.error(msg, t);
throw new IOException(msg, t);
}
}
/**
* Stop the in-memory DynamoDBLocal server if it is started.
* @throws IOException if any error occurs
*/
public synchronized static void stopSingletonServer() throws IOException {
if (dynamoDBLocalServer != null) {
LOG.info("Shutting down the in-memory DynamoDBLocal server");
try {
dynamoDBLocalServer.stop();
} catch (Throwable t) {
String msg = "Error stopping DynamoDBLocal server at " + ddbEndpoint;
LOG.error(msg, t);
throw new IOException(msg, t);
}
}
}
}

View File

@ -0,0 +1,160 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.atomic.AtomicInteger;
import com.amazonaws.services.dynamodbv2.document.DynamoDB;
import com.amazonaws.services.dynamodbv2.document.Table;
import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException;
import org.junit.Assume;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.Timeout;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
import org.apache.hadoop.fs.s3a.Constants;
import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_REGION_KEY;
/**
* Tests concurrent operations on S3Guard.
*/
public class ITestS3GuardConcurrentOps extends AbstractS3ATestBase {
@Rule
public final Timeout timeout = new Timeout(5 * 60 * 1000);
private void failIfTableExists(DynamoDB db, String tableName) {
boolean tableExists = true;
try {
Table table = db.getTable(tableName);
table.describe();
} catch (ResourceNotFoundException e) {
tableExists = false;
}
if (tableExists) {
fail("Table already exists: " + tableName);
}
}
private void deleteTable(DynamoDB db, String tableName) throws
InterruptedException {
try {
Table table = db.getTable(tableName);
table.waitForActive();
table.delete();
table.waitForDelete();
} catch (ResourceNotFoundException e) {
LOG.warn("Failed to delete {}, as it was not found", tableName, e);
}
}
@Test
public void testConcurrentTableCreations() throws Exception {
final Configuration conf = getConfiguration();
Assume.assumeTrue("Test only applies when DynamoDB is used for S3Guard",
conf.get(Constants.S3_METADATA_STORE_IMPL).equals(
Constants.S3GUARD_METASTORE_DYNAMO));
DynamoDBMetadataStore ms = new DynamoDBMetadataStore();
ms.initialize(getFileSystem());
DynamoDB db = ms.getDynamoDB();
String tableName = "testConcurrentTableCreations" + new Random().nextInt();
conf.setBoolean(Constants.S3GUARD_DDB_TABLE_CREATE_KEY, true);
conf.set(Constants.S3GUARD_DDB_TABLE_NAME_KEY, tableName);
String region = conf.getTrimmed(S3GUARD_DDB_REGION_KEY);
if (StringUtils.isEmpty(region)) {
// no region set, so pick it up from the test bucket
conf.set(S3GUARD_DDB_REGION_KEY, getFileSystem().getBucketLocation());
}
int concurrentOps = 16;
int iterations = 4;
failIfTableExists(db, tableName);
for (int i = 0; i < iterations; i++) {
ExecutorService executor = Executors.newFixedThreadPool(
concurrentOps, new ThreadFactory() {
private AtomicInteger count = new AtomicInteger(0);
public Thread newThread(Runnable r) {
return new Thread(r,
"testConcurrentTableCreations" + count.getAndIncrement());
}
});
((ThreadPoolExecutor) executor).prestartAllCoreThreads();
Future<Exception>[] futures = new Future[concurrentOps];
for (int f = 0; f < concurrentOps; f++) {
final int index = f;
futures[f] = executor.submit(new Callable<Exception>() {
@Override
public Exception call() throws Exception {
ContractTestUtils.NanoTimer timer =
new ContractTestUtils.NanoTimer();
Exception result = null;
try (DynamoDBMetadataStore store = new DynamoDBMetadataStore()) {
store.initialize(conf);
} catch (Exception e) {
LOG.error(e.getClass() + ": " + e.getMessage());
result = e;
}
timer.end("Parallel DynamoDB client creation %d", index);
LOG.info("Parallel DynamoDB client creation {} ran from {} to {}",
index, timer.getStartTime(), timer.getEndTime());
return result;
}
});
}
List<Exception> exceptions = new ArrayList<>(concurrentOps);
for (int f = 0; f < concurrentOps; f++) {
Exception outcome = futures[f].get();
if (outcome != null) {
exceptions.add(outcome);
}
}
deleteTable(db, tableName);
int exceptionsThrown = exceptions.size();
if (exceptionsThrown > 0) {
// at least one exception was thrown. Fail the test & nest the first
// exception caught
throw new AssertionError(exceptionsThrown + "/" + concurrentOps +
" threads threw exceptions while initializing on iteration " + i,
exceptions.get(0));
}
}
}
}

View File

@ -0,0 +1,134 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import java.io.IOException;
import java.util.Random;
import java.util.concurrent.Callable;
import com.amazonaws.services.dynamodbv2.document.DynamoDB;
import com.amazonaws.services.dynamodbv2.document.Table;
import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException;
import org.junit.Test;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.Destroy;
import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.Init;
import org.apache.hadoop.test.LambdaTestUtils;
/**
* Test S3Guard related CLI commands against DynamoDB.
*/
public class ITestS3GuardToolDynamoDB extends AbstractS3GuardToolTestBase {
@Override
protected MetadataStore newMetadataStore() {
return new DynamoDBMetadataStore();
}
// Check the existence of a given DynamoDB table.
private static boolean exist(DynamoDB dynamoDB, String tableName) {
assertNotNull(dynamoDB);
assertNotNull(tableName);
assertFalse("empty table name", tableName.isEmpty());
try {
Table table = dynamoDB.getTable(tableName);
table.describe();
} catch (ResourceNotFoundException e) {
return false;
}
return true;
}
@Test
public void testInvalidRegion() throws Exception {
final String testTableName = "testInvalidRegion" + new Random().nextInt();
final String testRegion = "invalidRegion";
// Initialize MetadataStore
final Init initCmd = new Init(getFileSystem().getConf());
LambdaTestUtils.intercept(IOException.class,
new Callable<String>() {
@Override
public String call() throws Exception {
int res = initCmd.run(new String[]{
"init",
"-region", testRegion,
"-meta", "dynamodb://" + testTableName
});
return "Use of invalid region did not fail, returning " + res
+ "- table may have been " +
"created and not cleaned up: " + testTableName;
}
});
}
@Test
public void testDynamoDBInitDestroyCycle() throws Exception {
String testTableName = "testDynamoDBInitDestroy" + new Random().nextInt();
String testS3Url = path(testTableName).toString();
S3AFileSystem fs = getFileSystem();
DynamoDB db = null;
try {
// Initialize MetadataStore
Init initCmd = new Init(fs.getConf());
expectSuccess("Init command did not exit successfully - see output",
initCmd,
"init", "-meta", "dynamodb://" + testTableName, testS3Url);
// Verify it exists
MetadataStore ms = getMetadataStore();
assertTrue("metadata store should be DynamoDBMetadataStore",
ms instanceof DynamoDBMetadataStore);
DynamoDBMetadataStore dynamoMs = (DynamoDBMetadataStore) ms;
db = dynamoMs.getDynamoDB();
assertTrue(String.format("%s does not exist", testTableName),
exist(db, testTableName));
// Destroy MetadataStore
Destroy destroyCmd = new Destroy(fs.getConf());
expectSuccess("Destroy command did not exit successfully - see output",
destroyCmd,
"destroy", "-meta", "dynamodb://" + testTableName, testS3Url);
// Verify it does not exist
assertFalse(String.format("%s still exists", testTableName),
exist(db, testTableName));
// delete again and expect success again
expectSuccess("Destroy command did not exit successfully - see output",
destroyCmd,
"destroy", "-meta", "dynamodb://" + testTableName, testS3Url);
} catch (ResourceNotFoundException e) {
throw new AssertionError(
String.format("DynamoDB table %s does not exist", testTableName),
e);
} finally {
LOG.warn("Table may have not been cleaned up: " +
testTableName);
if (db != null) {
Table table = db.getTable(testTableName);
if (table != null) {
try {
table.delete();
table.waitForDelete();
} catch (ResourceNotFoundException e) { /* Ignore */ }
}
}
}
}
}

View File

@ -0,0 +1,149 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.util.HashSet;
import java.util.Set;
import org.junit.Test;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.Diff;
import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.SUCCESS;
/**
* Test S3Guard related CLI commands against a LocalMetadataStore.
*/
public class ITestS3GuardToolLocal extends AbstractS3GuardToolTestBase {
@Override
protected MetadataStore newMetadataStore() {
return new LocalMetadataStore();
}
@Test
public void testImportCommand() throws Exception {
S3AFileSystem fs = getFileSystem();
MetadataStore ms = getMetadataStore();
Path parent = path("test-import");
fs.mkdirs(parent);
Path dir = new Path(parent, "a");
fs.mkdirs(dir);
Path emptyDir = new Path(parent, "emptyDir");
fs.mkdirs(emptyDir);
for (int i = 0; i < 10; i++) {
String child = String.format("file-%d", i);
try (FSDataOutputStream out = fs.create(new Path(dir, child))) {
out.write(1);
}
}
S3GuardTool.Import cmd = new S3GuardTool.Import(fs.getConf());
cmd.setStore(ms);
expectSuccess("Import command did not exit successfully - see output",
cmd,
"import", parent.toString());
DirListingMetadata children =
ms.listChildren(dir);
assertEquals("Unexpected number of paths imported", 10, children
.getListing().size());
assertEquals("Expected 2 items: empty directory and a parent directory", 2,
ms.listChildren(parent).getListing().size());
// assertTrue(children.isAuthoritative());
}
@Test
public void testDiffCommand() throws IOException {
S3AFileSystem fs = getFileSystem();
MetadataStore ms = getMetadataStore();
Set<Path> filesOnS3 = new HashSet<>(); // files on S3.
Set<Path> filesOnMS = new HashSet<>(); // files on metadata store.
Path testPath = path("test-diff");
mkdirs(testPath, true, true);
Path msOnlyPath = new Path(testPath, "ms_only");
mkdirs(msOnlyPath, false, true);
filesOnMS.add(msOnlyPath);
for (int i = 0; i < 5; i++) {
Path file = new Path(msOnlyPath, String.format("file-%d", i));
createFile(file, false, true);
filesOnMS.add(file);
}
Path s3OnlyPath = new Path(testPath, "s3_only");
mkdirs(s3OnlyPath, true, false);
filesOnS3.add(s3OnlyPath);
for (int i = 0; i < 5; i++) {
Path file = new Path(s3OnlyPath, String.format("file-%d", i));
createFile(file, true, false);
filesOnS3.add(file);
}
ByteArrayOutputStream buf = new ByteArrayOutputStream();
PrintStream out = new PrintStream(buf);
Diff cmd = new Diff(fs.getConf());
cmd.setStore(ms);
assertEquals("Diff command did not exit successfully - see output", SUCCESS,
cmd.run(new String[]{"diff", "-meta", "local://metadata",
testPath.toString()}, out));
out.close();
Set<Path> actualOnS3 = new HashSet<>();
Set<Path> actualOnMS = new HashSet<>();
boolean duplicates = false;
try (BufferedReader reader =
new BufferedReader(new InputStreamReader(
new ByteArrayInputStream(buf.toByteArray())))) {
String line;
while ((line = reader.readLine()) != null) {
String[] fields = line.split("\\s");
assertEquals("[" + line + "] does not have enough fields",
4, fields.length);
String where = fields[0];
Path path = new Path(fields[3]);
if (Diff.S3_PREFIX.equals(where)) {
duplicates = duplicates || actualOnS3.contains(path);
actualOnS3.add(path);
} else if (Diff.MS_PREFIX.equals(where)) {
duplicates = duplicates || actualOnMS.contains(path);
actualOnMS.add(path);
} else {
fail("Unknown prefix: " + where);
}
}
}
String actualOut = out.toString();
assertEquals("Mismatched metadata store outputs: " + actualOut,
filesOnMS, actualOnMS);
assertEquals("Mismatched s3 outputs: " + actualOut, filesOnS3, actualOnS3);
assertFalse("Diff contained duplicates", duplicates);
}
}

View File

@ -0,0 +1,887 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import com.google.common.collect.Sets;
import org.junit.After;
import org.junit.Assert;
import org.junit.Assume;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.fs.s3a.S3ATestUtils;
import org.apache.hadoop.fs.s3a.Tristate;
import org.apache.hadoop.io.IOUtils;
/**
* Main test class for MetadataStore implementations.
* Implementations should each create a test by subclassing this and
* overriding {@link #createContract()}.
* If your implementation may return missing results for recently set paths,
* override {@link MetadataStoreTestBase#allowMissing()}.
*/
public abstract class MetadataStoreTestBase extends Assert {
private static final Logger LOG =
LoggerFactory.getLogger(MetadataStoreTestBase.class);
/** Some dummy values for sanity-checking FileStatus contents. */
static final long BLOCK_SIZE = 32 * 1024 * 1024;
static final int REPLICATION = 1;
static final FsPermission PERMISSION = new FsPermission((short)0755);
static final String OWNER = "bob";
static final String GROUP = "uncles";
private final long accessTime = System.currentTimeMillis();
private final long modTime = accessTime - 5000;
/**
* Each test should override this. Will use a new Configuration instance.
* @return Contract which specifies the MetadataStore under test plus config.
*/
public abstract AbstractMSContract createContract() throws IOException;
/**
* Each test should override this.
* @param conf Base configuration instance to use.
* @return Contract which specifies the MetadataStore under test plus config.
*/
public abstract AbstractMSContract createContract(Configuration conf)
throws IOException;
/**
* Tests assume that implementations will return recently set results. If
* your implementation does not always hold onto metadata (e.g. LRU or
* time-based expiry) you can override this to return false.
* @return true if the test should succeed when null results are returned
* from the MetadataStore under test.
*/
public boolean allowMissing() {
return false;
}
/**
* Pruning is an optional feature for metadata store implementations.
* Tests will only check that functionality if it is expected to work.
* @return true if the test should expect pruning to work.
*/
public boolean supportsPruning() {
return true;
}
/** The MetadataStore contract used to test against. */
private AbstractMSContract contract;
private MetadataStore ms;
/**
* @return reference to the test contract.
*/
protected AbstractMSContract getContract() {
return contract;
}
@Before
public void setUp() throws Exception {
LOG.debug("== Setup. ==");
contract = createContract();
ms = contract.getMetadataStore();
assertNotNull("null MetadataStore", ms);
assertNotNull("null FileSystem", contract.getFileSystem());
ms.initialize(contract.getFileSystem());
}
@After
public void tearDown() throws Exception {
LOG.debug("== Tear down. ==");
if (ms != null) {
try {
ms.destroy();
} catch (Exception e) {
LOG.warn("Failed to destroy tables in teardown", e);
}
IOUtils.closeStream(ms);
ms = null;
}
}
/**
* Helper function for verifying DescendantsIterator and
* MetadataStoreListFilesIterator behavior.
* @param createNodes List of paths to create
* @param checkNodes List of paths that the iterator should return
*/
private void doTestDescendantsIterator(
Class implementation, String[] createNodes,
String[] checkNodes) throws Exception {
// we set up the example file system tree in metadata store
for (String pathStr : createNodes) {
final FileStatus status = pathStr.contains("file")
? basicFileStatus(strToPath(pathStr), 100, false)
: basicFileStatus(strToPath(pathStr), 0, true);
ms.put(new PathMetadata(status));
}
final PathMetadata rootMeta = new PathMetadata(makeDirStatus("/"));
RemoteIterator<FileStatus> iterator;
if (implementation == DescendantsIterator.class) {
iterator = new DescendantsIterator(ms, rootMeta);
} else if (implementation == MetadataStoreListFilesIterator.class) {
iterator = new MetadataStoreListFilesIterator(ms, rootMeta, false);
} else {
throw new UnsupportedOperationException("Unrecognized class");
}
final Set<String> actual = new HashSet<>();
while (iterator.hasNext()) {
final Path p = iterator.next().getPath();
actual.add(Path.getPathWithoutSchemeAndAuthority(p).toString());
}
LOG.info("We got {} by iterating DescendantsIterator", actual);
if (!allowMissing()) {
assertEquals(Sets.newHashSet(checkNodes), actual);
}
}
/**
* Test that we can get the whole sub-tree by iterating DescendantsIterator.
*
* The tree is similar to or same as the example in code comment.
*/
@Test
public void testDescendantsIterator() throws Exception {
final String[] tree = new String[] {
"/dir1",
"/dir1/dir2",
"/dir1/dir3",
"/dir1/dir2/file1",
"/dir1/dir2/file2",
"/dir1/dir3/dir4",
"/dir1/dir3/dir5",
"/dir1/dir3/dir4/file3",
"/dir1/dir3/dir5/file4",
"/dir1/dir3/dir6"
};
doTestDescendantsIterator(DescendantsIterator.class,
tree, tree);
}
/**
* Test that we can get the correct subset of the tree with
* MetadataStoreListFilesIterator.
*
* The tree is similar to or same as the example in code comment.
*/
@Test
public void testMetadataStoreListFilesIterator() throws Exception {
final String[] wholeTree = new String[] {
"/dir1",
"/dir1/dir2",
"/dir1/dir3",
"/dir1/dir2/file1",
"/dir1/dir2/file2",
"/dir1/dir3/dir4",
"/dir1/dir3/dir5",
"/dir1/dir3/dir4/file3",
"/dir1/dir3/dir5/file4",
"/dir1/dir3/dir6"
};
final String[] leafNodes = new String[] {
"/dir1/dir2/file1",
"/dir1/dir2/file2",
"/dir1/dir3/dir4/file3",
"/dir1/dir3/dir5/file4"
};
doTestDescendantsIterator(MetadataStoreListFilesIterator.class, wholeTree,
leafNodes);
}
@Test
public void testPutNew() throws Exception {
/* create three dirs /da1, /da2, /da3 */
createNewDirs("/da1", "/da2", "/da3");
/* It is caller's responsibility to set up ancestor entries beyond the
* containing directory. We only track direct children of the directory.
* Thus this will not affect entry for /da1.
*/
ms.put(new PathMetadata(makeFileStatus("/da1/db1/fc1", 100)));
assertEmptyDirs("/da2", "/da3");
assertDirectorySize("/da1/db1", 1);
/* Check contents of dir status. */
PathMetadata dirMeta = ms.get(strToPath("/da1"));
if (!allowMissing() || dirMeta != null) {
verifyDirStatus(dirMeta.getFileStatus());
}
/* This already exists, and should silently replace it. */
ms.put(new PathMetadata(makeDirStatus("/da1/db1")));
/* If we had putNew(), and used it above, this would be empty again. */
assertDirectorySize("/da1", 1);
assertEmptyDirs("/da2", "/da3");
/* Ensure new files update correct parent dirs. */
ms.put(new PathMetadata(makeFileStatus("/da1/db1/fc1", 100)));
ms.put(new PathMetadata(makeFileStatus("/da1/db1/fc2", 200)));
assertDirectorySize("/da1", 1);
assertDirectorySize("/da1/db1", 2);
assertEmptyDirs("/da2", "/da3");
PathMetadata meta = ms.get(strToPath("/da1/db1/fc2"));
if (!allowMissing() || meta != null) {
assertNotNull("Get file after put new.", meta);
verifyFileStatus(meta.getFileStatus(), 200);
}
}
@Test
public void testPutOverwrite() throws Exception {
final String filePath = "/a1/b1/c1/some_file";
final String dirPath = "/a1/b1/c1/d1";
ms.put(new PathMetadata(makeFileStatus(filePath, 100)));
ms.put(new PathMetadata(makeDirStatus(dirPath)));
PathMetadata meta = ms.get(strToPath(filePath));
if (!allowMissing() || meta != null) {
verifyFileStatus(meta.getFileStatus(), 100);
}
ms.put(new PathMetadata(basicFileStatus(strToPath(filePath), 9999, false)));
meta = ms.get(strToPath(filePath));
if (!allowMissing() || meta != null) {
verifyFileStatus(meta.getFileStatus(), 9999);
}
}
@Test
public void testRootDirPutNew() throws Exception {
Path rootPath = strToPath("/");
ms.put(new PathMetadata(makeFileStatus("/file1", 100)));
DirListingMetadata dir = ms.listChildren(rootPath);
if (!allowMissing() || dir != null) {
assertNotNull("Root dir cached", dir);
assertFalse("Root not fully cached", dir.isAuthoritative());
assertNotNull("have root dir file listing", dir.getListing());
assertEquals("One file in root dir", 1, dir.getListing().size());
assertEquals("file1 in root dir", strToPath("/file1"),
dir.getListing().iterator().next().getFileStatus().getPath());
}
}
@Test
public void testDelete() throws Exception {
setUpDeleteTest();
ms.delete(strToPath("/ADirectory1/db1/file2"));
/* Ensure delete happened. */
assertDirectorySize("/ADirectory1/db1", 1);
PathMetadata meta = ms.get(strToPath("/ADirectory1/db1/file2"));
assertTrue("File deleted", meta == null || meta.isDeleted());
}
@Test
public void testDeleteSubtree() throws Exception {
deleteSubtreeHelper("");
}
@Test
public void testDeleteSubtreeHostPath() throws Exception {
deleteSubtreeHelper(contract.getFileSystem().getUri().toString());
}
private void deleteSubtreeHelper(String pathPrefix) throws Exception {
String p = pathPrefix;
setUpDeleteTest(p);
createNewDirs(p + "/ADirectory1/db1/dc1", p + "/ADirectory1/db1/dc1/dd1");
ms.put(new PathMetadata(
makeFileStatus(p + "/ADirectory1/db1/dc1/dd1/deepFile", 100)));
if (!allowMissing()) {
assertCached(p + "/ADirectory1/db1");
}
ms.deleteSubtree(strToPath(p + "/ADirectory1/db1/"));
assertEmptyDirectory(p + "/ADirectory1");
assertDeleted(p + "/ADirectory1/db1");
assertDeleted(p + "/ADirectory1/file1");
assertDeleted(p + "/ADirectory1/file2");
assertDeleted(p + "/ADirectory1/db1/dc1/dd1/deepFile");
assertEmptyDirectory(p + "/ADirectory2");
}
/*
* Some implementations might not support this. It was useful to test
* correctness of the LocalMetadataStore implementation, but feel free to
* override this to be a no-op.
*/
@Test
public void testDeleteRecursiveRoot() throws Exception {
setUpDeleteTest();
ms.deleteSubtree(strToPath("/"));
assertDeleted("/ADirectory1");
assertDeleted("/ADirectory2");
assertDeleted("/ADirectory2/db1");
assertDeleted("/ADirectory2/db1/file1");
assertDeleted("/ADirectory2/db1/file2");
}
@Test
public void testDeleteNonExisting() throws Exception {
// Path doesn't exist, but should silently succeed
ms.delete(strToPath("/bobs/your/uncle"));
// Ditto.
ms.deleteSubtree(strToPath("/internets"));
}
private void setUpDeleteTest() throws IOException {
setUpDeleteTest("");
}
private void setUpDeleteTest(String prefix) throws IOException {
createNewDirs(prefix + "/ADirectory1", prefix + "/ADirectory2",
prefix + "/ADirectory1/db1");
ms.put(new PathMetadata(makeFileStatus(prefix + "/ADirectory1/db1/file1",
100)));
ms.put(new PathMetadata(makeFileStatus(prefix + "/ADirectory1/db1/file2",
100)));
PathMetadata meta = ms.get(strToPath(prefix + "/ADirectory1/db1/file2"));
if (!allowMissing() || meta != null) {
assertNotNull("Found test file", meta);
assertDirectorySize(prefix + "/ADirectory1/db1", 2);
}
}
@Test
public void testGet() throws Exception {
final String filePath = "/a1/b1/c1/some_file";
final String dirPath = "/a1/b1/c1/d1";
ms.put(new PathMetadata(makeFileStatus(filePath, 100)));
ms.put(new PathMetadata(makeDirStatus(dirPath)));
PathMetadata meta = ms.get(strToPath(filePath));
if (!allowMissing() || meta != null) {
assertNotNull("Get found file", meta);
verifyFileStatus(meta.getFileStatus(), 100);
}
if (!(ms instanceof NullMetadataStore)) {
ms.delete(strToPath(filePath));
meta = ms.get(strToPath(filePath));
assertTrue("Tombstone not left for deleted file", meta.isDeleted());
}
meta = ms.get(strToPath(dirPath));
if (!allowMissing() || meta != null) {
assertNotNull("Get found file (dir)", meta);
assertTrue("Found dir", meta.getFileStatus().isDirectory());
}
meta = ms.get(strToPath("/bollocks"));
assertNull("Don't get non-existent file", meta);
}
@Test
public void testGetEmptyDir() throws Exception {
final String dirPath = "/a1/b1/c1/d1";
// Creates /a1/b1/c1/d1 as an empty dir
setupListStatus();
// 1. Tell MetadataStore (MS) that there are zero children
putListStatusFiles(dirPath, true /* authoritative */
/* zero children */);
// 2. Request a file status for dir, including whether or not the dir
// is empty.
PathMetadata meta = ms.get(strToPath(dirPath), true);
// 3. Check that either (a) the MS doesn't track whether or not it is
// empty (which is allowed), or (b) the MS knows the dir is empty.
if (!allowMissing() || meta != null) {
assertNotNull("Get should find meta for dir", meta);
assertNotEquals("Dir is empty or unknown", Tristate.FALSE,
meta.isEmptyDirectory());
}
}
@Test
public void testGetNonEmptyDir() throws Exception {
final String dirPath = "/a1/b1/c1";
// Creates /a1/b1/c1 as an non-empty dir
setupListStatus();
// Request a file status for dir, including whether or not the dir
// is empty.
PathMetadata meta = ms.get(strToPath(dirPath), true);
// MetadataStore knows /a1/b1/c1 has at least one child. It is valid
// for it to answer either (a) UNKNOWN: the MS doesn't track whether
// or not the dir is empty, or (b) the MS knows the dir is non-empty.
if (!allowMissing() || meta != null) {
assertNotNull("Get should find meta for dir", meta);
assertNotEquals("Dir is non-empty or unknown", Tristate.TRUE,
meta.isEmptyDirectory());
}
}
@Test
public void testGetDirUnknownIfEmpty() throws Exception {
final String dirPath = "/a1/b1/c1/d1";
// 1. Create /a1/b1/c1/d1 as an empty dir, but do not tell MetadataStore
// (MS) whether or not it has any children.
setupListStatus();
// 2. Request a file status for dir, including whether or not the dir
// is empty.
PathMetadata meta = ms.get(strToPath(dirPath), true);
// 3. Assert MS reports isEmptyDir as UNKONWN: We haven't told MS
// whether or not the directory has any children.
if (!allowMissing() || meta != null) {
assertNotNull("Get should find meta for dir", meta);
assertEquals("Dir empty is unknown", Tristate.UNKNOWN,
meta.isEmptyDirectory());
}
}
@Test
public void testListChildren() throws Exception {
setupListStatus();
DirListingMetadata dirMeta;
dirMeta = ms.listChildren(strToPath("/"));
if (!allowMissing()) {
assertNotNull(dirMeta);
/* Cache has no way of knowing it has all entries for root unless we
* specifically tell it via put() with
* DirListingMetadata.isAuthoritative = true */
assertFalse("Root dir is not cached, or partially cached",
dirMeta.isAuthoritative());
assertListingsEqual(dirMeta.getListing(), "/a1", "/a2");
}
dirMeta = ms.listChildren(strToPath("/a1"));
if (!allowMissing() || dirMeta != null) {
dirMeta = dirMeta.withoutTombstones();
assertListingsEqual(dirMeta.getListing(), "/a1/b1", "/a1/b2");
}
// TODO HADOOP-14756 instrument MetadataStore for asserting & testing
dirMeta = ms.listChildren(strToPath("/a1/b1"));
if (!allowMissing() || dirMeta != null) {
assertListingsEqual(dirMeta.getListing(), "/a1/b1/file1", "/a1/b1/file2",
"/a1/b1/c1");
}
}
@Test
public void testDirListingRoot() throws Exception {
commonTestPutListStatus("/");
}
@Test
public void testPutDirListing() throws Exception {
commonTestPutListStatus("/a");
}
@Test
public void testInvalidListChildren() throws Exception {
setupListStatus();
assertNull("missing path returns null",
ms.listChildren(strToPath("/a1/b1x")));
}
@Test
public void testMove() throws Exception {
// Create test dir structure
createNewDirs("/a1", "/a2", "/a3");
createNewDirs("/a1/b1", "/a1/b2");
putListStatusFiles("/a1/b1", false, "/a1/b1/file1", "/a1/b1/file2");
// Assert root listing as expected
Collection<PathMetadata> entries;
DirListingMetadata dirMeta = ms.listChildren(strToPath("/"));
if (!allowMissing() || dirMeta != null) {
dirMeta = dirMeta.withoutTombstones();
assertNotNull("Listing root", dirMeta);
entries = dirMeta.getListing();
assertListingsEqual(entries, "/a1", "/a2", "/a3");
}
// Assert src listing as expected
dirMeta = ms.listChildren(strToPath("/a1/b1"));
if (!allowMissing() || dirMeta != null) {
assertNotNull("Listing /a1/b1", dirMeta);
entries = dirMeta.getListing();
assertListingsEqual(entries, "/a1/b1/file1", "/a1/b1/file2");
}
// Do the move(): rename(/a1/b1, /b1)
Collection<Path> srcPaths = Arrays.asList(strToPath("/a1/b1"),
strToPath("/a1/b1/file1"), strToPath("/a1/b1/file2"));
ArrayList<PathMetadata> destMetas = new ArrayList<>();
destMetas.add(new PathMetadata(makeDirStatus("/b1")));
destMetas.add(new PathMetadata(makeFileStatus("/b1/file1", 100)));
destMetas.add(new PathMetadata(makeFileStatus("/b1/file2", 100)));
ms.move(srcPaths, destMetas);
// Assert src is no longer there
dirMeta = ms.listChildren(strToPath("/a1"));
if (!allowMissing() || dirMeta != null) {
assertNotNull("Listing /a1", dirMeta);
entries = dirMeta.withoutTombstones().getListing();
assertListingsEqual(entries, "/a1/b2");
}
PathMetadata meta = ms.get(strToPath("/a1/b1/file1"));
assertTrue("Src path deleted", meta == null || meta.isDeleted());
// Assert dest looks right
meta = ms.get(strToPath("/b1/file1"));
if (!allowMissing() || meta != null) {
assertNotNull("dest file not null", meta);
verifyFileStatus(meta.getFileStatus(), 100);
}
dirMeta = ms.listChildren(strToPath("/b1"));
if (!allowMissing() || dirMeta != null) {
assertNotNull("dest listing not null", dirMeta);
entries = dirMeta.getListing();
assertListingsEqual(entries, "/b1/file1", "/b1/file2");
}
}
/**
* Test that the MetadataStore differentiates between the same path in two
* different buckets.
*/
@Test
public void testMultiBucketPaths() throws Exception {
String p1 = "s3a://bucket-a/path1";
String p2 = "s3a://bucket-b/path2";
// Make sure we start out empty
PathMetadata meta = ms.get(new Path(p1));
assertNull("Path should not be present yet.", meta);
meta = ms.get(new Path(p2));
assertNull("Path2 should not be present yet.", meta);
// Put p1, assert p2 doesn't match
ms.put(new PathMetadata(makeFileStatus(p1, 100)));
meta = ms.get(new Path(p2));
assertNull("Path 2 should not match path 1.", meta);
// Make sure delete is correct as well
if (!allowMissing()) {
ms.delete(new Path(p2));
meta = ms.get(new Path(p1));
assertNotNull("Path should not have been deleted", meta);
}
ms.delete(new Path(p1));
}
@Test
public void testPruneFiles() throws Exception {
Assume.assumeTrue(supportsPruning());
createNewDirs("/pruneFiles");
long oldTime = getTime();
ms.put(new PathMetadata(makeFileStatus("/pruneFiles/old", 1, oldTime,
oldTime)));
DirListingMetadata ls2 = ms.listChildren(strToPath("/pruneFiles"));
if (!allowMissing()) {
assertListingsEqual(ls2.getListing(), "/pruneFiles/old");
}
// It's possible for the Local implementation to get from /pruneFiles/old's
// modification time to here in under 1ms, causing it to not get pruned
Thread.sleep(1);
long cutoff = System.currentTimeMillis();
long newTime = getTime();
ms.put(new PathMetadata(makeFileStatus("/pruneFiles/new", 1, newTime,
newTime)));
DirListingMetadata ls;
ls = ms.listChildren(strToPath("/pruneFiles"));
if (!allowMissing()) {
assertListingsEqual(ls.getListing(), "/pruneFiles/new",
"/pruneFiles/old");
}
ms.prune(cutoff);
ls = ms.listChildren(strToPath("/pruneFiles"));
if (allowMissing()) {
assertDeleted("/pruneFiles/old");
} else {
assertListingsEqual(ls.getListing(), "/pruneFiles/new");
}
}
@Test
public void testPruneDirs() throws Exception {
Assume.assumeTrue(supportsPruning());
// We only test that files, not dirs, are removed during prune.
// We specifically allow directories to remain, as it is more robust
// for DynamoDBMetadataStore's prune() implementation: If a
// file was created in a directory while it was being pruned, it would
// violate the invariant that all ancestors of a file exist in the table.
createNewDirs("/pruneDirs/dir");
long oldTime = getTime();
ms.put(new PathMetadata(makeFileStatus("/pruneDirs/dir/file",
1, oldTime, oldTime)));
// It's possible for the Local implementation to get from the old
// modification time to here in under 1ms, causing it to not get pruned
Thread.sleep(1);
long cutoff = getTime();
ms.prune(cutoff);
assertDeleted("/pruneDirs/dir/file");
}
@Test
public void testPruneUnsetsAuthoritative() throws Exception {
String rootDir = "/unpruned-root-dir";
String grandparentDir = rootDir + "/pruned-grandparent-dir";
String parentDir = grandparentDir + "/pruned-parent-dir";
String staleFile = parentDir + "/stale-file";
String freshFile = rootDir + "/fresh-file";
String[] directories = {rootDir, grandparentDir, parentDir};
createNewDirs(rootDir, grandparentDir, parentDir);
long time = System.currentTimeMillis();
ms.put(new PathMetadata(
new FileStatus(0, false, 0, 0, time - 1, strToPath(staleFile)),
Tristate.FALSE, false));
ms.put(new PathMetadata(
new FileStatus(0, false, 0, 0, time + 1, strToPath(freshFile)),
Tristate.FALSE, false));
ms.prune(time);
DirListingMetadata listing;
for (String directory : directories) {
Path path = strToPath(directory);
if (ms.get(path) != null) {
listing = ms.listChildren(path);
assertFalse(listing.isAuthoritative());
}
}
}
/*
* Helper functions.
*/
/** Modifies paths input array and returns it. */
private String[] buildPathStrings(String parent, String... paths)
throws IOException {
for (int i = 0; i < paths.length; i++) {
Path p = new Path(strToPath(parent), paths[i]);
paths[i] = p.toString();
}
return paths;
}
private void commonTestPutListStatus(final String parent) throws IOException {
putListStatusFiles(parent, true, buildPathStrings(parent, "file1", "file2",
"file3"));
DirListingMetadata dirMeta = ms.listChildren(strToPath(parent));
if (!allowMissing() || dirMeta != null) {
dirMeta = dirMeta.withoutTombstones();
assertNotNull("list after putListStatus", dirMeta);
Collection<PathMetadata> entries = dirMeta.getListing();
assertNotNull("listStatus has entries", entries);
assertListingsEqual(entries,
buildPathStrings(parent, "file1", "file2", "file3"));
}
}
private void setupListStatus() throws IOException {
createNewDirs("/a1", "/a2", "/a1/b1", "/a1/b2", "/a1/b1/c1",
"/a1/b1/c1/d1");
ms.put(new PathMetadata(makeFileStatus("/a1/b1/file1", 100)));
ms.put(new PathMetadata(makeFileStatus("/a1/b1/file2", 100)));
}
private void assertListingsEqual(Collection<PathMetadata> listing,
String ...pathStrs) throws IOException {
Set<Path> a = new HashSet<>();
for (PathMetadata meta : listing) {
a.add(meta.getFileStatus().getPath());
}
Set<Path> b = new HashSet<>();
for (String ps : pathStrs) {
b.add(strToPath(ps));
}
assertEquals("Same set of files", b, a);
}
private void putListStatusFiles(String dirPath, boolean authoritative,
String... filenames) throws IOException {
ArrayList<PathMetadata> metas = new ArrayList<>(filenames .length);
for (String filename : filenames) {
metas.add(new PathMetadata(makeFileStatus(filename, 100)));
}
DirListingMetadata dirMeta =
new DirListingMetadata(strToPath(dirPath), metas, authoritative);
ms.put(dirMeta);
}
private void createNewDirs(String... dirs)
throws IOException {
for (String pathStr : dirs) {
ms.put(new PathMetadata(makeDirStatus(pathStr)));
}
}
private void assertDirectorySize(String pathStr, int size)
throws IOException {
DirListingMetadata dirMeta = ms.listChildren(strToPath(pathStr));
if (!allowMissing()) {
assertNotNull("Directory " + pathStr + " in cache", dirMeta);
}
if (!allowMissing() || dirMeta != null) {
dirMeta = dirMeta.withoutTombstones();
assertEquals("Number of entries in dir " + pathStr, size,
nonDeleted(dirMeta.getListing()).size());
}
}
/** @return only file statuses which are *not* marked deleted. */
private Collection<PathMetadata> nonDeleted(
Collection<PathMetadata> statuses) {
Collection<PathMetadata> currentStatuses = new ArrayList<>();
for (PathMetadata status : statuses) {
if (!status.isDeleted()) {
currentStatuses.add(status);
}
}
return currentStatuses;
}
private void assertDeleted(String pathStr) throws IOException {
Path path = strToPath(pathStr);
PathMetadata meta = ms.get(path);
boolean cached = meta != null && !meta.isDeleted();
assertFalse(pathStr + " should not be cached.", cached);
}
protected void assertCached(String pathStr) throws IOException {
Path path = strToPath(pathStr);
PathMetadata meta = ms.get(path);
boolean cached = meta != null && !meta.isDeleted();
assertTrue(pathStr + " should be cached.", cached);
}
/**
* Convenience to create a fully qualified Path from string.
*/
Path strToPath(String p) throws IOException {
final Path path = new Path(p);
assert path.isAbsolute();
return path.makeQualified(contract.getFileSystem().getUri(), null);
}
private void assertEmptyDirectory(String pathStr) throws IOException {
assertDirectorySize(pathStr, 0);
}
private void assertEmptyDirs(String ...dirs) throws IOException {
for (String pathStr : dirs) {
assertEmptyDirectory(pathStr);
}
}
FileStatus basicFileStatus(Path path, int size, boolean isDir) throws
IOException {
return basicFileStatus(path, size, isDir, modTime, accessTime);
}
FileStatus basicFileStatus(Path path, int size, boolean isDir,
long newModTime, long newAccessTime) throws IOException {
return new FileStatus(size, isDir, REPLICATION, BLOCK_SIZE, newModTime,
newAccessTime, PERMISSION, OWNER, GROUP, path);
}
private FileStatus makeFileStatus(String pathStr, int size) throws
IOException {
return makeFileStatus(pathStr, size, modTime, accessTime);
}
private FileStatus makeFileStatus(String pathStr, int size, long newModTime,
long newAccessTime) throws IOException {
return basicFileStatus(strToPath(pathStr), size, false,
newModTime, newAccessTime);
}
void verifyFileStatus(FileStatus status, long size) {
S3ATestUtils.verifyFileStatus(status, size, BLOCK_SIZE, modTime);
}
private FileStatus makeDirStatus(String pathStr) throws IOException {
return basicFileStatus(strToPath(pathStr), 0, true, modTime, accessTime);
}
/**
* Verify the directory file status. Subclass may verify additional fields.
*/
void verifyDirStatus(FileStatus status) {
assertTrue("Is a dir", status.isDirectory());
assertEquals("zero length", 0, status.getLen());
}
long getModTime() {
return modTime;
}
long getAccessTime() {
return accessTime;
}
protected static long getTime() {
return System.currentTimeMillis();
}
}

View File

@ -0,0 +1,303 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import static org.hamcrest.CoreMatchers.notNullValue;
import static org.junit.Assert.*;
/**
* Unit tests of {@link DirListingMetadata}.
*/
public class TestDirListingMetadata {
private static final String TEST_OWNER = "hadoop";
@Rule
public ExpectedException exception = ExpectedException.none();
@Test
public void testNullPath() {
exception.expect(NullPointerException.class);
exception.expectMessage(notNullValue(String.class));
new DirListingMetadata(null, null, false);
}
@Test
public void testNullListing() {
Path path = new Path("/path");
DirListingMetadata meta = new DirListingMetadata(path, null, false);
assertEquals(path, meta.getPath());
assertNotNull(meta.getListing());
assertTrue(meta.getListing().isEmpty());
assertFalse(meta.isAuthoritative());
}
@Test
public void testEmptyListing() {
Path path = new Path("/path");
DirListingMetadata meta = new DirListingMetadata(path,
new ArrayList<PathMetadata>(0),
false);
assertEquals(path, meta.getPath());
assertNotNull(meta.getListing());
assertTrue(meta.getListing().isEmpty());
assertFalse(meta.isAuthoritative());
}
@Test
public void testListing() {
Path path = new Path("/path");
PathMetadata pathMeta1 = new PathMetadata(
new S3AFileStatus(true, new Path(path, "dir1"), TEST_OWNER));
PathMetadata pathMeta2 = new PathMetadata(
new S3AFileStatus(true, new Path(path, "dir2"), TEST_OWNER));
PathMetadata pathMeta3 = new PathMetadata(
new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER));
List<PathMetadata> listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3);
DirListingMetadata meta = new DirListingMetadata(path, listing, false);
assertEquals(path, meta.getPath());
assertNotNull(meta.getListing());
assertFalse(meta.getListing().isEmpty());
assertTrue(meta.getListing().contains(pathMeta1));
assertTrue(meta.getListing().contains(pathMeta2));
assertTrue(meta.getListing().contains(pathMeta3));
assertFalse(meta.isAuthoritative());
}
@Test
public void testListingUnmodifiable() {
Path path = new Path("/path");
DirListingMetadata meta = makeTwoDirsOneFile(path);
assertNotNull(meta.getListing());
exception.expect(UnsupportedOperationException.class);
meta.getListing().clear();
}
@Test
public void testAuthoritative() {
Path path = new Path("/path");
DirListingMetadata meta = new DirListingMetadata(path, null, true);
assertEquals(path, meta.getPath());
assertNotNull(meta.getListing());
assertTrue(meta.getListing().isEmpty());
assertTrue(meta.isAuthoritative());
}
@Test
public void testSetAuthoritative() {
Path path = new Path("/path");
DirListingMetadata meta = new DirListingMetadata(path, null, false);
assertEquals(path, meta.getPath());
assertNotNull(meta.getListing());
assertTrue(meta.getListing().isEmpty());
assertFalse(meta.isAuthoritative());
meta.setAuthoritative(true);
assertTrue(meta.isAuthoritative());
}
@Test
public void testGet() {
Path path = new Path("/path");
PathMetadata pathMeta1 = new PathMetadata(
new S3AFileStatus(true, new Path(path, "dir1"), TEST_OWNER));
PathMetadata pathMeta2 = new PathMetadata(
new S3AFileStatus(true, new Path(path, "dir2"), TEST_OWNER));
PathMetadata pathMeta3 = new PathMetadata(
new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER));
List<PathMetadata> listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3);
DirListingMetadata meta = new DirListingMetadata(path, listing, false);
assertEquals(path, meta.getPath());
assertNotNull(meta.getListing());
assertFalse(meta.getListing().isEmpty());
assertTrue(meta.getListing().contains(pathMeta1));
assertTrue(meta.getListing().contains(pathMeta2));
assertTrue(meta.getListing().contains(pathMeta3));
assertFalse(meta.isAuthoritative());
assertEquals(pathMeta1, meta.get(pathMeta1.getFileStatus().getPath()));
assertEquals(pathMeta2, meta.get(pathMeta2.getFileStatus().getPath()));
assertEquals(pathMeta3, meta.get(pathMeta3.getFileStatus().getPath()));
assertNull(meta.get(new Path(path, "notfound")));
}
@Test
public void testGetNull() {
Path path = new Path("/path");
DirListingMetadata meta = new DirListingMetadata(path, null, false);
exception.expect(NullPointerException.class);
exception.expectMessage(notNullValue(String.class));
meta.get(null);
}
@Test
public void testGetRoot() {
Path path = new Path("/path");
DirListingMetadata meta = new DirListingMetadata(path, null, false);
exception.expect(IllegalArgumentException.class);
exception.expectMessage(notNullValue(String.class));
meta.get(new Path("/"));
}
@Test
public void testGetNotChild() {
Path path = new Path("/path");
DirListingMetadata meta = new DirListingMetadata(path, null, false);
exception.expect(IllegalArgumentException.class);
exception.expectMessage(notNullValue(String.class));
meta.get(new Path("/different/ancestor"));
}
@Test
public void testPut() {
Path path = new Path("/path");
PathMetadata pathMeta1 = new PathMetadata(
new S3AFileStatus(true, new Path(path, "dir1"), TEST_OWNER));
PathMetadata pathMeta2 = new PathMetadata(
new S3AFileStatus(true, new Path(path, "dir2"), TEST_OWNER));
PathMetadata pathMeta3 = new PathMetadata(
new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER));
List<PathMetadata> listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3);
DirListingMetadata meta = new DirListingMetadata(path, listing, false);
assertEquals(path, meta.getPath());
assertNotNull(meta.getListing());
assertFalse(meta.getListing().isEmpty());
assertTrue(meta.getListing().contains(pathMeta1));
assertTrue(meta.getListing().contains(pathMeta2));
assertTrue(meta.getListing().contains(pathMeta3));
assertFalse(meta.isAuthoritative());
PathMetadata pathMeta4 = new PathMetadata(
new S3AFileStatus(true, new Path(path, "dir3"), TEST_OWNER));
meta.put(pathMeta4.getFileStatus());
assertTrue(meta.getListing().contains(pathMeta4));
assertEquals(pathMeta4, meta.get(pathMeta4.getFileStatus().getPath()));
}
@Test
public void testPutNull() {
Path path = new Path("/path");
DirListingMetadata meta = new DirListingMetadata(path, null, false);
exception.expect(NullPointerException.class);
exception.expectMessage(notNullValue(String.class));
meta.put(null);
}
@Test
public void testPutNullPath() {
Path path = new Path("/path");
DirListingMetadata meta = new DirListingMetadata(path, null, false);
exception.expect(NullPointerException.class);
exception.expectMessage(notNullValue(String.class));
meta.put(new S3AFileStatus(true, null, TEST_OWNER));
}
@Test
public void testPutRoot() {
Path path = new Path("/path");
DirListingMetadata meta = new DirListingMetadata(path, null, false);
exception.expect(IllegalArgumentException.class);
exception.expectMessage(notNullValue(String.class));
meta.put(new S3AFileStatus(true, new Path("/"), TEST_OWNER));
}
@Test
public void testPutNotChild() {
Path path = new Path("/path");
DirListingMetadata meta = new DirListingMetadata(path, null, false);
exception.expect(IllegalArgumentException.class);
exception.expectMessage(notNullValue(String.class));
meta.put(new S3AFileStatus(true, new Path("/different/ancestor"),
TEST_OWNER));
}
@Test
public void testRemove() {
Path path = new Path("/path");
PathMetadata pathMeta1 = new PathMetadata(
new S3AFileStatus(true, new Path(path, "dir1"), TEST_OWNER));
PathMetadata pathMeta2 = new PathMetadata(
new S3AFileStatus(true, new Path(path, "dir2"), TEST_OWNER));
PathMetadata pathMeta3 = new PathMetadata(
new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER));
List<PathMetadata> listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3);
DirListingMetadata meta = new DirListingMetadata(path, listing, false);
assertEquals(path, meta.getPath());
assertNotNull(meta.getListing());
assertFalse(meta.getListing().isEmpty());
assertTrue(meta.getListing().contains(pathMeta1));
assertTrue(meta.getListing().contains(pathMeta2));
assertTrue(meta.getListing().contains(pathMeta3));
assertFalse(meta.isAuthoritative());
meta.remove(pathMeta1.getFileStatus().getPath());
assertFalse(meta.getListing().contains(pathMeta1));
assertNull(meta.get(pathMeta1.getFileStatus().getPath()));
}
@Test
public void testRemoveNull() {
Path path = new Path("/path");
DirListingMetadata meta = new DirListingMetadata(path, null, false);
exception.expect(NullPointerException.class);
exception.expectMessage(notNullValue(String.class));
meta.remove(null);
}
@Test
public void testRemoveRoot() {
Path path = new Path("/path");
DirListingMetadata meta = new DirListingMetadata(path, null, false);
exception.expect(IllegalArgumentException.class);
exception.expectMessage(notNullValue(String.class));
meta.remove(new Path("/"));
}
@Test
public void testRemoveNotChild() {
Path path = new Path("/path");
DirListingMetadata meta = new DirListingMetadata(path, null, false);
exception.expect(IllegalArgumentException.class);
exception.expectMessage(notNullValue(String.class));
meta.remove(new Path("/different/ancestor"));
}
/*
* Create DirListingMetadata with two dirs and one file living in directory
* 'parent'
*/
private static DirListingMetadata makeTwoDirsOneFile(Path parent) {
PathMetadata pathMeta1 = new PathMetadata(
new S3AFileStatus(true, new Path(parent, "dir1"), TEST_OWNER));
PathMetadata pathMeta2 = new PathMetadata(
new S3AFileStatus(true, new Path(parent, "dir2"), TEST_OWNER));
PathMetadata pathMeta3 = new PathMetadata(
new S3AFileStatus(123, 456, new Path(parent, "file1"), 8192,
TEST_OWNER));
List<PathMetadata> listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3);
return new DirListingMetadata(parent, listing, false);
}
}

View File

@ -0,0 +1,594 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import com.amazonaws.AmazonServiceException;
import com.amazonaws.services.dynamodbv2.document.DynamoDB;
import com.amazonaws.services.dynamodbv2.document.Item;
import com.amazonaws.services.dynamodbv2.document.PrimaryKey;
import com.amazonaws.services.dynamodbv2.document.Table;
import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputDescription;
import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException;
import com.amazonaws.services.dynamodbv2.model.TableDescription;
import com.google.common.collect.Lists;
import org.apache.commons.collections.CollectionUtils;
import org.apache.hadoop.fs.s3a.Tristate;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.Timeout;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.MockS3ClientFactory;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.S3ClientFactory;
import org.apache.hadoop.security.UserGroupInformation;
import static org.apache.hadoop.fs.s3a.Constants.*;
import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.*;
import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.*;
import static org.apache.hadoop.test.LambdaTestUtils.*;
/**
* Test that {@link DynamoDBMetadataStore} implements {@link MetadataStore}.
*
* In this unit test, we use an in-memory DynamoDBLocal server instead of real
* AWS DynamoDB. An {@link S3AFileSystem} object is created and shared for
* initializing {@link DynamoDBMetadataStore} objects. There are no real S3
* request issued as the underlying AWS S3Client is mocked. You won't be
* charged bills for AWS S3 or DynamoDB when you run this test.
*
* According to the base class, every test case will have independent contract
* to create a new {@link DynamoDBMetadataStore} instance and initializes it.
* A table will be created for each test by the test contract, and will be
* destroyed after the test case finishes.
*/
public class TestDynamoDBMetadataStore extends MetadataStoreTestBase {
private static final Logger LOG =
LoggerFactory.getLogger(TestDynamoDBMetadataStore.class);
private static final String BUCKET = "TestDynamoDBMetadataStore";
private static final String S3URI =
URI.create(FS_S3A + "://" + BUCKET + "/").toString();
public static final PrimaryKey
VERSION_MARKER_PRIMARY_KEY = createVersionMarkerPrimaryKey(
DynamoDBMetadataStore.VERSION_MARKER);
/** The DynamoDB instance that can issue requests directly to server. */
private static DynamoDB dynamoDB;
@Rule
public final Timeout timeout = new Timeout(60 * 1000);
/**
* Start the in-memory DynamoDBLocal server and initializes s3 file system.
*/
@BeforeClass
public static void setUpBeforeClass() throws Exception {
DynamoDBLocalClientFactory.startSingletonServer();
try {
dynamoDB = new DynamoDBMSContract().getMetadataStore().getDynamoDB();
} catch (AmazonServiceException e) {
final String msg = "Cannot initialize a DynamoDBMetadataStore instance "
+ "against the local DynamoDB server. Perhaps the DynamoDBLocal "
+ "server is not configured correctly. ";
LOG.error(msg, e);
// fail fast if the DynamoDBLocal server can not work
throw e;
}
}
@AfterClass
public static void tearDownAfterClass() throws Exception {
if (dynamoDB != null) {
dynamoDB.shutdown();
}
DynamoDBLocalClientFactory.stopSingletonServer();
}
/**
* Each contract has its own S3AFileSystem and DynamoDBMetadataStore objects.
*/
private static class DynamoDBMSContract extends AbstractMSContract {
private final S3AFileSystem s3afs;
private final DynamoDBMetadataStore ms = new DynamoDBMetadataStore();
DynamoDBMSContract() throws IOException {
this(new Configuration());
}
DynamoDBMSContract(Configuration conf) throws IOException {
// using mocked S3 clients
conf.setClass(S3_CLIENT_FACTORY_IMPL, MockS3ClientFactory.class,
S3ClientFactory.class);
conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, S3URI);
// setting config for creating a DynamoDBClient against local server
conf.set(ACCESS_KEY, "dummy-access-key");
conf.set(SECRET_KEY, "dummy-secret-key");
conf.setBoolean(S3GUARD_DDB_TABLE_CREATE_KEY, true);
conf.setClass(S3Guard.S3GUARD_DDB_CLIENT_FACTORY_IMPL,
DynamoDBLocalClientFactory.class, DynamoDBClientFactory.class);
// always create new file system object for a test contract
s3afs = (S3AFileSystem) FileSystem.newInstance(conf);
ms.initialize(s3afs);
}
@Override
public S3AFileSystem getFileSystem() {
return s3afs;
}
@Override
public DynamoDBMetadataStore getMetadataStore() {
return ms;
}
}
@Override
public DynamoDBMSContract createContract() throws IOException {
return new DynamoDBMSContract();
}
@Override
public DynamoDBMSContract createContract(Configuration conf) throws
IOException {
return new DynamoDBMSContract(conf);
}
@Override
FileStatus basicFileStatus(Path path, int size, boolean isDir)
throws IOException {
String owner = UserGroupInformation.getCurrentUser().getShortUserName();
return isDir
? new S3AFileStatus(true, path, owner)
: new S3AFileStatus(size, getModTime(), path, BLOCK_SIZE, owner);
}
private DynamoDBMetadataStore getDynamoMetadataStore() throws IOException {
return (DynamoDBMetadataStore) getContract().getMetadataStore();
}
private S3AFileSystem getFileSystem() throws IOException {
return (S3AFileSystem) getContract().getFileSystem();
}
/**
* This tests that after initialize() using an S3AFileSystem object, the
* instance should have been initialized successfully, and tables are ACTIVE.
*/
@Test
public void testInitialize() throws IOException {
final String tableName = "testInitializeWithFileSystem";
final S3AFileSystem s3afs = getFileSystem();
final Configuration conf = s3afs.getConf();
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName);
try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
ddbms.initialize(s3afs);
verifyTableInitialized(tableName);
assertNotNull(ddbms.getTable());
assertEquals(tableName, ddbms.getTable().getTableName());
String expectedRegion = conf.get(S3GUARD_DDB_REGION_KEY,
s3afs.getBucketLocation(tableName));
assertEquals("DynamoDB table should be in configured region or the same" +
" region as S3 bucket",
expectedRegion,
ddbms.getRegion());
}
}
/**
* This tests that after initialize() using a Configuration object, the
* instance should have been initialized successfully, and tables are ACTIVE.
*/
@Test
public void testInitializeWithConfiguration() throws IOException {
final String tableName = "testInitializeWithConfiguration";
final Configuration conf = getFileSystem().getConf();
conf.unset(S3GUARD_DDB_TABLE_NAME_KEY);
String savedRegion = conf.get(S3GUARD_DDB_REGION_KEY,
getFileSystem().getBucketLocation());
conf.unset(S3GUARD_DDB_REGION_KEY);
try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
ddbms.initialize(conf);
fail("Should have failed because the table name is not set!");
} catch (IllegalArgumentException ignored) {
}
// config table name
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName);
try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
ddbms.initialize(conf);
fail("Should have failed because as the region is not set!");
} catch (IllegalArgumentException ignored) {
}
// config region
conf.set(S3GUARD_DDB_REGION_KEY, savedRegion);
try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
ddbms.initialize(conf);
verifyTableInitialized(tableName);
assertNotNull(ddbms.getTable());
assertEquals(tableName, ddbms.getTable().getTableName());
assertEquals("Unexpected key schema found!",
keySchema(),
ddbms.getTable().describe().getKeySchema());
}
}
/**
* Test that for a large batch write request, the limit is handled correctly.
*/
@Test
public void testBatchWrite() throws IOException {
final int[] numMetasToDeleteOrPut = {
-1, // null
0, // empty collection
1, // one path
S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT, // exact limit of a batch request
S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT + 1 // limit + 1
};
for (int numOldMetas : numMetasToDeleteOrPut) {
for (int numNewMetas : numMetasToDeleteOrPut) {
doTestBatchWrite(numOldMetas, numNewMetas);
}
}
}
private void doTestBatchWrite(int numDelete, int numPut) throws IOException {
final String root = S3URI + "/testBatchWrite_" + numDelete + '_' + numPut;
final Path oldDir = new Path(root, "oldDir");
final Path newDir = new Path(root, "newDir");
LOG.info("doTestBatchWrite: oldDir={}, newDir={}", oldDir, newDir);
DynamoDBMetadataStore ms = getDynamoMetadataStore();
ms.put(new PathMetadata(basicFileStatus(oldDir, 0, true)));
ms.put(new PathMetadata(basicFileStatus(newDir, 0, true)));
final List<PathMetadata> oldMetas =
numDelete < 0 ? null : new ArrayList<PathMetadata>(numDelete);
for (int i = 0; i < numDelete; i++) {
oldMetas.add(new PathMetadata(
basicFileStatus(new Path(oldDir, "child" + i), i, true)));
}
final List<PathMetadata> newMetas =
numPut < 0 ? null : new ArrayList<PathMetadata>(numPut);
for (int i = 0; i < numPut; i++) {
newMetas.add(new PathMetadata(
basicFileStatus(new Path(newDir, "child" + i), i, false)));
}
Collection<Path> pathsToDelete = null;
if (oldMetas != null) {
// put all metadata of old paths and verify
ms.put(new DirListingMetadata(oldDir, oldMetas, false));
assertEquals(0, ms.listChildren(newDir).withoutTombstones().numEntries());
assertTrue(CollectionUtils.isEqualCollection(oldMetas,
ms.listChildren(oldDir).getListing()));
pathsToDelete = new ArrayList<>(oldMetas.size());
for (PathMetadata meta : oldMetas) {
pathsToDelete.add(meta.getFileStatus().getPath());
}
}
// move the old paths to new paths and verify
ms.move(pathsToDelete, newMetas);
assertEquals(0, ms.listChildren(oldDir).withoutTombstones().numEntries());
if (newMetas != null) {
assertTrue(CollectionUtils.isEqualCollection(newMetas,
ms.listChildren(newDir).getListing()));
}
}
@Test
public void testInitExistingTable() throws IOException {
final DynamoDBMetadataStore ddbms = getDynamoMetadataStore();
final String tableName = ddbms.getTable().getTableName();
verifyTableInitialized(tableName);
// create existing table
ddbms.initTable();
verifyTableInitialized(tableName);
}
/**
* Test the low level version check code.
*/
@Test
public void testItemVersionCompatibility() throws Throwable {
verifyVersionCompatibility("table",
createVersionMarker(VERSION_MARKER, VERSION, 0));
}
/**
* Test that a version marker entry without the version number field
* is rejected as incompatible with a meaningful error message.
*/
@Test
public void testItemLacksVersion() throws Throwable {
intercept(IOException.class, E_NOT_VERSION_MARKER,
new VoidCallable() {
@Override
public void call() throws Exception {
verifyVersionCompatibility("table",
new Item().withPrimaryKey(
createVersionMarkerPrimaryKey(VERSION_MARKER)));
}
});
}
/**
* Delete the version marker and verify that table init fails.
*/
@Test
public void testTableVersionRequired() throws Exception {
Configuration conf = getFileSystem().getConf();
int maxRetries = conf.getInt(S3GUARD_DDB_MAX_RETRIES,
S3GUARD_DDB_MAX_RETRIES_DEFAULT);
conf.setInt(S3GUARD_DDB_MAX_RETRIES, 3);
final DynamoDBMetadataStore ddbms = createContract(conf).getMetadataStore();
String tableName = conf.get(S3GUARD_DDB_TABLE_NAME_KEY, BUCKET);
Table table = verifyTableInitialized(tableName);
table.deleteItem(VERSION_MARKER_PRIMARY_KEY);
// create existing table
intercept(IOException.class, E_NO_VERSION_MARKER,
new VoidCallable() {
@Override
public void call() throws Exception {
ddbms.initTable();
}
});
conf.setInt(S3GUARD_DDB_MAX_RETRIES, maxRetries);
}
/**
* Set the version value to a different number and verify that
* table init fails.
*/
@Test
public void testTableVersionMismatch() throws Exception {
final DynamoDBMetadataStore ddbms = createContract().getMetadataStore();
String tableName = getFileSystem().getConf()
.get(S3GUARD_DDB_TABLE_NAME_KEY, BUCKET);
Table table = verifyTableInitialized(tableName);
table.deleteItem(VERSION_MARKER_PRIMARY_KEY);
Item v200 = createVersionMarker(VERSION_MARKER, 200, 0);
table.putItem(v200);
// create existing table
intercept(IOException.class, E_INCOMPATIBLE_VERSION,
new VoidCallable() {
@Override
public void call() throws Exception {
ddbms.initTable();
}
});
}
/**
* Test that initTable fails with IOException when table does not exist and
* table auto-creation is disabled.
*/
@Test
public void testFailNonexistentTable() throws IOException {
final String tableName = "testFailNonexistentTable";
final S3AFileSystem s3afs = getFileSystem();
final Configuration conf = s3afs.getConf();
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName);
conf.unset(S3GUARD_DDB_TABLE_CREATE_KEY);
try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
ddbms.initialize(s3afs);
fail("Should have failed as table does not exist and table auto-creation"
+ " is disabled");
} catch (IOException ignored) {
}
}
/**
* Test cases about root directory as it is not in the DynamoDB table.
*/
@Test
public void testRootDirectory() throws IOException {
final DynamoDBMetadataStore ddbms = getDynamoMetadataStore();
Path rootPath = new Path(S3URI);
verifyRootDirectory(ddbms.get(rootPath), true);
ddbms.put(new PathMetadata(new S3AFileStatus(true,
new Path(rootPath, "foo"),
UserGroupInformation.getCurrentUser().getShortUserName())));
verifyRootDirectory(ddbms.get(new Path(S3URI)), false);
}
private void verifyRootDirectory(PathMetadata rootMeta, boolean isEmpty) {
assertNotNull(rootMeta);
final FileStatus status = rootMeta.getFileStatus();
assertNotNull(status);
assertTrue(status.isDirectory());
// UNKNOWN is always a valid option, but true / false should not contradict
if (isEmpty) {
assertNotSame("Should not be marked non-empty",
Tristate.FALSE,
rootMeta.isEmptyDirectory());
} else {
assertNotSame("Should not be marked empty",
Tristate.TRUE,
rootMeta.isEmptyDirectory());
}
}
/**
* Test that when moving nested paths, all its ancestors up to destination
* root will also be created.
* Here is the directory tree before move:
* <pre>
* testMovePopulateAncestors
* a
*    b
*    src
*    dir1
*       dir2
*    file1.txt
* c
* d
* dest
*</pre>
* As part of rename(a/b/src, d/c/dest), S3A will enumerate the subtree at
* a/b/src. This test verifies that after the move, the new subtree at
* 'dest' is reachable from the root (i.e. c/ and c/d exist in the table.
* DynamoDBMetadataStore depends on this property to do recursive delete
* without a full table scan.
*/
@Test
public void testMovePopulatesAncestors() throws IOException {
final DynamoDBMetadataStore ddbms = getDynamoMetadataStore();
final String testRoot = "/testMovePopulatesAncestors";
final String srcRoot = testRoot + "/a/b/src";
final String destRoot = testRoot + "/c/d/e/dest";
final Path nestedPath1 = strToPath(srcRoot + "/file1.txt");
ddbms.put(new PathMetadata(basicFileStatus(nestedPath1, 1024, false)));
final Path nestedPath2 = strToPath(srcRoot + "/dir1/dir2");
ddbms.put(new PathMetadata(basicFileStatus(nestedPath2, 0, true)));
// We don't put the destRoot path here, since put() would create ancestor
// entries, and we want to ensure that move() does it, instead.
// Build enumeration of src / dest paths and do the move()
final Collection<Path> fullSourcePaths = Lists.newArrayList(
strToPath(srcRoot),
strToPath(srcRoot + "/dir1"),
strToPath(srcRoot + "/dir1/dir2"),
strToPath(srcRoot + "/file1.txt")
);
final Collection<PathMetadata> pathsToCreate = Lists.newArrayList(
new PathMetadata(basicFileStatus(strToPath(destRoot),
0, true)),
new PathMetadata(basicFileStatus(strToPath(destRoot + "/dir1"),
0, true)),
new PathMetadata(basicFileStatus(strToPath(destRoot + "/dir1/dir2"),
0, true)),
new PathMetadata(basicFileStatus(strToPath(destRoot + "/file1.txt"),
1024, false))
);
ddbms.move(fullSourcePaths, pathsToCreate);
// assert that all the ancestors should have been populated automatically
assertCached(testRoot + "/c");
assertCached(testRoot + "/c/d");
assertCached(testRoot + "/c/d/e");
assertCached(destRoot /* /c/d/e/dest */);
// Also check moved files while we're at it
assertCached(destRoot + "/dir1");
assertCached(destRoot + "/dir1/dir2");
assertCached(destRoot + "/file1.txt");
}
@Test
public void testProvisionTable() throws IOException {
final DynamoDBMetadataStore ddbms = getDynamoMetadataStore();
final String tableName = ddbms.getTable().getTableName();
final ProvisionedThroughputDescription oldProvision =
dynamoDB.getTable(tableName).describe().getProvisionedThroughput();
ddbms.provisionTable(oldProvision.getReadCapacityUnits() * 2,
oldProvision.getWriteCapacityUnits() * 2);
final ProvisionedThroughputDescription newProvision =
dynamoDB.getTable(tableName).describe().getProvisionedThroughput();
LOG.info("Old provision = {}, new provision = {}",
oldProvision, newProvision);
assertEquals(oldProvision.getReadCapacityUnits() * 2,
newProvision.getReadCapacityUnits().longValue());
assertEquals(oldProvision.getWriteCapacityUnits() * 2,
newProvision.getWriteCapacityUnits().longValue());
}
@Test
public void testDeleteTable() throws IOException {
final String tableName = "testDeleteTable";
final S3AFileSystem s3afs = getFileSystem();
final Configuration conf = s3afs.getConf();
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName);
try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
ddbms.initialize(s3afs);
// we can list the empty table
ddbms.listChildren(new Path(S3URI));
ddbms.destroy();
verifyTableNotExist(tableName);
// delete table once more; be ResourceNotFoundException swallowed silently
ddbms.destroy();
verifyTableNotExist(tableName);
try {
// we can no longer list the destroyed table
ddbms.listChildren(new Path(S3URI));
fail("Should have failed after the table is destroyed!");
} catch (IOException ignored) {
}
}
}
/**
* This validates the table is created and ACTIVE in DynamoDB.
*
* This should not rely on the {@link DynamoDBMetadataStore} implementation.
* Return the table
*/
private static Table verifyTableInitialized(String tableName) {
final Table table = dynamoDB.getTable(tableName);
final TableDescription td = table.describe();
assertEquals(tableName, td.getTableName());
assertEquals("ACTIVE", td.getTableStatus());
return table;
}
/**
* This validates the table is not found in DynamoDB.
*
* This should not rely on the {@link DynamoDBMetadataStore} implementation.
*/
private static void verifyTableNotExist(String tableName) {
final Table table = dynamoDB.getTable(tableName);
try {
table.describe();
fail("Expecting ResourceNotFoundException for table '" + tableName + "'");
} catch (ResourceNotFoundException ignored) {
}
}
}

View File

@ -0,0 +1,140 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.junit.Test;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.S3ATestUtils;
/**
* MetadataStore unit test for {@link LocalMetadataStore}.
*/
public class TestLocalMetadataStore extends MetadataStoreTestBase {
private static final String MAX_ENTRIES_STR = "16";
private final static class LocalMSContract extends AbstractMSContract {
private FileSystem fs;
private LocalMSContract() throws IOException {
this(new Configuration());
}
private LocalMSContract(Configuration config) throws IOException {
config.set(LocalMetadataStore.CONF_MAX_RECORDS, MAX_ENTRIES_STR);
fs = FileSystem.getLocal(config);
}
@Override
public FileSystem getFileSystem() {
return fs;
}
@Override
public MetadataStore getMetadataStore() throws IOException {
LocalMetadataStore lms = new LocalMetadataStore();
return lms;
}
}
@Override
public AbstractMSContract createContract() throws IOException {
return new LocalMSContract();
}
@Override
public AbstractMSContract createContract(Configuration conf) throws
IOException {
return new LocalMSContract(conf);
}
@Test
public void testClearByAncestor() {
Map<Path, PathMetadata> map = new HashMap<>();
// 1. Test paths without scheme/host
assertClearResult(map, "", "/", 0);
assertClearResult(map, "", "/dirA/dirB", 2);
assertClearResult(map, "", "/invalid", 5);
// 2. Test paths w/ scheme/host
String p = "s3a://fake-bucket-name";
assertClearResult(map, p, "/", 0);
assertClearResult(map, p, "/dirA/dirB", 2);
assertClearResult(map, p, "/invalid", 5);
}
private static void populateMap(Map<Path, PathMetadata> map,
String prefix) {
populateEntry(map, new Path(prefix + "/dirA/dirB/"));
populateEntry(map, new Path(prefix + "/dirA/dirB/dirC"));
populateEntry(map, new Path(prefix + "/dirA/dirB/dirC/file1"));
populateEntry(map, new Path(prefix + "/dirA/dirB/dirC/file2"));
populateEntry(map, new Path(prefix + "/dirA/file1"));
}
private static void populateEntry(Map<Path, PathMetadata> map,
Path path) {
map.put(path, new PathMetadata(new FileStatus(0, true, 0, 0, 0, path)));
}
private static int sizeOfMap(Map<Path, PathMetadata> map) {
int count = 0;
for (PathMetadata meta : map.values()) {
if (!meta.isDeleted()) {
count++;
}
}
return count;
}
private static void assertClearResult(Map <Path, PathMetadata> map,
String prefixStr, String pathStr, int leftoverSize) {
populateMap(map, prefixStr);
LocalMetadataStore.deleteHashByAncestor(new Path(prefixStr + pathStr), map,
true);
assertEquals(String.format("Map should have %d entries", leftoverSize),
leftoverSize, sizeOfMap(map));
map.clear();
}
@Override
protected void verifyFileStatus(FileStatus status, long size) {
S3ATestUtils.verifyFileStatus(status, size, REPLICATION, getModTime(),
getAccessTime(),
BLOCK_SIZE, OWNER, GROUP, PERMISSION);
}
@Override
protected void verifyDirStatus(FileStatus status) {
S3ATestUtils.verifyDirStatus(status, REPLICATION, getModTime(),
getAccessTime(), OWNER, GROUP, PERMISSION);
}
}

View File

@ -0,0 +1,58 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import java.io.IOException;
/**
* Run MetadataStore unit tests on the NullMetadataStore implementation.
*/
public class TestNullMetadataStore extends MetadataStoreTestBase {
private static class NullMSContract extends AbstractMSContract {
@Override
public FileSystem getFileSystem() throws IOException {
Configuration config = new Configuration();
return FileSystem.getLocal(config);
}
@Override
public MetadataStore getMetadataStore() throws IOException {
return new NullMetadataStore();
}
}
/** This MetadataStore always says "I don't know, ask the backing store". */
@Override
public boolean allowMissing() {
return true;
}
@Override
public AbstractMSContract createContract() {
return new NullMSContract();
}
@Override
public AbstractMSContract createContract(Configuration conf) {
return createContract();
}
}

View File

@ -0,0 +1,238 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import java.io.IOException;
import java.net.URI;
import java.util.Collection;
import java.util.concurrent.Callable;
import com.amazonaws.services.dynamodbv2.document.Item;
import com.amazonaws.services.dynamodbv2.document.KeyAttribute;
import com.amazonaws.services.dynamodbv2.document.PrimaryKey;
import com.amazonaws.services.dynamodbv2.model.AttributeDefinition;
import com.amazonaws.services.dynamodbv2.model.KeySchemaElement;
import com.google.common.base.Preconditions;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.Timeout;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.test.LambdaTestUtils;
import static com.amazonaws.services.dynamodbv2.model.KeyType.HASH;
import static com.amazonaws.services.dynamodbv2.model.KeyType.RANGE;
import static com.amazonaws.services.dynamodbv2.model.ScalarAttributeType.S;
import static org.hamcrest.CoreMatchers.anyOf;
import static org.hamcrest.CoreMatchers.is;
import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.*;
import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.VERSION_MARKER;
import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.VERSION;
/**
* Test the PathMetadataDynamoDBTranslation is able to translate between domain
* model objects and DynamoDB items.
*/
public class TestPathMetadataDynamoDBTranslation extends Assert {
private static final Path TEST_DIR_PATH = new Path("s3a://test-bucket/myDir");
private static final Item TEST_DIR_ITEM = new Item();
private static PathMetadata testDirPathMetadata;
private static final long TEST_FILE_LENGTH = 100;
private static final long TEST_MOD_TIME = 9999;
private static final long TEST_BLOCK_SIZE = 128;
private static final Path TEST_FILE_PATH = new Path(TEST_DIR_PATH, "myFile");
private static final Item TEST_FILE_ITEM = new Item();
private static PathMetadata testFilePathMetadata;
@BeforeClass
public static void setUpBeforeClass() throws IOException {
String username = UserGroupInformation.getCurrentUser().getShortUserName();
testDirPathMetadata =
new PathMetadata(new S3AFileStatus(false, TEST_DIR_PATH, username));
TEST_DIR_ITEM
.withPrimaryKey(PARENT, "/test-bucket", CHILD, TEST_DIR_PATH.getName())
.withBoolean(IS_DIR, true);
testFilePathMetadata = new PathMetadata(
new S3AFileStatus(TEST_FILE_LENGTH, TEST_MOD_TIME, TEST_FILE_PATH,
TEST_BLOCK_SIZE, username));
TEST_FILE_ITEM
.withPrimaryKey(PARENT, pathToParentKey(TEST_FILE_PATH.getParent()),
CHILD, TEST_FILE_PATH.getName())
.withBoolean(IS_DIR, false)
.withLong(FILE_LENGTH, TEST_FILE_LENGTH)
.withLong(MOD_TIME, TEST_MOD_TIME)
.withLong(BLOCK_SIZE, TEST_BLOCK_SIZE);
}
/**
* It should not take long time as it doesn't involve remote server operation.
*/
@Rule
public final Timeout timeout = new Timeout(30 * 1000);
@Test
public void testKeySchema() {
final Collection<KeySchemaElement> keySchema =
PathMetadataDynamoDBTranslation.keySchema();
assertNotNull(keySchema);
assertEquals("There should be HASH and RANGE key in key schema",
2, keySchema.size());
for (KeySchemaElement element : keySchema) {
assertThat(element.getAttributeName(), anyOf(is(PARENT), is(CHILD)));
assertThat(element.getKeyType(),
anyOf(is(HASH.toString()), is(RANGE.toString())));
}
}
@Test
public void testAttributeDefinitions() {
final Collection<AttributeDefinition> attrs =
PathMetadataDynamoDBTranslation.attributeDefinitions();
assertNotNull(attrs);
assertEquals("There should be HASH and RANGE attributes", 2, attrs.size());
for (AttributeDefinition definition : attrs) {
assertThat(definition.getAttributeName(), anyOf(is(PARENT), is(CHILD)));
assertEquals(S.toString(), definition.getAttributeType());
}
}
@Test
public void testItemToPathMetadata() throws IOException {
final String user =
UserGroupInformation.getCurrentUser().getShortUserName();
assertNull(itemToPathMetadata(null, user));
verify(TEST_DIR_ITEM, itemToPathMetadata(TEST_DIR_ITEM, user));
verify(TEST_FILE_ITEM, itemToPathMetadata(TEST_FILE_ITEM, user));
}
/**
* Verify that the Item and PathMetadata objects hold the same information.
*/
private static void verify(Item item, PathMetadata meta) {
assertNotNull(meta);
final FileStatus status = meta.getFileStatus();
final Path path = status.getPath();
assertEquals(item.get(PARENT), pathToParentKey(path.getParent()));
assertEquals(item.get(CHILD), path.getName());
boolean isDir = item.hasAttribute(IS_DIR) && item.getBoolean(IS_DIR);
assertEquals(isDir, status.isDirectory());
long len = item.hasAttribute(FILE_LENGTH) ? item.getLong(FILE_LENGTH) : 0;
assertEquals(len, status.getLen());
long bSize = item.hasAttribute(BLOCK_SIZE) ? item.getLong(BLOCK_SIZE) : 0;
assertEquals(bSize, status.getBlockSize());
/*
* S3AFileStatue#getModificationTime() reports the current time, so the
* following assertion is failing.
*
* long modTime = item.hasAttribute(MOD_TIME) ? item.getLong(MOD_TIME) : 0;
* assertEquals(modTime, status.getModificationTime());
*/
}
@Test
public void testPathMetadataToItem() {
verify(pathMetadataToItem(testDirPathMetadata), testDirPathMetadata);
verify(pathMetadataToItem(testFilePathMetadata),
testFilePathMetadata);
}
@Test
public void testPathToParentKeyAttribute() {
doTestPathToParentKeyAttribute(TEST_DIR_PATH);
doTestPathToParentKeyAttribute(TEST_FILE_PATH);
}
private static void doTestPathToParentKeyAttribute(Path path) {
final KeyAttribute attr = pathToParentKeyAttribute(path);
assertNotNull(attr);
assertEquals(PARENT, attr.getName());
// this path is expected as parent filed
assertEquals(pathToParentKey(path), attr.getValue());
}
private static String pathToParentKey(Path p) {
Preconditions.checkArgument(p.isUriPathAbsolute());
URI parentUri = p.toUri();
String bucket = parentUri.getHost();
Preconditions.checkNotNull(bucket);
String s = "/" + bucket + parentUri.getPath();
// strip trailing slash
if (s.endsWith("/")) {
s = s.substring(0, s.length()-1);
}
return s;
}
@Test
public void testPathToKey() throws Exception {
LambdaTestUtils.intercept(IllegalArgumentException.class,
new Callable<PrimaryKey>() {
@Override
public PrimaryKey call() throws Exception {
return pathToKey(new Path("/"));
}
});
doTestPathToKey(TEST_DIR_PATH);
doTestPathToKey(TEST_FILE_PATH);
}
private static void doTestPathToKey(Path path) {
final PrimaryKey key = pathToKey(path);
assertNotNull(key);
assertEquals("There should be both HASH and RANGE keys",
2, key.getComponents().size());
for (KeyAttribute keyAttribute : key.getComponents()) {
assertThat(keyAttribute.getName(), anyOf(is(PARENT), is(CHILD)));
if (PARENT.equals(keyAttribute.getName())) {
assertEquals(pathToParentKey(path.getParent()),
keyAttribute.getValue());
} else {
assertEquals(path.getName(), keyAttribute.getValue());
}
}
}
@Test
public void testVersionRoundTrip() throws Throwable {
final Item marker = createVersionMarker(VERSION_MARKER, VERSION, 0);
assertEquals("Extracted version from " + marker,
VERSION, extractVersionFromMarker(marker));
}
@Test
public void testVersionMarkerNotStatusIllegalPath() throws Throwable {
final Item marker = createVersionMarker(VERSION_MARKER, VERSION, 0);
assertNull("Path metadata fromfrom " + marker,
itemToPathMetadata(marker, "alice"));
}
}

View File

@ -0,0 +1,93 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import java.util.Arrays;
import java.util.List;
import org.junit.Assert;
import org.junit.Test;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
/**
* Tests for the {@link S3Guard} utility class.
*/
public class TestS3Guard extends Assert {
/**
* Basic test to ensure results from S3 and MetadataStore are merged
* correctly.
*/
@Test
public void testDirListingUnion() throws Exception {
MetadataStore ms = new LocalMetadataStore();
Path dirPath = new Path("s3a://bucket/dir");
// Two files in metadata store listing
PathMetadata m1 = makePathMeta("s3a://bucket/dir/ms-file1", false);
PathMetadata m2 = makePathMeta("s3a://bucket/dir/ms-file2", false);
DirListingMetadata dirMeta = new DirListingMetadata(dirPath,
Arrays.asList(m1, m2), false);
// Two other files in s3
List<FileStatus> s3Listing = Arrays.asList(
makeFileStatus("s3a://bucket/dir/s3-file3", false),
makeFileStatus("s3a://bucket/dir/s3-file4", false)
);
FileStatus[] result = S3Guard.dirListingUnion(ms, dirPath, s3Listing,
dirMeta, false);
assertEquals("listing length", 4, result.length);
assertContainsPath(result, "s3a://bucket/dir/ms-file1");
assertContainsPath(result, "s3a://bucket/dir/ms-file2");
assertContainsPath(result, "s3a://bucket/dir/s3-file3");
assertContainsPath(result, "s3a://bucket/dir/s3-file4");
}
void assertContainsPath(FileStatus[] statuses, String pathStr) {
assertTrue("listing doesn't contain " + pathStr,
containsPath(statuses, pathStr));
}
boolean containsPath(FileStatus[] statuses, String pathStr) {
for (FileStatus s : statuses) {
if (s.getPath().toString().equals(pathStr)) {
return true;
}
}
return false;
}
private PathMetadata makePathMeta(String pathStr, boolean isDir) {
return new PathMetadata(makeFileStatus(pathStr, isDir));
}
private FileStatus makeFileStatus(String pathStr, boolean isDir) {
Path p = new Path(pathStr);
if (isDir) {
return new FileStatus(0, true, 1, 1, System.currentTimeMillis(), p);
} else {
return new FileStatus(100, false, 1, 1, System.currentTimeMillis(), p);
}
}
}

View File

@ -0,0 +1,250 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.scale;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
import org.apache.hadoop.fs.s3a.s3guard.PathMetadata;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import static org.apache.hadoop.fs.contract.ContractTestUtils.NanoTimer;
/**
* Test the performance of a MetadataStore. Useful for load testing.
* Could be separated from S3A code, but we're using the S3A scale test
* framework for convenience.
*/
public abstract class AbstractITestS3AMetadataStoreScale extends
S3AScaleTestBase {
private static final Logger LOG = LoggerFactory.getLogger(
AbstractITestS3AMetadataStoreScale.class);
/** Some dummy values for FileStatus contents. */
static final long BLOCK_SIZE = 32 * 1024 * 1024;
static final long SIZE = BLOCK_SIZE * 2;
static final String OWNER = "bob";
static final long ACCESS_TIME = System.currentTimeMillis();
static final Path BUCKET_ROOT = new Path("s3a://fake-bucket/");
/**
* Subclasses should override this to provide the MetadataStore they which
* to test.
* @return MetadataStore to test against
* @throws IOException
*/
public abstract MetadataStore createMetadataStore() throws IOException;
@Test
public void testPut() throws Throwable {
describe("Test workload of put() operations");
// As described in hadoop-aws site docs, count parameter is used for
// width and depth of directory tree
int width = getConf().getInt(KEY_DIRECTORY_COUNT, DEFAULT_DIRECTORY_COUNT);
int depth = width;
List<PathMetadata> paths = new ArrayList<>();
createDirTree(BUCKET_ROOT, depth, width, paths);
long count = 1; // Some value in case we throw an exception below
try (MetadataStore ms = createMetadataStore()) {
try {
count = populateMetadataStore(paths, ms);
} finally {
clearMetadataStore(ms, count);
}
}
}
@Test
public void testMoves() throws Throwable {
describe("Test workload of batched move() operations");
// As described in hadoop-aws site docs, count parameter is used for
// width and depth of directory tree
int width = getConf().getInt(KEY_DIRECTORY_COUNT, DEFAULT_DIRECTORY_COUNT);
int depth = width;
long operations = getConf().getLong(KEY_OPERATION_COUNT,
DEFAULT_OPERATION_COUNT);
List<PathMetadata> origMetas = new ArrayList<>();
createDirTree(BUCKET_ROOT, depth, width, origMetas);
// Pre-compute source and destination paths for move() loop below
List<Path> origPaths = metasToPaths(origMetas);
List<PathMetadata> movedMetas = moveMetas(origMetas, BUCKET_ROOT,
new Path(BUCKET_ROOT, "moved-here"));
List<Path> movedPaths = metasToPaths(movedMetas);
long count = 1; // Some value in case we throw an exception below
try (MetadataStore ms = createMetadataStore()) {
try {
// Setup
count = populateMetadataStore(origMetas, ms);
// Main loop: move things back and forth
describe("Running move workload");
NanoTimer moveTimer = new NanoTimer();
LOG.info("Running {} moves of {} paths each", operations,
origMetas.size());
for (int i = 0; i < operations; i++) {
Collection<Path> toDelete;
Collection<PathMetadata> toCreate;
if (i % 2 == 0) {
toDelete = origPaths;
toCreate = movedMetas;
} else {
toDelete = movedPaths;
toCreate = origMetas;
}
ms.move(toDelete, toCreate);
}
moveTimer.end();
printTiming(LOG, "move", moveTimer, operations);
} finally {
// Cleanup
clearMetadataStore(ms, count);
}
}
}
/**
* Create a copy of given list of PathMetadatas with the paths moved from
* src to dest.
*/
private List<PathMetadata> moveMetas(List<PathMetadata> metas, Path src,
Path dest) throws IOException {
List<PathMetadata> moved = new ArrayList<>(metas.size());
for (PathMetadata srcMeta : metas) {
S3AFileStatus status = copyStatus((S3AFileStatus)srcMeta.getFileStatus());
status.setPath(movePath(status.getPath(), src, dest));
moved.add(new PathMetadata(status));
}
return moved;
}
private Path movePath(Path p, Path src, Path dest) {
String srcStr = src.toUri().getPath();
String pathStr = p.toUri().getPath();
// Strip off src dir
pathStr = pathStr.substring(srcStr.length());
// Prepend new dest
return new Path(dest, pathStr);
}
private S3AFileStatus copyStatus(S3AFileStatus status) {
if (status.isDirectory()) {
return new S3AFileStatus(status.isEmptyDirectory(), status.getPath(),
status.getOwner());
} else {
return new S3AFileStatus(status.getLen(), status.getModificationTime(),
status.getPath(), status.getBlockSize(), status.getOwner());
}
}
/** @return number of PathMetadatas put() into MetadataStore */
private long populateMetadataStore(Collection<PathMetadata> paths,
MetadataStore ms) throws IOException {
long count = 0;
NanoTimer putTimer = new NanoTimer();
describe("Inserting into MetadataStore");
for (PathMetadata p : paths) {
ms.put(p);
count++;
}
putTimer.end();
printTiming(LOG, "put", putTimer, count);
return count;
}
private void clearMetadataStore(MetadataStore ms, long count)
throws IOException {
describe("Recursive deletion");
NanoTimer deleteTimer = new NanoTimer();
ms.deleteSubtree(BUCKET_ROOT);
deleteTimer.end();
printTiming(LOG, "delete", deleteTimer, count);
}
private static void printTiming(Logger log, String op, NanoTimer timer,
long count) {
double msec = (double)timer.duration() / 1000;
double msecPerOp = msec / count;
log.info(String.format("Elapsed %.2f msec. %.3f msec / %s (%d ops)", msec,
msecPerOp, op, count));
}
private static S3AFileStatus makeFileStatus(Path path) throws IOException {
return new S3AFileStatus(SIZE, ACCESS_TIME, path, BLOCK_SIZE, OWNER);
}
private static S3AFileStatus makeDirStatus(Path p) throws IOException {
return new S3AFileStatus(false, p, OWNER);
}
private List<Path> metasToPaths(List<PathMetadata> metas) {
List<Path> paths = new ArrayList<>(metas.size());
for (PathMetadata meta : metas) {
paths.add(meta.getFileStatus().getPath());
}
return paths;
}
/**
* Recursively create a directory tree.
* @param parent Parent dir of the paths to create.
* @param depth How many more levels deep past parent to create.
* @param width Number of files (and directories, if depth > 0) per directory.
* @param paths List to add generated paths to.
*/
private static void createDirTree(Path parent, int depth, int width,
Collection<PathMetadata> paths) throws IOException {
// Create files
for (int i = 0; i < width; i++) {
Path p = new Path(parent, String.format("file-%d", i));
PathMetadata meta = new PathMetadata(makeFileStatus(p));
paths.add(meta);
}
if (depth == 0) {
return;
}
// Create directories if there is depth remaining
for (int i = 0; i < width; i++) {
Path dir = new Path(parent, String.format("dir-%d", i));
PathMetadata meta = new PathMetadata(makeDirStatus(dir));
paths.add(meta);
createDirTree(dir, depth-1, width, paths);
}
}
}

View File

@ -25,6 +25,7 @@ import java.util.concurrent.atomic.AtomicLong;
import com.amazonaws.event.ProgressEvent;
import com.amazonaws.event.ProgressEventType;
import com.amazonaws.event.ProgressListener;
import org.apache.hadoop.fs.FileStatus;
import org.junit.FixMethodOrder;
import org.junit.Test;
import org.junit.runners.MethodSorters;
@ -34,11 +35,9 @@ import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.StorageStatistics;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.S3AInstrumentation;
import org.apache.hadoop.fs.s3a.Statistic;
@ -222,7 +221,7 @@ public abstract class AbstractSTestS3AHugeFiles extends S3AScaleTestBase {
assertEquals("active put requests in \n" + fs,
0, gaugeValue(putRequestsActive));
ContractTestUtils.assertPathExists(fs, "Huge file", hugefile);
S3AFileStatus status = fs.getFileStatus(hugefile);
FileStatus status = fs.getFileStatus(hugefile);
ContractTestUtils.assertIsFile(hugefile, status);
assertEquals("File size in " + status, filesize, status.getLen());
if (progress != null) {
@ -324,7 +323,7 @@ public abstract class AbstractSTestS3AHugeFiles extends S3AScaleTestBase {
String filetype = encrypted ? "encrypted file" : "file";
describe("Positioned reads of %s %s", filetype, hugefile);
S3AFileSystem fs = getFileSystem();
S3AFileStatus status = fs.getFileStatus(hugefile);
FileStatus status = fs.getFileStatus(hugefile);
long filesize = status.getLen();
int ops = 0;
final int bufferSize = 8192;
@ -364,7 +363,7 @@ public abstract class AbstractSTestS3AHugeFiles extends S3AScaleTestBase {
assumeHugeFileExists();
describe("Reading %s", hugefile);
S3AFileSystem fs = getFileSystem();
S3AFileStatus status = fs.getFileStatus(hugefile);
FileStatus status = fs.getFileStatus(hugefile);
long filesize = status.getLen();
long blocks = filesize / uploadBlockSize;
byte[] data = new byte[uploadBlockSize];
@ -390,7 +389,7 @@ public abstract class AbstractSTestS3AHugeFiles extends S3AScaleTestBase {
assumeHugeFileExists();
describe("renaming %s to %s", hugefile, hugefileRenamed);
S3AFileSystem fs = getFileSystem();
S3AFileStatus status = fs.getFileStatus(hugefile);
FileStatus status = fs.getFileStatus(hugefile);
long filesize = status.getLen();
fs.delete(hugefileRenamed, false);
ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer();
@ -401,7 +400,7 @@ public abstract class AbstractSTestS3AHugeFiles extends S3AScaleTestBase {
toHuman(timer.nanosPerOperation(mb)));
bandwidth(timer, filesize);
logFSState();
S3AFileStatus destFileStatus = fs.getFileStatus(hugefileRenamed);
FileStatus destFileStatus = fs.getFileStatus(hugefileRenamed);
assertEquals(filesize, destFileStatus.getLen());
// rename back

View File

@ -0,0 +1,48 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.scale;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore;
import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
import java.io.IOException;
import static org.junit.Assume.*;
import static org.apache.hadoop.fs.s3a.Constants.*;
/**
* Scale test for DynamoDBMetadataStore.
*/
public class ITestDynamoDBMetadataStoreScale
extends AbstractITestS3AMetadataStoreScale {
@Override
public MetadataStore createMetadataStore() throws IOException {
Configuration conf = getFileSystem().getConf();
String ddbTable = conf.get(S3GUARD_DDB_TABLE_NAME_KEY);
assumeNotNull("DynamoDB table is configured", ddbTable);
String ddbEndpoint = conf.get(S3GUARD_DDB_REGION_KEY);
assumeNotNull("DynamoDB endpoint is configured", ddbEndpoint);
DynamoDBMetadataStore ms = new DynamoDBMetadataStore();
ms.initialize(getFileSystem().getConf());
return ms;
}
}

View File

@ -0,0 +1,37 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.scale;
import org.apache.hadoop.fs.s3a.s3guard.LocalMetadataStore;
import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
import java.io.IOException;
/**
* Scale test for LocalMetadataStore.
*/
public class ITestLocalMetadataStoreScale
extends AbstractITestS3AMetadataStoreScale {
@Override
public MetadataStore createMetadataStore() throws IOException {
MetadataStore ms = new LocalMetadataStore();
ms.initialize(getFileSystem());
return ms;
}
}

View File

@ -107,7 +107,7 @@ public class ITestS3AConcurrentOps extends S3AScaleTestBase {
private S3AFileSystem getNormalFileSystem() throws Exception {
S3AFileSystem s3a = new S3AFileSystem();
Configuration conf = new Configuration();
Configuration conf = createScaleConfiguration();
URI rootURI = new URI(conf.get(TEST_FS_S3A_NAME));
s3a.initialize(rootURI, conf);
return s3a;
@ -115,6 +115,7 @@ public class ITestS3AConcurrentOps extends S3AScaleTestBase {
@After
public void teardown() throws Exception {
super.teardown();
if (auxFs != null) {
auxFs.delete(testRoot, true);
}

View File

@ -0,0 +1,86 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.scale;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.OutputStream;
import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
/**
* Tests for create(): performance and/or load testing.
*/
public class ITestS3ACreatePerformance extends S3AScaleTestBase {
private static final Logger LOG = LoggerFactory.getLogger(
ITestS3ADirectoryPerformance.class);
private Path basePath;
private int basePathDepth;
private static final int PATH_DEPTH = 10;
@Override
public void setup() throws Exception {
super.setup();
basePath = getTestPath();
basePathDepth = basePath.depth();
}
/**
* Test rate at which we can create deeply-nested files from a single thread.
* @throws Exception
*/
@Test
public void testDeepSequentialCreate() throws Exception {
long numOperations = getOperationCount();
S3AFileSystem fs = getFileSystem();
NanoTimer timer = new NanoTimer();
for (int i = 0; i < numOperations; i++) {
Path p = getPathIteration(i, PATH_DEPTH);
OutputStream out = fs.create(p);
out.write(40); // one byte file with some value 40
out.close();
}
timer.end("Time to create %d files of depth %d", getOperationCount(),
PATH_DEPTH);
LOG.info("Time per create: {} msec",
timer.nanosPerOperation(numOperations) / 1000);
}
/* Get a unique path of depth totalDepth for given test iteration. */
private Path getPathIteration(long iter, int totalDepth) throws Exception {
assertTrue("Test path too long, increase PATH_DEPTH in test.",
totalDepth > basePathDepth);
int neededDirs = totalDepth - basePathDepth - 1;
StringBuilder sb = new StringBuilder();
for (int i = 0; i < neededDirs; i++) {
sb.append("iter-").append(iter);
sb.append("-dir-").append(i);
sb.append("/");
}
sb.append("file").append(iter);
return new Path(basePath, sb.toString());
}
}

View File

@ -113,14 +113,15 @@ public class ITestS3ADirectoryPerformance extends S3AScaleTestBase {
listContinueRequests,
listStatusCalls,
getFileStatusCalls);
assertEquals(listRequests.toString(), 2, listRequests.diff());
if (!fs.hasMetadataStore()) {
assertEquals(listRequests.toString(), 2, listRequests.diff());
}
reset(metadataRequests,
listRequests,
listContinueRequests,
listStatusCalls,
getFileStatusCalls);
} finally {
describe("deletion");
// deletion at the end of the run

View File

@ -20,10 +20,10 @@ package org.apache.hadoop.fs.s3a.scale;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.S3AInputPolicy;
import org.apache.hadoop.fs.s3a.S3AInputStream;
@ -56,7 +56,7 @@ public class ITestS3AInputStreamPerformance extends S3AScaleTestBase {
private S3AFileSystem s3aFS;
private Path testData;
private S3AFileStatus testDataStatus;
private FileStatus testDataStatus;
private FSDataInputStream in;
private S3AInstrumentation.InputStreamStatistics streamStatistics;
public static final int BLOCK_SIZE = 32 * 1024;

View File

@ -126,7 +126,7 @@ public class S3AScaleTestBase extends AbstractS3ATestBase {
* @return a configuration with which to create FS instances
*/
protected Configuration createScaleConfiguration() {
return new Configuration();
return super.createConfiguration();
}
protected Path getTestPath() {

View File

@ -36,6 +36,25 @@
<description>The endpoint for s3a://landsat-pds URLs</description>
</property>
<!-- Make sure S3Guard is disabled for read-only bucket tests. -->
<property>
<name>fs.s3a.bucket.landsat-pds.metadatastore.impl</name>
<value>${s3guard.null}</value>
<description>The read-only landsat-pds repository isn't
managed by s3guard</description>
</property>
<!-- Convenience definitions. -->
<property>
<name>s3guard.null</name>
<value>org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore</value>
</property>
<property>
<name>s3guard.dynamo</name>
<value>org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore</value>
</property>
<!--
This is the default endpoint, which can be used to interact
with any v2 region.
@ -110,6 +129,13 @@
<value>${central.endpoint}</value>
</property>
<!-- Scale integration tests may time out on slower connections
you can reduce the operation count like so to mitigate this.
<property>
<name>scale.test.operation.count</name>
<value>500</value>
</property>
-->
<!-- Turn security off for tests by default -->
<property>

Some files were not shown because too many files have changed in this diff Show More